yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     network_exceptions,
  86     number_of_digits,
  87     orderedSet,
  88     OUTTMPL_TYPES,
  89     PagedList,
  90     parse_filesize,
  91     PerRequestProxyHandler,
  92     platform_name,
  93     Popen,
  94     PostProcessingError,
  95     preferredencoding,
  96     prepend_extension,
  97     ReExtractInfo,
  98     register_socks_protocols,
  99     RejectedVideoReached,
 100     remove_terminal_sequences,
 101     render_table,
 102     replace_extension,
 103     SameFileError,
 104     sanitize_filename,
 105     sanitize_path,
 106     sanitize_url,
 107     sanitized_Request,
 108     std_headers,
 109     STR_FORMAT_RE_TMPL,
 110     STR_FORMAT_TYPES,
 111     str_or_none,
 112     strftime_or_none,
 113     subtitles_filename,
 114     supports_terminal_sequences,
 115     timetuple_from_msec,
 116     to_high_limit_path,
 117     traverse_obj,
 118     try_get,
 119     UnavailableVideoError,
 120     url_basename,
 121     variadic,
 122     version_tuple,
 123     write_json_file,
 124     write_string,
 125     YoutubeDLCookieProcessor,
 126     YoutubeDLHandler,
 127     YoutubeDLRedirectHandler,
 128 )
 129 from .cache import Cache
 130 from .minicurses import format_text
 131 from .extractor import (
 132     gen_extractor_classes,
 133     get_info_extractor,
 134     _LAZY_LOADER,
 135     _PLUGIN_CLASSES as plugin_extractors
 136 )
 137 from .extractor.openload import PhantomJSwrapper
 138 from .downloader import (
 139     FFmpegFD,
 140     get_suitable_downloader,
 141     shorten_protocol_name
 142 )
 143 from .downloader.rtmp import rtmpdump_version
 144 from .postprocessor import (
 145     get_postprocessor,
 146     EmbedThumbnailPP,
 147     FFmpegFixupDuplicateMoovPP,
 148     FFmpegFixupDurationPP,
 149     FFmpegFixupM3u8PP,
 150     FFmpegFixupM4aPP,
 151     FFmpegFixupStretchedPP,
 152     FFmpegFixupTimestampPP,
 153     FFmpegMergerPP,
 154     FFmpegPostProcessor,
 155     MoveFilesAfterDownloadPP,
 156     _PLUGIN_CLASSES as plugin_postprocessors
 157 )
 158 from .update import detect_variant
 159 from .version import __version__, RELEASE_GIT_HEAD
 160
 161 if compat_os_name == 'nt':
 162     import ctypes
 163
 164
 165 class YoutubeDL(object):
 166     """YoutubeDL class.
 167
 168     YoutubeDL objects are the ones responsible of downloading the
 169     actual video file and writing it to disk if the user has requested
 170     it, among some other tasks. In most cases there should be one per
 171     program. As, given a video URL, the downloader doesn't know how to
 172     extract all the needed information, task that InfoExtractors do, it
 173     has to pass the URL to one of them.
 174
 175     For this, YoutubeDL objects have a method that allows
 176     InfoExtractors to be registered in a given order. When it is passed
 177     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 178     finds that reports being able to handle it. The InfoExtractor extracts
 179     all the information about the video or videos the URL refers to, and
 180     YoutubeDL process the extracted information, possibly using a File
 181     Downloader to download the video.
 182
 183     YoutubeDL objects accept a lot of parameters. In order not to saturate
 184     the object constructor with arguments, it receives a dictionary of
 185     options instead. These options are available through the params
 186     attribute for the InfoExtractors to use. The YoutubeDL also
 187     registers itself as the downloader in charge for the InfoExtractors
 188     that are added to it, so this is a "mutual registration".
 189
 190     Available options:
 191
 192     username:          Username for authentication purposes.
 193     password:          Password for authentication purposes.
 194     videopassword:     Password for accessing a video.
 195     ap_mso:            Adobe Pass multiple-system operator identifier.
 196     ap_username:       Multiple-system operator account username.
 197     ap_password:       Multiple-system operator account password.
 198     usenetrc:          Use netrc for authentication instead.
 199     verbose:           Print additional info to stdout.
 200     quiet:             Do not print messages to stdout.
 201     no_warnings:       Do not print out anything for warnings.
 202     forceprint:        A dict with keys video/playlist mapped to
 203                        a list of templates to force print to stdout
 204                        For compatibility, a single list is also accepted
 205     forceurl:          Force printing final URL. (Deprecated)
 206     forcetitle:        Force printing title. (Deprecated)
 207     forceid:           Force printing ID. (Deprecated)
 208     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 209     forcedescription:  Force printing description. (Deprecated)
 210     forcefilename:     Force printing final filename. (Deprecated)
 211     forceduration:     Force printing duration. (Deprecated)
 212     forcejson:         Force printing info_dict as JSON.
 213     dump_single_json:  Force printing the info_dict of the whole playlist
 214                        (or video) as a single JSON line.
 215     force_write_download_archive: Force writing download archive regardless
 216                        of 'skip_download' or 'simulate'.
 217     simulate:          Do not download the video files. If unset (or None),
 218                        simulate only if listsubtitles, listformats or list_thumbnails is used
 219     format:            Video format code. see "FORMAT SELECTION" for more details.
 220                        You can also pass a function. The function takes 'ctx' as
 221                        argument and returns the formats to download.
 222                        See "build_format_selector" for an implementation
 223     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 224     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 225                        extracting metadata even if the video is not actually
 226                        available for download (experimental)
 227     format_sort:       A list of fields by which to sort the video formats.
 228                        See "Sorting Formats" for more details.
 229     format_sort_force: Force the given format_sort. see "Sorting Formats"
 230                        for more details.
 231     allow_multiple_video_streams:   Allow multiple video streams to be merged
 232                        into a single file
 233     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 234                        into a single file
 235     check_formats      Whether to test if the formats are downloadable.
 236                        Can be True (check all), False (check none),
 237                        'selected' (check selected formats),
 238                        or None (check only if requested by extractor)
 239     paths:             Dictionary of output paths. The allowed keys are 'home'
 240                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 241     outtmpl:           Dictionary of templates for output names. Allowed keys
 242                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 243                        For compatibility with youtube-dl, a single string can also be used
 244     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 245     restrictfilenames: Do not allow "&" and spaces in file names
 246     trim_file_name:    Limit length of filename (extension excluded)
 247     windowsfilenames:  Force the filenames to be windows compatible
 248     ignoreerrors:      Do not stop on download/postprocessing errors.
 249                        Can be 'only_download' to ignore only download errors.
 250                        Default is 'only_download' for CLI, but False for API
 251     skip_playlist_after_errors: Number of allowed failures until the rest of
 252                        the playlist is skipped
 253     force_generic_extractor: Force downloader to use the generic extractor
 254     overwrites:        Overwrite all video and metadata files if True,
 255                        overwrite only non-video files if None
 256                        and don't overwrite any file if False
 257                        For compatibility with youtube-dl,
 258                        "nooverwrites" may also be used instead
 259     playliststart:     Playlist item to start at.
 260     playlistend:       Playlist item to end at.
 261     playlist_items:    Specific indices of playlist to download.
 262     playlistreverse:   Download playlist items in reverse order.
 263     playlistrandom:    Download playlist items in random order.
 264     matchtitle:        Download only matching titles.
 265     rejecttitle:       Reject downloads for matching titles.
 266     logger:            Log messages to a logging.Logger instance.
 267     logtostderr:       Log messages to stderr instead of stdout.
 268     consoletitle:       Display progress in console window's titlebar.
 269     writedescription:  Write the video description to a .description file
 270     writeinfojson:     Write the video description to a .info.json file
 271     clean_infojson:    Remove private fields from the infojson
 272     getcomments:       Extract video comments. This will not be written to disk
 273                        unless writeinfojson is also given
 274     writeannotations:  Write the video annotations to a .annotations.xml file
 275     writethumbnail:    Write the thumbnail image to a file
 276     allow_playlist_files: Whether to write playlists' description, infojson etc
 277                        also to disk when using the 'write*' options
 278     write_all_thumbnails:  Write all thumbnail formats to files
 279     writelink:         Write an internet shortcut file, depending on the
 280                        current platform (.url/.webloc/.desktop)
 281     writeurllink:      Write a Windows internet shortcut file (.url)
 282     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 283     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 284     writesubtitles:    Write the video subtitles to a file
 285     writeautomaticsub: Write the automatically generated subtitles to a file
 286     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 287                        Downloads all the subtitles of the video
 288                        (requires writesubtitles or writeautomaticsub)
 289     listsubtitles:     Lists all available subtitles for the video
 290     subtitlesformat:   The format code for subtitles
 291     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 292                        The list may contain "all" to refer to all the available
 293                        subtitles. The language can be prefixed with a "-" to
 294                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 295     keepvideo:         Keep the video file after post-processing
 296     daterange:         A DateRange object, download only if the upload_date is in the range.
 297     skip_download:     Skip the actual download of the video file
 298     cachedir:          Location of the cache files in the filesystem.
 299                        False to disable filesystem cache.
 300     noplaylist:        Download single video instead of a playlist if in doubt.
 301     age_limit:         An integer representing the user's age in years.
 302                        Unsuitable videos for the given age are skipped.
 303     min_views:         An integer representing the minimum view count the video
 304                        must have in order to not be skipped.
 305                        Videos without view count information are always
 306                        downloaded. None for no limit.
 307     max_views:         An integer representing the maximum view count.
 308                        Videos that are more popular than that are not
 309                        downloaded.
 310                        Videos without view count information are always
 311                        downloaded. None for no limit.
 312     download_archive:  File name of a file where all downloads are recorded.
 313                        Videos already present in the file are not downloaded
 314                        again.
 315     break_on_existing: Stop the download process after attempting to download a
 316                        file that is in the archive.
 317     break_on_reject:   Stop the download process when encountering a video that
 318                        has been filtered out.
 319     break_per_url:     Whether break_on_reject and break_on_existing
 320                        should act on each input URL as opposed to for the entire queue
 321     cookiefile:        File name where cookies should be read from and dumped to
 322     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 323                        name/pathfrom where cookies are loaded, and the name of the
 324                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 325     nocheckcertificate:  Do not verify SSL certificates
 326     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 327                        At the moment, this is only supported by YouTube.
 328     proxy:             URL of the proxy server to use
 329     geo_verification_proxy:  URL of the proxy to use for IP address verification
 330                        on geo-restricted sites.
 331     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 332     bidi_workaround:   Work around buggy terminals without bidirectional text
 333                        support, using fridibi
 334     debug_printtraffic:Print out sent and received HTTP traffic
 335     include_ads:       Download ads as well (deprecated)
 336     default_search:    Prepend this string if an input url is not valid.
 337                        'auto' for elaborate guessing
 338     encoding:          Use this encoding instead of the system-specified.
 339     extract_flat:      Do not resolve URLs, return the immediate result.
 340                        Pass in 'in_playlist' to only show this behavior for
 341                        playlist items.
 342     wait_for_video:    If given, wait for scheduled streams to become available.
 343                        The value should be a tuple containing the range
 344                        (min_secs, max_secs) to wait between retries
 345     postprocessors:    A list of dictionaries, each with an entry
 346                        * key:  The name of the postprocessor. See
 347                                yt_dlp/postprocessor/__init__.py for a list.
 348                        * when: When to run the postprocessor. Can be one of
 349                                pre_process|before_dl|post_process|after_move.
 350                                Assumed to be 'post_process' if not given
 351     post_hooks:        Deprecated - Register a custom postprocessor instead
 352                        A list of functions that get called as the final step
 353                        for each video file, after all postprocessors have been
 354                        called. The filename will be passed as the only argument.
 355     progress_hooks:    A list of functions that get called on download
 356                        progress, with a dictionary with the entries
 357                        * status: One of "downloading", "error", or "finished".
 358                                  Check this first and ignore unknown values.
 359                        * info_dict: The extracted info_dict
 360
 361                        If status is one of "downloading", or "finished", the
 362                        following properties may also be present:
 363                        * filename: The final filename (always present)
 364                        * tmpfilename: The filename we're currently writing to
 365                        * downloaded_bytes: Bytes on disk
 366                        * total_bytes: Size of the whole file, None if unknown
 367                        * total_bytes_estimate: Guess of the eventual file size,
 368                                                None if unavailable.
 369                        * elapsed: The number of seconds since download started.
 370                        * eta: The estimated time in seconds, None if unknown
 371                        * speed: The download speed in bytes/second, None if
 372                                 unknown
 373                        * fragment_index: The counter of the currently
 374                                          downloaded video fragment.
 375                        * fragment_count: The number of fragments (= individual
 376                                          files that will be merged)
 377
 378                        Progress hooks are guaranteed to be called at least once
 379                        (with status "finished") if the download is successful.
 380     postprocessor_hooks:  A list of functions that get called on postprocessing
 381                        progress, with a dictionary with the entries
 382                        * status: One of "started", "processing", or "finished".
 383                                  Check this first and ignore unknown values.
 384                        * postprocessor: Name of the postprocessor
 385                        * info_dict: The extracted info_dict
 386
 387                        Progress hooks are guaranteed to be called at least twice
 388                        (with status "started" and "finished") if the processing is successful.
 389     merge_output_format: Extension to use when merging formats.
 390     final_ext:         Expected final extension; used to detect when the file was
 391                        already downloaded and converted
 392     fixup:             Automatically correct known faults of the file.
 393                        One of:
 394                        - "never": do nothing
 395                        - "warn": only emit a warning
 396                        - "detect_or_warn": check whether we can do anything
 397                                            about it, warn otherwise (default)
 398     source_address:    Client-side IP address to bind to.
 399     call_home:         Boolean, true iff we are allowed to contact the
 400                        yt-dlp servers for debugging. (BROKEN)
 401     sleep_interval_requests: Number of seconds to sleep between requests
 402                        during extraction
 403     sleep_interval:    Number of seconds to sleep before each download when
 404                        used alone or a lower bound of a range for randomized
 405                        sleep before each download (minimum possible number
 406                        of seconds to sleep) when used along with
 407                        max_sleep_interval.
 408     max_sleep_interval:Upper bound of a range for randomized sleep before each
 409                        download (maximum possible number of seconds to sleep).
 410                        Must only be used along with sleep_interval.
 411                        Actual sleep time will be a random float from range
 412                        [sleep_interval; max_sleep_interval].
 413     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 414     listformats:       Print an overview of available video formats and exit.
 415     list_thumbnails:   Print a table of all thumbnails and exit.
 416     match_filter:      A function that gets called with the info_dict of
 417                        every video.
 418                        If it returns a message, the video is ignored.
 419                        If it returns None, the video is downloaded.
 420                        match_filter_func in utils.py is one example for this.
 421     no_color:          Do not emit color codes in output.
 422     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 423                        HTTP header
 424     geo_bypass_country:
 425                        Two-letter ISO 3166-2 country code that will be used for
 426                        explicit geographic restriction bypassing via faking
 427                        X-Forwarded-For HTTP header
 428     geo_bypass_ip_block:
 429                        IP range in CIDR notation that will be used similarly to
 430                        geo_bypass_country
 431
 432     The following options determine which downloader is picked:
 433     external_downloader: A dictionary of protocol keys and the executable of the
 434                        external downloader to use for it. The allowed protocols
 435                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 436                        Set the value to 'native' to use the native downloader
 437     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 438                        or {'m3u8': 'ffmpeg'} instead.
 439                        Use the native HLS downloader instead of ffmpeg/avconv
 440                        if True, otherwise use ffmpeg/avconv if False, otherwise
 441                        use downloader suggested by extractor if None.
 442     compat_opts:       Compatibility options. See "Differences in default behavior".
 443                        The following options do not work when used through the API:
 444                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 445                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 446                        Refer __init__.py for their implementation
 447     progress_template: Dictionary of templates for progress outputs.
 448                        Allowed keys are 'download', 'postprocess',
 449                        'download-title' (console title) and 'postprocess-title'.
 450                        The template is mapped on a dictionary with keys 'progress' and 'info'
 451
 452     The following parameters are not used by YoutubeDL itself, they are used by
 453     the downloader (see yt_dlp/downloader/common.py):
 454     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 455     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 456     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 457     external_downloader_args, concurrent_fragment_downloads.
 458
 459     The following options are used by the post processors:
 460     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 461                        otherwise prefer ffmpeg. (avconv support is deprecated)
 462     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 463                        to the binary or its containing directory.
 464     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 465                        and a list of additional command-line arguments for the
 466                        postprocessor/executable. The dict can also have "PP+EXE" keys
 467                        which are used when the given exe is used by the given PP.
 468                        Use 'default' as the name for arguments to passed to all PP
 469                        For compatibility with youtube-dl, a single list of args
 470                        can also be used
 471
 472     The following options are used by the extractors:
 473     extractor_retries: Number of times to retry for known errors
 474     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 475     hls_split_discontinuity: Split HLS playlists to different formats at
 476                        discontinuities such as ad breaks (default: False)
 477     extractor_args:    A dictionary of arguments to be passed to the extractors.
 478                        See "EXTRACTOR ARGUMENTS" for details.
 479                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 480     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 481                        If True (default), DASH manifests and related
 482                        data will be downloaded and processed by extractor.
 483                        You can reduce network I/O by disabling it if you don't
 484                        care about DASH. (only for youtube)
 485     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 486                        If True (default), HLS manifests and related
 487                        data will be downloaded and processed by extractor.
 488                        You can reduce network I/O by disabling it if you don't
 489                        care about HLS. (only for youtube)
 490     """
 491
 492     _NUMERIC_FIELDS = set((
 493         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 494         'timestamp', 'release_timestamp',
 495         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 496         'average_rating', 'comment_count', 'age_limit',
 497         'start_time', 'end_time',
 498         'chapter_number', 'season_number', 'episode_number',
 499         'track_number', 'disc_number', 'release_year',
 500     ))
 501
 502     _format_selection_exts = {
 503         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 504         'video': {'mp4', 'flv', 'webm', '3gp'},
 505         'storyboards': {'mhtml'},
 506     }
 507
 508     params = None
 509     _ies = {}
 510     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 511     _printed_messages = set()
 512     _first_webpage_request = True
 513     _download_retcode = None
 514     _num_downloads = None
 515     _playlist_level = 0
 516     _playlist_urls = set()
 517     _screen_file = None
 518
 519     def __init__(self, params=None, auto_init=True):
 520         """Create a FileDownloader object with the given options.
 521         @param auto_init    Whether to load the default extractors and print header (if verbose).
 522                             Set to 'no_verbose_header' to not print the header
 523         """
 524         if params is None:
 525             params = {}
 526         self._ies = {}
 527         self._ies_instances = {}
 528         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 529         self._printed_messages = set()
 530         self._first_webpage_request = True
 531         self._post_hooks = []
 532         self._progress_hooks = []
 533         self._postprocessor_hooks = []
 534         self._download_retcode = 0
 535         self._num_downloads = 0
 536         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 537         self._err_file = sys.stderr
 538         self.params = params
 539         self.cache = Cache(self)
 540
 541         windows_enable_vt_mode()
 542         self._allow_colors = {
 543             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 544             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 545         }
 546
 547         if sys.version_info < (3, 6):
 548             self.report_warning(
 549                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 550
 551         if self.params.get('allow_unplayable_formats'):
 552             self.report_warning(
 553                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 554                 'This is a developer option intended for debugging. \n'
 555                 '         If you experience any issues while using this option, '
 556                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 557
 558         def check_deprecated(param, option, suggestion):
 559             if self.params.get(param) is not None:
 560                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 561                 return True
 562             return False
 563
 564         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 565             if self.params.get('geo_verification_proxy') is None:
 566                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 567
 568         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 569         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 570         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 571
 572         for msg in self.params.get('_warnings', []):
 573             self.report_warning(msg)
 574         for msg in self.params.get('_deprecation_warnings', []):
 575             self.deprecation_warning(msg)
 576
 577         if 'list-formats' in self.params.get('compat_opts', []):
 578             self.params['listformats_table'] = False
 579
 580         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 581             # nooverwrites was unnecessarily changed to overwrites
 582             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 583             # This ensures compatibility with both keys
 584             self.params['overwrites'] = not self.params['nooverwrites']
 585         elif self.params.get('overwrites') is None:
 586             self.params.pop('overwrites', None)
 587         else:
 588             self.params['nooverwrites'] = not self.params['overwrites']
 589
 590         # Compatibility with older syntax
 591         params.setdefault('forceprint', {})
 592         if not isinstance(params['forceprint'], dict):
 593             params['forceprint'] = {'video': params['forceprint']}
 594
 595         if params.get('bidi_workaround', False):
 596             try:
 597                 import pty
 598                 master, slave = pty.openpty()
 599                 width = compat_get_terminal_size().columns
 600                 if width is None:
 601                     width_args = []
 602                 else:
 603                     width_args = ['-w', str(width)]
 604                 sp_kwargs = dict(
 605                     stdin=subprocess.PIPE,
 606                     stdout=slave,
 607                     stderr=self._err_file)
 608                 try:
 609                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 610                 except OSError:
 611                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 612                 self._output_channel = os.fdopen(master, 'rb')
 613             except OSError as ose:
 614                 if ose.errno == errno.ENOENT:
 615                     self.report_warning(
 616                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 617                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 618                 else:
 619                     raise
 620
 621         if (sys.platform != 'win32'
 622                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 623                 and not params.get('restrictfilenames', False)):
 624             # Unicode filesystem API will throw errors (#1474, #13027)
 625             self.report_warning(
 626                 'Assuming --restrict-filenames since file system encoding '
 627                 'cannot encode all characters. '
 628                 'Set the LC_ALL environment variable to fix this.')
 629             self.params['restrictfilenames'] = True
 630
 631         self.outtmpl_dict = self.parse_outtmpl()
 632
 633         # Creating format selector here allows us to catch syntax errors before the extraction
 634         self.format_selector = (
 635             self.params.get('format') if self.params.get('format') in (None, '-')
 636             else self.params['format'] if callable(self.params['format'])
 637             else self.build_format_selector(self.params['format']))
 638
 639         self._setup_opener()
 640
 641         if auto_init:
 642             if auto_init != 'no_verbose_header':
 643                 self.print_debug_header()
 644             self.add_default_info_extractors()
 645
 646         hooks = {
 647             'post_hooks': self.add_post_hook,
 648             'progress_hooks': self.add_progress_hook,
 649             'postprocessor_hooks': self.add_postprocessor_hook,
 650         }
 651         for opt, fn in hooks.items():
 652             for ph in self.params.get(opt, []):
 653                 fn(ph)
 654
 655         for pp_def_raw in self.params.get('postprocessors', []):
 656             pp_def = dict(pp_def_raw)
 657             when = pp_def.pop('when', 'post_process')
 658             self.add_post_processor(
 659                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 660                 when=when)
 661
 662         register_socks_protocols()
 663
 664         def preload_download_archive(fn):
 665             """Preload the archive, if any is specified"""
 666             if fn is None:
 667                 return False
 668             self.write_debug(f'Loading archive file {fn!r}')
 669             try:
 670                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 671                     for line in archive_file:
 672                         self.archive.add(line.strip())
 673             except IOError as ioe:
 674                 if ioe.errno != errno.ENOENT:
 675                     raise
 676                 return False
 677             return True
 678
 679         self.archive = set()
 680         preload_download_archive(self.params.get('download_archive'))
 681
 682     def warn_if_short_id(self, argv):
 683         # short YouTube ID starting with dash?
 684         idxs = [
 685             i for i, a in enumerate(argv)
 686             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 687         if idxs:
 688             correct_argv = (
 689                 ['yt-dlp']
 690                 + [a for i, a in enumerate(argv) if i not in idxs]
 691                 + ['--'] + [argv[i] for i in idxs]
 692             )
 693             self.report_warning(
 694                 'Long argument string detected. '
 695                 'Use -- to separate parameters and URLs, like this:\n%s' %
 696                 args_to_str(correct_argv))
 697
 698     def add_info_extractor(self, ie):
 699         """Add an InfoExtractor object to the end of the list."""
 700         ie_key = ie.ie_key()
 701         self._ies[ie_key] = ie
 702         if not isinstance(ie, type):
 703             self._ies_instances[ie_key] = ie
 704             ie.set_downloader(self)
 705
 706     def _get_info_extractor_class(self, ie_key):
 707         ie = self._ies.get(ie_key)
 708         if ie is None:
 709             ie = get_info_extractor(ie_key)
 710             self.add_info_extractor(ie)
 711         return ie
 712
 713     def get_info_extractor(self, ie_key):
 714         """
 715         Get an instance of an IE with name ie_key, it will try to get one from
 716         the _ies list, if there's no instance it will create a new one and add
 717         it to the extractor list.
 718         """
 719         ie = self._ies_instances.get(ie_key)
 720         if ie is None:
 721             ie = get_info_extractor(ie_key)()
 722             self.add_info_extractor(ie)
 723         return ie
 724
 725     def add_default_info_extractors(self):
 726         """
 727         Add the InfoExtractors returned by gen_extractors to the end of the list
 728         """
 729         for ie in gen_extractor_classes():
 730             self.add_info_extractor(ie)
 731
 732     def add_post_processor(self, pp, when='post_process'):
 733         """Add a PostProcessor object to the end of the chain."""
 734         self._pps[when].append(pp)
 735         pp.set_downloader(self)
 736
 737     def add_post_hook(self, ph):
 738         """Add the post hook"""
 739         self._post_hooks.append(ph)
 740
 741     def add_progress_hook(self, ph):
 742         """Add the download progress hook"""
 743         self._progress_hooks.append(ph)
 744
 745     def add_postprocessor_hook(self, ph):
 746         """Add the postprocessing progress hook"""
 747         self._postprocessor_hooks.append(ph)
 748         for pps in self._pps.values():
 749             for pp in pps:
 750                 pp.add_progress_hook(ph)
 751
 752     def _bidi_workaround(self, message):
 753         if not hasattr(self, '_output_channel'):
 754             return message
 755
 756         assert hasattr(self, '_output_process')
 757         assert isinstance(message, compat_str)
 758         line_count = message.count('\n') + 1
 759         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 760         self._output_process.stdin.flush()
 761         res = ''.join(self._output_channel.readline().decode('utf-8')
 762                       for _ in range(line_count))
 763         return res[:-len('\n')]
 764
 765     def _write_string(self, message, out=None, only_once=False):
 766         if only_once:
 767             if message in self._printed_messages:
 768                 return
 769             self._printed_messages.add(message)
 770         write_string(message, out=out, encoding=self.params.get('encoding'))
 771
 772     def to_stdout(self, message, skip_eol=False, quiet=False):
 773         """Print message to stdout"""
 774         if self.params.get('logger'):
 775             self.params['logger'].debug(message)
 776         elif not quiet or self.params.get('verbose'):
 777             self._write_string(
 778                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 779                 self._err_file if quiet else self._screen_file)
 780
 781     def to_stderr(self, message, only_once=False):
 782         """Print message to stderr"""
 783         assert isinstance(message, compat_str)
 784         if self.params.get('logger'):
 785             self.params['logger'].error(message)
 786         else:
 787             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 788
 789     def to_console_title(self, message):
 790         if not self.params.get('consoletitle', False):
 791             return
 792         message = remove_terminal_sequences(message)
 793         if compat_os_name == 'nt':
 794             if ctypes.windll.kernel32.GetConsoleWindow():
 795                 # c_wchar_p() might not be necessary if `message` is
 796                 # already of type unicode()
 797                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 798         elif 'TERM' in os.environ:
 799             self._write_string('\033]0;%s\007' % message, self._screen_file)
 800
 801     def save_console_title(self):
 802         if not self.params.get('consoletitle', False):
 803             return
 804         if self.params.get('simulate'):
 805             return
 806         if compat_os_name != 'nt' and 'TERM' in os.environ:
 807             # Save the title on stack
 808             self._write_string('\033[22;0t', self._screen_file)
 809
 810     def restore_console_title(self):
 811         if not self.params.get('consoletitle', False):
 812             return
 813         if self.params.get('simulate'):
 814             return
 815         if compat_os_name != 'nt' and 'TERM' in os.environ:
 816             # Restore the title from stack
 817             self._write_string('\033[23;0t', self._screen_file)
 818
 819     def __enter__(self):
 820         self.save_console_title()
 821         return self
 822
 823     def __exit__(self, *args):
 824         self.restore_console_title()
 825
 826         if self.params.get('cookiefile') is not None:
 827             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 828
 829     def trouble(self, message=None, tb=None, is_error=True):
 830         """Determine action to take when a download problem appears.
 831
 832         Depending on if the downloader has been configured to ignore
 833         download errors or not, this method may throw an exception or
 834         not when errors are found, after printing the message.
 835
 836         @param tb          If given, is additional traceback information
 837         @param is_error    Whether to raise error according to ignorerrors
 838         """
 839         if message is not None:
 840             self.to_stderr(message)
 841         if self.params.get('verbose'):
 842             if tb is None:
 843                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 844                     tb = ''
 845                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 846                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 847                     tb += encode_compat_str(traceback.format_exc())
 848                 else:
 849                     tb_data = traceback.format_list(traceback.extract_stack())
 850                     tb = ''.join(tb_data)
 851             if tb:
 852                 self.to_stderr(tb)
 853         if not is_error:
 854             return
 855         if not self.params.get('ignoreerrors'):
 856             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 857                 exc_info = sys.exc_info()[1].exc_info
 858             else:
 859                 exc_info = sys.exc_info()
 860             raise DownloadError(message, exc_info)
 861         self._download_retcode = 1
 862
 863     def to_screen(self, message, skip_eol=False):
 864         """Print message to stdout if not in quiet mode"""
 865         self.to_stdout(
 866             message, skip_eol, quiet=self.params.get('quiet', False))
 867
 868     class Styles(Enum):
 869         HEADERS = 'yellow'
 870         EMPHASIS = 'light blue'
 871         ID = 'green'
 872         DELIM = 'blue'
 873         ERROR = 'red'
 874         WARNING = 'yellow'
 875         SUPPRESS = 'light black'
 876
 877     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 878         if test_encoding:
 879             original_text = text
 880             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 881             text = text.encode(encoding, 'ignore').decode(encoding)
 882             if fallback is not None and text != original_text:
 883                 text = fallback
 884         if isinstance(f, self.Styles):
 885             f = f.value
 886         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 887
 888     def _format_screen(self, *args, **kwargs):
 889         return self._format_text(
 890             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 891
 892     def _format_err(self, *args, **kwargs):
 893         return self._format_text(
 894             self._err_file, self._allow_colors['err'], *args, **kwargs)
 895
 896     def report_warning(self, message, only_once=False):
 897         '''
 898         Print the message to stderr, it will be prefixed with 'WARNING:'
 899         If stderr is a tty file the 'WARNING:' will be colored
 900         '''
 901         if self.params.get('logger') is not None:
 902             self.params['logger'].warning(message)
 903         else:
 904             if self.params.get('no_warnings'):
 905                 return
 906             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 907
 908     def deprecation_warning(self, message):
 909         if self.params.get('logger') is not None:
 910             self.params['logger'].warning('DeprecationWarning: {message}')
 911         else:
 912             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 913
 914     def report_error(self, message, *args, **kwargs):
 915         '''
 916         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 917         in red if stderr is a tty file.
 918         '''
 919         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 920
 921     def write_debug(self, message, only_once=False):
 922         '''Log debug message or Print message to stderr'''
 923         if not self.params.get('verbose', False):
 924             return
 925         message = '[debug] %s' % message
 926         if self.params.get('logger'):
 927             self.params['logger'].debug(message)
 928         else:
 929             self.to_stderr(message, only_once)
 930
 931     def report_file_already_downloaded(self, file_name):
 932         """Report file has already been fully downloaded."""
 933         try:
 934             self.to_screen('[download] %s has already been downloaded' % file_name)
 935         except UnicodeEncodeError:
 936             self.to_screen('[download] The file has already been downloaded')
 937
 938     def report_file_delete(self, file_name):
 939         """Report that existing file will be deleted."""
 940         try:
 941             self.to_screen('Deleting existing file %s' % file_name)
 942         except UnicodeEncodeError:
 943             self.to_screen('Deleting existing file')
 944
 945     def raise_no_formats(self, info, forced=False):
 946         has_drm = info.get('__has_drm')
 947         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 948         expected = self.params.get('ignore_no_formats_error')
 949         if forced or not expected:
 950             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 951                                  expected=has_drm or expected)
 952         else:
 953             self.report_warning(msg)
 954
 955     def parse_outtmpl(self):
 956         outtmpl_dict = self.params.get('outtmpl', {})
 957         if not isinstance(outtmpl_dict, dict):
 958             outtmpl_dict = {'default': outtmpl_dict}
 959         # Remove spaces in the default template
 960         if self.params.get('restrictfilenames'):
 961             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 962         else:
 963             sanitize = lambda x: x
 964         outtmpl_dict.update({
 965             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 966             if outtmpl_dict.get(k) is None})
 967         for key, val in outtmpl_dict.items():
 968             if isinstance(val, bytes):
 969                 self.report_warning(
 970                     'Parameter outtmpl is bytes, but should be a unicode string. '
 971                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 972         return outtmpl_dict
 973
 974     def get_output_path(self, dir_type='', filename=None):
 975         paths = self.params.get('paths', {})
 976         assert isinstance(paths, dict)
 977         path = os.path.join(
 978             expand_path(paths.get('home', '').strip()),
 979             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 980             filename or '')
 981
 982         # Temporary fix for #4787
 983         # 'Treat' all problem characters by passing filename through preferredencoding
 984         # to workaround encoding issues with subprocess on python2 @ Windows
 985         if sys.version_info < (3, 0) and sys.platform == 'win32':
 986             path = encodeFilename(path, True).decode(preferredencoding())
 987         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 988
 989     @staticmethod
 990     def _outtmpl_expandpath(outtmpl):
 991         # expand_path translates '%%' into '%' and '$$' into '$'
 992         # correspondingly that is not what we want since we need to keep
 993         # '%%' intact for template dict substitution step. Working around
 994         # with boundary-alike separator hack.
 995         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 996         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 997
 998         # outtmpl should be expand_path'ed before template dict substitution
 999         # because meta fields may contain env variables we don't want to
1000         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1001         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1002         return expand_path(outtmpl).replace(sep, '')
1003
1004     @staticmethod
1005     def escape_outtmpl(outtmpl):
1006         ''' Escape any remaining strings like %s, %abc% etc. '''
1007         return re.sub(
1008             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1009             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1010             outtmpl)
1011
1012     @classmethod
1013     def validate_outtmpl(cls, outtmpl):
1014         ''' @return None or Exception object '''
1015         outtmpl = re.sub(
1016             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1017             lambda mobj: f'{mobj.group(0)[:-1]}s',
1018             cls._outtmpl_expandpath(outtmpl))
1019         try:
1020             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1021             return None
1022         except ValueError as err:
1023             return err
1024
1025     @staticmethod
1026     def _copy_infodict(info_dict):
1027         info_dict = dict(info_dict)
1028         for key in ('__original_infodict', '__postprocessors'):
1029             info_dict.pop(key, None)
1030         return info_dict
1031
1032     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1033         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1034         @param sanitize    Whether to sanitize the output as a filename.
1035                            For backward compatibility, a function can also be passed
1036         """
1037
1038         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1039
1040         info_dict = self._copy_infodict(info_dict)
1041         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1042             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1043             if info_dict.get('duration', None) is not None
1044             else None)
1045         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1046         if info_dict.get('resolution') is None:
1047             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1048
1049         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1050         # of %(field)s to %(field)0Nd for backward compatibility
1051         field_size_compat_map = {
1052             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1053             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1054             'autonumber': self.params.get('autonumber_size') or 5,
1055         }
1056
1057         TMPL_DICT = {}
1058         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1059         MATH_FUNCTIONS = {
1060             '+': float.__add__,
1061             '-': float.__sub__,
1062         }
1063         # Field is of the form key1.key2...
1064         # where keys (except first) can be string, int or slice
1065         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1066         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1067         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1068         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1069             (?P<negate>-)?
1070             (?P<fields>{field})
1071             (?P<maths>(?:{math_op}{math_field})*)
1072             (?:>(?P<strf_format>.+?))?
1073             (?P<alternate>(?<!\\),[^|&)]+)?
1074             (?:&(?P<replacement>.*?))?
1075             (?:\|(?P<default>.*?))?
1076             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1077
1078         def _traverse_infodict(k):
1079             k = k.split('.')
1080             if k[0] == '':
1081                 k.pop(0)
1082             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1083
1084         def get_value(mdict):
1085             # Object traversal
1086             value = _traverse_infodict(mdict['fields'])
1087             # Negative
1088             if mdict['negate']:
1089                 value = float_or_none(value)
1090                 if value is not None:
1091                     value *= -1
1092             # Do maths
1093             offset_key = mdict['maths']
1094             if offset_key:
1095                 value = float_or_none(value)
1096                 operator = None
1097                 while offset_key:
1098                     item = re.match(
1099                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1100                         offset_key).group(0)
1101                     offset_key = offset_key[len(item):]
1102                     if operator is None:
1103                         operator = MATH_FUNCTIONS[item]
1104                         continue
1105                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1106                     offset = float_or_none(item)
1107                     if offset is None:
1108                         offset = float_or_none(_traverse_infodict(item))
1109                     try:
1110                         value = operator(value, multiplier * offset)
1111                     except (TypeError, ZeroDivisionError):
1112                         return None
1113                     operator = None
1114             # Datetime formatting
1115             if mdict['strf_format']:
1116                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1117
1118             return value
1119
1120         na = self.params.get('outtmpl_na_placeholder', 'NA')
1121
1122         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1123             return sanitize_filename(str(value), restricted=restricted,
1124                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1125
1126         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1127         sanitize = bool(sanitize)
1128
1129         def _dumpjson_default(obj):
1130             if isinstance(obj, (set, LazyList)):
1131                 return list(obj)
1132             return repr(obj)
1133
1134         def create_key(outer_mobj):
1135             if not outer_mobj.group('has_key'):
1136                 return outer_mobj.group(0)
1137             key = outer_mobj.group('key')
1138             mobj = re.match(INTERNAL_FORMAT_RE, key)
1139             initial_field = mobj.group('fields') if mobj else ''
1140             value, replacement, default = None, None, na
1141             while mobj:
1142                 mobj = mobj.groupdict()
1143                 default = mobj['default'] if mobj['default'] is not None else default
1144                 value = get_value(mobj)
1145                 replacement = mobj['replacement']
1146                 if value is None and mobj['alternate']:
1147                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1148                 else:
1149                     break
1150
1151             fmt = outer_mobj.group('format')
1152             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1153                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1154
1155             value = default if value is None else value if replacement is None else replacement
1156
1157             flags = outer_mobj.group('conversion') or ''
1158             str_fmt = f'{fmt[:-1]}s'
1159             if fmt[-1] == 'l':  # list
1160                 delim = '\n' if '#' in flags else ', '
1161                 value, fmt = delim.join(variadic(value, allowed_types=(str, bytes))), str_fmt
1162             elif fmt[-1] == 'j':  # json
1163                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1164             elif fmt[-1] == 'q':  # quoted
1165                 value = map(str, variadic(value) if '#' in flags else [value])
1166                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1167             elif fmt[-1] == 'B':  # bytes
1168                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1169                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1170             elif fmt[-1] == 'U':  # unicode normalized
1171                 value, fmt = unicodedata.normalize(
1172                     # "+" = compatibility equivalence, "#" = NFD
1173                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1174                     value), str_fmt
1175             elif fmt[-1] == 'D':  # decimal suffix
1176                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1177                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1178                                               factor=1024 if '#' in flags else 1000)
1179             elif fmt[-1] == 'S':  # filename sanitization
1180                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1181             elif fmt[-1] == 'c':
1182                 if value:
1183                     value = str(value)[0]
1184                 else:
1185                     fmt = str_fmt
1186             elif fmt[-1] not in 'rs':  # numeric
1187                 value = float_or_none(value)
1188                 if value is None:
1189                     value, fmt = default, 's'
1190
1191             if sanitize:
1192                 if fmt[-1] == 'r':
1193                     # If value is an object, sanitize might convert it to a string
1194                     # So we convert it to repr first
1195                     value, fmt = repr(value), str_fmt
1196                 if fmt[-1] in 'csr':
1197                     value = sanitizer(initial_field, value)
1198
1199             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1200             TMPL_DICT[key] = value
1201             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1202
1203         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1204
1205     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1206         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1207         return self.escape_outtmpl(outtmpl) % info_dict
1208
1209     def _prepare_filename(self, info_dict, tmpl_type='default'):
1210         try:
1211             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1212             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1213
1214             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1215             if filename and force_ext is not None:
1216                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1217
1218             # https://github.com/blackjack4494/youtube-dlc/issues/85
1219             trim_file_name = self.params.get('trim_file_name', False)
1220             if trim_file_name:
1221                 no_ext, *ext = filename.rsplit('.', 2)
1222                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1223
1224             return filename
1225         except ValueError as err:
1226             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1227             return None
1228
1229     def prepare_filename(self, info_dict, dir_type='', warn=False):
1230         """Generate the output filename."""
1231
1232         filename = self._prepare_filename(info_dict, dir_type or 'default')
1233         if not filename and dir_type not in ('', 'temp'):
1234             return ''
1235
1236         if warn:
1237             if not self.params.get('paths'):
1238                 pass
1239             elif filename == '-':
1240                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1241             elif os.path.isabs(filename):
1242                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1243         if filename == '-' or not filename:
1244             return filename
1245
1246         return self.get_output_path(dir_type, filename)
1247
1248     def _match_entry(self, info_dict, incomplete=False, silent=False):
1249         """ Returns None if the file should be downloaded """
1250
1251         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1252
1253         def check_filter():
1254             if 'title' in info_dict:
1255                 # This can happen when we're just evaluating the playlist
1256                 title = info_dict['title']
1257                 matchtitle = self.params.get('matchtitle', False)
1258                 if matchtitle:
1259                     if not re.search(matchtitle, title, re.IGNORECASE):
1260                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1261                 rejecttitle = self.params.get('rejecttitle', False)
1262                 if rejecttitle:
1263                     if re.search(rejecttitle, title, re.IGNORECASE):
1264                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1265             date = info_dict.get('upload_date')
1266             if date is not None:
1267                 dateRange = self.params.get('daterange', DateRange())
1268                 if date not in dateRange:
1269                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1270             view_count = info_dict.get('view_count')
1271             if view_count is not None:
1272                 min_views = self.params.get('min_views')
1273                 if min_views is not None and view_count < min_views:
1274                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1275                 max_views = self.params.get('max_views')
1276                 if max_views is not None and view_count > max_views:
1277                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1278             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1279                 return 'Skipping "%s" because it is age restricted' % video_title
1280
1281             match_filter = self.params.get('match_filter')
1282             if match_filter is not None:
1283                 try:
1284                     ret = match_filter(info_dict, incomplete=incomplete)
1285                 except TypeError:
1286                     # For backward compatibility
1287                     ret = None if incomplete else match_filter(info_dict)
1288                 if ret is not None:
1289                     return ret
1290             return None
1291
1292         if self.in_download_archive(info_dict):
1293             reason = '%s has already been recorded in the archive' % video_title
1294             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1295         else:
1296             reason = check_filter()
1297             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1298         if reason is not None:
1299             if not silent:
1300                 self.to_screen('[download] ' + reason)
1301             if self.params.get(break_opt, False):
1302                 raise break_err()
1303         return reason
1304
1305     @staticmethod
1306     def add_extra_info(info_dict, extra_info):
1307         '''Set the keys from extra_info in info dict if they are missing'''
1308         for key, value in extra_info.items():
1309             info_dict.setdefault(key, value)
1310
1311     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1312                      process=True, force_generic_extractor=False):
1313         """
1314         Return a list with a dictionary for each video extracted.
1315
1316         Arguments:
1317         url -- URL to extract
1318
1319         Keyword arguments:
1320         download -- whether to download videos during extraction
1321         ie_key -- extractor key hint
1322         extra_info -- dictionary containing the extra values to add to each result
1323         process -- whether to resolve all unresolved references (URLs, playlist items),
1324             must be True for download to work.
1325         force_generic_extractor -- force using the generic extractor
1326         """
1327
1328         if extra_info is None:
1329             extra_info = {}
1330
1331         if not ie_key and force_generic_extractor:
1332             ie_key = 'Generic'
1333
1334         if ie_key:
1335             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1336         else:
1337             ies = self._ies
1338
1339         for ie_key, ie in ies.items():
1340             if not ie.suitable(url):
1341                 continue
1342
1343             if not ie.working():
1344                 self.report_warning('The program functionality for this site has been marked as broken, '
1345                                     'and will probably not work.')
1346
1347             temp_id = ie.get_temp_id(url)
1348             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1349                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1350                 if self.params.get('break_on_existing', False):
1351                     raise ExistingVideoReached()
1352                 break
1353             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1354         else:
1355             self.report_error('no suitable InfoExtractor for URL %s' % url)
1356
1357     def __handle_extraction_exceptions(func):
1358         @functools.wraps(func)
1359         def wrapper(self, *args, **kwargs):
1360             while True:
1361                 try:
1362                     return func(self, *args, **kwargs)
1363                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1364                     raise
1365                 except ReExtractInfo as e:
1366                     if e.expected:
1367                         self.to_screen(f'{e}; Re-extracting data')
1368                     else:
1369                         self.to_stderr('\r')
1370                         self.report_warning(f'{e}; Re-extracting data')
1371                     continue
1372                 except GeoRestrictedError as e:
1373                     msg = e.msg
1374                     if e.countries:
1375                         msg += '\nThis video is available in %s.' % ', '.join(
1376                             map(ISO3166Utils.short2full, e.countries))
1377                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1378                     self.report_error(msg)
1379                 except ExtractorError as e:  # An error we somewhat expected
1380                     self.report_error(str(e), e.format_traceback())
1381                 except Exception as e:
1382                     if self.params.get('ignoreerrors'):
1383                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1384                     else:
1385                         raise
1386                 break
1387         return wrapper
1388
1389     def _wait_for_video(self, ie_result):
1390         if (not self.params.get('wait_for_video')
1391                 or ie_result.get('_type', 'video') != 'video'
1392                 or ie_result.get('formats') or ie_result.get('url')):
1393             return
1394
1395         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1396         last_msg = ''
1397
1398         def progress(msg):
1399             nonlocal last_msg
1400             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1401             last_msg = msg
1402
1403         min_wait, max_wait = self.params.get('wait_for_video')
1404         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1405         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1406             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1407             self.report_warning('Release time of video is not known')
1408         elif (diff or 0) <= 0:
1409             self.report_warning('Video should already be available according to extracted info')
1410         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1411         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1412
1413         wait_till = time.time() + diff
1414         try:
1415             while True:
1416                 diff = wait_till - time.time()
1417                 if diff <= 0:
1418                     progress('')
1419                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1420                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1421                 time.sleep(1)
1422         except KeyboardInterrupt:
1423             progress('')
1424             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1425         except BaseException as e:
1426             if not isinstance(e, ReExtractInfo):
1427                 self.to_screen('')
1428             raise
1429
1430     @__handle_extraction_exceptions
1431     def __extract_info(self, url, ie, download, extra_info, process):
1432         ie_result = ie.extract(url)
1433         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1434             return
1435         if isinstance(ie_result, list):
1436             # Backwards compatibility: old IE result format
1437             ie_result = {
1438                 '_type': 'compat_list',
1439                 'entries': ie_result,
1440             }
1441         if extra_info.get('original_url'):
1442             ie_result.setdefault('original_url', extra_info['original_url'])
1443         self.add_default_extra_info(ie_result, ie, url)
1444         if process:
1445             self._wait_for_video(ie_result)
1446             return self.process_ie_result(ie_result, download, extra_info)
1447         else:
1448             return ie_result
1449
1450     def add_default_extra_info(self, ie_result, ie, url):
1451         if url is not None:
1452             self.add_extra_info(ie_result, {
1453                 'webpage_url': url,
1454                 'original_url': url,
1455                 'webpage_url_basename': url_basename(url),
1456                 'webpage_url_domain': get_domain(url),
1457             })
1458         if ie is not None:
1459             self.add_extra_info(ie_result, {
1460                 'extractor': ie.IE_NAME,
1461                 'extractor_key': ie.ie_key(),
1462             })
1463
1464     def process_ie_result(self, ie_result, download=True, extra_info=None):
1465         """
1466         Take the result of the ie(may be modified) and resolve all unresolved
1467         references (URLs, playlist items).
1468
1469         It will also download the videos if 'download'.
1470         Returns the resolved ie_result.
1471         """
1472         if extra_info is None:
1473             extra_info = {}
1474         result_type = ie_result.get('_type', 'video')
1475
1476         if result_type in ('url', 'url_transparent'):
1477             ie_result['url'] = sanitize_url(ie_result['url'])
1478             if ie_result.get('original_url'):
1479                 extra_info.setdefault('original_url', ie_result['original_url'])
1480
1481             extract_flat = self.params.get('extract_flat', False)
1482             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1483                     or extract_flat is True):
1484                 info_copy = ie_result.copy()
1485                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1486                 if ie and not ie_result.get('id'):
1487                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1488                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1489                 self.add_extra_info(info_copy, extra_info)
1490                 info_copy, _ = self.pre_process(info_copy)
1491                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1492                 if self.params.get('force_write_download_archive', False):
1493                     self.record_download_archive(info_copy)
1494                 return ie_result
1495
1496         if result_type == 'video':
1497             self.add_extra_info(ie_result, extra_info)
1498             ie_result = self.process_video_result(ie_result, download=download)
1499             additional_urls = (ie_result or {}).get('additional_urls')
1500             if additional_urls:
1501                 # TODO: Improve MetadataParserPP to allow setting a list
1502                 if isinstance(additional_urls, compat_str):
1503                     additional_urls = [additional_urls]
1504                 self.to_screen(
1505                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1506                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1507                 ie_result['additional_entries'] = [
1508                     self.extract_info(
1509                         url, download, extra_info=extra_info,
1510                         force_generic_extractor=self.params.get('force_generic_extractor'))
1511                     for url in additional_urls
1512                 ]
1513             return ie_result
1514         elif result_type == 'url':
1515             # We have to add extra_info to the results because it may be
1516             # contained in a playlist
1517             return self.extract_info(
1518                 ie_result['url'], download,
1519                 ie_key=ie_result.get('ie_key'),
1520                 extra_info=extra_info)
1521         elif result_type == 'url_transparent':
1522             # Use the information from the embedding page
1523             info = self.extract_info(
1524                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1525                 extra_info=extra_info, download=False, process=False)
1526
1527             # extract_info may return None when ignoreerrors is enabled and
1528             # extraction failed with an error, don't crash and return early
1529             # in this case
1530             if not info:
1531                 return info
1532
1533             force_properties = dict(
1534                 (k, v) for k, v in ie_result.items() if v is not None)
1535             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1536                 if f in force_properties:
1537                     del force_properties[f]
1538             new_result = info.copy()
1539             new_result.update(force_properties)
1540
1541             # Extracted info may not be a video result (i.e.
1542             # info.get('_type', 'video') != video) but rather an url or
1543             # url_transparent. In such cases outer metadata (from ie_result)
1544             # should be propagated to inner one (info). For this to happen
1545             # _type of info should be overridden with url_transparent. This
1546             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1547             if new_result.get('_type') == 'url':
1548                 new_result['_type'] = 'url_transparent'
1549
1550             return self.process_ie_result(
1551                 new_result, download=download, extra_info=extra_info)
1552         elif result_type in ('playlist', 'multi_video'):
1553             # Protect from infinite recursion due to recursively nested playlists
1554             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1555             webpage_url = ie_result['webpage_url']
1556             if webpage_url in self._playlist_urls:
1557                 self.to_screen(
1558                     '[download] Skipping already downloaded playlist: %s'
1559                     % ie_result.get('title') or ie_result.get('id'))
1560                 return
1561
1562             self._playlist_level += 1
1563             self._playlist_urls.add(webpage_url)
1564             self._sanitize_thumbnails(ie_result)
1565             try:
1566                 return self.__process_playlist(ie_result, download)
1567             finally:
1568                 self._playlist_level -= 1
1569                 if not self._playlist_level:
1570                     self._playlist_urls.clear()
1571         elif result_type == 'compat_list':
1572             self.report_warning(
1573                 'Extractor %s returned a compat_list result. '
1574                 'It needs to be updated.' % ie_result.get('extractor'))
1575
1576             def _fixup(r):
1577                 self.add_extra_info(r, {
1578                     'extractor': ie_result['extractor'],
1579                     'webpage_url': ie_result['webpage_url'],
1580                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1581                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1582                     'extractor_key': ie_result['extractor_key'],
1583                 })
1584                 return r
1585             ie_result['entries'] = [
1586                 self.process_ie_result(_fixup(r), download, extra_info)
1587                 for r in ie_result['entries']
1588             ]
1589             return ie_result
1590         else:
1591             raise Exception('Invalid result type: %s' % result_type)
1592
1593     def _ensure_dir_exists(self, path):
1594         return make_dir(path, self.report_error)
1595
1596     def __process_playlist(self, ie_result, download):
1597         # We process each entry in the playlist
1598         playlist = ie_result.get('title') or ie_result.get('id')
1599         self.to_screen('[download] Downloading playlist: %s' % playlist)
1600
1601         if 'entries' not in ie_result:
1602             raise EntryNotInPlaylist('There are no entries')
1603
1604         MissingEntry = object()
1605         incomplete_entries = bool(ie_result.get('requested_entries'))
1606         if incomplete_entries:
1607             def fill_missing_entries(entries, indices):
1608                 ret = [MissingEntry] * max(indices)
1609                 for i, entry in zip(indices, entries):
1610                     ret[i - 1] = entry
1611                 return ret
1612             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1613
1614         playlist_results = []
1615
1616         playliststart = self.params.get('playliststart', 1)
1617         playlistend = self.params.get('playlistend')
1618         # For backwards compatibility, interpret -1 as whole list
1619         if playlistend == -1:
1620             playlistend = None
1621
1622         playlistitems_str = self.params.get('playlist_items')
1623         playlistitems = None
1624         if playlistitems_str is not None:
1625             def iter_playlistitems(format):
1626                 for string_segment in format.split(','):
1627                     if '-' in string_segment:
1628                         start, end = string_segment.split('-')
1629                         for item in range(int(start), int(end) + 1):
1630                             yield int(item)
1631                     else:
1632                         yield int(string_segment)
1633             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1634
1635         ie_entries = ie_result['entries']
1636         msg = (
1637             'Downloading %d videos' if not isinstance(ie_entries, list)
1638             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1639
1640         if isinstance(ie_entries, list):
1641             def get_entry(i):
1642                 return ie_entries[i - 1]
1643         else:
1644             if not isinstance(ie_entries, (PagedList, LazyList)):
1645                 ie_entries = LazyList(ie_entries)
1646
1647             def get_entry(i):
1648                 return YoutubeDL.__handle_extraction_exceptions(
1649                     lambda self, i: ie_entries[i - 1]
1650                 )(self, i)
1651
1652         entries = []
1653         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1654         for i in items:
1655             if i == 0:
1656                 continue
1657             if playlistitems is None and playlistend is not None and playlistend < i:
1658                 break
1659             entry = None
1660             try:
1661                 entry = get_entry(i)
1662                 if entry is MissingEntry:
1663                     raise EntryNotInPlaylist()
1664             except (IndexError, EntryNotInPlaylist):
1665                 if incomplete_entries:
1666                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1667                 elif not playlistitems:
1668                     break
1669             entries.append(entry)
1670             try:
1671                 if entry is not None:
1672                     self._match_entry(entry, incomplete=True, silent=True)
1673             except (ExistingVideoReached, RejectedVideoReached):
1674                 break
1675         ie_result['entries'] = entries
1676
1677         # Save playlist_index before re-ordering
1678         entries = [
1679             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1680             for i, entry in enumerate(entries, 1)
1681             if entry is not None]
1682         n_entries = len(entries)
1683
1684         if not playlistitems and (playliststart != 1 or playlistend):
1685             playlistitems = list(range(playliststart, playliststart + n_entries))
1686         ie_result['requested_entries'] = playlistitems
1687
1688         _infojson_written = False
1689         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1690             ie_copy = {
1691                 'playlist': playlist,
1692                 'playlist_id': ie_result.get('id'),
1693                 'playlist_title': ie_result.get('title'),
1694                 'playlist_uploader': ie_result.get('uploader'),
1695                 'playlist_uploader_id': ie_result.get('uploader_id'),
1696                 'playlist_index': 0,
1697                 'n_entries': n_entries,
1698             }
1699             ie_copy.update(dict(ie_result))
1700
1701             _infojson_written = self._write_info_json(
1702                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1703             if _infojson_written is None:
1704                 return
1705             if self._write_description('playlist', ie_result,
1706                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1707                 return
1708             # TODO: This should be passed to ThumbnailsConvertor if necessary
1709             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1710
1711         if self.params.get('playlistreverse', False):
1712             entries = entries[::-1]
1713         if self.params.get('playlistrandom', False):
1714             random.shuffle(entries)
1715
1716         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1717
1718         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1719         failures = 0
1720         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1721         for i, entry_tuple in enumerate(entries, 1):
1722             playlist_index, entry = entry_tuple
1723             if 'playlist-index' in self.params.get('compat_opts', []):
1724                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1725             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1726             # This __x_forwarded_for_ip thing is a bit ugly but requires
1727             # minimal changes
1728             if x_forwarded_for:
1729                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1730             extra = {
1731                 'n_entries': n_entries,
1732                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1733                 'playlist_index': playlist_index,
1734                 'playlist_autonumber': i,
1735                 'playlist': playlist,
1736                 'playlist_id': ie_result.get('id'),
1737                 'playlist_title': ie_result.get('title'),
1738                 'playlist_uploader': ie_result.get('uploader'),
1739                 'playlist_uploader_id': ie_result.get('uploader_id'),
1740                 'extractor': ie_result['extractor'],
1741                 'webpage_url': ie_result['webpage_url'],
1742                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1743                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1744                 'extractor_key': ie_result['extractor_key'],
1745             }
1746
1747             if self._match_entry(entry, incomplete=True) is not None:
1748                 continue
1749
1750             entry_result = self.__process_iterable_entry(entry, download, extra)
1751             if not entry_result:
1752                 failures += 1
1753             if failures >= max_failures:
1754                 self.report_error(
1755                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1756                 break
1757             playlist_results.append(entry_result)
1758         ie_result['entries'] = playlist_results
1759
1760         # Write the updated info to json
1761         if _infojson_written and self._write_info_json(
1762                 'updated playlist', ie_result,
1763                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1764             return
1765
1766         for tmpl in self.params['forceprint'].get('playlist', []):
1767             self._forceprint(tmpl, ie_result)
1768         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1769         return ie_result
1770
1771     @__handle_extraction_exceptions
1772     def __process_iterable_entry(self, entry, download, extra_info):
1773         return self.process_ie_result(
1774             entry, download=download, extra_info=extra_info)
1775
1776     def _build_format_filter(self, filter_spec):
1777         " Returns a function to filter the formats according to the filter_spec "
1778
1779         OPERATORS = {
1780             '<': operator.lt,
1781             '<=': operator.le,
1782             '>': operator.gt,
1783             '>=': operator.ge,
1784             '=': operator.eq,
1785             '!=': operator.ne,
1786         }
1787         operator_rex = re.compile(r'''(?x)\s*
1788             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1789             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1790             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1791             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1792         m = operator_rex.fullmatch(filter_spec)
1793         if m:
1794             try:
1795                 comparison_value = int(m.group('value'))
1796             except ValueError:
1797                 comparison_value = parse_filesize(m.group('value'))
1798                 if comparison_value is None:
1799                     comparison_value = parse_filesize(m.group('value') + 'B')
1800                 if comparison_value is None:
1801                     raise ValueError(
1802                         'Invalid value %r in format specification %r' % (
1803                             m.group('value'), filter_spec))
1804             op = OPERATORS[m.group('op')]
1805
1806         if not m:
1807             STR_OPERATORS = {
1808                 '=': operator.eq,
1809                 '^=': lambda attr, value: attr.startswith(value),
1810                 '$=': lambda attr, value: attr.endswith(value),
1811                 '*=': lambda attr, value: value in attr,
1812             }
1813             str_operator_rex = re.compile(r'''(?x)\s*
1814                 (?P<key>[a-zA-Z0-9._-]+)\s*
1815                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1816                 (?P<value>[a-zA-Z0-9._-]+)\s*
1817                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1818             m = str_operator_rex.fullmatch(filter_spec)
1819             if m:
1820                 comparison_value = m.group('value')
1821                 str_op = STR_OPERATORS[m.group('op')]
1822                 if m.group('negation'):
1823                     op = lambda attr, value: not str_op(attr, value)
1824                 else:
1825                     op = str_op
1826
1827         if not m:
1828             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1829
1830         def _filter(f):
1831             actual_value = f.get(m.group('key'))
1832             if actual_value is None:
1833                 return m.group('none_inclusive')
1834             return op(actual_value, comparison_value)
1835         return _filter
1836
1837     def _check_formats(self, formats):
1838         for f in formats:
1839             self.to_screen('[info] Testing format %s' % f['format_id'])
1840             path = self.get_output_path('temp')
1841             if not self._ensure_dir_exists(f'{path}/'):
1842                 continue
1843             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1844             temp_file.close()
1845             try:
1846                 success, _ = self.dl(temp_file.name, f, test=True)
1847             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1848                 success = False
1849             finally:
1850                 if os.path.exists(temp_file.name):
1851                     try:
1852                         os.remove(temp_file.name)
1853                     except OSError:
1854                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1855             if success:
1856                 yield f
1857             else:
1858                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1859
1860     def _default_format_spec(self, info_dict, download=True):
1861
1862         def can_merge():
1863             merger = FFmpegMergerPP(self)
1864             return merger.available and merger.can_merge()
1865
1866         prefer_best = (
1867             not self.params.get('simulate')
1868             and download
1869             and (
1870                 not can_merge()
1871                 or info_dict.get('is_live', False)
1872                 or self.outtmpl_dict['default'] == '-'))
1873         compat = (
1874             prefer_best
1875             or self.params.get('allow_multiple_audio_streams', False)
1876             or 'format-spec' in self.params.get('compat_opts', []))
1877
1878         return (
1879             'best/bestvideo+bestaudio' if prefer_best
1880             else 'bestvideo*+bestaudio/best' if not compat
1881             else 'bestvideo+bestaudio/best')
1882
1883     def build_format_selector(self, format_spec):
1884         def syntax_error(note, start):
1885             message = (
1886                 'Invalid format specification: '
1887                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1888             return SyntaxError(message)
1889
1890         PICKFIRST = 'PICKFIRST'
1891         MERGE = 'MERGE'
1892         SINGLE = 'SINGLE'
1893         GROUP = 'GROUP'
1894         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1895
1896         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1897                                   'video': self.params.get('allow_multiple_video_streams', False)}
1898
1899         check_formats = self.params.get('check_formats') == 'selected'
1900
1901         def _parse_filter(tokens):
1902             filter_parts = []
1903             for type, string, start, _, _ in tokens:
1904                 if type == tokenize.OP and string == ']':
1905                     return ''.join(filter_parts)
1906                 else:
1907                     filter_parts.append(string)
1908
1909         def _remove_unused_ops(tokens):
1910             # Remove operators that we don't use and join them with the surrounding strings
1911             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1912             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1913             last_string, last_start, last_end, last_line = None, None, None, None
1914             for type, string, start, end, line in tokens:
1915                 if type == tokenize.OP and string == '[':
1916                     if last_string:
1917                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1918                         last_string = None
1919                     yield type, string, start, end, line
1920                     # everything inside brackets will be handled by _parse_filter
1921                     for type, string, start, end, line in tokens:
1922                         yield type, string, start, end, line
1923                         if type == tokenize.OP and string == ']':
1924                             break
1925                 elif type == tokenize.OP and string in ALLOWED_OPS:
1926                     if last_string:
1927                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1928                         last_string = None
1929                     yield type, string, start, end, line
1930                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1931                     if not last_string:
1932                         last_string = string
1933                         last_start = start
1934                         last_end = end
1935                     else:
1936                         last_string += string
1937             if last_string:
1938                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1939
1940         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1941             selectors = []
1942             current_selector = None
1943             for type, string, start, _, _ in tokens:
1944                 # ENCODING is only defined in python 3.x
1945                 if type == getattr(tokenize, 'ENCODING', None):
1946                     continue
1947                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1948                     current_selector = FormatSelector(SINGLE, string, [])
1949                 elif type == tokenize.OP:
1950                     if string == ')':
1951                         if not inside_group:
1952                             # ')' will be handled by the parentheses group
1953                             tokens.restore_last_token()
1954                         break
1955                     elif inside_merge and string in ['/', ',']:
1956                         tokens.restore_last_token()
1957                         break
1958                     elif inside_choice and string == ',':
1959                         tokens.restore_last_token()
1960                         break
1961                     elif string == ',':
1962                         if not current_selector:
1963                             raise syntax_error('"," must follow a format selector', start)
1964                         selectors.append(current_selector)
1965                         current_selector = None
1966                     elif string == '/':
1967                         if not current_selector:
1968                             raise syntax_error('"/" must follow a format selector', start)
1969                         first_choice = current_selector
1970                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1971                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1972                     elif string == '[':
1973                         if not current_selector:
1974                             current_selector = FormatSelector(SINGLE, 'best', [])
1975                         format_filter = _parse_filter(tokens)
1976                         current_selector.filters.append(format_filter)
1977                     elif string == '(':
1978                         if current_selector:
1979                             raise syntax_error('Unexpected "("', start)
1980                         group = _parse_format_selection(tokens, inside_group=True)
1981                         current_selector = FormatSelector(GROUP, group, [])
1982                     elif string == '+':
1983                         if not current_selector:
1984                             raise syntax_error('Unexpected "+"', start)
1985                         selector_1 = current_selector
1986                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1987                         if not selector_2:
1988                             raise syntax_error('Expected a selector', start)
1989                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1990                     else:
1991                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1992                 elif type == tokenize.ENDMARKER:
1993                     break
1994             if current_selector:
1995                 selectors.append(current_selector)
1996             return selectors
1997
1998         def _merge(formats_pair):
1999             format_1, format_2 = formats_pair
2000
2001             formats_info = []
2002             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2003             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2004
2005             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2006                 get_no_more = {'video': False, 'audio': False}
2007                 for (i, fmt_info) in enumerate(formats_info):
2008                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2009                         formats_info.pop(i)
2010                         continue
2011                     for aud_vid in ['audio', 'video']:
2012                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2013                             if get_no_more[aud_vid]:
2014                                 formats_info.pop(i)
2015                                 break
2016                             get_no_more[aud_vid] = True
2017
2018             if len(formats_info) == 1:
2019                 return formats_info[0]
2020
2021             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2022             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2023
2024             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2025             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2026
2027             output_ext = self.params.get('merge_output_format')
2028             if not output_ext:
2029                 if the_only_video:
2030                     output_ext = the_only_video['ext']
2031                 elif the_only_audio and not video_fmts:
2032                     output_ext = the_only_audio['ext']
2033                 else:
2034                     output_ext = 'mkv'
2035
2036             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2037
2038             new_dict = {
2039                 'requested_formats': formats_info,
2040                 'format': '+'.join(filtered('format')),
2041                 'format_id': '+'.join(filtered('format_id')),
2042                 'ext': output_ext,
2043                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2044                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2045                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2046                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2047                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2048             }
2049
2050             if the_only_video:
2051                 new_dict.update({
2052                     'width': the_only_video.get('width'),
2053                     'height': the_only_video.get('height'),
2054                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2055                     'fps': the_only_video.get('fps'),
2056                     'dynamic_range': the_only_video.get('dynamic_range'),
2057                     'vcodec': the_only_video.get('vcodec'),
2058                     'vbr': the_only_video.get('vbr'),
2059                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2060                 })
2061
2062             if the_only_audio:
2063                 new_dict.update({
2064                     'acodec': the_only_audio.get('acodec'),
2065                     'abr': the_only_audio.get('abr'),
2066                     'asr': the_only_audio.get('asr'),
2067                 })
2068
2069             return new_dict
2070
2071         def _check_formats(formats):
2072             if not check_formats:
2073                 yield from formats
2074                 return
2075             yield from self._check_formats(formats)
2076
2077         def _build_selector_function(selector):
2078             if isinstance(selector, list):  # ,
2079                 fs = [_build_selector_function(s) for s in selector]
2080
2081                 def selector_function(ctx):
2082                     for f in fs:
2083                         yield from f(ctx)
2084                 return selector_function
2085
2086             elif selector.type == GROUP:  # ()
2087                 selector_function = _build_selector_function(selector.selector)
2088
2089             elif selector.type == PICKFIRST:  # /
2090                 fs = [_build_selector_function(s) for s in selector.selector]
2091
2092                 def selector_function(ctx):
2093                     for f in fs:
2094                         picked_formats = list(f(ctx))
2095                         if picked_formats:
2096                             return picked_formats
2097                     return []
2098
2099             elif selector.type == MERGE:  # +
2100                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2101
2102                 def selector_function(ctx):
2103                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2104                         yield _merge(pair)
2105
2106             elif selector.type == SINGLE:  # atom
2107                 format_spec = selector.selector or 'best'
2108
2109                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2110                 if format_spec == 'all':
2111                     def selector_function(ctx):
2112                         yield from _check_formats(ctx['formats'][::-1])
2113                 elif format_spec == 'mergeall':
2114                     def selector_function(ctx):
2115                         formats = list(_check_formats(ctx['formats']))
2116                         if not formats:
2117                             return
2118                         merged_format = formats[-1]
2119                         for f in formats[-2::-1]:
2120                             merged_format = _merge((merged_format, f))
2121                         yield merged_format
2122
2123                 else:
2124                     format_fallback, format_reverse, format_idx = False, True, 1
2125                     mobj = re.match(
2126                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2127                         format_spec)
2128                     if mobj is not None:
2129                         format_idx = int_or_none(mobj.group('n'), default=1)
2130                         format_reverse = mobj.group('bw')[0] == 'b'
2131                         format_type = (mobj.group('type') or [None])[0]
2132                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2133                         format_modified = mobj.group('mod') is not None
2134
2135                         format_fallback = not format_type and not format_modified  # for b, w
2136                         _filter_f = (
2137                             (lambda f: f.get('%scodec' % format_type) != 'none')
2138                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2139                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2140                             if format_type  # bv, ba, wv, wa
2141                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2142                             if not format_modified  # b, w
2143                             else lambda f: True)  # b*, w*
2144                         filter_f = lambda f: _filter_f(f) and (
2145                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2146                     else:
2147                         if format_spec in self._format_selection_exts['audio']:
2148                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2149                         elif format_spec in self._format_selection_exts['video']:
2150                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2151                         elif format_spec in self._format_selection_exts['storyboards']:
2152                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2153                         else:
2154                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2155
2156                     def selector_function(ctx):
2157                         formats = list(ctx['formats'])
2158                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2159                         if format_fallback and ctx['incomplete_formats'] and not matches:
2160                             # for extractors with incomplete formats (audio only (soundcloud)
2161                             # or video only (imgur)) best/worst will fallback to
2162                             # best/worst {video,audio}-only format
2163                             matches = formats
2164                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2165                         try:
2166                             yield matches[format_idx - 1]
2167                         except IndexError:
2168                             return
2169
2170             filters = [self._build_format_filter(f) for f in selector.filters]
2171
2172             def final_selector(ctx):
2173                 ctx_copy = dict(ctx)
2174                 for _filter in filters:
2175                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2176                 return selector_function(ctx_copy)
2177             return final_selector
2178
2179         stream = io.BytesIO(format_spec.encode('utf-8'))
2180         try:
2181             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2182         except tokenize.TokenError:
2183             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2184
2185         class TokenIterator(object):
2186             def __init__(self, tokens):
2187                 self.tokens = tokens
2188                 self.counter = 0
2189
2190             def __iter__(self):
2191                 return self
2192
2193             def __next__(self):
2194                 if self.counter >= len(self.tokens):
2195                     raise StopIteration()
2196                 value = self.tokens[self.counter]
2197                 self.counter += 1
2198                 return value
2199
2200             next = __next__
2201
2202             def restore_last_token(self):
2203                 self.counter -= 1
2204
2205         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2206         return _build_selector_function(parsed_selector)
2207
2208     def _calc_headers(self, info_dict):
2209         res = std_headers.copy()
2210
2211         add_headers = info_dict.get('http_headers')
2212         if add_headers:
2213             res.update(add_headers)
2214
2215         cookies = self._calc_cookies(info_dict)
2216         if cookies:
2217             res['Cookie'] = cookies
2218
2219         if 'X-Forwarded-For' not in res:
2220             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2221             if x_forwarded_for_ip:
2222                 res['X-Forwarded-For'] = x_forwarded_for_ip
2223
2224         return res
2225
2226     def _calc_cookies(self, info_dict):
2227         pr = sanitized_Request(info_dict['url'])
2228         self.cookiejar.add_cookie_header(pr)
2229         return pr.get_header('Cookie')
2230
2231     def _sort_thumbnails(self, thumbnails):
2232         thumbnails.sort(key=lambda t: (
2233             t.get('preference') if t.get('preference') is not None else -1,
2234             t.get('width') if t.get('width') is not None else -1,
2235             t.get('height') if t.get('height') is not None else -1,
2236             t.get('id') if t.get('id') is not None else '',
2237             t.get('url')))
2238
2239     def _sanitize_thumbnails(self, info_dict):
2240         thumbnails = info_dict.get('thumbnails')
2241         if thumbnails is None:
2242             thumbnail = info_dict.get('thumbnail')
2243             if thumbnail:
2244                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2245         if not thumbnails:
2246             return
2247
2248         def check_thumbnails(thumbnails):
2249             for t in thumbnails:
2250                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2251                 try:
2252                     self.urlopen(HEADRequest(t['url']))
2253                 except network_exceptions as err:
2254                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2255                     continue
2256                 yield t
2257
2258         self._sort_thumbnails(thumbnails)
2259         for i, t in enumerate(thumbnails):
2260             if t.get('id') is None:
2261                 t['id'] = '%d' % i
2262             if t.get('width') and t.get('height'):
2263                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2264             t['url'] = sanitize_url(t['url'])
2265
2266         if self.params.get('check_formats') is True:
2267             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2268         else:
2269             info_dict['thumbnails'] = thumbnails
2270
2271     def process_video_result(self, info_dict, download=True):
2272         assert info_dict.get('_type', 'video') == 'video'
2273
2274         if 'id' not in info_dict:
2275             raise ExtractorError('Missing "id" field in extractor result')
2276         if 'title' not in info_dict:
2277             raise ExtractorError('Missing "title" field in extractor result',
2278                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2279
2280         def report_force_conversion(field, field_not, conversion):
2281             self.report_warning(
2282                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2283                 % (field, field_not, conversion))
2284
2285         def sanitize_string_field(info, string_field):
2286             field = info.get(string_field)
2287             if field is None or isinstance(field, compat_str):
2288                 return
2289             report_force_conversion(string_field, 'a string', 'string')
2290             info[string_field] = compat_str(field)
2291
2292         def sanitize_numeric_fields(info):
2293             for numeric_field in self._NUMERIC_FIELDS:
2294                 field = info.get(numeric_field)
2295                 if field is None or isinstance(field, compat_numeric_types):
2296                     continue
2297                 report_force_conversion(numeric_field, 'numeric', 'int')
2298                 info[numeric_field] = int_or_none(field)
2299
2300         sanitize_string_field(info_dict, 'id')
2301         sanitize_numeric_fields(info_dict)
2302
2303         if 'playlist' not in info_dict:
2304             # It isn't part of a playlist
2305             info_dict['playlist'] = None
2306             info_dict['playlist_index'] = None
2307
2308         self._sanitize_thumbnails(info_dict)
2309
2310         thumbnail = info_dict.get('thumbnail')
2311         thumbnails = info_dict.get('thumbnails')
2312         if thumbnail:
2313             info_dict['thumbnail'] = sanitize_url(thumbnail)
2314         elif thumbnails:
2315             info_dict['thumbnail'] = thumbnails[-1]['url']
2316
2317         if info_dict.get('display_id') is None and 'id' in info_dict:
2318             info_dict['display_id'] = info_dict['id']
2319
2320         if info_dict.get('duration') is not None:
2321             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2322
2323         for ts_key, date_key in (
2324                 ('timestamp', 'upload_date'),
2325                 ('release_timestamp', 'release_date'),
2326         ):
2327             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2328                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2329                 # see http://bugs.python.org/issue1646728)
2330                 try:
2331                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2332                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2333                 except (ValueError, OverflowError, OSError):
2334                     pass
2335
2336         live_keys = ('is_live', 'was_live')
2337         live_status = info_dict.get('live_status')
2338         if live_status is None:
2339             for key in live_keys:
2340                 if info_dict.get(key) is False:
2341                     continue
2342                 if info_dict.get(key):
2343                     live_status = key
2344                 break
2345             if all(info_dict.get(key) is False for key in live_keys):
2346                 live_status = 'not_live'
2347         if live_status:
2348             info_dict['live_status'] = live_status
2349             for key in live_keys:
2350                 if info_dict.get(key) is None:
2351                     info_dict[key] = (live_status == key)
2352
2353         # Auto generate title fields corresponding to the *_number fields when missing
2354         # in order to always have clean titles. This is very common for TV series.
2355         for field in ('chapter', 'season', 'episode'):
2356             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2357                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2358
2359         for cc_kind in ('subtitles', 'automatic_captions'):
2360             cc = info_dict.get(cc_kind)
2361             if cc:
2362                 for _, subtitle in cc.items():
2363                     for subtitle_format in subtitle:
2364                         if subtitle_format.get('url'):
2365                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2366                         if subtitle_format.get('ext') is None:
2367                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2368
2369         automatic_captions = info_dict.get('automatic_captions')
2370         subtitles = info_dict.get('subtitles')
2371
2372         info_dict['requested_subtitles'] = self.process_subtitles(
2373             info_dict['id'], subtitles, automatic_captions)
2374
2375         if info_dict.get('formats') is None:
2376             # There's only one format available
2377             formats = [info_dict]
2378         else:
2379             formats = info_dict['formats']
2380
2381         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2382         if not self.params.get('allow_unplayable_formats'):
2383             formats = [f for f in formats if not f.get('has_drm')]
2384
2385         if info_dict.get('is_live'):
2386             get_from_start = bool(self.params.get('live_from_start'))
2387             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2388             if not get_from_start:
2389                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2390
2391         if not formats:
2392             self.raise_no_formats(info_dict)
2393
2394         def is_wellformed(f):
2395             url = f.get('url')
2396             if not url:
2397                 self.report_warning(
2398                     '"url" field is missing or empty - skipping format, '
2399                     'there is an error in extractor')
2400                 return False
2401             if isinstance(url, bytes):
2402                 sanitize_string_field(f, 'url')
2403             return True
2404
2405         # Filter out malformed formats for better extraction robustness
2406         formats = list(filter(is_wellformed, formats))
2407
2408         formats_dict = {}
2409
2410         # We check that all the formats have the format and format_id fields
2411         for i, format in enumerate(formats):
2412             sanitize_string_field(format, 'format_id')
2413             sanitize_numeric_fields(format)
2414             format['url'] = sanitize_url(format['url'])
2415             if not format.get('format_id'):
2416                 format['format_id'] = compat_str(i)
2417             else:
2418                 # Sanitize format_id from characters used in format selector expression
2419                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2420             format_id = format['format_id']
2421             if format_id not in formats_dict:
2422                 formats_dict[format_id] = []
2423             formats_dict[format_id].append(format)
2424
2425         # Make sure all formats have unique format_id
2426         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2427         for format_id, ambiguous_formats in formats_dict.items():
2428             ambigious_id = len(ambiguous_formats) > 1
2429             for i, format in enumerate(ambiguous_formats):
2430                 if ambigious_id:
2431                     format['format_id'] = '%s-%d' % (format_id, i)
2432                 if format.get('ext') is None:
2433                     format['ext'] = determine_ext(format['url']).lower()
2434                 # Ensure there is no conflict between id and ext in format selection
2435                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2436                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2437                     format['format_id'] = 'f%s' % format['format_id']
2438
2439         for i, format in enumerate(formats):
2440             if format.get('format') is None:
2441                 format['format'] = '{id} - {res}{note}'.format(
2442                     id=format['format_id'],
2443                     res=self.format_resolution(format),
2444                     note=format_field(format, 'format_note', ' (%s)'),
2445                 )
2446             if format.get('protocol') is None:
2447                 format['protocol'] = determine_protocol(format)
2448             if format.get('resolution') is None:
2449                 format['resolution'] = self.format_resolution(format, default=None)
2450             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2451                 format['dynamic_range'] = 'SDR'
2452             if (info_dict.get('duration') and format.get('tbr')
2453                     and not format.get('filesize') and not format.get('filesize_approx')):
2454                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2455
2456             # Add HTTP headers, so that external programs can use them from the
2457             # json output
2458             full_format_info = info_dict.copy()
2459             full_format_info.update(format)
2460             format['http_headers'] = self._calc_headers(full_format_info)
2461         # Remove private housekeeping stuff
2462         if '__x_forwarded_for_ip' in info_dict:
2463             del info_dict['__x_forwarded_for_ip']
2464
2465         # TODO Central sorting goes here
2466
2467         if self.params.get('check_formats') is True:
2468             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2469
2470         if not formats or formats[0] is not info_dict:
2471             # only set the 'formats' fields if the original info_dict list them
2472             # otherwise we end up with a circular reference, the first (and unique)
2473             # element in the 'formats' field in info_dict is info_dict itself,
2474             # which can't be exported to json
2475             info_dict['formats'] = formats
2476
2477         info_dict, _ = self.pre_process(info_dict)
2478
2479         # The pre-processors may have modified the formats
2480         formats = info_dict.get('formats', [info_dict])
2481
2482         list_only = self.params.get('simulate') is None and (
2483             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2484         interactive_format_selection = not list_only and self.format_selector == '-'
2485         if self.params.get('list_thumbnails'):
2486             self.list_thumbnails(info_dict)
2487         if self.params.get('listsubtitles'):
2488             if 'automatic_captions' in info_dict:
2489                 self.list_subtitles(
2490                     info_dict['id'], automatic_captions, 'automatic captions')
2491             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2492         if self.params.get('listformats') or interactive_format_selection:
2493             self.list_formats(info_dict)
2494         if list_only:
2495             # Without this printing, -F --print-json will not work
2496             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2497             return
2498
2499         format_selector = self.format_selector
2500         if format_selector is None:
2501             req_format = self._default_format_spec(info_dict, download=download)
2502             self.write_debug('Default format spec: %s' % req_format)
2503             format_selector = self.build_format_selector(req_format)
2504
2505         while True:
2506             if interactive_format_selection:
2507                 req_format = input(
2508                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2509                 try:
2510                     format_selector = self.build_format_selector(req_format)
2511                 except SyntaxError as err:
2512                     self.report_error(err, tb=False, is_error=False)
2513                     continue
2514
2515             # While in format selection we may need to have an access to the original
2516             # format set in order to calculate some metrics or do some processing.
2517             # For now we need to be able to guess whether original formats provided
2518             # by extractor are incomplete or not (i.e. whether extractor provides only
2519             # video-only or audio-only formats) for proper formats selection for
2520             # extractors with such incomplete formats (see
2521             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2522             # Since formats may be filtered during format selection and may not match
2523             # the original formats the results may be incorrect. Thus original formats
2524             # or pre-calculated metrics should be passed to format selection routines
2525             # as well.
2526             # We will pass a context object containing all necessary additional data
2527             # instead of just formats.
2528             # This fixes incorrect format selection issue (see
2529             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2530             incomplete_formats = (
2531                 # All formats are video-only or
2532                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2533                 # all formats are audio-only
2534                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2535
2536             ctx = {
2537                 'formats': formats,
2538                 'incomplete_formats': incomplete_formats,
2539             }
2540
2541             formats_to_download = list(format_selector(ctx))
2542             if interactive_format_selection and not formats_to_download:
2543                 self.report_error('Requested format is not available', tb=False, is_error=False)
2544                 continue
2545             break
2546
2547         if not formats_to_download:
2548             if not self.params.get('ignore_no_formats_error'):
2549                 raise ExtractorError('Requested format is not available', expected=True,
2550                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2551             else:
2552                 self.report_warning('Requested format is not available')
2553                 # Process what we can, even without any available formats.
2554                 self.process_info(dict(info_dict))
2555         elif download:
2556             self.to_screen(
2557                 '[info] %s: Downloading %d format(s): %s' % (
2558                     info_dict['id'], len(formats_to_download),
2559                     ", ".join([f['format_id'] for f in formats_to_download])))
2560             for fmt in formats_to_download:
2561                 new_info = dict(info_dict)
2562                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2563                 new_info['__original_infodict'] = info_dict
2564                 new_info.update(fmt)
2565                 self.process_info(new_info)
2566         # We update the info dict with the selected best quality format (backwards compatibility)
2567         if formats_to_download:
2568             info_dict.update(formats_to_download[-1])
2569         return info_dict
2570
2571     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2572         """Select the requested subtitles and their format"""
2573         available_subs = {}
2574         if normal_subtitles and self.params.get('writesubtitles'):
2575             available_subs.update(normal_subtitles)
2576         if automatic_captions and self.params.get('writeautomaticsub'):
2577             for lang, cap_info in automatic_captions.items():
2578                 if lang not in available_subs:
2579                     available_subs[lang] = cap_info
2580
2581         if (not self.params.get('writesubtitles') and not
2582                 self.params.get('writeautomaticsub') or not
2583                 available_subs):
2584             return None
2585
2586         all_sub_langs = available_subs.keys()
2587         if self.params.get('allsubtitles', False):
2588             requested_langs = all_sub_langs
2589         elif self.params.get('subtitleslangs', False):
2590             # A list is used so that the order of languages will be the same as
2591             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2592             requested_langs = []
2593             for lang_re in self.params.get('subtitleslangs'):
2594                 if lang_re == 'all':
2595                     requested_langs.extend(all_sub_langs)
2596                     continue
2597                 discard = lang_re[0] == '-'
2598                 if discard:
2599                     lang_re = lang_re[1:]
2600                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2601                 if discard:
2602                     for lang in current_langs:
2603                         while lang in requested_langs:
2604                             requested_langs.remove(lang)
2605                 else:
2606                     requested_langs.extend(current_langs)
2607             requested_langs = orderedSet(requested_langs)
2608         elif 'en' in available_subs:
2609             requested_langs = ['en']
2610         else:
2611             requested_langs = [list(all_sub_langs)[0]]
2612         if requested_langs:
2613             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2614
2615         formats_query = self.params.get('subtitlesformat', 'best')
2616         formats_preference = formats_query.split('/') if formats_query else []
2617         subs = {}
2618         for lang in requested_langs:
2619             formats = available_subs.get(lang)
2620             if formats is None:
2621                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2622                 continue
2623             for ext in formats_preference:
2624                 if ext == 'best':
2625                     f = formats[-1]
2626                     break
2627                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2628                 if matches:
2629                     f = matches[-1]
2630                     break
2631             else:
2632                 f = formats[-1]
2633                 self.report_warning(
2634                     'No subtitle format found matching "%s" for language %s, '
2635                     'using %s' % (formats_query, lang, f['ext']))
2636             subs[lang] = f
2637         return subs
2638
2639     def _forceprint(self, tmpl, info_dict):
2640         mobj = re.match(r'\w+(=?)$', tmpl)
2641         if mobj and mobj.group(1):
2642             tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2643         elif mobj:
2644             tmpl = '%({})s'.format(tmpl)
2645         self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2646
2647     def __forced_printings(self, info_dict, filename, incomplete):
2648         def print_mandatory(field, actual_field=None):
2649             if actual_field is None:
2650                 actual_field = field
2651             if (self.params.get('force%s' % field, False)
2652                     and (not incomplete or info_dict.get(actual_field) is not None)):
2653                 self.to_stdout(info_dict[actual_field])
2654
2655         def print_optional(field):
2656             if (self.params.get('force%s' % field, False)
2657                     and info_dict.get(field) is not None):
2658                 self.to_stdout(info_dict[field])
2659
2660         info_dict = info_dict.copy()
2661         if filename is not None:
2662             info_dict['filename'] = filename
2663         if info_dict.get('requested_formats') is not None:
2664             # For RTMP URLs, also include the playpath
2665             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2666         elif 'url' in info_dict:
2667             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2668
2669         if self.params['forceprint'].get('video') or self.params.get('forcejson'):
2670             self.post_extract(info_dict)
2671         for tmpl in self.params['forceprint'].get('video', []):
2672             self._forceprint(tmpl, info_dict)
2673
2674         print_mandatory('title')
2675         print_mandatory('id')
2676         print_mandatory('url', 'urls')
2677         print_optional('thumbnail')
2678         print_optional('description')
2679         print_optional('filename')
2680         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2681             self.to_stdout(formatSeconds(info_dict['duration']))
2682         print_mandatory('format')
2683
2684         if self.params.get('forcejson'):
2685             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2686
2687     def dl(self, name, info, subtitle=False, test=False):
2688         if not info.get('url'):
2689             self.raise_no_formats(info, True)
2690
2691         if test:
2692             verbose = self.params.get('verbose')
2693             params = {
2694                 'test': True,
2695                 'quiet': self.params.get('quiet') or not verbose,
2696                 'verbose': verbose,
2697                 'noprogress': not verbose,
2698                 'nopart': True,
2699                 'skip_unavailable_fragments': False,
2700                 'keep_fragments': False,
2701                 'overwrites': True,
2702                 '_no_ytdl_file': True,
2703             }
2704         else:
2705             params = self.params
2706         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2707         if not test:
2708             for ph in self._progress_hooks:
2709                 fd.add_progress_hook(ph)
2710             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2711             self.write_debug('Invoking downloader on "%s"' % urls)
2712
2713         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2714         # But it may contain objects that are not deep-copyable
2715         new_info = self._copy_infodict(info)
2716         if new_info.get('http_headers') is None:
2717             new_info['http_headers'] = self._calc_headers(new_info)
2718         return fd.download(name, new_info, subtitle)
2719
2720     def process_info(self, info_dict):
2721         """Process a single resolved IE result."""
2722
2723         assert info_dict.get('_type', 'video') == 'video'
2724
2725         max_downloads = self.params.get('max_downloads')
2726         if max_downloads is not None:
2727             if self._num_downloads >= int(max_downloads):
2728                 raise MaxDownloadsReached()
2729
2730         # TODO: backward compatibility, to be removed
2731         info_dict['fulltitle'] = info_dict['title']
2732
2733         if 'format' not in info_dict and 'ext' in info_dict:
2734             info_dict['format'] = info_dict['ext']
2735
2736         if self._match_entry(info_dict) is not None:
2737             return
2738
2739         self.post_extract(info_dict)
2740         self._num_downloads += 1
2741
2742         # info_dict['_filename'] needs to be set for backward compatibility
2743         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2744         temp_filename = self.prepare_filename(info_dict, 'temp')
2745         files_to_move = {}
2746
2747         # Forced printings
2748         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2749
2750         if self.params.get('simulate'):
2751             if self.params.get('force_write_download_archive', False):
2752                 self.record_download_archive(info_dict)
2753             # Do nothing else if in simulate mode
2754             return
2755
2756         if full_filename is None:
2757             return
2758         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2759             return
2760         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2761             return
2762
2763         if self._write_description('video', info_dict,
2764                                    self.prepare_filename(info_dict, 'description')) is None:
2765             return
2766
2767         sub_files = self._write_subtitles(info_dict, temp_filename)
2768         if sub_files is None:
2769             return
2770         files_to_move.update(dict(sub_files))
2771
2772         thumb_files = self._write_thumbnails(
2773             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2774         if thumb_files is None:
2775             return
2776         files_to_move.update(dict(thumb_files))
2777
2778         infofn = self.prepare_filename(info_dict, 'infojson')
2779         _infojson_written = self._write_info_json('video', info_dict, infofn)
2780         if _infojson_written:
2781             info_dict['infojson_filename'] = infofn
2782             # For backward compatibility, even though it was a private field
2783             info_dict['__infojson_filename'] = infofn
2784         elif _infojson_written is None:
2785             return
2786
2787         # Note: Annotations are deprecated
2788         annofn = None
2789         if self.params.get('writeannotations', False):
2790             annofn = self.prepare_filename(info_dict, 'annotation')
2791         if annofn:
2792             if not self._ensure_dir_exists(encodeFilename(annofn)):
2793                 return
2794             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2795                 self.to_screen('[info] Video annotations are already present')
2796             elif not info_dict.get('annotations'):
2797                 self.report_warning('There are no annotations to write.')
2798             else:
2799                 try:
2800                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2801                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2802                         annofile.write(info_dict['annotations'])
2803                 except (KeyError, TypeError):
2804                     self.report_warning('There are no annotations to write.')
2805                 except (OSError, IOError):
2806                     self.report_error('Cannot write annotations file: ' + annofn)
2807                     return
2808
2809         # Write internet shortcut files
2810         def _write_link_file(link_type):
2811             if 'webpage_url' not in info_dict:
2812                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2813                 return False
2814             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2815             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2816                 return False
2817             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2818                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2819                 return True
2820             try:
2821                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2822                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2823                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2824                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2825                     if link_type == 'desktop':
2826                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2827                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2828             except (OSError, IOError):
2829                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2830                 return False
2831             return True
2832
2833         write_links = {
2834             'url': self.params.get('writeurllink'),
2835             'webloc': self.params.get('writewebloclink'),
2836             'desktop': self.params.get('writedesktoplink'),
2837         }
2838         if self.params.get('writelink'):
2839             link_type = ('webloc' if sys.platform == 'darwin'
2840                          else 'desktop' if sys.platform.startswith('linux')
2841                          else 'url')
2842             write_links[link_type] = True
2843
2844         if any(should_write and not _write_link_file(link_type)
2845                for link_type, should_write in write_links.items()):
2846             return
2847
2848         try:
2849             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2850         except PostProcessingError as err:
2851             self.report_error('Preprocessing: %s' % str(err))
2852             return
2853
2854         must_record_download_archive = False
2855         if self.params.get('skip_download', False):
2856             info_dict['filepath'] = temp_filename
2857             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2858             info_dict['__files_to_move'] = files_to_move
2859             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2860         else:
2861             # Download
2862             info_dict.setdefault('__postprocessors', [])
2863             try:
2864
2865                 def existing_file(*filepaths):
2866                     ext = info_dict.get('ext')
2867                     final_ext = self.params.get('final_ext', ext)
2868                     existing_files = []
2869                     for file in orderedSet(filepaths):
2870                         if final_ext != ext:
2871                             converted = replace_extension(file, final_ext, ext)
2872                             if os.path.exists(encodeFilename(converted)):
2873                                 existing_files.append(converted)
2874                         if os.path.exists(encodeFilename(file)):
2875                             existing_files.append(file)
2876
2877                     if not existing_files or self.params.get('overwrites', False):
2878                         for file in orderedSet(existing_files):
2879                             self.report_file_delete(file)
2880                             os.remove(encodeFilename(file))
2881                         return None
2882
2883                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2884                     return existing_files[0]
2885
2886                 success = True
2887                 if info_dict.get('requested_formats') is not None:
2888
2889                     def compatible_formats(formats):
2890                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2891                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2892                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2893                         if len(video_formats) > 2 or len(audio_formats) > 2:
2894                             return False
2895
2896                         # Check extension
2897                         exts = set(format.get('ext') for format in formats)
2898                         COMPATIBLE_EXTS = (
2899                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2900                             set(('webm',)),
2901                         )
2902                         for ext_sets in COMPATIBLE_EXTS:
2903                             if ext_sets.issuperset(exts):
2904                                 return True
2905                         # TODO: Check acodec/vcodec
2906                         return False
2907
2908                     requested_formats = info_dict['requested_formats']
2909                     old_ext = info_dict['ext']
2910                     if self.params.get('merge_output_format') is None:
2911                         if not compatible_formats(requested_formats):
2912                             info_dict['ext'] = 'mkv'
2913                             self.report_warning(
2914                                 'Requested formats are incompatible for merge and will be merged into mkv')
2915                         if (info_dict['ext'] == 'webm'
2916                                 and info_dict.get('thumbnails')
2917                                 # check with type instead of pp_key, __name__, or isinstance
2918                                 # since we dont want any custom PPs to trigger this
2919                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2920                             info_dict['ext'] = 'mkv'
2921                             self.report_warning(
2922                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2923                     new_ext = info_dict['ext']
2924
2925                     def correct_ext(filename, ext=new_ext):
2926                         if filename == '-':
2927                             return filename
2928                         filename_real_ext = os.path.splitext(filename)[1][1:]
2929                         filename_wo_ext = (
2930                             os.path.splitext(filename)[0]
2931                             if filename_real_ext in (old_ext, new_ext)
2932                             else filename)
2933                         return '%s.%s' % (filename_wo_ext, ext)
2934
2935                     # Ensure filename always has a correct extension for successful merge
2936                     full_filename = correct_ext(full_filename)
2937                     temp_filename = correct_ext(temp_filename)
2938                     dl_filename = existing_file(full_filename, temp_filename)
2939                     info_dict['__real_download'] = False
2940
2941                     downloaded = []
2942                     merger = FFmpegMergerPP(self)
2943
2944                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2945                     if dl_filename is not None:
2946                         self.report_file_already_downloaded(dl_filename)
2947                     elif fd:
2948                         for f in requested_formats if fd != FFmpegFD else []:
2949                             f['filepath'] = fname = prepend_extension(
2950                                 correct_ext(temp_filename, info_dict['ext']),
2951                                 'f%s' % f['format_id'], info_dict['ext'])
2952                             downloaded.append(fname)
2953                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2954                         success, real_download = self.dl(temp_filename, info_dict)
2955                         info_dict['__real_download'] = real_download
2956                     else:
2957                         if self.params.get('allow_unplayable_formats'):
2958                             self.report_warning(
2959                                 'You have requested merging of multiple formats '
2960                                 'while also allowing unplayable formats to be downloaded. '
2961                                 'The formats won\'t be merged to prevent data corruption.')
2962                         elif not merger.available:
2963                             self.report_warning(
2964                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2965                                 'The formats won\'t be merged.')
2966
2967                         if temp_filename == '-':
2968                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
2969                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2970                                       else 'but ffmpeg is not installed')
2971                             self.report_warning(
2972                                 f'You have requested downloading multiple formats to stdout {reason}. '
2973                                 'The formats will be streamed one after the other')
2974                             fname = temp_filename
2975                         for f in requested_formats:
2976                             new_info = dict(info_dict)
2977                             del new_info['requested_formats']
2978                             new_info.update(f)
2979                             if temp_filename != '-':
2980                                 fname = prepend_extension(
2981                                     correct_ext(temp_filename, new_info['ext']),
2982                                     'f%s' % f['format_id'], new_info['ext'])
2983                                 if not self._ensure_dir_exists(fname):
2984                                     return
2985                                 f['filepath'] = fname
2986                                 downloaded.append(fname)
2987                             partial_success, real_download = self.dl(fname, new_info)
2988                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2989                             success = success and partial_success
2990
2991                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
2992                         info_dict['__postprocessors'].append(merger)
2993                         info_dict['__files_to_merge'] = downloaded
2994                         # Even if there were no downloads, it is being merged only now
2995                         info_dict['__real_download'] = True
2996                     else:
2997                         for file in downloaded:
2998                             files_to_move[file] = None
2999                 else:
3000                     # Just a single file
3001                     dl_filename = existing_file(full_filename, temp_filename)
3002                     if dl_filename is None or dl_filename == temp_filename:
3003                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3004                         # So we should try to resume the download
3005                         success, real_download = self.dl(temp_filename, info_dict)
3006                         info_dict['__real_download'] = real_download
3007                     else:
3008                         self.report_file_already_downloaded(dl_filename)
3009
3010                 dl_filename = dl_filename or temp_filename
3011                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3012
3013             except network_exceptions as err:
3014                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3015                 return
3016             except (OSError, IOError) as err:
3017                 raise UnavailableVideoError(err)
3018             except (ContentTooShortError, ) as err:
3019                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3020                 return
3021
3022             if success and full_filename != '-':
3023
3024                 def fixup():
3025                     do_fixup = True
3026                     fixup_policy = self.params.get('fixup')
3027                     vid = info_dict['id']
3028
3029                     if fixup_policy in ('ignore', 'never'):
3030                         return
3031                     elif fixup_policy == 'warn':
3032                         do_fixup = False
3033                     elif fixup_policy != 'force':
3034                         assert fixup_policy in ('detect_or_warn', None)
3035                         if not info_dict.get('__real_download'):
3036                             do_fixup = False
3037
3038                     def ffmpeg_fixup(cndn, msg, cls):
3039                         if not cndn:
3040                             return
3041                         if not do_fixup:
3042                             self.report_warning(f'{vid}: {msg}')
3043                             return
3044                         pp = cls(self)
3045                         if pp.available:
3046                             info_dict['__postprocessors'].append(pp)
3047                         else:
3048                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3049
3050                     stretched_ratio = info_dict.get('stretched_ratio')
3051                     ffmpeg_fixup(
3052                         stretched_ratio not in (1, None),
3053                         f'Non-uniform pixel ratio {stretched_ratio}',
3054                         FFmpegFixupStretchedPP)
3055
3056                     ffmpeg_fixup(
3057                         (info_dict.get('requested_formats') is None
3058                          and info_dict.get('container') == 'm4a_dash'
3059                          and info_dict.get('ext') == 'm4a'),
3060                         'writing DASH m4a. Only some players support this container',
3061                         FFmpegFixupM4aPP)
3062
3063                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3064                     downloader = downloader.__name__ if downloader else None
3065
3066                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3067                         ffmpeg_fixup(downloader == 'HlsFD',
3068                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3069                                      FFmpegFixupM3u8PP)
3070                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3071                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3072
3073                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3074                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3075
3076                 fixup()
3077                 try:
3078                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
3079                 except PostProcessingError as err:
3080                     self.report_error('Postprocessing: %s' % str(err))
3081                     return
3082                 try:
3083                     for ph in self._post_hooks:
3084                         ph(info_dict['filepath'])
3085                 except Exception as err:
3086                     self.report_error('post hooks: %s' % str(err))
3087                     return
3088                 must_record_download_archive = True
3089
3090         if must_record_download_archive or self.params.get('force_write_download_archive', False):
3091             self.record_download_archive(info_dict)
3092         max_downloads = self.params.get('max_downloads')
3093         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3094             raise MaxDownloadsReached()
3095
3096     def __download_wrapper(self, func):
3097         @functools.wraps(func)
3098         def wrapper(*args, **kwargs):
3099             try:
3100                 res = func(*args, **kwargs)
3101             except UnavailableVideoError as e:
3102                 self.report_error(e)
3103             except MaxDownloadsReached as e:
3104                 self.to_screen(f'[info] {e}')
3105                 raise
3106             except DownloadCancelled as e:
3107                 self.to_screen(f'[info] {e}')
3108                 if not self.params.get('break_per_url'):
3109                     raise
3110             else:
3111                 if self.params.get('dump_single_json', False):
3112                     self.post_extract(res)
3113                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3114         return wrapper
3115
3116     def download(self, url_list):
3117         """Download a given list of URLs."""
3118         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3119         outtmpl = self.outtmpl_dict['default']
3120         if (len(url_list) > 1
3121                 and outtmpl != '-'
3122                 and '%' not in outtmpl
3123                 and self.params.get('max_downloads') != 1):
3124             raise SameFileError(outtmpl)
3125
3126         for url in url_list:
3127             self.__download_wrapper(self.extract_info)(
3128                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3129
3130         return self._download_retcode
3131
3132     def download_with_info_file(self, info_filename):
3133         with contextlib.closing(fileinput.FileInput(
3134                 [info_filename], mode='r',
3135                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3136             # FileInput doesn't have a read method, we can't call json.load
3137             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3138         try:
3139             self.__download_wrapper(self.process_ie_result)(info, download=True)
3140         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3141             if not isinstance(e, EntryNotInPlaylist):
3142                 self.to_stderr('\r')
3143             webpage_url = info.get('webpage_url')
3144             if webpage_url is not None:
3145                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3146                 return self.download([webpage_url])
3147             else:
3148                 raise
3149         return self._download_retcode
3150
3151     @staticmethod
3152     def sanitize_info(info_dict, remove_private_keys=False):
3153         ''' Sanitize the infodict for converting to json '''
3154         if info_dict is None:
3155             return info_dict
3156         info_dict.setdefault('epoch', int(time.time()))
3157         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3158         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3159         if remove_private_keys:
3160             remove_keys |= {
3161                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3162                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3163             }
3164             reject = lambda k, v: k not in keep_keys and (
3165                 k.startswith('_') or k in remove_keys or v is None)
3166         else:
3167             reject = lambda k, v: k in remove_keys
3168
3169         def filter_fn(obj):
3170             if isinstance(obj, dict):
3171                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3172             elif isinstance(obj, (list, tuple, set, LazyList)):
3173                 return list(map(filter_fn, obj))
3174             elif obj is None or isinstance(obj, (str, int, float, bool)):
3175                 return obj
3176             else:
3177                 return repr(obj)
3178
3179         return filter_fn(info_dict)
3180
3181     @staticmethod
3182     def filter_requested_info(info_dict, actually_filter=True):
3183         ''' Alias of sanitize_info for backward compatibility '''
3184         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3185
3186     def run_pp(self, pp, infodict):
3187         files_to_delete = []
3188         if '__files_to_move' not in infodict:
3189             infodict['__files_to_move'] = {}
3190         try:
3191             files_to_delete, infodict = pp.run(infodict)
3192         except PostProcessingError as e:
3193             # Must be True and not 'only_download'
3194             if self.params.get('ignoreerrors') is True:
3195                 self.report_error(e)
3196                 return infodict
3197             raise
3198
3199         if not files_to_delete:
3200             return infodict
3201         if self.params.get('keepvideo', False):
3202             for f in files_to_delete:
3203                 infodict['__files_to_move'].setdefault(f, '')
3204         else:
3205             for old_filename in set(files_to_delete):
3206                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3207                 try:
3208                     os.remove(encodeFilename(old_filename))
3209                 except (IOError, OSError):
3210                     self.report_warning('Unable to remove downloaded original file')
3211                 if old_filename in infodict['__files_to_move']:
3212                     del infodict['__files_to_move'][old_filename]
3213         return infodict
3214
3215     @staticmethod
3216     def post_extract(info_dict):
3217         def actual_post_extract(info_dict):
3218             if info_dict.get('_type') in ('playlist', 'multi_video'):
3219                 for video_dict in info_dict.get('entries', {}):
3220                     actual_post_extract(video_dict or {})
3221                 return
3222
3223             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3224             extra = post_extractor().items()
3225             info_dict.update(extra)
3226             info_dict.pop('__post_extractor', None)
3227
3228             original_infodict = info_dict.get('__original_infodict') or {}
3229             original_infodict.update(extra)
3230             original_infodict.pop('__post_extractor', None)
3231
3232         actual_post_extract(info_dict or {})
3233
3234     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3235         info = dict(ie_info)
3236         info['__files_to_move'] = files_to_move or {}
3237         for pp in self._pps[key]:
3238             info = self.run_pp(pp, info)
3239         return info, info.pop('__files_to_move', None)
3240
3241     def post_process(self, filename, ie_info, files_to_move=None):
3242         """Run all the postprocessors on the given file."""
3243         info = dict(ie_info)
3244         info['filepath'] = filename
3245         info['__files_to_move'] = files_to_move or {}
3246
3247         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3248             info = self.run_pp(pp, info)
3249         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3250         del info['__files_to_move']
3251         for pp in self._pps['after_move']:
3252             info = self.run_pp(pp, info)
3253         return info
3254
3255     def _make_archive_id(self, info_dict):
3256         video_id = info_dict.get('id')
3257         if not video_id:
3258             return
3259         # Future-proof against any change in case
3260         # and backwards compatibility with prior versions
3261         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3262         if extractor is None:
3263             url = str_or_none(info_dict.get('url'))
3264             if not url:
3265                 return
3266             # Try to find matching extractor for the URL and take its ie_key
3267             for ie_key, ie in self._ies.items():
3268                 if ie.suitable(url):
3269                     extractor = ie_key
3270                     break
3271             else:
3272                 return
3273         return '%s %s' % (extractor.lower(), video_id)
3274
3275     def in_download_archive(self, info_dict):
3276         fn = self.params.get('download_archive')
3277         if fn is None:
3278             return False
3279
3280         vid_id = self._make_archive_id(info_dict)
3281         if not vid_id:
3282             return False  # Incomplete video information
3283
3284         return vid_id in self.archive
3285
3286     def record_download_archive(self, info_dict):
3287         fn = self.params.get('download_archive')
3288         if fn is None:
3289             return
3290         vid_id = self._make_archive_id(info_dict)
3291         assert vid_id
3292         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3293             archive_file.write(vid_id + '\n')
3294         self.archive.add(vid_id)
3295
3296     @staticmethod
3297     def format_resolution(format, default='unknown'):
3298         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3299             return 'audio only'
3300         if format.get('resolution') is not None:
3301             return format['resolution']
3302         if format.get('width') and format.get('height'):
3303             return '%dx%d' % (format['width'], format['height'])
3304         elif format.get('height'):
3305             return '%sp' % format['height']
3306         elif format.get('width'):
3307             return '%dx?' % format['width']
3308         return default
3309
3310     def _format_note(self, fdict):
3311         res = ''
3312         if fdict.get('ext') in ['f4f', 'f4m']:
3313             res += '(unsupported)'
3314         if fdict.get('language'):
3315             if res:
3316                 res += ' '
3317             res += '[%s]' % fdict['language']
3318         if fdict.get('format_note') is not None:
3319             if res:
3320                 res += ' '
3321             res += fdict['format_note']
3322         if fdict.get('tbr') is not None:
3323             if res:
3324                 res += ', '
3325             res += '%4dk' % fdict['tbr']
3326         if fdict.get('container') is not None:
3327             if res:
3328                 res += ', '
3329             res += '%s container' % fdict['container']
3330         if (fdict.get('vcodec') is not None
3331                 and fdict.get('vcodec') != 'none'):
3332             if res:
3333                 res += ', '
3334             res += fdict['vcodec']
3335             if fdict.get('vbr') is not None:
3336                 res += '@'
3337         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3338             res += 'video@'
3339         if fdict.get('vbr') is not None:
3340             res += '%4dk' % fdict['vbr']
3341         if fdict.get('fps') is not None:
3342             if res:
3343                 res += ', '
3344             res += '%sfps' % fdict['fps']
3345         if fdict.get('acodec') is not None:
3346             if res:
3347                 res += ', '
3348             if fdict['acodec'] == 'none':
3349                 res += 'video only'
3350             else:
3351                 res += '%-5s' % fdict['acodec']
3352         elif fdict.get('abr') is not None:
3353             if res:
3354                 res += ', '
3355             res += 'audio'
3356         if fdict.get('abr') is not None:
3357             res += '@%3dk' % fdict['abr']
3358         if fdict.get('asr') is not None:
3359             res += ' (%5dHz)' % fdict['asr']
3360         if fdict.get('filesize') is not None:
3361             if res:
3362                 res += ', '
3363             res += format_bytes(fdict['filesize'])
3364         elif fdict.get('filesize_approx') is not None:
3365             if res:
3366                 res += ', '
3367             res += '~' + format_bytes(fdict['filesize_approx'])
3368         return res
3369
3370     def _list_format_headers(self, *headers):
3371         if self.params.get('listformats_table', True) is not False:
3372             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3373         return headers
3374
3375     def list_formats(self, info_dict):
3376         if not info_dict.get('formats') and not info_dict.get('url'):
3377             self.to_screen('%s has no formats' % info_dict['id'])
3378             return
3379         self.to_screen('[info] Available formats for %s:' % info_dict['id'])
3380
3381         formats = info_dict.get('formats', [info_dict])
3382         new_format = self.params.get('listformats_table', True) is not False
3383         if new_format:
3384             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3385             table = [
3386                 [
3387                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3388                     format_field(f, 'ext'),
3389                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3390                     format_field(f, 'fps', '\t%d'),
3391                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3392                     delim,
3393                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3394                     format_field(f, 'tbr', '\t%dk'),
3395                     shorten_protocol_name(f.get('protocol', '')),
3396                     delim,
3397                     format_field(f, 'vcodec', default='unknown').replace(
3398                         'none',
3399                         'images' if f.get('acodec') == 'none'
3400                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3401                     format_field(f, 'vbr', '\t%dk'),
3402                     format_field(f, 'acodec', default='unknown').replace(
3403                         'none',
3404                         '' if f.get('vcodec') == 'none'
3405                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3406                     format_field(f, 'abr', '\t%dk'),
3407                     format_field(f, 'asr', '\t%dHz'),
3408                     join_nonempty(
3409                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3410                         format_field(f, 'language', '[%s]'),
3411                         join_nonempty(
3412                             format_field(f, 'format_note'),
3413                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3414                             delim=', '),
3415                         delim=' '),
3416                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3417             header_line = self._list_format_headers(
3418                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3419                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3420         else:
3421             table = [
3422                 [
3423                     format_field(f, 'format_id'),
3424                     format_field(f, 'ext'),
3425                     self.format_resolution(f),
3426                     self._format_note(f)]
3427                 for f in formats
3428                 if f.get('preference') is None or f['preference'] >= -1000]
3429             header_line = ['format code', 'extension', 'resolution', 'note']
3430
3431         self.to_stdout(render_table(
3432             header_line, table,
3433             extra_gap=(0 if new_format else 1),
3434             hide_empty=new_format,
3435             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3436
3437     def list_thumbnails(self, info_dict):
3438         thumbnails = list(info_dict.get('thumbnails'))
3439         if not thumbnails:
3440             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3441             return
3442
3443         self.to_screen(
3444             '[info] Thumbnails for %s:' % info_dict['id'])
3445         self.to_stdout(render_table(
3446             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3447             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3448
3449     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3450         if not subtitles:
3451             self.to_screen('%s has no %s' % (video_id, name))
3452             return
3453         self.to_screen(
3454             'Available %s for %s:' % (name, video_id))
3455
3456         def _row(lang, formats):
3457             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3458             if len(set(names)) == 1:
3459                 names = [] if names[0] == 'unknown' else names[:1]
3460             return [lang, ', '.join(names), ', '.join(exts)]
3461
3462         self.to_stdout(render_table(
3463             self._list_format_headers('Language', 'Name', 'Formats'),
3464             [_row(lang, formats) for lang, formats in subtitles.items()],
3465             hide_empty=True))
3466
3467     def urlopen(self, req):
3468         """ Start an HTTP download """
3469         if isinstance(req, compat_basestring):
3470             req = sanitized_Request(req)
3471         return self._opener.open(req, timeout=self._socket_timeout)
3472
3473     def print_debug_header(self):
3474         if not self.params.get('verbose'):
3475             return
3476
3477         def get_encoding(stream):
3478             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3479             if not supports_terminal_sequences(stream):
3480                 from .compat import WINDOWS_VT_MODE
3481                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3482             return ret
3483
3484         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3485             locale.getpreferredencoding(),
3486             sys.getfilesystemencoding(),
3487             get_encoding(self._screen_file), get_encoding(self._err_file),
3488             self.get_encoding())
3489
3490         logger = self.params.get('logger')
3491         if logger:
3492             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3493             write_debug(encoding_str)
3494         else:
3495             write_string(f'[debug] {encoding_str}\n', encoding=None)
3496             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3497
3498         source = detect_variant()
3499         write_debug(join_nonempty(
3500             'yt-dlp version', __version__,
3501             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3502             '' if source == 'unknown' else f'({source})',
3503             delim=' '))
3504         if not _LAZY_LOADER:
3505             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3506                 write_debug('Lazy loading extractors is forcibly disabled')
3507             else:
3508                 write_debug('Lazy loading extractors is disabled')
3509         if plugin_extractors or plugin_postprocessors:
3510             write_debug('Plugins: %s' % [
3511                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3512                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3513         if self.params.get('compat_opts'):
3514             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3515
3516         if source == 'source':
3517             try:
3518                 sp = Popen(
3519                     ['git', 'rev-parse', '--short', 'HEAD'],
3520                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3521                     cwd=os.path.dirname(os.path.abspath(__file__)))
3522                 out, err = sp.communicate_or_kill()
3523                 out = out.decode().strip()
3524                 if re.match('[0-9a-f]+', out):
3525                     write_debug('Git HEAD: %s' % out)
3526             except Exception:
3527                 try:
3528                     sys.exc_clear()
3529                 except Exception:
3530                     pass
3531
3532         def python_implementation():
3533             impl_name = platform.python_implementation()
3534             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3535                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3536             return impl_name
3537
3538         write_debug('Python version %s (%s %s) - %s' % (
3539             platform.python_version(),
3540             python_implementation(),
3541             platform.architecture()[0],
3542             platform_name()))
3543
3544         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3545         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3546         if ffmpeg_features:
3547             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3548
3549         exe_versions['rtmpdump'] = rtmpdump_version()
3550         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3551         exe_str = ', '.join(
3552             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3553         ) or 'none'
3554         write_debug('exe versions: %s' % exe_str)
3555
3556         from .downloader.websocket import has_websockets
3557         from .postprocessor.embedthumbnail import has_mutagen
3558         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3559
3560         lib_str = join_nonempty(
3561             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3562             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3563             has_mutagen and 'mutagen',
3564             SQLITE_AVAILABLE and 'sqlite',
3565             has_websockets and 'websockets',
3566             delim=', ') or 'none'
3567         write_debug('Optional libraries: %s' % lib_str)
3568
3569         proxy_map = {}
3570         for handler in self._opener.handlers:
3571             if hasattr(handler, 'proxies'):
3572                 proxy_map.update(handler.proxies)
3573         write_debug(f'Proxy map: {proxy_map}')
3574
3575         # Not implemented
3576         if False and self.params.get('call_home'):
3577             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3578             write_debug('Public IP address: %s' % ipaddr)
3579             latest_version = self.urlopen(
3580                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3581             if version_tuple(latest_version) > version_tuple(__version__):
3582                 self.report_warning(
3583                     'You are using an outdated version (newest version: %s)! '
3584                     'See https://yt-dl.org/update if you need help updating.' %
3585                     latest_version)
3586
3587     def _setup_opener(self):
3588         timeout_val = self.params.get('socket_timeout')
3589         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3590
3591         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3592         opts_cookiefile = self.params.get('cookiefile')
3593         opts_proxy = self.params.get('proxy')
3594
3595         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3596
3597         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3598         if opts_proxy is not None:
3599             if opts_proxy == '':
3600                 proxies = {}
3601             else:
3602                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3603         else:
3604             proxies = compat_urllib_request.getproxies()
3605             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3606             if 'http' in proxies and 'https' not in proxies:
3607                 proxies['https'] = proxies['http']
3608         proxy_handler = PerRequestProxyHandler(proxies)
3609
3610         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3611         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3612         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3613         redirect_handler = YoutubeDLRedirectHandler()
3614         data_handler = compat_urllib_request_DataHandler()
3615
3616         # When passing our own FileHandler instance, build_opener won't add the
3617         # default FileHandler and allows us to disable the file protocol, which
3618         # can be used for malicious purposes (see
3619         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3620         file_handler = compat_urllib_request.FileHandler()
3621
3622         def file_open(*args, **kwargs):
3623             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3624         file_handler.file_open = file_open
3625
3626         opener = compat_urllib_request.build_opener(
3627             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3628
3629         # Delete the default user-agent header, which would otherwise apply in
3630         # cases where our custom HTTP handler doesn't come into play
3631         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3632         opener.addheaders = []
3633         self._opener = opener
3634
3635     def encode(self, s):
3636         if isinstance(s, bytes):
3637             return s  # Already encoded
3638
3639         try:
3640             return s.encode(self.get_encoding())
3641         except UnicodeEncodeError as err:
3642             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3643             raise
3644
3645     def get_encoding(self):
3646         encoding = self.params.get('encoding')
3647         if encoding is None:
3648             encoding = preferredencoding()
3649         return encoding
3650
3651     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3652         ''' Write infojson and returns True = written, False = skip, None = error '''
3653         if overwrite is None:
3654             overwrite = self.params.get('overwrites', True)
3655         if not self.params.get('writeinfojson'):
3656             return False
3657         elif not infofn:
3658             self.write_debug(f'Skipping writing {label} infojson')
3659             return False
3660         elif not self._ensure_dir_exists(infofn):
3661             return None
3662         elif not overwrite and os.path.exists(infofn):
3663             self.to_screen(f'[info] {label.title()} metadata is already present')
3664         else:
3665             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3666             try:
3667                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3668             except (OSError, IOError):
3669                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3670                 return None
3671         return True
3672
3673     def _write_description(self, label, ie_result, descfn):
3674         ''' Write description and returns True = written, False = skip, None = error '''
3675         if not self.params.get('writedescription'):
3676             return False
3677         elif not descfn:
3678             self.write_debug(f'Skipping writing {label} description')
3679             return False
3680         elif not self._ensure_dir_exists(descfn):
3681             return None
3682         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3683             self.to_screen(f'[info] {label.title()} description is already present')
3684         elif ie_result.get('description') is None:
3685             self.report_warning(f'There\'s no {label} description to write')
3686             return False
3687         else:
3688             try:
3689                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3690                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3691                     descfile.write(ie_result['description'])
3692             except (OSError, IOError):
3693                 self.report_error(f'Cannot write {label} description file {descfn}')
3694                 return None
3695         return True
3696
3697     def _write_subtitles(self, info_dict, filename):
3698         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3699         ret = []
3700         subtitles = info_dict.get('requested_subtitles')
3701         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3702             # subtitles download errors are already managed as troubles in relevant IE
3703             # that way it will silently go on when used with unsupporting IE
3704             return ret
3705
3706         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3707         if not sub_filename_base:
3708             self.to_screen('[info] Skipping writing video subtitles')
3709             return ret
3710         for sub_lang, sub_info in subtitles.items():
3711             sub_format = sub_info['ext']
3712             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3713             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3714             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3715                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3716                 sub_info['filepath'] = sub_filename
3717                 ret.append((sub_filename, sub_filename_final))
3718                 continue
3719
3720             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3721             if sub_info.get('data') is not None:
3722                 try:
3723                     # Use newline='' to prevent conversion of newline characters
3724                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3725                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3726                         subfile.write(sub_info['data'])
3727                     sub_info['filepath'] = sub_filename
3728                     ret.append((sub_filename, sub_filename_final))
3729                     continue
3730                 except (OSError, IOError):
3731                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3732                     return None
3733
3734             try:
3735                 sub_copy = sub_info.copy()
3736                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3737                 self.dl(sub_filename, sub_copy, subtitle=True)
3738                 sub_info['filepath'] = sub_filename
3739                 ret.append((sub_filename, sub_filename_final))
3740             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3741                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3742                 continue
3743         return ret
3744
3745     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3746         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3747         write_all = self.params.get('write_all_thumbnails', False)
3748         thumbnails, ret = [], []
3749         if write_all or self.params.get('writethumbnail', False):
3750             thumbnails = info_dict.get('thumbnails') or []
3751         multiple = write_all and len(thumbnails) > 1
3752
3753         if thumb_filename_base is None:
3754             thumb_filename_base = filename
3755         if thumbnails and not thumb_filename_base:
3756             self.write_debug(f'Skipping writing {label} thumbnail')
3757             return ret
3758
3759         for idx, t in list(enumerate(thumbnails))[::-1]:
3760             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3761             thumb_display_id = f'{label} thumbnail {t["id"]}'
3762             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3763             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3764
3765             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3766                 ret.append((thumb_filename, thumb_filename_final))
3767                 t['filepath'] = thumb_filename
3768                 self.to_screen('[info] %s is already present' % (
3769                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3770             else:
3771                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3772                 try:
3773                     uf = self.urlopen(t['url'])
3774                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3775                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3776                         shutil.copyfileobj(uf, thumbf)
3777                     ret.append((thumb_filename, thumb_filename_final))
3778                     t['filepath'] = thumb_filename
3779                 except network_exceptions as err:
3780                     thumbnails.pop(idx)
3781                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3782             if ret and not write_all:
3783                 break
3784         return ret