yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     InAdvancePagedList,
  76     int_or_none,
  77     iri_to_uri,
  78     ISO3166Utils,
  79     join_nonempty,
  80     LazyList,
  81     LINK_TEMPLATES,
  82     locked_file,
  83     make_dir,
  84     make_HTTPS_handler,
  85     MaxDownloadsReached,
  86     network_exceptions,
  87     number_of_digits,
  88     orderedSet,
  89     OUTTMPL_TYPES,
  90     PagedList,
  91     parse_filesize,
  92     PerRequestProxyHandler,
  93     platform_name,
  94     Popen,
  95     POSTPROCESS_WHEN,
  96     PostProcessingError,
  97     preferredencoding,
  98     prepend_extension,
  99     ReExtractInfo,
 100     register_socks_protocols,
 101     RejectedVideoReached,
 102     remove_terminal_sequences,
 103     render_table,
 104     replace_extension,
 105     SameFileError,
 106     sanitize_filename,
 107     sanitize_path,
 108     sanitize_url,
 109     sanitized_Request,
 110     std_headers,
 111     STR_FORMAT_RE_TMPL,
 112     STR_FORMAT_TYPES,
 113     str_or_none,
 114     strftime_or_none,
 115     subtitles_filename,
 116     supports_terminal_sequences,
 117     timetuple_from_msec,
 118     to_high_limit_path,
 119     traverse_obj,
 120     try_get,
 121     UnavailableVideoError,
 122     url_basename,
 123     variadic,
 124     version_tuple,
 125     write_json_file,
 126     write_string,
 127     YoutubeDLCookieProcessor,
 128     YoutubeDLHandler,
 129     YoutubeDLRedirectHandler,
 130 )
 131 from .cache import Cache
 132 from .minicurses import format_text
 133 from .extractor import (
 134     gen_extractor_classes,
 135     get_info_extractor,
 136     _LAZY_LOADER,
 137     _PLUGIN_CLASSES as plugin_extractors
 138 )
 139 from .extractor.openload import PhantomJSwrapper
 140 from .downloader import (
 141     FFmpegFD,
 142     get_suitable_downloader,
 143     shorten_protocol_name
 144 )
 145 from .downloader.rtmp import rtmpdump_version
 146 from .postprocessor import (
 147     get_postprocessor,
 148     EmbedThumbnailPP,
 149     FFmpegFixupDuplicateMoovPP,
 150     FFmpegFixupDurationPP,
 151     FFmpegFixupM3u8PP,
 152     FFmpegFixupM4aPP,
 153     FFmpegFixupStretchedPP,
 154     FFmpegFixupTimestampPP,
 155     FFmpegMergerPP,
 156     FFmpegPostProcessor,
 157     MoveFilesAfterDownloadPP,
 158     _PLUGIN_CLASSES as plugin_postprocessors
 159 )
 160 from .update import detect_variant
 161 from .version import __version__, RELEASE_GIT_HEAD
 162
 163 if compat_os_name == 'nt':
 164     import ctypes
 165
 166
 167 class YoutubeDL(object):
 168     """YoutubeDL class.
 169
 170     YoutubeDL objects are the ones responsible of downloading the
 171     actual video file and writing it to disk if the user has requested
 172     it, among some other tasks. In most cases there should be one per
 173     program. As, given a video URL, the downloader doesn't know how to
 174     extract all the needed information, task that InfoExtractors do, it
 175     has to pass the URL to one of them.
 176
 177     For this, YoutubeDL objects have a method that allows
 178     InfoExtractors to be registered in a given order. When it is passed
 179     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 180     finds that reports being able to handle it. The InfoExtractor extracts
 181     all the information about the video or videos the URL refers to, and
 182     YoutubeDL process the extracted information, possibly using a File
 183     Downloader to download the video.
 184
 185     YoutubeDL objects accept a lot of parameters. In order not to saturate
 186     the object constructor with arguments, it receives a dictionary of
 187     options instead. These options are available through the params
 188     attribute for the InfoExtractors to use. The YoutubeDL also
 189     registers itself as the downloader in charge for the InfoExtractors
 190     that are added to it, so this is a "mutual registration".
 191
 192     Available options:
 193
 194     username:          Username for authentication purposes.
 195     password:          Password for authentication purposes.
 196     videopassword:     Password for accessing a video.
 197     ap_mso:            Adobe Pass multiple-system operator identifier.
 198     ap_username:       Multiple-system operator account username.
 199     ap_password:       Multiple-system operator account password.
 200     usenetrc:          Use netrc for authentication instead.
 201     verbose:           Print additional info to stdout.
 202     quiet:             Do not print messages to stdout.
 203     no_warnings:       Do not print out anything for warnings.
 204     forceprint:        A dict with keys WHEN mapped to a list of templates to
 205                        print to stdout. The allowed keys are video or any of the
 206                        items in utils.POSTPROCESS_WHEN.
 207                        For compatibility, a single list is also accepted
 208     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 209                        a list of tuples with (template, filename)
 210     forceurl:          Force printing final URL. (Deprecated)
 211     forcetitle:        Force printing title. (Deprecated)
 212     forceid:           Force printing ID. (Deprecated)
 213     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 214     forcedescription:  Force printing description. (Deprecated)
 215     forcefilename:     Force printing final filename. (Deprecated)
 216     forceduration:     Force printing duration. (Deprecated)
 217     forcejson:         Force printing info_dict as JSON.
 218     dump_single_json:  Force printing the info_dict of the whole playlist
 219                        (or video) as a single JSON line.
 220     force_write_download_archive: Force writing download archive regardless
 221                        of 'skip_download' or 'simulate'.
 222     simulate:          Do not download the video files. If unset (or None),
 223                        simulate only if listsubtitles, listformats or list_thumbnails is used
 224     format:            Video format code. see "FORMAT SELECTION" for more details.
 225                        You can also pass a function. The function takes 'ctx' as
 226                        argument and returns the formats to download.
 227                        See "build_format_selector" for an implementation
 228     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 229     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 230                        extracting metadata even if the video is not actually
 231                        available for download (experimental)
 232     format_sort:       A list of fields by which to sort the video formats.
 233                        See "Sorting Formats" for more details.
 234     format_sort_force: Force the given format_sort. see "Sorting Formats"
 235                        for more details.
 236     allow_multiple_video_streams:   Allow multiple video streams to be merged
 237                        into a single file
 238     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 239                        into a single file
 240     check_formats      Whether to test if the formats are downloadable.
 241                        Can be True (check all), False (check none),
 242                        'selected' (check selected formats),
 243                        or None (check only if requested by extractor)
 244     paths:             Dictionary of output paths. The allowed keys are 'home'
 245                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 246     outtmpl:           Dictionary of templates for output names. Allowed keys
 247                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 248                        For compatibility with youtube-dl, a single string can also be used
 249     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 250     restrictfilenames: Do not allow "&" and spaces in file names
 251     trim_file_name:    Limit length of filename (extension excluded)
 252     windowsfilenames:  Force the filenames to be windows compatible
 253     ignoreerrors:      Do not stop on download/postprocessing errors.
 254                        Can be 'only_download' to ignore only download errors.
 255                        Default is 'only_download' for CLI, but False for API
 256     skip_playlist_after_errors: Number of allowed failures until the rest of
 257                        the playlist is skipped
 258     force_generic_extractor: Force downloader to use the generic extractor
 259     overwrites:        Overwrite all video and metadata files if True,
 260                        overwrite only non-video files if None
 261                        and don't overwrite any file if False
 262                        For compatibility with youtube-dl,
 263                        "nooverwrites" may also be used instead
 264     playliststart:     Playlist item to start at.
 265     playlistend:       Playlist item to end at.
 266     playlist_items:    Specific indices of playlist to download.
 267     playlistreverse:   Download playlist items in reverse order.
 268     playlistrandom:    Download playlist items in random order.
 269     matchtitle:        Download only matching titles.
 270     rejecttitle:       Reject downloads for matching titles.
 271     logger:            Log messages to a logging.Logger instance.
 272     logtostderr:       Log messages to stderr instead of stdout.
 273     consoletitle:       Display progress in console window's titlebar.
 274     writedescription:  Write the video description to a .description file
 275     writeinfojson:     Write the video description to a .info.json file
 276     clean_infojson:    Remove private fields from the infojson
 277     getcomments:       Extract video comments. This will not be written to disk
 278                        unless writeinfojson is also given
 279     writeannotations:  Write the video annotations to a .annotations.xml file
 280     writethumbnail:    Write the thumbnail image to a file
 281     allow_playlist_files: Whether to write playlists' description, infojson etc
 282                        also to disk when using the 'write*' options
 283     write_all_thumbnails:  Write all thumbnail formats to files
 284     writelink:         Write an internet shortcut file, depending on the
 285                        current platform (.url/.webloc/.desktop)
 286     writeurllink:      Write a Windows internet shortcut file (.url)
 287     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 288     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 289     writesubtitles:    Write the video subtitles to a file
 290     writeautomaticsub: Write the automatically generated subtitles to a file
 291     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 292                        Downloads all the subtitles of the video
 293                        (requires writesubtitles or writeautomaticsub)
 294     listsubtitles:     Lists all available subtitles for the video
 295     subtitlesformat:   The format code for subtitles
 296     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 297                        The list may contain "all" to refer to all the available
 298                        subtitles. The language can be prefixed with a "-" to
 299                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 300     keepvideo:         Keep the video file after post-processing
 301     daterange:         A DateRange object, download only if the upload_date is in the range.
 302     skip_download:     Skip the actual download of the video file
 303     cachedir:          Location of the cache files in the filesystem.
 304                        False to disable filesystem cache.
 305     noplaylist:        Download single video instead of a playlist if in doubt.
 306     age_limit:         An integer representing the user's age in years.
 307                        Unsuitable videos for the given age are skipped.
 308     min_views:         An integer representing the minimum view count the video
 309                        must have in order to not be skipped.
 310                        Videos without view count information are always
 311                        downloaded. None for no limit.
 312     max_views:         An integer representing the maximum view count.
 313                        Videos that are more popular than that are not
 314                        downloaded.
 315                        Videos without view count information are always
 316                        downloaded. None for no limit.
 317     download_archive:  File name of a file where all downloads are recorded.
 318                        Videos already present in the file are not downloaded
 319                        again.
 320     break_on_existing: Stop the download process after attempting to download a
 321                        file that is in the archive.
 322     break_on_reject:   Stop the download process when encountering a video that
 323                        has been filtered out.
 324     break_per_url:     Whether break_on_reject and break_on_existing
 325                        should act on each input URL as opposed to for the entire queue
 326     cookiefile:        File name where cookies should be read from and dumped to
 327     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 328                        name/pathfrom where cookies are loaded, and the name of the
 329                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 330     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 331                        support RFC 5746 secure renegotiation
 332     nocheckcertificate:  Do not verify SSL certificates
 333     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 334                        At the moment, this is only supported by YouTube.
 335     proxy:             URL of the proxy server to use
 336     geo_verification_proxy:  URL of the proxy to use for IP address verification
 337                        on geo-restricted sites.
 338     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 339     bidi_workaround:   Work around buggy terminals without bidirectional text
 340                        support, using fridibi
 341     debug_printtraffic:Print out sent and received HTTP traffic
 342     include_ads:       Download ads as well (deprecated)
 343     default_search:    Prepend this string if an input url is not valid.
 344                        'auto' for elaborate guessing
 345     encoding:          Use this encoding instead of the system-specified.
 346     extract_flat:      Do not resolve URLs, return the immediate result.
 347                        Pass in 'in_playlist' to only show this behavior for
 348                        playlist items.
 349     wait_for_video:    If given, wait for scheduled streams to become available.
 350                        The value should be a tuple containing the range
 351                        (min_secs, max_secs) to wait between retries
 352     postprocessors:    A list of dictionaries, each with an entry
 353                        * key:  The name of the postprocessor. See
 354                                yt_dlp/postprocessor/__init__.py for a list.
 355                        * when: When to run the postprocessor. Allowed values are
 356                                the entries of utils.POSTPROCESS_WHEN
 357                                Assumed to be 'post_process' if not given
 358     post_hooks:        Deprecated - Register a custom postprocessor instead
 359                        A list of functions that get called as the final step
 360                        for each video file, after all postprocessors have been
 361                        called. The filename will be passed as the only argument.
 362     progress_hooks:    A list of functions that get called on download
 363                        progress, with a dictionary with the entries
 364                        * status: One of "downloading", "error", or "finished".
 365                                  Check this first and ignore unknown values.
 366                        * info_dict: The extracted info_dict
 367
 368                        If status is one of "downloading", or "finished", the
 369                        following properties may also be present:
 370                        * filename: The final filename (always present)
 371                        * tmpfilename: The filename we're currently writing to
 372                        * downloaded_bytes: Bytes on disk
 373                        * total_bytes: Size of the whole file, None if unknown
 374                        * total_bytes_estimate: Guess of the eventual file size,
 375                                                None if unavailable.
 376                        * elapsed: The number of seconds since download started.
 377                        * eta: The estimated time in seconds, None if unknown
 378                        * speed: The download speed in bytes/second, None if
 379                                 unknown
 380                        * fragment_index: The counter of the currently
 381                                          downloaded video fragment.
 382                        * fragment_count: The number of fragments (= individual
 383                                          files that will be merged)
 384
 385                        Progress hooks are guaranteed to be called at least once
 386                        (with status "finished") if the download is successful.
 387     postprocessor_hooks:  A list of functions that get called on postprocessing
 388                        progress, with a dictionary with the entries
 389                        * status: One of "started", "processing", or "finished".
 390                                  Check this first and ignore unknown values.
 391                        * postprocessor: Name of the postprocessor
 392                        * info_dict: The extracted info_dict
 393
 394                        Progress hooks are guaranteed to be called at least twice
 395                        (with status "started" and "finished") if the processing is successful.
 396     merge_output_format: Extension to use when merging formats.
 397     final_ext:         Expected final extension; used to detect when the file was
 398                        already downloaded and converted
 399     fixup:             Automatically correct known faults of the file.
 400                        One of:
 401                        - "never": do nothing
 402                        - "warn": only emit a warning
 403                        - "detect_or_warn": check whether we can do anything
 404                                            about it, warn otherwise (default)
 405     source_address:    Client-side IP address to bind to.
 406     call_home:         Boolean, true iff we are allowed to contact the
 407                        yt-dlp servers for debugging. (BROKEN)
 408     sleep_interval_requests: Number of seconds to sleep between requests
 409                        during extraction
 410     sleep_interval:    Number of seconds to sleep before each download when
 411                        used alone or a lower bound of a range for randomized
 412                        sleep before each download (minimum possible number
 413                        of seconds to sleep) when used along with
 414                        max_sleep_interval.
 415     max_sleep_interval:Upper bound of a range for randomized sleep before each
 416                        download (maximum possible number of seconds to sleep).
 417                        Must only be used along with sleep_interval.
 418                        Actual sleep time will be a random float from range
 419                        [sleep_interval; max_sleep_interval].
 420     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 421     listformats:       Print an overview of available video formats and exit.
 422     list_thumbnails:   Print a table of all thumbnails and exit.
 423     match_filter:      A function that gets called with the info_dict of
 424                        every video.
 425                        If it returns a message, the video is ignored.
 426                        If it returns None, the video is downloaded.
 427                        match_filter_func in utils.py is one example for this.
 428     no_color:          Do not emit color codes in output.
 429     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 430                        HTTP header
 431     geo_bypass_country:
 432                        Two-letter ISO 3166-2 country code that will be used for
 433                        explicit geographic restriction bypassing via faking
 434                        X-Forwarded-For HTTP header
 435     geo_bypass_ip_block:
 436                        IP range in CIDR notation that will be used similarly to
 437                        geo_bypass_country
 438
 439     The following options determine which downloader is picked:
 440     external_downloader: A dictionary of protocol keys and the executable of the
 441                        external downloader to use for it. The allowed protocols
 442                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 443                        Set the value to 'native' to use the native downloader
 444     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 445                        or {'m3u8': 'ffmpeg'} instead.
 446                        Use the native HLS downloader instead of ffmpeg/avconv
 447                        if True, otherwise use ffmpeg/avconv if False, otherwise
 448                        use downloader suggested by extractor if None.
 449     compat_opts:       Compatibility options. See "Differences in default behavior".
 450                        The following options do not work when used through the API:
 451                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 452                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 453                        Refer __init__.py for their implementation
 454     progress_template: Dictionary of templates for progress outputs.
 455                        Allowed keys are 'download', 'postprocess',
 456                        'download-title' (console title) and 'postprocess-title'.
 457                        The template is mapped on a dictionary with keys 'progress' and 'info'
 458
 459     The following parameters are not used by YoutubeDL itself, they are used by
 460     the downloader (see yt_dlp/downloader/common.py):
 461     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 462     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 463     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 464     external_downloader_args, concurrent_fragment_downloads.
 465
 466     The following options are used by the post processors:
 467     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 468                        otherwise prefer ffmpeg. (avconv support is deprecated)
 469     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 470                        to the binary or its containing directory.
 471     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 472                        and a list of additional command-line arguments for the
 473                        postprocessor/executable. The dict can also have "PP+EXE" keys
 474                        which are used when the given exe is used by the given PP.
 475                        Use 'default' as the name for arguments to passed to all PP
 476                        For compatibility with youtube-dl, a single list of args
 477                        can also be used
 478
 479     The following options are used by the extractors:
 480     extractor_retries: Number of times to retry for known errors
 481     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 482     hls_split_discontinuity: Split HLS playlists to different formats at
 483                        discontinuities such as ad breaks (default: False)
 484     extractor_args:    A dictionary of arguments to be passed to the extractors.
 485                        See "EXTRACTOR ARGUMENTS" for details.
 486                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 487     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 488     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 489                        If True (default), DASH manifests and related
 490                        data will be downloaded and processed by extractor.
 491                        You can reduce network I/O by disabling it if you don't
 492                        care about DASH. (only for youtube)
 493     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 494                        If True (default), HLS manifests and related
 495                        data will be downloaded and processed by extractor.
 496                        You can reduce network I/O by disabling it if you don't
 497                        care about HLS. (only for youtube)
 498     """
 499
 500     _NUMERIC_FIELDS = set((
 501         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 502         'timestamp', 'release_timestamp',
 503         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 504         'average_rating', 'comment_count', 'age_limit',
 505         'start_time', 'end_time',
 506         'chapter_number', 'season_number', 'episode_number',
 507         'track_number', 'disc_number', 'release_year',
 508     ))
 509
 510     _format_selection_exts = {
 511         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 512         'video': {'mp4', 'flv', 'webm', '3gp'},
 513         'storyboards': {'mhtml'},
 514     }
 515
 516     params = None
 517     _ies = {}
 518     _pps = {k: [] for k in POSTPROCESS_WHEN}
 519     _printed_messages = set()
 520     _first_webpage_request = True
 521     _download_retcode = None
 522     _num_downloads = None
 523     _playlist_level = 0
 524     _playlist_urls = set()
 525     _screen_file = None
 526
 527     def __init__(self, params=None, auto_init=True):
 528         """Create a FileDownloader object with the given options.
 529         @param auto_init    Whether to load the default extractors and print header (if verbose).
 530                             Set to 'no_verbose_header' to not print the header
 531         """
 532         if params is None:
 533             params = {}
 534         self._ies = {}
 535         self._ies_instances = {}
 536         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 537         self._printed_messages = set()
 538         self._first_webpage_request = True
 539         self._post_hooks = []
 540         self._progress_hooks = []
 541         self._postprocessor_hooks = []
 542         self._download_retcode = 0
 543         self._num_downloads = 0
 544         self._num_videos = 0
 545         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 546         self._err_file = sys.stderr
 547         self.params = params
 548         self.cache = Cache(self)
 549
 550         windows_enable_vt_mode()
 551         self._allow_colors = {
 552             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 553             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 554         }
 555
 556         if sys.version_info < (3, 6):
 557             self.report_warning(
 558                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 559
 560         if self.params.get('allow_unplayable_formats'):
 561             self.report_warning(
 562                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 563                 'This is a developer option intended for debugging. \n'
 564                 '         If you experience any issues while using this option, '
 565                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 566
 567         def check_deprecated(param, option, suggestion):
 568             if self.params.get(param) is not None:
 569                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 570                 return True
 571             return False
 572
 573         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 574             if self.params.get('geo_verification_proxy') is None:
 575                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 576
 577         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 578         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 579         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 580
 581         for msg in self.params.get('_warnings', []):
 582             self.report_warning(msg)
 583         for msg in self.params.get('_deprecation_warnings', []):
 584             self.deprecation_warning(msg)
 585
 586         if 'list-formats' in self.params.get('compat_opts', []):
 587             self.params['listformats_table'] = False
 588
 589         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 590             # nooverwrites was unnecessarily changed to overwrites
 591             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 592             # This ensures compatibility with both keys
 593             self.params['overwrites'] = not self.params['nooverwrites']
 594         elif self.params.get('overwrites') is None:
 595             self.params.pop('overwrites', None)
 596         else:
 597             self.params['nooverwrites'] = not self.params['overwrites']
 598
 599         self.params.setdefault('forceprint', {})
 600         self.params.setdefault('print_to_file', {})
 601
 602         # Compatibility with older syntax
 603         if not isinstance(params['forceprint'], dict):
 604             self.params['forceprint'] = {'video': params['forceprint']}
 605
 606         if self.params.get('bidi_workaround', False):
 607             try:
 608                 import pty
 609                 master, slave = pty.openpty()
 610                 width = compat_get_terminal_size().columns
 611                 if width is None:
 612                     width_args = []
 613                 else:
 614                     width_args = ['-w', str(width)]
 615                 sp_kwargs = dict(
 616                     stdin=subprocess.PIPE,
 617                     stdout=slave,
 618                     stderr=self._err_file)
 619                 try:
 620                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 621                 except OSError:
 622                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 623                 self._output_channel = os.fdopen(master, 'rb')
 624             except OSError as ose:
 625                 if ose.errno == errno.ENOENT:
 626                     self.report_warning(
 627                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 628                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 629                 else:
 630                     raise
 631
 632         if (sys.platform != 'win32'
 633                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 634                 and not self.params.get('restrictfilenames', False)):
 635             # Unicode filesystem API will throw errors (#1474, #13027)
 636             self.report_warning(
 637                 'Assuming --restrict-filenames since file system encoding '
 638                 'cannot encode all characters. '
 639                 'Set the LC_ALL environment variable to fix this.')
 640             self.params['restrictfilenames'] = True
 641
 642         self.outtmpl_dict = self.parse_outtmpl()
 643
 644         # Creating format selector here allows us to catch syntax errors before the extraction
 645         self.format_selector = (
 646             self.params.get('format') if self.params.get('format') in (None, '-')
 647             else self.params['format'] if callable(self.params['format'])
 648             else self.build_format_selector(self.params['format']))
 649
 650         self._setup_opener()
 651
 652         if auto_init:
 653             if auto_init != 'no_verbose_header':
 654                 self.print_debug_header()
 655             self.add_default_info_extractors()
 656
 657         hooks = {
 658             'post_hooks': self.add_post_hook,
 659             'progress_hooks': self.add_progress_hook,
 660             'postprocessor_hooks': self.add_postprocessor_hook,
 661         }
 662         for opt, fn in hooks.items():
 663             for ph in self.params.get(opt, []):
 664                 fn(ph)
 665
 666         for pp_def_raw in self.params.get('postprocessors', []):
 667             pp_def = dict(pp_def_raw)
 668             when = pp_def.pop('when', 'post_process')
 669             self.add_post_processor(
 670                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 671                 when=when)
 672
 673         register_socks_protocols()
 674
 675         def preload_download_archive(fn):
 676             """Preload the archive, if any is specified"""
 677             if fn is None:
 678                 return False
 679             self.write_debug(f'Loading archive file {fn!r}')
 680             try:
 681                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 682                     for line in archive_file:
 683                         self.archive.add(line.strip())
 684             except IOError as ioe:
 685                 if ioe.errno != errno.ENOENT:
 686                     raise
 687                 return False
 688             return True
 689
 690         self.archive = set()
 691         preload_download_archive(self.params.get('download_archive'))
 692
 693     def warn_if_short_id(self, argv):
 694         # short YouTube ID starting with dash?
 695         idxs = [
 696             i for i, a in enumerate(argv)
 697             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 698         if idxs:
 699             correct_argv = (
 700                 ['yt-dlp']
 701                 + [a for i, a in enumerate(argv) if i not in idxs]
 702                 + ['--'] + [argv[i] for i in idxs]
 703             )
 704             self.report_warning(
 705                 'Long argument string detected. '
 706                 'Use -- to separate parameters and URLs, like this:\n%s' %
 707                 args_to_str(correct_argv))
 708
 709     def add_info_extractor(self, ie):
 710         """Add an InfoExtractor object to the end of the list."""
 711         ie_key = ie.ie_key()
 712         self._ies[ie_key] = ie
 713         if not isinstance(ie, type):
 714             self._ies_instances[ie_key] = ie
 715             ie.set_downloader(self)
 716
 717     def _get_info_extractor_class(self, ie_key):
 718         ie = self._ies.get(ie_key)
 719         if ie is None:
 720             ie = get_info_extractor(ie_key)
 721             self.add_info_extractor(ie)
 722         return ie
 723
 724     def get_info_extractor(self, ie_key):
 725         """
 726         Get an instance of an IE with name ie_key, it will try to get one from
 727         the _ies list, if there's no instance it will create a new one and add
 728         it to the extractor list.
 729         """
 730         ie = self._ies_instances.get(ie_key)
 731         if ie is None:
 732             ie = get_info_extractor(ie_key)()
 733             self.add_info_extractor(ie)
 734         return ie
 735
 736     def add_default_info_extractors(self):
 737         """
 738         Add the InfoExtractors returned by gen_extractors to the end of the list
 739         """
 740         for ie in gen_extractor_classes():
 741             self.add_info_extractor(ie)
 742
 743     def add_post_processor(self, pp, when='post_process'):
 744         """Add a PostProcessor object to the end of the chain."""
 745         self._pps[when].append(pp)
 746         pp.set_downloader(self)
 747
 748     def add_post_hook(self, ph):
 749         """Add the post hook"""
 750         self._post_hooks.append(ph)
 751
 752     def add_progress_hook(self, ph):
 753         """Add the download progress hook"""
 754         self._progress_hooks.append(ph)
 755
 756     def add_postprocessor_hook(self, ph):
 757         """Add the postprocessing progress hook"""
 758         self._postprocessor_hooks.append(ph)
 759         for pps in self._pps.values():
 760             for pp in pps:
 761                 pp.add_progress_hook(ph)
 762
 763     def _bidi_workaround(self, message):
 764         if not hasattr(self, '_output_channel'):
 765             return message
 766
 767         assert hasattr(self, '_output_process')
 768         assert isinstance(message, compat_str)
 769         line_count = message.count('\n') + 1
 770         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 771         self._output_process.stdin.flush()
 772         res = ''.join(self._output_channel.readline().decode('utf-8')
 773                       for _ in range(line_count))
 774         return res[:-len('\n')]
 775
 776     def _write_string(self, message, out=None, only_once=False):
 777         if only_once:
 778             if message in self._printed_messages:
 779                 return
 780             self._printed_messages.add(message)
 781         write_string(message, out=out, encoding=self.params.get('encoding'))
 782
 783     def to_stdout(self, message, skip_eol=False, quiet=False):
 784         """Print message to stdout"""
 785         if self.params.get('logger'):
 786             self.params['logger'].debug(message)
 787         elif not quiet or self.params.get('verbose'):
 788             self._write_string(
 789                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 790                 self._err_file if quiet else self._screen_file)
 791
 792     def to_stderr(self, message, only_once=False):
 793         """Print message to stderr"""
 794         assert isinstance(message, compat_str)
 795         if self.params.get('logger'):
 796             self.params['logger'].error(message)
 797         else:
 798             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 799
 800     def to_console_title(self, message):
 801         if not self.params.get('consoletitle', False):
 802             return
 803         message = remove_terminal_sequences(message)
 804         if compat_os_name == 'nt':
 805             if ctypes.windll.kernel32.GetConsoleWindow():
 806                 # c_wchar_p() might not be necessary if `message` is
 807                 # already of type unicode()
 808                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 809         elif 'TERM' in os.environ:
 810             self._write_string('\033]0;%s\007' % message, self._screen_file)
 811
 812     def save_console_title(self):
 813         if not self.params.get('consoletitle', False):
 814             return
 815         if self.params.get('simulate'):
 816             return
 817         if compat_os_name != 'nt' and 'TERM' in os.environ:
 818             # Save the title on stack
 819             self._write_string('\033[22;0t', self._screen_file)
 820
 821     def restore_console_title(self):
 822         if not self.params.get('consoletitle', False):
 823             return
 824         if self.params.get('simulate'):
 825             return
 826         if compat_os_name != 'nt' and 'TERM' in os.environ:
 827             # Restore the title from stack
 828             self._write_string('\033[23;0t', self._screen_file)
 829
 830     def __enter__(self):
 831         self.save_console_title()
 832         return self
 833
 834     def __exit__(self, *args):
 835         self.restore_console_title()
 836
 837         if self.params.get('cookiefile') is not None:
 838             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 839
 840     def trouble(self, message=None, tb=None, is_error=True):
 841         """Determine action to take when a download problem appears.
 842
 843         Depending on if the downloader has been configured to ignore
 844         download errors or not, this method may throw an exception or
 845         not when errors are found, after printing the message.
 846
 847         @param tb          If given, is additional traceback information
 848         @param is_error    Whether to raise error according to ignorerrors
 849         """
 850         if message is not None:
 851             self.to_stderr(message)
 852         if self.params.get('verbose'):
 853             if tb is None:
 854                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 855                     tb = ''
 856                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 857                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 858                     tb += encode_compat_str(traceback.format_exc())
 859                 else:
 860                     tb_data = traceback.format_list(traceback.extract_stack())
 861                     tb = ''.join(tb_data)
 862             if tb:
 863                 self.to_stderr(tb)
 864         if not is_error:
 865             return
 866         if not self.params.get('ignoreerrors'):
 867             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 868                 exc_info = sys.exc_info()[1].exc_info
 869             else:
 870                 exc_info = sys.exc_info()
 871             raise DownloadError(message, exc_info)
 872         self._download_retcode = 1
 873
 874     def to_screen(self, message, skip_eol=False):
 875         """Print message to stdout if not in quiet mode"""
 876         self.to_stdout(
 877             message, skip_eol, quiet=self.params.get('quiet', False))
 878
 879     class Styles(Enum):
 880         HEADERS = 'yellow'
 881         EMPHASIS = 'light blue'
 882         ID = 'green'
 883         DELIM = 'blue'
 884         ERROR = 'red'
 885         WARNING = 'yellow'
 886         SUPPRESS = 'light black'
 887
 888     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 889         if test_encoding:
 890             original_text = text
 891             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 892             text = text.encode(encoding, 'ignore').decode(encoding)
 893             if fallback is not None and text != original_text:
 894                 text = fallback
 895         if isinstance(f, self.Styles):
 896             f = f.value
 897         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 898
 899     def _format_screen(self, *args, **kwargs):
 900         return self._format_text(
 901             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 902
 903     def _format_err(self, *args, **kwargs):
 904         return self._format_text(
 905             self._err_file, self._allow_colors['err'], *args, **kwargs)
 906
 907     def report_warning(self, message, only_once=False):
 908         '''
 909         Print the message to stderr, it will be prefixed with 'WARNING:'
 910         If stderr is a tty file the 'WARNING:' will be colored
 911         '''
 912         if self.params.get('logger') is not None:
 913             self.params['logger'].warning(message)
 914         else:
 915             if self.params.get('no_warnings'):
 916                 return
 917             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 918
 919     def deprecation_warning(self, message):
 920         if self.params.get('logger') is not None:
 921             self.params['logger'].warning('DeprecationWarning: {message}')
 922         else:
 923             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 924
 925     def report_error(self, message, *args, **kwargs):
 926         '''
 927         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 928         in red if stderr is a tty file.
 929         '''
 930         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 931
 932     def write_debug(self, message, only_once=False):
 933         '''Log debug message or Print message to stderr'''
 934         if not self.params.get('verbose', False):
 935             return
 936         message = '[debug] %s' % message
 937         if self.params.get('logger'):
 938             self.params['logger'].debug(message)
 939         else:
 940             self.to_stderr(message, only_once)
 941
 942     def report_file_already_downloaded(self, file_name):
 943         """Report file has already been fully downloaded."""
 944         try:
 945             self.to_screen('[download] %s has already been downloaded' % file_name)
 946         except UnicodeEncodeError:
 947             self.to_screen('[download] The file has already been downloaded')
 948
 949     def report_file_delete(self, file_name):
 950         """Report that existing file will be deleted."""
 951         try:
 952             self.to_screen('Deleting existing file %s' % file_name)
 953         except UnicodeEncodeError:
 954             self.to_screen('Deleting existing file')
 955
 956     def raise_no_formats(self, info, forced=False):
 957         has_drm = info.get('__has_drm')
 958         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 959         expected = self.params.get('ignore_no_formats_error')
 960         if forced or not expected:
 961             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 962                                  expected=has_drm or expected)
 963         else:
 964             self.report_warning(msg)
 965
 966     def parse_outtmpl(self):
 967         outtmpl_dict = self.params.get('outtmpl', {})
 968         if not isinstance(outtmpl_dict, dict):
 969             outtmpl_dict = {'default': outtmpl_dict}
 970         # Remove spaces in the default template
 971         if self.params.get('restrictfilenames'):
 972             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 973         else:
 974             sanitize = lambda x: x
 975         outtmpl_dict.update({
 976             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 977             if outtmpl_dict.get(k) is None})
 978         for key, val in outtmpl_dict.items():
 979             if isinstance(val, bytes):
 980                 self.report_warning(
 981                     'Parameter outtmpl is bytes, but should be a unicode string. '
 982                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 983         return outtmpl_dict
 984
 985     def get_output_path(self, dir_type='', filename=None):
 986         paths = self.params.get('paths', {})
 987         assert isinstance(paths, dict)
 988         path = os.path.join(
 989             expand_path(paths.get('home', '').strip()),
 990             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 991             filename or '')
 992
 993         # Temporary fix for #4787
 994         # 'Treat' all problem characters by passing filename through preferredencoding
 995         # to workaround encoding issues with subprocess on python2 @ Windows
 996         if sys.version_info < (3, 0) and sys.platform == 'win32':
 997             path = encodeFilename(path, True).decode(preferredencoding())
 998         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 999
1000     @staticmethod
1001     def _outtmpl_expandpath(outtmpl):
1002         # expand_path translates '%%' into '%' and '$$' into '$'
1003         # correspondingly that is not what we want since we need to keep
1004         # '%%' intact for template dict substitution step. Working around
1005         # with boundary-alike separator hack.
1006         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1007         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1008
1009         # outtmpl should be expand_path'ed before template dict substitution
1010         # because meta fields may contain env variables we don't want to
1011         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1012         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1013         return expand_path(outtmpl).replace(sep, '')
1014
1015     @staticmethod
1016     def escape_outtmpl(outtmpl):
1017         ''' Escape any remaining strings like %s, %abc% etc. '''
1018         return re.sub(
1019             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1020             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1021             outtmpl)
1022
1023     @classmethod
1024     def validate_outtmpl(cls, outtmpl):
1025         ''' @return None or Exception object '''
1026         outtmpl = re.sub(
1027             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1028             lambda mobj: f'{mobj.group(0)[:-1]}s',
1029             cls._outtmpl_expandpath(outtmpl))
1030         try:
1031             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1032             return None
1033         except ValueError as err:
1034             return err
1035
1036     @staticmethod
1037     def _copy_infodict(info_dict):
1038         info_dict = dict(info_dict)
1039         for key in ('__original_infodict', '__postprocessors'):
1040             info_dict.pop(key, None)
1041         return info_dict
1042
1043     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1044         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1045         @param sanitize    Whether to sanitize the output as a filename.
1046                            For backward compatibility, a function can also be passed
1047         """
1048
1049         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1050
1051         info_dict = self._copy_infodict(info_dict)
1052         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1053             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1054             if info_dict.get('duration', None) is not None
1055             else None)
1056         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1057         info_dict['video_autonumber'] = self._num_videos
1058         if info_dict.get('resolution') is None:
1059             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1060
1061         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1062         # of %(field)s to %(field)0Nd for backward compatibility
1063         field_size_compat_map = {
1064             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1065             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1066             'autonumber': self.params.get('autonumber_size') or 5,
1067         }
1068
1069         TMPL_DICT = {}
1070         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1071         MATH_FUNCTIONS = {
1072             '+': float.__add__,
1073             '-': float.__sub__,
1074         }
1075         # Field is of the form key1.key2...
1076         # where keys (except first) can be string, int or slice
1077         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1078         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1079         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1080         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1081             (?P<negate>-)?
1082             (?P<fields>{field})
1083             (?P<maths>(?:{math_op}{math_field})*)
1084             (?:>(?P<strf_format>.+?))?
1085             (?P<alternate>(?<!\\),[^|&)]+)?
1086             (?:&(?P<replacement>.*?))?
1087             (?:\|(?P<default>.*?))?
1088             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1089
1090         def _traverse_infodict(k):
1091             k = k.split('.')
1092             if k[0] == '':
1093                 k.pop(0)
1094             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1095
1096         def get_value(mdict):
1097             # Object traversal
1098             value = _traverse_infodict(mdict['fields'])
1099             # Negative
1100             if mdict['negate']:
1101                 value = float_or_none(value)
1102                 if value is not None:
1103                     value *= -1
1104             # Do maths
1105             offset_key = mdict['maths']
1106             if offset_key:
1107                 value = float_or_none(value)
1108                 operator = None
1109                 while offset_key:
1110                     item = re.match(
1111                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1112                         offset_key).group(0)
1113                     offset_key = offset_key[len(item):]
1114                     if operator is None:
1115                         operator = MATH_FUNCTIONS[item]
1116                         continue
1117                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1118                     offset = float_or_none(item)
1119                     if offset is None:
1120                         offset = float_or_none(_traverse_infodict(item))
1121                     try:
1122                         value = operator(value, multiplier * offset)
1123                     except (TypeError, ZeroDivisionError):
1124                         return None
1125                     operator = None
1126             # Datetime formatting
1127             if mdict['strf_format']:
1128                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1129
1130             return value
1131
1132         na = self.params.get('outtmpl_na_placeholder', 'NA')
1133
1134         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1135             return sanitize_filename(str(value), restricted=restricted,
1136                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1137
1138         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1139         sanitize = bool(sanitize)
1140
1141         def _dumpjson_default(obj):
1142             if isinstance(obj, (set, LazyList)):
1143                 return list(obj)
1144             return repr(obj)
1145
1146         def create_key(outer_mobj):
1147             if not outer_mobj.group('has_key'):
1148                 return outer_mobj.group(0)
1149             key = outer_mobj.group('key')
1150             mobj = re.match(INTERNAL_FORMAT_RE, key)
1151             initial_field = mobj.group('fields') if mobj else ''
1152             value, replacement, default = None, None, na
1153             while mobj:
1154                 mobj = mobj.groupdict()
1155                 default = mobj['default'] if mobj['default'] is not None else default
1156                 value = get_value(mobj)
1157                 replacement = mobj['replacement']
1158                 if value is None and mobj['alternate']:
1159                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1160                 else:
1161                     break
1162
1163             fmt = outer_mobj.group('format')
1164             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1165                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1166
1167             value = default if value is None else value if replacement is None else replacement
1168
1169             flags = outer_mobj.group('conversion') or ''
1170             str_fmt = f'{fmt[:-1]}s'
1171             if fmt[-1] == 'l':  # list
1172                 delim = '\n' if '#' in flags else ', '
1173                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1174             elif fmt[-1] == 'j':  # json
1175                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1176             elif fmt[-1] == 'q':  # quoted
1177                 value = map(str, variadic(value) if '#' in flags else [value])
1178                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1179             elif fmt[-1] == 'B':  # bytes
1180                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1181                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1182             elif fmt[-1] == 'U':  # unicode normalized
1183                 value, fmt = unicodedata.normalize(
1184                     # "+" = compatibility equivalence, "#" = NFD
1185                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1186                     value), str_fmt
1187             elif fmt[-1] == 'D':  # decimal suffix
1188                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1189                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1190                                               factor=1024 if '#' in flags else 1000)
1191             elif fmt[-1] == 'S':  # filename sanitization
1192                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1193             elif fmt[-1] == 'c':
1194                 if value:
1195                     value = str(value)[0]
1196                 else:
1197                     fmt = str_fmt
1198             elif fmt[-1] not in 'rs':  # numeric
1199                 value = float_or_none(value)
1200                 if value is None:
1201                     value, fmt = default, 's'
1202
1203             if sanitize:
1204                 if fmt[-1] == 'r':
1205                     # If value is an object, sanitize might convert it to a string
1206                     # So we convert it to repr first
1207                     value, fmt = repr(value), str_fmt
1208                 if fmt[-1] in 'csr':
1209                     value = sanitizer(initial_field, value)
1210
1211             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1212             TMPL_DICT[key] = value
1213             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1214
1215         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1216
1217     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1218         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1219         return self.escape_outtmpl(outtmpl) % info_dict
1220
1221     def _prepare_filename(self, info_dict, tmpl_type='default'):
1222         try:
1223             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1224             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1225             if not filename:
1226                 return None
1227
1228             if tmpl_type in ('default', 'temp'):
1229                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1230                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1231                     filename = replace_extension(filename, ext, final_ext)
1232             else:
1233                 force_ext = OUTTMPL_TYPES[tmpl_type]
1234                 if force_ext:
1235                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1236
1237             # https://github.com/blackjack4494/youtube-dlc/issues/85
1238             trim_file_name = self.params.get('trim_file_name', False)
1239             if trim_file_name:
1240                 no_ext, *ext = filename.rsplit('.', 2)
1241                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1242
1243             return filename
1244         except ValueError as err:
1245             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1246             return None
1247
1248     def prepare_filename(self, info_dict, dir_type='', warn=False):
1249         """Generate the output filename."""
1250
1251         filename = self._prepare_filename(info_dict, dir_type or 'default')
1252         if not filename and dir_type not in ('', 'temp'):
1253             return ''
1254
1255         if warn:
1256             if not self.params.get('paths'):
1257                 pass
1258             elif filename == '-':
1259                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1260             elif os.path.isabs(filename):
1261                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1262         if filename == '-' or not filename:
1263             return filename
1264
1265         return self.get_output_path(dir_type, filename)
1266
1267     def _match_entry(self, info_dict, incomplete=False, silent=False):
1268         """ Returns None if the file should be downloaded """
1269
1270         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1271
1272         def check_filter():
1273             if 'title' in info_dict:
1274                 # This can happen when we're just evaluating the playlist
1275                 title = info_dict['title']
1276                 matchtitle = self.params.get('matchtitle', False)
1277                 if matchtitle:
1278                     if not re.search(matchtitle, title, re.IGNORECASE):
1279                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1280                 rejecttitle = self.params.get('rejecttitle', False)
1281                 if rejecttitle:
1282                     if re.search(rejecttitle, title, re.IGNORECASE):
1283                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1284             date = info_dict.get('upload_date')
1285             if date is not None:
1286                 dateRange = self.params.get('daterange', DateRange())
1287                 if date not in dateRange:
1288                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1289             view_count = info_dict.get('view_count')
1290             if view_count is not None:
1291                 min_views = self.params.get('min_views')
1292                 if min_views is not None and view_count < min_views:
1293                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1294                 max_views = self.params.get('max_views')
1295                 if max_views is not None and view_count > max_views:
1296                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1297             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1298                 return 'Skipping "%s" because it is age restricted' % video_title
1299
1300             match_filter = self.params.get('match_filter')
1301             if match_filter is not None:
1302                 try:
1303                     ret = match_filter(info_dict, incomplete=incomplete)
1304                 except TypeError:
1305                     # For backward compatibility
1306                     ret = None if incomplete else match_filter(info_dict)
1307                 if ret is not None:
1308                     return ret
1309             return None
1310
1311         if self.in_download_archive(info_dict):
1312             reason = '%s has already been recorded in the archive' % video_title
1313             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1314         else:
1315             reason = check_filter()
1316             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1317         if reason is not None:
1318             if not silent:
1319                 self.to_screen('[download] ' + reason)
1320             if self.params.get(break_opt, False):
1321                 raise break_err()
1322         return reason
1323
1324     @staticmethod
1325     def add_extra_info(info_dict, extra_info):
1326         '''Set the keys from extra_info in info dict if they are missing'''
1327         for key, value in extra_info.items():
1328             info_dict.setdefault(key, value)
1329
1330     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1331                      process=True, force_generic_extractor=False):
1332         """
1333         Return a list with a dictionary for each video extracted.
1334
1335         Arguments:
1336         url -- URL to extract
1337
1338         Keyword arguments:
1339         download -- whether to download videos during extraction
1340         ie_key -- extractor key hint
1341         extra_info -- dictionary containing the extra values to add to each result
1342         process -- whether to resolve all unresolved references (URLs, playlist items),
1343             must be True for download to work.
1344         force_generic_extractor -- force using the generic extractor
1345         """
1346
1347         if extra_info is None:
1348             extra_info = {}
1349
1350         if not ie_key and force_generic_extractor:
1351             ie_key = 'Generic'
1352
1353         if ie_key:
1354             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1355         else:
1356             ies = self._ies
1357
1358         for ie_key, ie in ies.items():
1359             if not ie.suitable(url):
1360                 continue
1361
1362             if not ie.working():
1363                 self.report_warning('The program functionality for this site has been marked as broken, '
1364                                     'and will probably not work.')
1365
1366             temp_id = ie.get_temp_id(url)
1367             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1368                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1369                 if self.params.get('break_on_existing', False):
1370                     raise ExistingVideoReached()
1371                 break
1372             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1373         else:
1374             self.report_error('no suitable InfoExtractor for URL %s' % url)
1375
1376     def __handle_extraction_exceptions(func):
1377         @functools.wraps(func)
1378         def wrapper(self, *args, **kwargs):
1379             while True:
1380                 try:
1381                     return func(self, *args, **kwargs)
1382                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1383                     raise
1384                 except ReExtractInfo as e:
1385                     if e.expected:
1386                         self.to_screen(f'{e}; Re-extracting data')
1387                     else:
1388                         self.to_stderr('\r')
1389                         self.report_warning(f'{e}; Re-extracting data')
1390                     continue
1391                 except GeoRestrictedError as e:
1392                     msg = e.msg
1393                     if e.countries:
1394                         msg += '\nThis video is available in %s.' % ', '.join(
1395                             map(ISO3166Utils.short2full, e.countries))
1396                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1397                     self.report_error(msg)
1398                 except ExtractorError as e:  # An error we somewhat expected
1399                     self.report_error(str(e), e.format_traceback())
1400                 except Exception as e:
1401                     if self.params.get('ignoreerrors'):
1402                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1403                     else:
1404                         raise
1405                 break
1406         return wrapper
1407
1408     def _wait_for_video(self, ie_result):
1409         if (not self.params.get('wait_for_video')
1410                 or ie_result.get('_type', 'video') != 'video'
1411                 or ie_result.get('formats') or ie_result.get('url')):
1412             return
1413
1414         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1415         last_msg = ''
1416
1417         def progress(msg):
1418             nonlocal last_msg
1419             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1420             last_msg = msg
1421
1422         min_wait, max_wait = self.params.get('wait_for_video')
1423         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1424         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1425             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1426             self.report_warning('Release time of video is not known')
1427         elif (diff or 0) <= 0:
1428             self.report_warning('Video should already be available according to extracted info')
1429         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1430         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1431
1432         wait_till = time.time() + diff
1433         try:
1434             while True:
1435                 diff = wait_till - time.time()
1436                 if diff <= 0:
1437                     progress('')
1438                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1439                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1440                 time.sleep(1)
1441         except KeyboardInterrupt:
1442             progress('')
1443             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1444         except BaseException as e:
1445             if not isinstance(e, ReExtractInfo):
1446                 self.to_screen('')
1447             raise
1448
1449     @__handle_extraction_exceptions
1450     def __extract_info(self, url, ie, download, extra_info, process):
1451         ie_result = ie.extract(url)
1452         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1453             return
1454         if isinstance(ie_result, list):
1455             # Backwards compatibility: old IE result format
1456             ie_result = {
1457                 '_type': 'compat_list',
1458                 'entries': ie_result,
1459             }
1460         if extra_info.get('original_url'):
1461             ie_result.setdefault('original_url', extra_info['original_url'])
1462         self.add_default_extra_info(ie_result, ie, url)
1463         if process:
1464             self._wait_for_video(ie_result)
1465             return self.process_ie_result(ie_result, download, extra_info)
1466         else:
1467             return ie_result
1468
1469     def add_default_extra_info(self, ie_result, ie, url):
1470         if url is not None:
1471             self.add_extra_info(ie_result, {
1472                 'webpage_url': url,
1473                 'original_url': url,
1474                 'webpage_url_basename': url_basename(url),
1475                 'webpage_url_domain': get_domain(url),
1476             })
1477         if ie is not None:
1478             self.add_extra_info(ie_result, {
1479                 'extractor': ie.IE_NAME,
1480                 'extractor_key': ie.ie_key(),
1481             })
1482
1483     def process_ie_result(self, ie_result, download=True, extra_info=None):
1484         """
1485         Take the result of the ie(may be modified) and resolve all unresolved
1486         references (URLs, playlist items).
1487
1488         It will also download the videos if 'download'.
1489         Returns the resolved ie_result.
1490         """
1491         if extra_info is None:
1492             extra_info = {}
1493         result_type = ie_result.get('_type', 'video')
1494
1495         if result_type in ('url', 'url_transparent'):
1496             ie_result['url'] = sanitize_url(ie_result['url'])
1497             if ie_result.get('original_url'):
1498                 extra_info.setdefault('original_url', ie_result['original_url'])
1499
1500             extract_flat = self.params.get('extract_flat', False)
1501             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1502                     or extract_flat is True):
1503                 info_copy = ie_result.copy()
1504                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1505                 if ie and not ie_result.get('id'):
1506                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1507                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1508                 self.add_extra_info(info_copy, extra_info)
1509                 info_copy, _ = self.pre_process(info_copy)
1510                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1511                 if self.params.get('force_write_download_archive', False):
1512                     self.record_download_archive(info_copy)
1513                 return ie_result
1514
1515         if result_type == 'video':
1516             self.add_extra_info(ie_result, extra_info)
1517             ie_result = self.process_video_result(ie_result, download=download)
1518             additional_urls = (ie_result or {}).get('additional_urls')
1519             if additional_urls:
1520                 # TODO: Improve MetadataParserPP to allow setting a list
1521                 if isinstance(additional_urls, compat_str):
1522                     additional_urls = [additional_urls]
1523                 self.to_screen(
1524                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1525                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1526                 ie_result['additional_entries'] = [
1527                     self.extract_info(
1528                         url, download, extra_info=extra_info,
1529                         force_generic_extractor=self.params.get('force_generic_extractor'))
1530                     for url in additional_urls
1531                 ]
1532             return ie_result
1533         elif result_type == 'url':
1534             # We have to add extra_info to the results because it may be
1535             # contained in a playlist
1536             return self.extract_info(
1537                 ie_result['url'], download,
1538                 ie_key=ie_result.get('ie_key'),
1539                 extra_info=extra_info)
1540         elif result_type == 'url_transparent':
1541             # Use the information from the embedding page
1542             info = self.extract_info(
1543                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1544                 extra_info=extra_info, download=False, process=False)
1545
1546             # extract_info may return None when ignoreerrors is enabled and
1547             # extraction failed with an error, don't crash and return early
1548             # in this case
1549             if not info:
1550                 return info
1551
1552             force_properties = dict(
1553                 (k, v) for k, v in ie_result.items() if v is not None)
1554             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1555                 if f in force_properties:
1556                     del force_properties[f]
1557             new_result = info.copy()
1558             new_result.update(force_properties)
1559
1560             # Extracted info may not be a video result (i.e.
1561             # info.get('_type', 'video') != video) but rather an url or
1562             # url_transparent. In such cases outer metadata (from ie_result)
1563             # should be propagated to inner one (info). For this to happen
1564             # _type of info should be overridden with url_transparent. This
1565             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1566             if new_result.get('_type') == 'url':
1567                 new_result['_type'] = 'url_transparent'
1568
1569             return self.process_ie_result(
1570                 new_result, download=download, extra_info=extra_info)
1571         elif result_type in ('playlist', 'multi_video'):
1572             # Protect from infinite recursion due to recursively nested playlists
1573             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1574             webpage_url = ie_result['webpage_url']
1575             if webpage_url in self._playlist_urls:
1576                 self.to_screen(
1577                     '[download] Skipping already downloaded playlist: %s'
1578                     % ie_result.get('title') or ie_result.get('id'))
1579                 return
1580
1581             self._playlist_level += 1
1582             self._playlist_urls.add(webpage_url)
1583             self._sanitize_thumbnails(ie_result)
1584             try:
1585                 return self.__process_playlist(ie_result, download)
1586             finally:
1587                 self._playlist_level -= 1
1588                 if not self._playlist_level:
1589                     self._playlist_urls.clear()
1590         elif result_type == 'compat_list':
1591             self.report_warning(
1592                 'Extractor %s returned a compat_list result. '
1593                 'It needs to be updated.' % ie_result.get('extractor'))
1594
1595             def _fixup(r):
1596                 self.add_extra_info(r, {
1597                     'extractor': ie_result['extractor'],
1598                     'webpage_url': ie_result['webpage_url'],
1599                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1600                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1601                     'extractor_key': ie_result['extractor_key'],
1602                 })
1603                 return r
1604             ie_result['entries'] = [
1605                 self.process_ie_result(_fixup(r), download, extra_info)
1606                 for r in ie_result['entries']
1607             ]
1608             return ie_result
1609         else:
1610             raise Exception('Invalid result type: %s' % result_type)
1611
1612     def _ensure_dir_exists(self, path):
1613         return make_dir(path, self.report_error)
1614
1615     @staticmethod
1616     def _playlist_infodict(ie_result, **kwargs):
1617         return {
1618             **ie_result,
1619             'playlist': ie_result.get('title') or ie_result.get('id'),
1620             'playlist_id': ie_result.get('id'),
1621             'playlist_title': ie_result.get('title'),
1622             'playlist_uploader': ie_result.get('uploader'),
1623             'playlist_uploader_id': ie_result.get('uploader_id'),
1624             'playlist_index': 0,
1625             **kwargs,
1626         }
1627
1628     def __process_playlist(self, ie_result, download):
1629         # We process each entry in the playlist
1630         playlist = ie_result.get('title') or ie_result.get('id')
1631         self.to_screen('[download] Downloading playlist: %s' % playlist)
1632
1633         if 'entries' not in ie_result:
1634             raise EntryNotInPlaylist('There are no entries')
1635
1636         MissingEntry = object()
1637         incomplete_entries = bool(ie_result.get('requested_entries'))
1638         if incomplete_entries:
1639             def fill_missing_entries(entries, indices):
1640                 ret = [MissingEntry] * max(indices)
1641                 for i, entry in zip(indices, entries):
1642                     ret[i - 1] = entry
1643                 return ret
1644             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1645
1646         playlist_results = []
1647
1648         playliststart = self.params.get('playliststart', 1)
1649         playlistend = self.params.get('playlistend')
1650         # For backwards compatibility, interpret -1 as whole list
1651         if playlistend == -1:
1652             playlistend = None
1653
1654         playlistitems_str = self.params.get('playlist_items')
1655         playlistitems = None
1656         if playlistitems_str is not None:
1657             def iter_playlistitems(format):
1658                 for string_segment in format.split(','):
1659                     if '-' in string_segment:
1660                         start, end = string_segment.split('-')
1661                         for item in range(int(start), int(end) + 1):
1662                             yield int(item)
1663                     else:
1664                         yield int(string_segment)
1665             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1666
1667         ie_entries = ie_result['entries']
1668         if isinstance(ie_entries, list):
1669             playlist_count = len(ie_entries)
1670             msg = f'Collected {playlist_count} videos; downloading %d of them'
1671             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1672
1673             def get_entry(i):
1674                 return ie_entries[i - 1]
1675         else:
1676             msg = 'Downloading %d videos'
1677             if not isinstance(ie_entries, (PagedList, LazyList)):
1678                 ie_entries = LazyList(ie_entries)
1679             elif isinstance(ie_entries, InAdvancePagedList):
1680                 if ie_entries._pagesize == 1:
1681                     playlist_count = ie_entries._pagecount
1682
1683             def get_entry(i):
1684                 return YoutubeDL.__handle_extraction_exceptions(
1685                     lambda self, i: ie_entries[i - 1]
1686                 )(self, i)
1687
1688         entries, broken = [], False
1689         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1690         for i in items:
1691             if i == 0:
1692                 continue
1693             if playlistitems is None and playlistend is not None and playlistend < i:
1694                 break
1695             entry = None
1696             try:
1697                 entry = get_entry(i)
1698                 if entry is MissingEntry:
1699                     raise EntryNotInPlaylist()
1700             except (IndexError, EntryNotInPlaylist):
1701                 if incomplete_entries:
1702                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1703                 elif not playlistitems:
1704                     break
1705             entries.append(entry)
1706             try:
1707                 if entry is not None:
1708                     self._match_entry(entry, incomplete=True, silent=True)
1709             except (ExistingVideoReached, RejectedVideoReached):
1710                 broken = True
1711                 break
1712         ie_result['entries'] = entries
1713
1714         # Save playlist_index before re-ordering
1715         entries = [
1716             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1717             for i, entry in enumerate(entries, 1)
1718             if entry is not None]
1719         n_entries = len(entries)
1720
1721         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1722             ie_result['playlist_count'] = n_entries
1723
1724         if not playlistitems and (playliststart != 1 or playlistend):
1725             playlistitems = list(range(playliststart, playliststart + n_entries))
1726         ie_result['requested_entries'] = playlistitems
1727
1728         _infojson_written = False
1729         write_playlist_files = self.params.get('allow_playlist_files', True)
1730         if write_playlist_files and self.params.get('list_thumbnails'):
1731             self.list_thumbnails(ie_result)
1732         if write_playlist_files and not self.params.get('simulate'):
1733             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1734             _infojson_written = self._write_info_json(
1735                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1736             if _infojson_written is None:
1737                 return
1738             if self._write_description('playlist', ie_result,
1739                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1740                 return
1741             # TODO: This should be passed to ThumbnailsConvertor if necessary
1742             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1743
1744         if self.params.get('playlistreverse', False):
1745             entries = entries[::-1]
1746         if self.params.get('playlistrandom', False):
1747             random.shuffle(entries)
1748
1749         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1750
1751         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1752         failures = 0
1753         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1754         for i, entry_tuple in enumerate(entries, 1):
1755             playlist_index, entry = entry_tuple
1756             if 'playlist-index' in self.params.get('compat_opts', []):
1757                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1758             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1759             # This __x_forwarded_for_ip thing is a bit ugly but requires
1760             # minimal changes
1761             if x_forwarded_for:
1762                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1763             extra = {
1764                 'n_entries': n_entries,
1765                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1766                 'playlist_count': ie_result.get('playlist_count'),
1767                 'playlist_index': playlist_index,
1768                 'playlist_autonumber': i,
1769                 'playlist': playlist,
1770                 'playlist_id': ie_result.get('id'),
1771                 'playlist_title': ie_result.get('title'),
1772                 'playlist_uploader': ie_result.get('uploader'),
1773                 'playlist_uploader_id': ie_result.get('uploader_id'),
1774                 'extractor': ie_result['extractor'],
1775                 'webpage_url': ie_result['webpage_url'],
1776                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1777                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1778                 'extractor_key': ie_result['extractor_key'],
1779             }
1780
1781             if self._match_entry(entry, incomplete=True) is not None:
1782                 continue
1783
1784             entry_result = self.__process_iterable_entry(entry, download, extra)
1785             if not entry_result:
1786                 failures += 1
1787             if failures >= max_failures:
1788                 self.report_error(
1789                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1790                 break
1791             playlist_results.append(entry_result)
1792         ie_result['entries'] = playlist_results
1793
1794         # Write the updated info to json
1795         if _infojson_written and self._write_info_json(
1796                 'updated playlist', ie_result,
1797                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1798             return
1799
1800         ie_result = self.run_all_pps('playlist', ie_result)
1801         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1802         return ie_result
1803
1804     @__handle_extraction_exceptions
1805     def __process_iterable_entry(self, entry, download, extra_info):
1806         return self.process_ie_result(
1807             entry, download=download, extra_info=extra_info)
1808
1809     def _build_format_filter(self, filter_spec):
1810         " Returns a function to filter the formats according to the filter_spec "
1811
1812         OPERATORS = {
1813             '<': operator.lt,
1814             '<=': operator.le,
1815             '>': operator.gt,
1816             '>=': operator.ge,
1817             '=': operator.eq,
1818             '!=': operator.ne,
1819         }
1820         operator_rex = re.compile(r'''(?x)\s*
1821             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1822             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1823             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1824             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1825         m = operator_rex.fullmatch(filter_spec)
1826         if m:
1827             try:
1828                 comparison_value = int(m.group('value'))
1829             except ValueError:
1830                 comparison_value = parse_filesize(m.group('value'))
1831                 if comparison_value is None:
1832                     comparison_value = parse_filesize(m.group('value') + 'B')
1833                 if comparison_value is None:
1834                     raise ValueError(
1835                         'Invalid value %r in format specification %r' % (
1836                             m.group('value'), filter_spec))
1837             op = OPERATORS[m.group('op')]
1838
1839         if not m:
1840             STR_OPERATORS = {
1841                 '=': operator.eq,
1842                 '^=': lambda attr, value: attr.startswith(value),
1843                 '$=': lambda attr, value: attr.endswith(value),
1844                 '*=': lambda attr, value: value in attr,
1845             }
1846             str_operator_rex = re.compile(r'''(?x)\s*
1847                 (?P<key>[a-zA-Z0-9._-]+)\s*
1848                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1849                 (?P<value>[a-zA-Z0-9._-]+)\s*
1850                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1851             m = str_operator_rex.fullmatch(filter_spec)
1852             if m:
1853                 comparison_value = m.group('value')
1854                 str_op = STR_OPERATORS[m.group('op')]
1855                 if m.group('negation'):
1856                     op = lambda attr, value: not str_op(attr, value)
1857                 else:
1858                     op = str_op
1859
1860         if not m:
1861             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1862
1863         def _filter(f):
1864             actual_value = f.get(m.group('key'))
1865             if actual_value is None:
1866                 return m.group('none_inclusive')
1867             return op(actual_value, comparison_value)
1868         return _filter
1869
1870     def _check_formats(self, formats):
1871         for f in formats:
1872             self.to_screen('[info] Testing format %s' % f['format_id'])
1873             path = self.get_output_path('temp')
1874             if not self._ensure_dir_exists(f'{path}/'):
1875                 continue
1876             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1877             temp_file.close()
1878             try:
1879                 success, _ = self.dl(temp_file.name, f, test=True)
1880             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1881                 success = False
1882             finally:
1883                 if os.path.exists(temp_file.name):
1884                     try:
1885                         os.remove(temp_file.name)
1886                     except OSError:
1887                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1888             if success:
1889                 yield f
1890             else:
1891                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1892
1893     def _default_format_spec(self, info_dict, download=True):
1894
1895         def can_merge():
1896             merger = FFmpegMergerPP(self)
1897             return merger.available and merger.can_merge()
1898
1899         prefer_best = (
1900             not self.params.get('simulate')
1901             and download
1902             and (
1903                 not can_merge()
1904                 or info_dict.get('is_live', False)
1905                 or self.outtmpl_dict['default'] == '-'))
1906         compat = (
1907             prefer_best
1908             or self.params.get('allow_multiple_audio_streams', False)
1909             or 'format-spec' in self.params.get('compat_opts', []))
1910
1911         return (
1912             'best/bestvideo+bestaudio' if prefer_best
1913             else 'bestvideo*+bestaudio/best' if not compat
1914             else 'bestvideo+bestaudio/best')
1915
1916     def build_format_selector(self, format_spec):
1917         def syntax_error(note, start):
1918             message = (
1919                 'Invalid format specification: '
1920                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1921             return SyntaxError(message)
1922
1923         PICKFIRST = 'PICKFIRST'
1924         MERGE = 'MERGE'
1925         SINGLE = 'SINGLE'
1926         GROUP = 'GROUP'
1927         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1928
1929         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1930                                   'video': self.params.get('allow_multiple_video_streams', False)}
1931
1932         check_formats = self.params.get('check_formats') == 'selected'
1933
1934         def _parse_filter(tokens):
1935             filter_parts = []
1936             for type, string, start, _, _ in tokens:
1937                 if type == tokenize.OP and string == ']':
1938                     return ''.join(filter_parts)
1939                 else:
1940                     filter_parts.append(string)
1941
1942         def _remove_unused_ops(tokens):
1943             # Remove operators that we don't use and join them with the surrounding strings
1944             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1945             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1946             last_string, last_start, last_end, last_line = None, None, None, None
1947             for type, string, start, end, line in tokens:
1948                 if type == tokenize.OP and string == '[':
1949                     if last_string:
1950                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1951                         last_string = None
1952                     yield type, string, start, end, line
1953                     # everything inside brackets will be handled by _parse_filter
1954                     for type, string, start, end, line in tokens:
1955                         yield type, string, start, end, line
1956                         if type == tokenize.OP and string == ']':
1957                             break
1958                 elif type == tokenize.OP and string in ALLOWED_OPS:
1959                     if last_string:
1960                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1961                         last_string = None
1962                     yield type, string, start, end, line
1963                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1964                     if not last_string:
1965                         last_string = string
1966                         last_start = start
1967                         last_end = end
1968                     else:
1969                         last_string += string
1970             if last_string:
1971                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1972
1973         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1974             selectors = []
1975             current_selector = None
1976             for type, string, start, _, _ in tokens:
1977                 # ENCODING is only defined in python 3.x
1978                 if type == getattr(tokenize, 'ENCODING', None):
1979                     continue
1980                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1981                     current_selector = FormatSelector(SINGLE, string, [])
1982                 elif type == tokenize.OP:
1983                     if string == ')':
1984                         if not inside_group:
1985                             # ')' will be handled by the parentheses group
1986                             tokens.restore_last_token()
1987                         break
1988                     elif inside_merge and string in ['/', ',']:
1989                         tokens.restore_last_token()
1990                         break
1991                     elif inside_choice and string == ',':
1992                         tokens.restore_last_token()
1993                         break
1994                     elif string == ',':
1995                         if not current_selector:
1996                             raise syntax_error('"," must follow a format selector', start)
1997                         selectors.append(current_selector)
1998                         current_selector = None
1999                     elif string == '/':
2000                         if not current_selector:
2001                             raise syntax_error('"/" must follow a format selector', start)
2002                         first_choice = current_selector
2003                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2004                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2005                     elif string == '[':
2006                         if not current_selector:
2007                             current_selector = FormatSelector(SINGLE, 'best', [])
2008                         format_filter = _parse_filter(tokens)
2009                         current_selector.filters.append(format_filter)
2010                     elif string == '(':
2011                         if current_selector:
2012                             raise syntax_error('Unexpected "("', start)
2013                         group = _parse_format_selection(tokens, inside_group=True)
2014                         current_selector = FormatSelector(GROUP, group, [])
2015                     elif string == '+':
2016                         if not current_selector:
2017                             raise syntax_error('Unexpected "+"', start)
2018                         selector_1 = current_selector
2019                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2020                         if not selector_2:
2021                             raise syntax_error('Expected a selector', start)
2022                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2023                     else:
2024                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2025                 elif type == tokenize.ENDMARKER:
2026                     break
2027             if current_selector:
2028                 selectors.append(current_selector)
2029             return selectors
2030
2031         def _merge(formats_pair):
2032             format_1, format_2 = formats_pair
2033
2034             formats_info = []
2035             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2036             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2037
2038             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2039                 get_no_more = {'video': False, 'audio': False}
2040                 for (i, fmt_info) in enumerate(formats_info):
2041                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2042                         formats_info.pop(i)
2043                         continue
2044                     for aud_vid in ['audio', 'video']:
2045                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2046                             if get_no_more[aud_vid]:
2047                                 formats_info.pop(i)
2048                                 break
2049                             get_no_more[aud_vid] = True
2050
2051             if len(formats_info) == 1:
2052                 return formats_info[0]
2053
2054             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2055             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2056
2057             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2058             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2059
2060             output_ext = self.params.get('merge_output_format')
2061             if not output_ext:
2062                 if the_only_video:
2063                     output_ext = the_only_video['ext']
2064                 elif the_only_audio and not video_fmts:
2065                     output_ext = the_only_audio['ext']
2066                 else:
2067                     output_ext = 'mkv'
2068
2069             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2070
2071             new_dict = {
2072                 'requested_formats': formats_info,
2073                 'format': '+'.join(filtered('format')),
2074                 'format_id': '+'.join(filtered('format_id')),
2075                 'ext': output_ext,
2076                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2077                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2078                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2079                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2080                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2081             }
2082
2083             if the_only_video:
2084                 new_dict.update({
2085                     'width': the_only_video.get('width'),
2086                     'height': the_only_video.get('height'),
2087                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2088                     'fps': the_only_video.get('fps'),
2089                     'dynamic_range': the_only_video.get('dynamic_range'),
2090                     'vcodec': the_only_video.get('vcodec'),
2091                     'vbr': the_only_video.get('vbr'),
2092                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2093                 })
2094
2095             if the_only_audio:
2096                 new_dict.update({
2097                     'acodec': the_only_audio.get('acodec'),
2098                     'abr': the_only_audio.get('abr'),
2099                     'asr': the_only_audio.get('asr'),
2100                 })
2101
2102             return new_dict
2103
2104         def _check_formats(formats):
2105             if not check_formats:
2106                 yield from formats
2107                 return
2108             yield from self._check_formats(formats)
2109
2110         def _build_selector_function(selector):
2111             if isinstance(selector, list):  # ,
2112                 fs = [_build_selector_function(s) for s in selector]
2113
2114                 def selector_function(ctx):
2115                     for f in fs:
2116                         yield from f(ctx)
2117                 return selector_function
2118
2119             elif selector.type == GROUP:  # ()
2120                 selector_function = _build_selector_function(selector.selector)
2121
2122             elif selector.type == PICKFIRST:  # /
2123                 fs = [_build_selector_function(s) for s in selector.selector]
2124
2125                 def selector_function(ctx):
2126                     for f in fs:
2127                         picked_formats = list(f(ctx))
2128                         if picked_formats:
2129                             return picked_formats
2130                     return []
2131
2132             elif selector.type == MERGE:  # +
2133                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2134
2135                 def selector_function(ctx):
2136                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2137                         yield _merge(pair)
2138
2139             elif selector.type == SINGLE:  # atom
2140                 format_spec = selector.selector or 'best'
2141
2142                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2143                 if format_spec == 'all':
2144                     def selector_function(ctx):
2145                         yield from _check_formats(ctx['formats'][::-1])
2146                 elif format_spec == 'mergeall':
2147                     def selector_function(ctx):
2148                         formats = list(_check_formats(ctx['formats']))
2149                         if not formats:
2150                             return
2151                         merged_format = formats[-1]
2152                         for f in formats[-2::-1]:
2153                             merged_format = _merge((merged_format, f))
2154                         yield merged_format
2155
2156                 else:
2157                     format_fallback, format_reverse, format_idx = False, True, 1
2158                     mobj = re.match(
2159                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2160                         format_spec)
2161                     if mobj is not None:
2162                         format_idx = int_or_none(mobj.group('n'), default=1)
2163                         format_reverse = mobj.group('bw')[0] == 'b'
2164                         format_type = (mobj.group('type') or [None])[0]
2165                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2166                         format_modified = mobj.group('mod') is not None
2167
2168                         format_fallback = not format_type and not format_modified  # for b, w
2169                         _filter_f = (
2170                             (lambda f: f.get('%scodec' % format_type) != 'none')
2171                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2172                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2173                             if format_type  # bv, ba, wv, wa
2174                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2175                             if not format_modified  # b, w
2176                             else lambda f: True)  # b*, w*
2177                         filter_f = lambda f: _filter_f(f) and (
2178                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2179                     else:
2180                         if format_spec in self._format_selection_exts['audio']:
2181                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2182                         elif format_spec in self._format_selection_exts['video']:
2183                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2184                         elif format_spec in self._format_selection_exts['storyboards']:
2185                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2186                         else:
2187                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2188
2189                     def selector_function(ctx):
2190                         formats = list(ctx['formats'])
2191                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2192                         if format_fallback and ctx['incomplete_formats'] and not matches:
2193                             # for extractors with incomplete formats (audio only (soundcloud)
2194                             # or video only (imgur)) best/worst will fallback to
2195                             # best/worst {video,audio}-only format
2196                             matches = formats
2197                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2198                         try:
2199                             yield matches[format_idx - 1]
2200                         except IndexError:
2201                             return
2202
2203             filters = [self._build_format_filter(f) for f in selector.filters]
2204
2205             def final_selector(ctx):
2206                 ctx_copy = dict(ctx)
2207                 for _filter in filters:
2208                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2209                 return selector_function(ctx_copy)
2210             return final_selector
2211
2212         stream = io.BytesIO(format_spec.encode('utf-8'))
2213         try:
2214             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2215         except tokenize.TokenError:
2216             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2217
2218         class TokenIterator(object):
2219             def __init__(self, tokens):
2220                 self.tokens = tokens
2221                 self.counter = 0
2222
2223             def __iter__(self):
2224                 return self
2225
2226             def __next__(self):
2227                 if self.counter >= len(self.tokens):
2228                     raise StopIteration()
2229                 value = self.tokens[self.counter]
2230                 self.counter += 1
2231                 return value
2232
2233             next = __next__
2234
2235             def restore_last_token(self):
2236                 self.counter -= 1
2237
2238         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2239         return _build_selector_function(parsed_selector)
2240
2241     def _calc_headers(self, info_dict):
2242         res = std_headers.copy()
2243         res.update(info_dict.get('http_headers') or {})
2244
2245         cookies = self._calc_cookies(info_dict)
2246         if cookies:
2247             res['Cookie'] = cookies
2248
2249         if 'X-Forwarded-For' not in res:
2250             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2251             if x_forwarded_for_ip:
2252                 res['X-Forwarded-For'] = x_forwarded_for_ip
2253
2254         return res
2255
2256     def _calc_cookies(self, info_dict):
2257         pr = sanitized_Request(info_dict['url'])
2258         self.cookiejar.add_cookie_header(pr)
2259         return pr.get_header('Cookie')
2260
2261     def _sort_thumbnails(self, thumbnails):
2262         thumbnails.sort(key=lambda t: (
2263             t.get('preference') if t.get('preference') is not None else -1,
2264             t.get('width') if t.get('width') is not None else -1,
2265             t.get('height') if t.get('height') is not None else -1,
2266             t.get('id') if t.get('id') is not None else '',
2267             t.get('url')))
2268
2269     def _sanitize_thumbnails(self, info_dict):
2270         thumbnails = info_dict.get('thumbnails')
2271         if thumbnails is None:
2272             thumbnail = info_dict.get('thumbnail')
2273             if thumbnail:
2274                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2275         if not thumbnails:
2276             return
2277
2278         def check_thumbnails(thumbnails):
2279             for t in thumbnails:
2280                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2281                 try:
2282                     self.urlopen(HEADRequest(t['url']))
2283                 except network_exceptions as err:
2284                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2285                     continue
2286                 yield t
2287
2288         self._sort_thumbnails(thumbnails)
2289         for i, t in enumerate(thumbnails):
2290             if t.get('id') is None:
2291                 t['id'] = '%d' % i
2292             if t.get('width') and t.get('height'):
2293                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2294             t['url'] = sanitize_url(t['url'])
2295
2296         if self.params.get('check_formats') is True:
2297             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2298         else:
2299             info_dict['thumbnails'] = thumbnails
2300
2301     def process_video_result(self, info_dict, download=True):
2302         assert info_dict.get('_type', 'video') == 'video'
2303         self._num_videos += 1
2304
2305         if 'id' not in info_dict:
2306             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2307         elif not info_dict.get('id'):
2308             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2309
2310         info_dict['fulltitle'] = info_dict.get('title')
2311         if 'title' not in info_dict:
2312             raise ExtractorError('Missing "title" field in extractor result',
2313                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2314         elif not info_dict.get('title'):
2315             self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2316             info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2317
2318         def report_force_conversion(field, field_not, conversion):
2319             self.report_warning(
2320                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2321                 % (field, field_not, conversion))
2322
2323         def sanitize_string_field(info, string_field):
2324             field = info.get(string_field)
2325             if field is None or isinstance(field, compat_str):
2326                 return
2327             report_force_conversion(string_field, 'a string', 'string')
2328             info[string_field] = compat_str(field)
2329
2330         def sanitize_numeric_fields(info):
2331             for numeric_field in self._NUMERIC_FIELDS:
2332                 field = info.get(numeric_field)
2333                 if field is None or isinstance(field, compat_numeric_types):
2334                     continue
2335                 report_force_conversion(numeric_field, 'numeric', 'int')
2336                 info[numeric_field] = int_or_none(field)
2337
2338         sanitize_string_field(info_dict, 'id')
2339         sanitize_numeric_fields(info_dict)
2340
2341         if 'playlist' not in info_dict:
2342             # It isn't part of a playlist
2343             info_dict['playlist'] = None
2344             info_dict['playlist_index'] = None
2345
2346         self._sanitize_thumbnails(info_dict)
2347
2348         thumbnail = info_dict.get('thumbnail')
2349         thumbnails = info_dict.get('thumbnails')
2350         if thumbnail:
2351             info_dict['thumbnail'] = sanitize_url(thumbnail)
2352         elif thumbnails:
2353             info_dict['thumbnail'] = thumbnails[-1]['url']
2354
2355         if info_dict.get('display_id') is None and 'id' in info_dict:
2356             info_dict['display_id'] = info_dict['id']
2357
2358         if info_dict.get('duration') is not None:
2359             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2360
2361         for ts_key, date_key in (
2362                 ('timestamp', 'upload_date'),
2363                 ('release_timestamp', 'release_date'),
2364                 ('modified_timestamp', 'modified_date'),
2365         ):
2366             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2367                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2368                 # see http://bugs.python.org/issue1646728)
2369                 try:
2370                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2371                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2372                 except (ValueError, OverflowError, OSError):
2373                     pass
2374
2375         live_keys = ('is_live', 'was_live')
2376         live_status = info_dict.get('live_status')
2377         if live_status is None:
2378             for key in live_keys:
2379                 if info_dict.get(key) is False:
2380                     continue
2381                 if info_dict.get(key):
2382                     live_status = key
2383                 break
2384             if all(info_dict.get(key) is False for key in live_keys):
2385                 live_status = 'not_live'
2386         if live_status:
2387             info_dict['live_status'] = live_status
2388             for key in live_keys:
2389                 if info_dict.get(key) is None:
2390                     info_dict[key] = (live_status == key)
2391
2392         # Auto generate title fields corresponding to the *_number fields when missing
2393         # in order to always have clean titles. This is very common for TV series.
2394         for field in ('chapter', 'season', 'episode'):
2395             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2396                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2397
2398         for cc_kind in ('subtitles', 'automatic_captions'):
2399             cc = info_dict.get(cc_kind)
2400             if cc:
2401                 for _, subtitle in cc.items():
2402                     for subtitle_format in subtitle:
2403                         if subtitle_format.get('url'):
2404                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2405                         if subtitle_format.get('ext') is None:
2406                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2407
2408         automatic_captions = info_dict.get('automatic_captions')
2409         subtitles = info_dict.get('subtitles')
2410
2411         info_dict['requested_subtitles'] = self.process_subtitles(
2412             info_dict['id'], subtitles, automatic_captions)
2413
2414         if info_dict.get('formats') is None:
2415             # There's only one format available
2416             formats = [info_dict]
2417         else:
2418             formats = info_dict['formats']
2419
2420         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2421         if not self.params.get('allow_unplayable_formats'):
2422             formats = [f for f in formats if not f.get('has_drm')]
2423
2424         if info_dict.get('is_live'):
2425             get_from_start = bool(self.params.get('live_from_start'))
2426             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2427             if not get_from_start:
2428                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2429
2430         if not formats:
2431             self.raise_no_formats(info_dict)
2432
2433         def is_wellformed(f):
2434             url = f.get('url')
2435             if not url:
2436                 self.report_warning(
2437                     '"url" field is missing or empty - skipping format, '
2438                     'there is an error in extractor')
2439                 return False
2440             if isinstance(url, bytes):
2441                 sanitize_string_field(f, 'url')
2442             return True
2443
2444         # Filter out malformed formats for better extraction robustness
2445         formats = list(filter(is_wellformed, formats))
2446
2447         formats_dict = {}
2448
2449         # We check that all the formats have the format and format_id fields
2450         for i, format in enumerate(formats):
2451             sanitize_string_field(format, 'format_id')
2452             sanitize_numeric_fields(format)
2453             format['url'] = sanitize_url(format['url'])
2454             if not format.get('format_id'):
2455                 format['format_id'] = compat_str(i)
2456             else:
2457                 # Sanitize format_id from characters used in format selector expression
2458                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2459             format_id = format['format_id']
2460             if format_id not in formats_dict:
2461                 formats_dict[format_id] = []
2462             formats_dict[format_id].append(format)
2463
2464         # Make sure all formats have unique format_id
2465         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2466         for format_id, ambiguous_formats in formats_dict.items():
2467             ambigious_id = len(ambiguous_formats) > 1
2468             for i, format in enumerate(ambiguous_formats):
2469                 if ambigious_id:
2470                     format['format_id'] = '%s-%d' % (format_id, i)
2471                 if format.get('ext') is None:
2472                     format['ext'] = determine_ext(format['url']).lower()
2473                 # Ensure there is no conflict between id and ext in format selection
2474                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2475                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2476                     format['format_id'] = 'f%s' % format['format_id']
2477
2478         for i, format in enumerate(formats):
2479             if format.get('format') is None:
2480                 format['format'] = '{id} - {res}{note}'.format(
2481                     id=format['format_id'],
2482                     res=self.format_resolution(format),
2483                     note=format_field(format, 'format_note', ' (%s)'),
2484                 )
2485             if format.get('protocol') is None:
2486                 format['protocol'] = determine_protocol(format)
2487             if format.get('resolution') is None:
2488                 format['resolution'] = self.format_resolution(format, default=None)
2489             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2490                 format['dynamic_range'] = 'SDR'
2491             if (info_dict.get('duration') and format.get('tbr')
2492                     and not format.get('filesize') and not format.get('filesize_approx')):
2493                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2494
2495             # Add HTTP headers, so that external programs can use them from the
2496             # json output
2497             full_format_info = info_dict.copy()
2498             full_format_info.update(format)
2499             format['http_headers'] = self._calc_headers(full_format_info)
2500         # Remove private housekeeping stuff
2501         if '__x_forwarded_for_ip' in info_dict:
2502             del info_dict['__x_forwarded_for_ip']
2503
2504         # TODO Central sorting goes here
2505
2506         if self.params.get('check_formats') is True:
2507             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2508
2509         if not formats or formats[0] is not info_dict:
2510             # only set the 'formats' fields if the original info_dict list them
2511             # otherwise we end up with a circular reference, the first (and unique)
2512             # element in the 'formats' field in info_dict is info_dict itself,
2513             # which can't be exported to json
2514             info_dict['formats'] = formats
2515
2516         info_dict, _ = self.pre_process(info_dict)
2517
2518         # The pre-processors may have modified the formats
2519         formats = info_dict.get('formats', [info_dict])
2520
2521         list_only = self.params.get('simulate') is None and (
2522             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2523         interactive_format_selection = not list_only and self.format_selector == '-'
2524         if self.params.get('list_thumbnails'):
2525             self.list_thumbnails(info_dict)
2526         if self.params.get('listsubtitles'):
2527             if 'automatic_captions' in info_dict:
2528                 self.list_subtitles(
2529                     info_dict['id'], automatic_captions, 'automatic captions')
2530             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2531         if self.params.get('listformats') or interactive_format_selection:
2532             self.list_formats(info_dict)
2533         if list_only:
2534             # Without this printing, -F --print-json will not work
2535             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2536             return
2537
2538         format_selector = self.format_selector
2539         if format_selector is None:
2540             req_format = self._default_format_spec(info_dict, download=download)
2541             self.write_debug('Default format spec: %s' % req_format)
2542             format_selector = self.build_format_selector(req_format)
2543
2544         while True:
2545             if interactive_format_selection:
2546                 req_format = input(
2547                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2548                 try:
2549                     format_selector = self.build_format_selector(req_format)
2550                 except SyntaxError as err:
2551                     self.report_error(err, tb=False, is_error=False)
2552                     continue
2553
2554             # While in format selection we may need to have an access to the original
2555             # format set in order to calculate some metrics or do some processing.
2556             # For now we need to be able to guess whether original formats provided
2557             # by extractor are incomplete or not (i.e. whether extractor provides only
2558             # video-only or audio-only formats) for proper formats selection for
2559             # extractors with such incomplete formats (see
2560             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2561             # Since formats may be filtered during format selection and may not match
2562             # the original formats the results may be incorrect. Thus original formats
2563             # or pre-calculated metrics should be passed to format selection routines
2564             # as well.
2565             # We will pass a context object containing all necessary additional data
2566             # instead of just formats.
2567             # This fixes incorrect format selection issue (see
2568             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2569             incomplete_formats = (
2570                 # All formats are video-only or
2571                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2572                 # all formats are audio-only
2573                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2574
2575             ctx = {
2576                 'formats': formats,
2577                 'incomplete_formats': incomplete_formats,
2578             }
2579
2580             formats_to_download = list(format_selector(ctx))
2581             if interactive_format_selection and not formats_to_download:
2582                 self.report_error('Requested format is not available', tb=False, is_error=False)
2583                 continue
2584             break
2585
2586         if not formats_to_download:
2587             if not self.params.get('ignore_no_formats_error'):
2588                 raise ExtractorError('Requested format is not available', expected=True,
2589                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2590             self.report_warning('Requested format is not available')
2591             # Process what we can, even without any available formats.
2592             formats_to_download = [{}]
2593
2594         best_format = formats_to_download[-1]
2595         if download:
2596             if best_format:
2597                 self.to_screen(
2598                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2599                     + ', '.join([f['format_id'] for f in formats_to_download]))
2600             max_downloads_reached = False
2601             for i, fmt in enumerate(formats_to_download):
2602                 formats_to_download[i] = new_info = dict(info_dict)
2603                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2604                 new_info.update(fmt)
2605                 new_info['__original_infodict'] = info_dict
2606                 try:
2607                     self.process_info(new_info)
2608                 except MaxDownloadsReached:
2609                     max_downloads_reached = True
2610                 new_info.pop('__original_infodict')
2611                 # Remove copied info
2612                 for key, val in tuple(new_info.items()):
2613                     if info_dict.get(key) == val:
2614                         new_info.pop(key)
2615                 if max_downloads_reached:
2616                     break
2617
2618             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2619             assert write_archive.issubset({True, False, 'ignore'})
2620             if True in write_archive and False not in write_archive:
2621                 self.record_download_archive(info_dict)
2622
2623             info_dict['requested_downloads'] = formats_to_download
2624             info_dict = self.run_all_pps('after_video', info_dict)
2625             if max_downloads_reached:
2626                 raise MaxDownloadsReached()
2627
2628         # We update the info dict with the selected best quality format (backwards compatibility)
2629         info_dict.update(best_format)
2630         return info_dict
2631
2632     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2633         """Select the requested subtitles and their format"""
2634         available_subs = {}
2635         if normal_subtitles and self.params.get('writesubtitles'):
2636             available_subs.update(normal_subtitles)
2637         if automatic_captions and self.params.get('writeautomaticsub'):
2638             for lang, cap_info in automatic_captions.items():
2639                 if lang not in available_subs:
2640                     available_subs[lang] = cap_info
2641
2642         if (not self.params.get('writesubtitles') and not
2643                 self.params.get('writeautomaticsub') or not
2644                 available_subs):
2645             return None
2646
2647         all_sub_langs = available_subs.keys()
2648         if self.params.get('allsubtitles', False):
2649             requested_langs = all_sub_langs
2650         elif self.params.get('subtitleslangs', False):
2651             # A list is used so that the order of languages will be the same as
2652             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2653             requested_langs = []
2654             for lang_re in self.params.get('subtitleslangs'):
2655                 if lang_re == 'all':
2656                     requested_langs.extend(all_sub_langs)
2657                     continue
2658                 discard = lang_re[0] == '-'
2659                 if discard:
2660                     lang_re = lang_re[1:]
2661                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2662                 if discard:
2663                     for lang in current_langs:
2664                         while lang in requested_langs:
2665                             requested_langs.remove(lang)
2666                 else:
2667                     requested_langs.extend(current_langs)
2668             requested_langs = orderedSet(requested_langs)
2669         elif 'en' in available_subs:
2670             requested_langs = ['en']
2671         else:
2672             requested_langs = [list(all_sub_langs)[0]]
2673         if requested_langs:
2674             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2675
2676         formats_query = self.params.get('subtitlesformat', 'best')
2677         formats_preference = formats_query.split('/') if formats_query else []
2678         subs = {}
2679         for lang in requested_langs:
2680             formats = available_subs.get(lang)
2681             if formats is None:
2682                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2683                 continue
2684             for ext in formats_preference:
2685                 if ext == 'best':
2686                     f = formats[-1]
2687                     break
2688                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2689                 if matches:
2690                     f = matches[-1]
2691                     break
2692             else:
2693                 f = formats[-1]
2694                 self.report_warning(
2695                     'No subtitle format found matching "%s" for language %s, '
2696                     'using %s' % (formats_query, lang, f['ext']))
2697             subs[lang] = f
2698         return subs
2699
2700     def _forceprint(self, key, info_dict):
2701         if info_dict is None:
2702             return
2703         info_copy = info_dict.copy()
2704         info_copy['formats_table'] = self.render_formats_table(info_dict)
2705         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2706         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2707         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2708
2709         def format_tmpl(tmpl):
2710             mobj = re.match(r'\w+(=?)$', tmpl)
2711             if mobj and mobj.group(1):
2712                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2713             elif mobj:
2714                 return f'%({tmpl})s'
2715             return tmpl
2716
2717         for tmpl in self.params['forceprint'].get(key, []):
2718             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2719
2720         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2721             filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2722             tmpl = format_tmpl(tmpl)
2723             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2724             with io.open(filename, 'a', encoding='utf-8') as f:
2725                 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2726
2727     def __forced_printings(self, info_dict, filename, incomplete):
2728         def print_mandatory(field, actual_field=None):
2729             if actual_field is None:
2730                 actual_field = field
2731             if (self.params.get('force%s' % field, False)
2732                     and (not incomplete or info_dict.get(actual_field) is not None)):
2733                 self.to_stdout(info_dict[actual_field])
2734
2735         def print_optional(field):
2736             if (self.params.get('force%s' % field, False)
2737                     and info_dict.get(field) is not None):
2738                 self.to_stdout(info_dict[field])
2739
2740         info_dict = info_dict.copy()
2741         if filename is not None:
2742             info_dict['filename'] = filename
2743         if info_dict.get('requested_formats') is not None:
2744             # For RTMP URLs, also include the playpath
2745             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2746         elif 'url' in info_dict:
2747             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2748
2749         if (self.params.get('forcejson')
2750                 or self.params['forceprint'].get('video')
2751                 or self.params['print_to_file'].get('video')):
2752             self.post_extract(info_dict)
2753         self._forceprint('video', info_dict)
2754
2755         print_mandatory('title')
2756         print_mandatory('id')
2757         print_mandatory('url', 'urls')
2758         print_optional('thumbnail')
2759         print_optional('description')
2760         print_optional('filename')
2761         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2762             self.to_stdout(formatSeconds(info_dict['duration']))
2763         print_mandatory('format')
2764
2765         if self.params.get('forcejson'):
2766             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2767
2768     def dl(self, name, info, subtitle=False, test=False):
2769         if not info.get('url'):
2770             self.raise_no_formats(info, True)
2771
2772         if test:
2773             verbose = self.params.get('verbose')
2774             params = {
2775                 'test': True,
2776                 'quiet': self.params.get('quiet') or not verbose,
2777                 'verbose': verbose,
2778                 'noprogress': not verbose,
2779                 'nopart': True,
2780                 'skip_unavailable_fragments': False,
2781                 'keep_fragments': False,
2782                 'overwrites': True,
2783                 '_no_ytdl_file': True,
2784             }
2785         else:
2786             params = self.params
2787         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2788         if not test:
2789             for ph in self._progress_hooks:
2790                 fd.add_progress_hook(ph)
2791             urls = '", "'.join(
2792                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2793                 for f in info.get('requested_formats', []) or [info])
2794             self.write_debug('Invoking downloader on "%s"' % urls)
2795
2796         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2797         # But it may contain objects that are not deep-copyable
2798         new_info = self._copy_infodict(info)
2799         if new_info.get('http_headers') is None:
2800             new_info['http_headers'] = self._calc_headers(new_info)
2801         return fd.download(name, new_info, subtitle)
2802
2803     def existing_file(self, filepaths, *, default_overwrite=True):
2804         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2805         if existing_files and not self.params.get('overwrites', default_overwrite):
2806             return existing_files[0]
2807
2808         for file in existing_files:
2809             self.report_file_delete(file)
2810             os.remove(file)
2811         return None
2812
2813     def process_info(self, info_dict):
2814         """Process a single resolved IE result. (Modified it in-place)"""
2815
2816         assert info_dict.get('_type', 'video') == 'video'
2817         original_infodict = info_dict
2818
2819         if 'format' not in info_dict and 'ext' in info_dict:
2820             info_dict['format'] = info_dict['ext']
2821
2822         if self._match_entry(info_dict) is not None:
2823             info_dict['__write_download_archive'] = 'ignore'
2824             return
2825
2826         self.post_extract(info_dict)
2827         self._num_downloads += 1
2828
2829         # info_dict['_filename'] needs to be set for backward compatibility
2830         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2831         temp_filename = self.prepare_filename(info_dict, 'temp')
2832         files_to_move = {}
2833
2834         # Forced printings
2835         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2836
2837         if self.params.get('simulate'):
2838             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2839             return
2840
2841         if full_filename is None:
2842             return
2843         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2844             return
2845         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2846             return
2847
2848         if self._write_description('video', info_dict,
2849                                    self.prepare_filename(info_dict, 'description')) is None:
2850             return
2851
2852         sub_files = self._write_subtitles(info_dict, temp_filename)
2853         if sub_files is None:
2854             return
2855         files_to_move.update(dict(sub_files))
2856
2857         thumb_files = self._write_thumbnails(
2858             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2859         if thumb_files is None:
2860             return
2861         files_to_move.update(dict(thumb_files))
2862
2863         infofn = self.prepare_filename(info_dict, 'infojson')
2864         _infojson_written = self._write_info_json('video', info_dict, infofn)
2865         if _infojson_written:
2866             info_dict['infojson_filename'] = infofn
2867             # For backward compatibility, even though it was a private field
2868             info_dict['__infojson_filename'] = infofn
2869         elif _infojson_written is None:
2870             return
2871
2872         # Note: Annotations are deprecated
2873         annofn = None
2874         if self.params.get('writeannotations', False):
2875             annofn = self.prepare_filename(info_dict, 'annotation')
2876         if annofn:
2877             if not self._ensure_dir_exists(encodeFilename(annofn)):
2878                 return
2879             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2880                 self.to_screen('[info] Video annotations are already present')
2881             elif not info_dict.get('annotations'):
2882                 self.report_warning('There are no annotations to write.')
2883             else:
2884                 try:
2885                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2886                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2887                         annofile.write(info_dict['annotations'])
2888                 except (KeyError, TypeError):
2889                     self.report_warning('There are no annotations to write.')
2890                 except (OSError, IOError):
2891                     self.report_error('Cannot write annotations file: ' + annofn)
2892                     return
2893
2894         # Write internet shortcut files
2895         def _write_link_file(link_type):
2896             if 'webpage_url' not in info_dict:
2897                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2898                 return False
2899             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2900             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2901                 return False
2902             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2903                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2904                 return True
2905             try:
2906                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2907                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2908                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2909                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2910                     if link_type == 'desktop':
2911                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2912                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2913             except (OSError, IOError):
2914                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2915                 return False
2916             return True
2917
2918         write_links = {
2919             'url': self.params.get('writeurllink'),
2920             'webloc': self.params.get('writewebloclink'),
2921             'desktop': self.params.get('writedesktoplink'),
2922         }
2923         if self.params.get('writelink'):
2924             link_type = ('webloc' if sys.platform == 'darwin'
2925                          else 'desktop' if sys.platform.startswith('linux')
2926                          else 'url')
2927             write_links[link_type] = True
2928
2929         if any(should_write and not _write_link_file(link_type)
2930                for link_type, should_write in write_links.items()):
2931             return
2932
2933         def replace_info_dict(new_info):
2934             nonlocal info_dict
2935             if new_info == info_dict:
2936                 return
2937             info_dict.clear()
2938             info_dict.update(new_info)
2939
2940         try:
2941             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2942             replace_info_dict(new_info)
2943         except PostProcessingError as err:
2944             self.report_error('Preprocessing: %s' % str(err))
2945             return
2946
2947         if self.params.get('skip_download'):
2948             info_dict['filepath'] = temp_filename
2949             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2950             info_dict['__files_to_move'] = files_to_move
2951             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2952             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2953         else:
2954             # Download
2955             info_dict.setdefault('__postprocessors', [])
2956             try:
2957
2958                 def existing_video_file(*filepaths):
2959                     ext = info_dict.get('ext')
2960                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2961                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2962                                               default_overwrite=False)
2963                     if file:
2964                         info_dict['ext'] = os.path.splitext(file)[1][1:]
2965                     return file
2966
2967                 success = True
2968                 if info_dict.get('requested_formats') is not None:
2969
2970                     def compatible_formats(formats):
2971                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2972                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2973                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2974                         if len(video_formats) > 2 or len(audio_formats) > 2:
2975                             return False
2976
2977                         # Check extension
2978                         exts = set(format.get('ext') for format in formats)
2979                         COMPATIBLE_EXTS = (
2980                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2981                             set(('webm',)),
2982                         )
2983                         for ext_sets in COMPATIBLE_EXTS:
2984                             if ext_sets.issuperset(exts):
2985                                 return True
2986                         # TODO: Check acodec/vcodec
2987                         return False
2988
2989                     requested_formats = info_dict['requested_formats']
2990                     old_ext = info_dict['ext']
2991                     if self.params.get('merge_output_format') is None:
2992                         if not compatible_formats(requested_formats):
2993                             info_dict['ext'] = 'mkv'
2994                             self.report_warning(
2995                                 'Requested formats are incompatible for merge and will be merged into mkv')
2996                         if (info_dict['ext'] == 'webm'
2997                                 and info_dict.get('thumbnails')
2998                                 # check with type instead of pp_key, __name__, or isinstance
2999                                 # since we dont want any custom PPs to trigger this
3000                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3001                             info_dict['ext'] = 'mkv'
3002                             self.report_warning(
3003                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3004                     new_ext = info_dict['ext']
3005
3006                     def correct_ext(filename, ext=new_ext):
3007                         if filename == '-':
3008                             return filename
3009                         filename_real_ext = os.path.splitext(filename)[1][1:]
3010                         filename_wo_ext = (
3011                             os.path.splitext(filename)[0]
3012                             if filename_real_ext in (old_ext, new_ext)
3013                             else filename)
3014                         return '%s.%s' % (filename_wo_ext, ext)
3015
3016                     # Ensure filename always has a correct extension for successful merge
3017                     full_filename = correct_ext(full_filename)
3018                     temp_filename = correct_ext(temp_filename)
3019                     dl_filename = existing_video_file(full_filename, temp_filename)
3020                     info_dict['__real_download'] = False
3021
3022                     downloaded = []
3023                     merger = FFmpegMergerPP(self)
3024
3025                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3026                     if dl_filename is not None:
3027                         self.report_file_already_downloaded(dl_filename)
3028                     elif fd:
3029                         for f in requested_formats if fd != FFmpegFD else []:
3030                             f['filepath'] = fname = prepend_extension(
3031                                 correct_ext(temp_filename, info_dict['ext']),
3032                                 'f%s' % f['format_id'], info_dict['ext'])
3033                             downloaded.append(fname)
3034                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3035                         success, real_download = self.dl(temp_filename, info_dict)
3036                         info_dict['__real_download'] = real_download
3037                     else:
3038                         if self.params.get('allow_unplayable_formats'):
3039                             self.report_warning(
3040                                 'You have requested merging of multiple formats '
3041                                 'while also allowing unplayable formats to be downloaded. '
3042                                 'The formats won\'t be merged to prevent data corruption.')
3043                         elif not merger.available:
3044                             self.report_warning(
3045                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
3046                                 'The formats won\'t be merged.')
3047
3048                         if temp_filename == '-':
3049                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3050                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3051                                       else 'but ffmpeg is not installed')
3052                             self.report_warning(
3053                                 f'You have requested downloading multiple formats to stdout {reason}. '
3054                                 'The formats will be streamed one after the other')
3055                             fname = temp_filename
3056                         for f in requested_formats:
3057                             new_info = dict(info_dict)
3058                             del new_info['requested_formats']
3059                             new_info.update(f)
3060                             if temp_filename != '-':
3061                                 fname = prepend_extension(
3062                                     correct_ext(temp_filename, new_info['ext']),
3063                                     'f%s' % f['format_id'], new_info['ext'])
3064                                 if not self._ensure_dir_exists(fname):
3065                                     return
3066                                 f['filepath'] = fname
3067                                 downloaded.append(fname)
3068                             partial_success, real_download = self.dl(fname, new_info)
3069                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3070                             success = success and partial_success
3071
3072                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3073                         info_dict['__postprocessors'].append(merger)
3074                         info_dict['__files_to_merge'] = downloaded
3075                         # Even if there were no downloads, it is being merged only now
3076                         info_dict['__real_download'] = True
3077                     else:
3078                         for file in downloaded:
3079                             files_to_move[file] = None
3080                 else:
3081                     # Just a single file
3082                     dl_filename = existing_video_file(full_filename, temp_filename)
3083                     if dl_filename is None or dl_filename == temp_filename:
3084                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3085                         # So we should try to resume the download
3086                         success, real_download = self.dl(temp_filename, info_dict)
3087                         info_dict['__real_download'] = real_download
3088                     else:
3089                         self.report_file_already_downloaded(dl_filename)
3090
3091                 dl_filename = dl_filename or temp_filename
3092                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3093
3094             except network_exceptions as err:
3095                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3096                 return
3097             except (OSError, IOError) as err:
3098                 raise UnavailableVideoError(err)
3099             except (ContentTooShortError, ) as err:
3100                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3101                 return
3102
3103             if success and full_filename != '-':
3104
3105                 def fixup():
3106                     do_fixup = True
3107                     fixup_policy = self.params.get('fixup')
3108                     vid = info_dict['id']
3109
3110                     if fixup_policy in ('ignore', 'never'):
3111                         return
3112                     elif fixup_policy == 'warn':
3113                         do_fixup = False
3114                     elif fixup_policy != 'force':
3115                         assert fixup_policy in ('detect_or_warn', None)
3116                         if not info_dict.get('__real_download'):
3117                             do_fixup = False
3118
3119                     def ffmpeg_fixup(cndn, msg, cls):
3120                         if not cndn:
3121                             return
3122                         if not do_fixup:
3123                             self.report_warning(f'{vid}: {msg}')
3124                             return
3125                         pp = cls(self)
3126                         if pp.available:
3127                             info_dict['__postprocessors'].append(pp)
3128                         else:
3129                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3130
3131                     stretched_ratio = info_dict.get('stretched_ratio')
3132                     ffmpeg_fixup(
3133                         stretched_ratio not in (1, None),
3134                         f'Non-uniform pixel ratio {stretched_ratio}',
3135                         FFmpegFixupStretchedPP)
3136
3137                     ffmpeg_fixup(
3138                         (info_dict.get('requested_formats') is None
3139                          and info_dict.get('container') == 'm4a_dash'
3140                          and info_dict.get('ext') == 'm4a'),
3141                         'writing DASH m4a. Only some players support this container',
3142                         FFmpegFixupM4aPP)
3143
3144                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3145                     downloader = downloader.__name__ if downloader else None
3146
3147                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3148                         ffmpeg_fixup(downloader == 'HlsFD',
3149                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3150                                      FFmpegFixupM3u8PP)
3151                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3152                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3153
3154                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3155                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3156
3157                 fixup()
3158                 try:
3159                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3160                 except PostProcessingError as err:
3161                     self.report_error('Postprocessing: %s' % str(err))
3162                     return
3163                 try:
3164                     for ph in self._post_hooks:
3165                         ph(info_dict['filepath'])
3166                 except Exception as err:
3167                     self.report_error('post hooks: %s' % str(err))
3168                     return
3169                 info_dict['__write_download_archive'] = True
3170
3171         if self.params.get('force_write_download_archive'):
3172             info_dict['__write_download_archive'] = True
3173
3174         # Make sure the info_dict was modified in-place
3175         assert info_dict is original_infodict
3176
3177         max_downloads = self.params.get('max_downloads')
3178         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3179             raise MaxDownloadsReached()
3180
3181     def __download_wrapper(self, func):
3182         @functools.wraps(func)
3183         def wrapper(*args, **kwargs):
3184             try:
3185                 res = func(*args, **kwargs)
3186             except UnavailableVideoError as e:
3187                 self.report_error(e)
3188             except MaxDownloadsReached as e:
3189                 self.to_screen(f'[info] {e}')
3190                 raise
3191             except DownloadCancelled as e:
3192                 self.to_screen(f'[info] {e}')
3193                 if not self.params.get('break_per_url'):
3194                     raise
3195             else:
3196                 if self.params.get('dump_single_json', False):
3197                     self.post_extract(res)
3198                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3199         return wrapper
3200
3201     def download(self, url_list):
3202         """Download a given list of URLs."""
3203         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3204         outtmpl = self.outtmpl_dict['default']
3205         if (len(url_list) > 1
3206                 and outtmpl != '-'
3207                 and '%' not in outtmpl
3208                 and self.params.get('max_downloads') != 1):
3209             raise SameFileError(outtmpl)
3210
3211         for url in url_list:
3212             self.__download_wrapper(self.extract_info)(
3213                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3214
3215         return self._download_retcode
3216
3217     def download_with_info_file(self, info_filename):
3218         with contextlib.closing(fileinput.FileInput(
3219                 [info_filename], mode='r',
3220                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3221             # FileInput doesn't have a read method, we can't call json.load
3222             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3223         try:
3224             self.__download_wrapper(self.process_ie_result)(info, download=True)
3225         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3226             if not isinstance(e, EntryNotInPlaylist):
3227                 self.to_stderr('\r')
3228             webpage_url = info.get('webpage_url')
3229             if webpage_url is not None:
3230                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3231                 return self.download([webpage_url])
3232             else:
3233                 raise
3234         return self._download_retcode
3235
3236     @staticmethod
3237     def sanitize_info(info_dict, remove_private_keys=False):
3238         ''' Sanitize the infodict for converting to json '''
3239         if info_dict is None:
3240             return info_dict
3241         info_dict.setdefault('epoch', int(time.time()))
3242         info_dict.setdefault('_type', 'video')
3243         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3244         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3245         if remove_private_keys:
3246             remove_keys |= {
3247                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3248                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3249             }
3250             reject = lambda k, v: k not in keep_keys and (
3251                 k.startswith('_') or k in remove_keys or v is None)
3252         else:
3253             reject = lambda k, v: k in remove_keys
3254
3255         def filter_fn(obj):
3256             if isinstance(obj, dict):
3257                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3258             elif isinstance(obj, (list, tuple, set, LazyList)):
3259                 return list(map(filter_fn, obj))
3260             elif obj is None or isinstance(obj, (str, int, float, bool)):
3261                 return obj
3262             else:
3263                 return repr(obj)
3264
3265         return filter_fn(info_dict)
3266
3267     @staticmethod
3268     def filter_requested_info(info_dict, actually_filter=True):
3269         ''' Alias of sanitize_info for backward compatibility '''
3270         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3271
3272     @staticmethod
3273     def post_extract(info_dict):
3274         def actual_post_extract(info_dict):
3275             if info_dict.get('_type') in ('playlist', 'multi_video'):
3276                 for video_dict in info_dict.get('entries', {}):
3277                     actual_post_extract(video_dict or {})
3278                 return
3279
3280             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3281             extra = post_extractor().items()
3282             info_dict.update(extra)
3283             info_dict.pop('__post_extractor', None)
3284
3285             original_infodict = info_dict.get('__original_infodict') or {}
3286             original_infodict.update(extra)
3287             original_infodict.pop('__post_extractor', None)
3288
3289         actual_post_extract(info_dict or {})
3290
3291     def run_pp(self, pp, infodict):
3292         files_to_delete = []
3293         if '__files_to_move' not in infodict:
3294             infodict['__files_to_move'] = {}
3295         try:
3296             files_to_delete, infodict = pp.run(infodict)
3297         except PostProcessingError as e:
3298             # Must be True and not 'only_download'
3299             if self.params.get('ignoreerrors') is True:
3300                 self.report_error(e)
3301                 return infodict
3302             raise
3303
3304         if not files_to_delete:
3305             return infodict
3306         if self.params.get('keepvideo', False):
3307             for f in files_to_delete:
3308                 infodict['__files_to_move'].setdefault(f, '')
3309         else:
3310             for old_filename in set(files_to_delete):
3311                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3312                 try:
3313                     os.remove(encodeFilename(old_filename))
3314                 except (IOError, OSError):
3315                     self.report_warning('Unable to remove downloaded original file')
3316                 if old_filename in infodict['__files_to_move']:
3317                     del infodict['__files_to_move'][old_filename]
3318         return infodict
3319
3320     def run_all_pps(self, key, info, *, additional_pps=None):
3321         self._forceprint(key, info)
3322         for pp in (additional_pps or []) + self._pps[key]:
3323             info = self.run_pp(pp, info)
3324         return info
3325
3326     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3327         info = dict(ie_info)
3328         info['__files_to_move'] = files_to_move or {}
3329         info = self.run_all_pps(key, info)
3330         return info, info.pop('__files_to_move', None)
3331
3332     def post_process(self, filename, info, files_to_move=None):
3333         """Run all the postprocessors on the given file."""
3334         info['filepath'] = filename
3335         info['__files_to_move'] = files_to_move or {}
3336         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3337         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3338         del info['__files_to_move']
3339         return self.run_all_pps('after_move', info)
3340
3341     def _make_archive_id(self, info_dict):
3342         video_id = info_dict.get('id')
3343         if not video_id:
3344             return
3345         # Future-proof against any change in case
3346         # and backwards compatibility with prior versions
3347         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3348         if extractor is None:
3349             url = str_or_none(info_dict.get('url'))
3350             if not url:
3351                 return
3352             # Try to find matching extractor for the URL and take its ie_key
3353             for ie_key, ie in self._ies.items():
3354                 if ie.suitable(url):
3355                     extractor = ie_key
3356                     break
3357             else:
3358                 return
3359         return '%s %s' % (extractor.lower(), video_id)
3360
3361     def in_download_archive(self, info_dict):
3362         fn = self.params.get('download_archive')
3363         if fn is None:
3364             return False
3365
3366         vid_id = self._make_archive_id(info_dict)
3367         if not vid_id:
3368             return False  # Incomplete video information
3369
3370         return vid_id in self.archive
3371
3372     def record_download_archive(self, info_dict):
3373         fn = self.params.get('download_archive')
3374         if fn is None:
3375             return
3376         vid_id = self._make_archive_id(info_dict)
3377         assert vid_id
3378         self.write_debug(f'Adding to archive: {vid_id}')
3379         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3380             archive_file.write(vid_id + '\n')
3381         self.archive.add(vid_id)
3382
3383     @staticmethod
3384     def format_resolution(format, default='unknown'):
3385         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3386             return 'audio only'
3387         if format.get('resolution') is not None:
3388             return format['resolution']
3389         if format.get('width') and format.get('height'):
3390             return '%dx%d' % (format['width'], format['height'])
3391         elif format.get('height'):
3392             return '%sp' % format['height']
3393         elif format.get('width'):
3394             return '%dx?' % format['width']
3395         return default
3396
3397     def _list_format_headers(self, *headers):
3398         if self.params.get('listformats_table', True) is not False:
3399             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3400         return headers
3401
3402     def _format_note(self, fdict):
3403         res = ''
3404         if fdict.get('ext') in ['f4f', 'f4m']:
3405             res += '(unsupported)'
3406         if fdict.get('language'):
3407             if res:
3408                 res += ' '
3409             res += '[%s]' % fdict['language']
3410         if fdict.get('format_note') is not None:
3411             if res:
3412                 res += ' '
3413             res += fdict['format_note']
3414         if fdict.get('tbr') is not None:
3415             if res:
3416                 res += ', '
3417             res += '%4dk' % fdict['tbr']
3418         if fdict.get('container') is not None:
3419             if res:
3420                 res += ', '
3421             res += '%s container' % fdict['container']
3422         if (fdict.get('vcodec') is not None
3423                 and fdict.get('vcodec') != 'none'):
3424             if res:
3425                 res += ', '
3426             res += fdict['vcodec']
3427             if fdict.get('vbr') is not None:
3428                 res += '@'
3429         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3430             res += 'video@'
3431         if fdict.get('vbr') is not None:
3432             res += '%4dk' % fdict['vbr']
3433         if fdict.get('fps') is not None:
3434             if res:
3435                 res += ', '
3436             res += '%sfps' % fdict['fps']
3437         if fdict.get('acodec') is not None:
3438             if res:
3439                 res += ', '
3440             if fdict['acodec'] == 'none':
3441                 res += 'video only'
3442             else:
3443                 res += '%-5s' % fdict['acodec']
3444         elif fdict.get('abr') is not None:
3445             if res:
3446                 res += ', '
3447             res += 'audio'
3448         if fdict.get('abr') is not None:
3449             res += '@%3dk' % fdict['abr']
3450         if fdict.get('asr') is not None:
3451             res += ' (%5dHz)' % fdict['asr']
3452         if fdict.get('filesize') is not None:
3453             if res:
3454                 res += ', '
3455             res += format_bytes(fdict['filesize'])
3456         elif fdict.get('filesize_approx') is not None:
3457             if res:
3458                 res += ', '
3459             res += '~' + format_bytes(fdict['filesize_approx'])
3460         return res
3461
3462     def render_formats_table(self, info_dict):
3463         if not info_dict.get('formats') and not info_dict.get('url'):
3464             return None
3465
3466         formats = info_dict.get('formats', [info_dict])
3467         if not self.params.get('listformats_table', True) is not False:
3468             table = [
3469                 [
3470                     format_field(f, 'format_id'),
3471                     format_field(f, 'ext'),
3472                     self.format_resolution(f),
3473                     self._format_note(f)
3474                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3475             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3476
3477         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3478         table = [
3479             [
3480                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3481                 format_field(f, 'ext'),
3482                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3483                 format_field(f, 'fps', '\t%d'),
3484                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3485                 delim,
3486                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3487                 format_field(f, 'tbr', '\t%dk'),
3488                 shorten_protocol_name(f.get('protocol', '')),
3489                 delim,
3490                 format_field(f, 'vcodec', default='unknown').replace(
3491                     'none', 'images' if f.get('acodec') == 'none'
3492                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3493                 format_field(f, 'vbr', '\t%dk'),
3494                 format_field(f, 'acodec', default='unknown').replace(
3495                     'none', '' if f.get('vcodec') == 'none'
3496                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3497                 format_field(f, 'abr', '\t%dk'),
3498                 format_field(f, 'asr', '\t%dHz'),
3499                 join_nonempty(
3500                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3501                     format_field(f, 'language', '[%s]'),
3502                     join_nonempty(format_field(f, 'format_note'),
3503                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3504                                   delim=', '),
3505                     delim=' '),
3506             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3507         header_line = self._list_format_headers(
3508             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3509             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3510
3511         return render_table(
3512             header_line, table, hide_empty=True,
3513             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3514
3515     def render_thumbnails_table(self, info_dict):
3516         thumbnails = list(info_dict.get('thumbnails') or [])
3517         if not thumbnails:
3518             return None
3519         return render_table(
3520             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3521             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3522
3523     def render_subtitles_table(self, video_id, subtitles):
3524         def _row(lang, formats):
3525             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3526             if len(set(names)) == 1:
3527                 names = [] if names[0] == 'unknown' else names[:1]
3528             return [lang, ', '.join(names), ', '.join(exts)]
3529
3530         if not subtitles:
3531             return None
3532         return render_table(
3533             self._list_format_headers('Language', 'Name', 'Formats'),
3534             [_row(lang, formats) for lang, formats in subtitles.items()],
3535             hide_empty=True)
3536
3537     def __list_table(self, video_id, name, func, *args):
3538         table = func(*args)
3539         if not table:
3540             self.to_screen(f'{video_id} has no {name}')
3541             return
3542         self.to_screen(f'[info] Available {name} for {video_id}:')
3543         self.to_stdout(table)
3544
3545     def list_formats(self, info_dict):
3546         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3547
3548     def list_thumbnails(self, info_dict):
3549         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3550
3551     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3552         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3553
3554     def urlopen(self, req):
3555         """ Start an HTTP download """
3556         if isinstance(req, compat_basestring):
3557             req = sanitized_Request(req)
3558         return self._opener.open(req, timeout=self._socket_timeout)
3559
3560     def print_debug_header(self):
3561         if not self.params.get('verbose'):
3562             return
3563
3564         def get_encoding(stream):
3565             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3566             if not supports_terminal_sequences(stream):
3567                 from .compat import WINDOWS_VT_MODE
3568                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3569             return ret
3570
3571         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3572             locale.getpreferredencoding(),
3573             sys.getfilesystemencoding(),
3574             get_encoding(self._screen_file), get_encoding(self._err_file),
3575             self.get_encoding())
3576
3577         logger = self.params.get('logger')
3578         if logger:
3579             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3580             write_debug(encoding_str)
3581         else:
3582             write_string(f'[debug] {encoding_str}\n', encoding=None)
3583             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3584
3585         source = detect_variant()
3586         write_debug(join_nonempty(
3587             'yt-dlp version', __version__,
3588             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3589             '' if source == 'unknown' else f'({source})',
3590             delim=' '))
3591         if not _LAZY_LOADER:
3592             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3593                 write_debug('Lazy loading extractors is forcibly disabled')
3594             else:
3595                 write_debug('Lazy loading extractors is disabled')
3596         if plugin_extractors or plugin_postprocessors:
3597             write_debug('Plugins: %s' % [
3598                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3599                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3600         if self.params.get('compat_opts'):
3601             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3602
3603         if source == 'source':
3604             try:
3605                 sp = Popen(
3606                     ['git', 'rev-parse', '--short', 'HEAD'],
3607                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3608                     cwd=os.path.dirname(os.path.abspath(__file__)))
3609                 out, err = sp.communicate_or_kill()
3610                 out = out.decode().strip()
3611                 if re.match('[0-9a-f]+', out):
3612                     write_debug('Git HEAD: %s' % out)
3613             except Exception:
3614                 try:
3615                     sys.exc_clear()
3616                 except Exception:
3617                     pass
3618
3619         def python_implementation():
3620             impl_name = platform.python_implementation()
3621             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3622                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3623             return impl_name
3624
3625         write_debug('Python version %s (%s %s) - %s' % (
3626             platform.python_version(),
3627             python_implementation(),
3628             platform.architecture()[0],
3629             platform_name()))
3630
3631         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3632         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3633         if ffmpeg_features:
3634             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3635
3636         exe_versions['rtmpdump'] = rtmpdump_version()
3637         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3638         exe_str = ', '.join(
3639             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3640         ) or 'none'
3641         write_debug('exe versions: %s' % exe_str)
3642
3643         from .downloader.websocket import has_websockets
3644         from .postprocessor.embedthumbnail import has_mutagen
3645         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3646
3647         lib_str = join_nonempty(
3648             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3649             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3650             has_mutagen and 'mutagen',
3651             SQLITE_AVAILABLE and 'sqlite',
3652             has_websockets and 'websockets',
3653             delim=', ') or 'none'
3654         write_debug('Optional libraries: %s' % lib_str)
3655
3656         proxy_map = {}
3657         for handler in self._opener.handlers:
3658             if hasattr(handler, 'proxies'):
3659                 proxy_map.update(handler.proxies)
3660         write_debug(f'Proxy map: {proxy_map}')
3661
3662         # Not implemented
3663         if False and self.params.get('call_home'):
3664             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3665             write_debug('Public IP address: %s' % ipaddr)
3666             latest_version = self.urlopen(
3667                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3668             if version_tuple(latest_version) > version_tuple(__version__):
3669                 self.report_warning(
3670                     'You are using an outdated version (newest version: %s)! '
3671                     'See https://yt-dl.org/update if you need help updating.' %
3672                     latest_version)
3673
3674     def _setup_opener(self):
3675         timeout_val = self.params.get('socket_timeout')
3676         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3677
3678         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3679         opts_cookiefile = self.params.get('cookiefile')
3680         opts_proxy = self.params.get('proxy')
3681
3682         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3683
3684         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3685         if opts_proxy is not None:
3686             if opts_proxy == '':
3687                 proxies = {}
3688             else:
3689                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3690         else:
3691             proxies = compat_urllib_request.getproxies()
3692             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3693             if 'http' in proxies and 'https' not in proxies:
3694                 proxies['https'] = proxies['http']
3695         proxy_handler = PerRequestProxyHandler(proxies)
3696
3697         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3698         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3699         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3700         redirect_handler = YoutubeDLRedirectHandler()
3701         data_handler = compat_urllib_request_DataHandler()
3702
3703         # When passing our own FileHandler instance, build_opener won't add the
3704         # default FileHandler and allows us to disable the file protocol, which
3705         # can be used for malicious purposes (see
3706         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3707         file_handler = compat_urllib_request.FileHandler()
3708
3709         def file_open(*args, **kwargs):
3710             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3711         file_handler.file_open = file_open
3712
3713         opener = compat_urllib_request.build_opener(
3714             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3715
3716         # Delete the default user-agent header, which would otherwise apply in
3717         # cases where our custom HTTP handler doesn't come into play
3718         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3719         opener.addheaders = []
3720         self._opener = opener
3721
3722     def encode(self, s):
3723         if isinstance(s, bytes):
3724             return s  # Already encoded
3725
3726         try:
3727             return s.encode(self.get_encoding())
3728         except UnicodeEncodeError as err:
3729             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3730             raise
3731
3732     def get_encoding(self):
3733         encoding = self.params.get('encoding')
3734         if encoding is None:
3735             encoding = preferredencoding()
3736         return encoding
3737
3738     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3739         ''' Write infojson and returns True = written, False = skip, None = error '''
3740         if overwrite is None:
3741             overwrite = self.params.get('overwrites', True)
3742         if not self.params.get('writeinfojson'):
3743             return False
3744         elif not infofn:
3745             self.write_debug(f'Skipping writing {label} infojson')
3746             return False
3747         elif not self._ensure_dir_exists(infofn):
3748             return None
3749         elif not overwrite and os.path.exists(infofn):
3750             self.to_screen(f'[info] {label.title()} metadata is already present')
3751         else:
3752             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3753             try:
3754                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3755             except (OSError, IOError):
3756                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3757                 return None
3758         return True
3759
3760     def _write_description(self, label, ie_result, descfn):
3761         ''' Write description and returns True = written, False = skip, None = error '''
3762         if not self.params.get('writedescription'):
3763             return False
3764         elif not descfn:
3765             self.write_debug(f'Skipping writing {label} description')
3766             return False
3767         elif not self._ensure_dir_exists(descfn):
3768             return None
3769         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3770             self.to_screen(f'[info] {label.title()} description is already present')
3771         elif ie_result.get('description') is None:
3772             self.report_warning(f'There\'s no {label} description to write')
3773             return False
3774         else:
3775             try:
3776                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3777                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3778                     descfile.write(ie_result['description'])
3779             except (OSError, IOError):
3780                 self.report_error(f'Cannot write {label} description file {descfn}')
3781                 return None
3782         return True
3783
3784     def _write_subtitles(self, info_dict, filename):
3785         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3786         ret = []
3787         subtitles = info_dict.get('requested_subtitles')
3788         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3789             # subtitles download errors are already managed as troubles in relevant IE
3790             # that way it will silently go on when used with unsupporting IE
3791             return ret
3792
3793         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3794         if not sub_filename_base:
3795             self.to_screen('[info] Skipping writing video subtitles')
3796             return ret
3797         for sub_lang, sub_info in subtitles.items():
3798             sub_format = sub_info['ext']
3799             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3800             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3801             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3802             if existing_sub:
3803                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3804                 sub_info['filepath'] = existing_sub
3805                 ret.append((existing_sub, sub_filename_final))
3806                 continue
3807
3808             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3809             if sub_info.get('data') is not None:
3810                 try:
3811                     # Use newline='' to prevent conversion of newline characters
3812                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3813                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3814                         subfile.write(sub_info['data'])
3815                     sub_info['filepath'] = sub_filename
3816                     ret.append((sub_filename, sub_filename_final))
3817                     continue
3818                 except (OSError, IOError):
3819                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3820                     return None
3821
3822             try:
3823                 sub_copy = sub_info.copy()
3824                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3825                 self.dl(sub_filename, sub_copy, subtitle=True)
3826                 sub_info['filepath'] = sub_filename
3827                 ret.append((sub_filename, sub_filename_final))
3828             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3829                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3830                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3831                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3832         return ret
3833
3834     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3835         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3836         write_all = self.params.get('write_all_thumbnails', False)
3837         thumbnails, ret = [], []
3838         if write_all or self.params.get('writethumbnail', False):
3839             thumbnails = info_dict.get('thumbnails') or []
3840         multiple = write_all and len(thumbnails) > 1
3841
3842         if thumb_filename_base is None:
3843             thumb_filename_base = filename
3844         if thumbnails and not thumb_filename_base:
3845             self.write_debug(f'Skipping writing {label} thumbnail')
3846             return ret
3847
3848         for idx, t in list(enumerate(thumbnails))[::-1]:
3849             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3850             thumb_display_id = f'{label} thumbnail {t["id"]}'
3851             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3852             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3853
3854             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3855             if existing_thumb:
3856                 self.to_screen('[info] %s is already present' % (
3857                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3858                 t['filepath'] = existing_thumb
3859                 ret.append((existing_thumb, thumb_filename_final))
3860             else:
3861                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3862                 try:
3863                     uf = self.urlopen(t['url'])
3864                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3865                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3866                         shutil.copyfileobj(uf, thumbf)
3867                     ret.append((thumb_filename, thumb_filename_final))
3868                     t['filepath'] = thumb_filename
3869                 except network_exceptions as err:
3870                     thumbnails.pop(idx)
3871                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3872             if ret and not write_all:
3873                 break
3874         return ret