yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     InAdvancePagedList,
  76     int_or_none,
  77     iri_to_uri,
  78     ISO3166Utils,
  79     join_nonempty,
  80     LazyList,
  81     LINK_TEMPLATES,
  82     locked_file,
  83     make_dir,
  84     make_HTTPS_handler,
  85     MaxDownloadsReached,
  86     network_exceptions,
  87     number_of_digits,
  88     orderedSet,
  89     OUTTMPL_TYPES,
  90     PagedList,
  91     parse_filesize,
  92     PerRequestProxyHandler,
  93     platform_name,
  94     Popen,
  95     POSTPROCESS_WHEN,
  96     PostProcessingError,
  97     preferredencoding,
  98     prepend_extension,
  99     ReExtractInfo,
 100     register_socks_protocols,
 101     RejectedVideoReached,
 102     remove_terminal_sequences,
 103     render_table,
 104     replace_extension,
 105     SameFileError,
 106     sanitize_filename,
 107     sanitize_path,
 108     sanitize_url,
 109     sanitized_Request,
 110     std_headers,
 111     STR_FORMAT_RE_TMPL,
 112     STR_FORMAT_TYPES,
 113     str_or_none,
 114     strftime_or_none,
 115     subtitles_filename,
 116     supports_terminal_sequences,
 117     timetuple_from_msec,
 118     to_high_limit_path,
 119     traverse_obj,
 120     try_get,
 121     UnavailableVideoError,
 122     url_basename,
 123     variadic,
 124     version_tuple,
 125     write_json_file,
 126     write_string,
 127     YoutubeDLCookieProcessor,
 128     YoutubeDLHandler,
 129     YoutubeDLRedirectHandler,
 130 )
 131 from .cache import Cache
 132 from .minicurses import format_text
 133 from .extractor import (
 134     gen_extractor_classes,
 135     get_info_extractor,
 136     _LAZY_LOADER,
 137     _PLUGIN_CLASSES as plugin_extractors
 138 )
 139 from .extractor.openload import PhantomJSwrapper
 140 from .downloader import (
 141     FFmpegFD,
 142     get_suitable_downloader,
 143     shorten_protocol_name
 144 )
 145 from .downloader.rtmp import rtmpdump_version
 146 from .postprocessor import (
 147     get_postprocessor,
 148     EmbedThumbnailPP,
 149     FFmpegFixupDuplicateMoovPP,
 150     FFmpegFixupDurationPP,
 151     FFmpegFixupM3u8PP,
 152     FFmpegFixupM4aPP,
 153     FFmpegFixupStretchedPP,
 154     FFmpegFixupTimestampPP,
 155     FFmpegMergerPP,
 156     FFmpegPostProcessor,
 157     MoveFilesAfterDownloadPP,
 158     _PLUGIN_CLASSES as plugin_postprocessors
 159 )
 160 from .update import detect_variant
 161 from .version import __version__, RELEASE_GIT_HEAD
 162
 163 if compat_os_name == 'nt':
 164     import ctypes
 165
 166
 167 class YoutubeDL(object):
 168     """YoutubeDL class.
 169
 170     YoutubeDL objects are the ones responsible of downloading the
 171     actual video file and writing it to disk if the user has requested
 172     it, among some other tasks. In most cases there should be one per
 173     program. As, given a video URL, the downloader doesn't know how to
 174     extract all the needed information, task that InfoExtractors do, it
 175     has to pass the URL to one of them.
 176
 177     For this, YoutubeDL objects have a method that allows
 178     InfoExtractors to be registered in a given order. When it is passed
 179     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 180     finds that reports being able to handle it. The InfoExtractor extracts
 181     all the information about the video or videos the URL refers to, and
 182     YoutubeDL process the extracted information, possibly using a File
 183     Downloader to download the video.
 184
 185     YoutubeDL objects accept a lot of parameters. In order not to saturate
 186     the object constructor with arguments, it receives a dictionary of
 187     options instead. These options are available through the params
 188     attribute for the InfoExtractors to use. The YoutubeDL also
 189     registers itself as the downloader in charge for the InfoExtractors
 190     that are added to it, so this is a "mutual registration".
 191
 192     Available options:
 193
 194     username:          Username for authentication purposes.
 195     password:          Password for authentication purposes.
 196     videopassword:     Password for accessing a video.
 197     ap_mso:            Adobe Pass multiple-system operator identifier.
 198     ap_username:       Multiple-system operator account username.
 199     ap_password:       Multiple-system operator account password.
 200     usenetrc:          Use netrc for authentication instead.
 201     verbose:           Print additional info to stdout.
 202     quiet:             Do not print messages to stdout.
 203     no_warnings:       Do not print out anything for warnings.
 204     forceprint:        A dict with keys WHEN mapped to a list of templates to
 205                        print to stdout. The allowed keys are video or any of the
 206                        items in utils.POSTPROCESS_WHEN.
 207                        For compatibility, a single list is also accepted
 208     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 209                        a list of tuples with (template, filename)
 210     forceurl:          Force printing final URL. (Deprecated)
 211     forcetitle:        Force printing title. (Deprecated)
 212     forceid:           Force printing ID. (Deprecated)
 213     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 214     forcedescription:  Force printing description. (Deprecated)
 215     forcefilename:     Force printing final filename. (Deprecated)
 216     forceduration:     Force printing duration. (Deprecated)
 217     forcejson:         Force printing info_dict as JSON.
 218     dump_single_json:  Force printing the info_dict of the whole playlist
 219                        (or video) as a single JSON line.
 220     force_write_download_archive: Force writing download archive regardless
 221                        of 'skip_download' or 'simulate'.
 222     simulate:          Do not download the video files. If unset (or None),
 223                        simulate only if listsubtitles, listformats or list_thumbnails is used
 224     format:            Video format code. see "FORMAT SELECTION" for more details.
 225                        You can also pass a function. The function takes 'ctx' as
 226                        argument and returns the formats to download.
 227                        See "build_format_selector" for an implementation
 228     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 229     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 230                        extracting metadata even if the video is not actually
 231                        available for download (experimental)
 232     format_sort:       A list of fields by which to sort the video formats.
 233                        See "Sorting Formats" for more details.
 234     format_sort_force: Force the given format_sort. see "Sorting Formats"
 235                        for more details.
 236     allow_multiple_video_streams:   Allow multiple video streams to be merged
 237                        into a single file
 238     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 239                        into a single file
 240     check_formats      Whether to test if the formats are downloadable.
 241                        Can be True (check all), False (check none),
 242                        'selected' (check selected formats),
 243                        or None (check only if requested by extractor)
 244     paths:             Dictionary of output paths. The allowed keys are 'home'
 245                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 246     outtmpl:           Dictionary of templates for output names. Allowed keys
 247                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 248                        For compatibility with youtube-dl, a single string can also be used
 249     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 250     restrictfilenames: Do not allow "&" and spaces in file names
 251     trim_file_name:    Limit length of filename (extension excluded)
 252     windowsfilenames:  Force the filenames to be windows compatible
 253     ignoreerrors:      Do not stop on download/postprocessing errors.
 254                        Can be 'only_download' to ignore only download errors.
 255                        Default is 'only_download' for CLI, but False for API
 256     skip_playlist_after_errors: Number of allowed failures until the rest of
 257                        the playlist is skipped
 258     force_generic_extractor: Force downloader to use the generic extractor
 259     overwrites:        Overwrite all video and metadata files if True,
 260                        overwrite only non-video files if None
 261                        and don't overwrite any file if False
 262                        For compatibility with youtube-dl,
 263                        "nooverwrites" may also be used instead
 264     playliststart:     Playlist item to start at.
 265     playlistend:       Playlist item to end at.
 266     playlist_items:    Specific indices of playlist to download.
 267     playlistreverse:   Download playlist items in reverse order.
 268     playlistrandom:    Download playlist items in random order.
 269     matchtitle:        Download only matching titles.
 270     rejecttitle:       Reject downloads for matching titles.
 271     logger:            Log messages to a logging.Logger instance.
 272     logtostderr:       Log messages to stderr instead of stdout.
 273     consoletitle:       Display progress in console window's titlebar.
 274     writedescription:  Write the video description to a .description file
 275     writeinfojson:     Write the video description to a .info.json file
 276     clean_infojson:    Remove private fields from the infojson
 277     getcomments:       Extract video comments. This will not be written to disk
 278                        unless writeinfojson is also given
 279     writeannotations:  Write the video annotations to a .annotations.xml file
 280     writethumbnail:    Write the thumbnail image to a file
 281     allow_playlist_files: Whether to write playlists' description, infojson etc
 282                        also to disk when using the 'write*' options
 283     write_all_thumbnails:  Write all thumbnail formats to files
 284     writelink:         Write an internet shortcut file, depending on the
 285                        current platform (.url/.webloc/.desktop)
 286     writeurllink:      Write a Windows internet shortcut file (.url)
 287     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 288     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 289     writesubtitles:    Write the video subtitles to a file
 290     writeautomaticsub: Write the automatically generated subtitles to a file
 291     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 292                        Downloads all the subtitles of the video
 293                        (requires writesubtitles or writeautomaticsub)
 294     listsubtitles:     Lists all available subtitles for the video
 295     subtitlesformat:   The format code for subtitles
 296     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 297                        The list may contain "all" to refer to all the available
 298                        subtitles. The language can be prefixed with a "-" to
 299                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 300     keepvideo:         Keep the video file after post-processing
 301     daterange:         A DateRange object, download only if the upload_date is in the range.
 302     skip_download:     Skip the actual download of the video file
 303     cachedir:          Location of the cache files in the filesystem.
 304                        False to disable filesystem cache.
 305     noplaylist:        Download single video instead of a playlist if in doubt.
 306     age_limit:         An integer representing the user's age in years.
 307                        Unsuitable videos for the given age are skipped.
 308     min_views:         An integer representing the minimum view count the video
 309                        must have in order to not be skipped.
 310                        Videos without view count information are always
 311                        downloaded. None for no limit.
 312     max_views:         An integer representing the maximum view count.
 313                        Videos that are more popular than that are not
 314                        downloaded.
 315                        Videos without view count information are always
 316                        downloaded. None for no limit.
 317     download_archive:  File name of a file where all downloads are recorded.
 318                        Videos already present in the file are not downloaded
 319                        again.
 320     break_on_existing: Stop the download process after attempting to download a
 321                        file that is in the archive.
 322     break_on_reject:   Stop the download process when encountering a video that
 323                        has been filtered out.
 324     break_per_url:     Whether break_on_reject and break_on_existing
 325                        should act on each input URL as opposed to for the entire queue
 326     cookiefile:        File name where cookies should be read from and dumped to
 327     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 328                        name/pathfrom where cookies are loaded, and the name of the
 329                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 330     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 331                        support RFC 5746 secure renegotiation
 332     nocheckcertificate:  Do not verify SSL certificates
 333     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 334                        At the moment, this is only supported by YouTube.
 335     proxy:             URL of the proxy server to use
 336     geo_verification_proxy:  URL of the proxy to use for IP address verification
 337                        on geo-restricted sites.
 338     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 339     bidi_workaround:   Work around buggy terminals without bidirectional text
 340                        support, using fridibi
 341     debug_printtraffic:Print out sent and received HTTP traffic
 342     include_ads:       Download ads as well (deprecated)
 343     default_search:    Prepend this string if an input url is not valid.
 344                        'auto' for elaborate guessing
 345     encoding:          Use this encoding instead of the system-specified.
 346     extract_flat:      Do not resolve URLs, return the immediate result.
 347                        Pass in 'in_playlist' to only show this behavior for
 348                        playlist items.
 349     wait_for_video:    If given, wait for scheduled streams to become available.
 350                        The value should be a tuple containing the range
 351                        (min_secs, max_secs) to wait between retries
 352     postprocessors:    A list of dictionaries, each with an entry
 353                        * key:  The name of the postprocessor. See
 354                                yt_dlp/postprocessor/__init__.py for a list.
 355                        * when: When to run the postprocessor. Allowed values are
 356                                the entries of utils.POSTPROCESS_WHEN
 357                                Assumed to be 'post_process' if not given
 358     post_hooks:        Deprecated - Register a custom postprocessor instead
 359                        A list of functions that get called as the final step
 360                        for each video file, after all postprocessors have been
 361                        called. The filename will be passed as the only argument.
 362     progress_hooks:    A list of functions that get called on download
 363                        progress, with a dictionary with the entries
 364                        * status: One of "downloading", "error", or "finished".
 365                                  Check this first and ignore unknown values.
 366                        * info_dict: The extracted info_dict
 367
 368                        If status is one of "downloading", or "finished", the
 369                        following properties may also be present:
 370                        * filename: The final filename (always present)
 371                        * tmpfilename: The filename we're currently writing to
 372                        * downloaded_bytes: Bytes on disk
 373                        * total_bytes: Size of the whole file, None if unknown
 374                        * total_bytes_estimate: Guess of the eventual file size,
 375                                                None if unavailable.
 376                        * elapsed: The number of seconds since download started.
 377                        * eta: The estimated time in seconds, None if unknown
 378                        * speed: The download speed in bytes/second, None if
 379                                 unknown
 380                        * fragment_index: The counter of the currently
 381                                          downloaded video fragment.
 382                        * fragment_count: The number of fragments (= individual
 383                                          files that will be merged)
 384
 385                        Progress hooks are guaranteed to be called at least once
 386                        (with status "finished") if the download is successful.
 387     postprocessor_hooks:  A list of functions that get called on postprocessing
 388                        progress, with a dictionary with the entries
 389                        * status: One of "started", "processing", or "finished".
 390                                  Check this first and ignore unknown values.
 391                        * postprocessor: Name of the postprocessor
 392                        * info_dict: The extracted info_dict
 393
 394                        Progress hooks are guaranteed to be called at least twice
 395                        (with status "started" and "finished") if the processing is successful.
 396     merge_output_format: Extension to use when merging formats.
 397     final_ext:         Expected final extension; used to detect when the file was
 398                        already downloaded and converted
 399     fixup:             Automatically correct known faults of the file.
 400                        One of:
 401                        - "never": do nothing
 402                        - "warn": only emit a warning
 403                        - "detect_or_warn": check whether we can do anything
 404                                            about it, warn otherwise (default)
 405     source_address:    Client-side IP address to bind to.
 406     call_home:         Boolean, true iff we are allowed to contact the
 407                        yt-dlp servers for debugging. (BROKEN)
 408     sleep_interval_requests: Number of seconds to sleep between requests
 409                        during extraction
 410     sleep_interval:    Number of seconds to sleep before each download when
 411                        used alone or a lower bound of a range for randomized
 412                        sleep before each download (minimum possible number
 413                        of seconds to sleep) when used along with
 414                        max_sleep_interval.
 415     max_sleep_interval:Upper bound of a range for randomized sleep before each
 416                        download (maximum possible number of seconds to sleep).
 417                        Must only be used along with sleep_interval.
 418                        Actual sleep time will be a random float from range
 419                        [sleep_interval; max_sleep_interval].
 420     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 421     listformats:       Print an overview of available video formats and exit.
 422     list_thumbnails:   Print a table of all thumbnails and exit.
 423     match_filter:      A function that gets called with the info_dict of
 424                        every video.
 425                        If it returns a message, the video is ignored.
 426                        If it returns None, the video is downloaded.
 427                        match_filter_func in utils.py is one example for this.
 428     no_color:          Do not emit color codes in output.
 429     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 430                        HTTP header
 431     geo_bypass_country:
 432                        Two-letter ISO 3166-2 country code that will be used for
 433                        explicit geographic restriction bypassing via faking
 434                        X-Forwarded-For HTTP header
 435     geo_bypass_ip_block:
 436                        IP range in CIDR notation that will be used similarly to
 437                        geo_bypass_country
 438
 439     The following options determine which downloader is picked:
 440     external_downloader: A dictionary of protocol keys and the executable of the
 441                        external downloader to use for it. The allowed protocols
 442                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 443                        Set the value to 'native' to use the native downloader
 444     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 445                        or {'m3u8': 'ffmpeg'} instead.
 446                        Use the native HLS downloader instead of ffmpeg/avconv
 447                        if True, otherwise use ffmpeg/avconv if False, otherwise
 448                        use downloader suggested by extractor if None.
 449     compat_opts:       Compatibility options. See "Differences in default behavior".
 450                        The following options do not work when used through the API:
 451                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 452                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 453                        Refer __init__.py for their implementation
 454     progress_template: Dictionary of templates for progress outputs.
 455                        Allowed keys are 'download', 'postprocess',
 456                        'download-title' (console title) and 'postprocess-title'.
 457                        The template is mapped on a dictionary with keys 'progress' and 'info'
 458
 459     The following parameters are not used by YoutubeDL itself, they are used by
 460     the downloader (see yt_dlp/downloader/common.py):
 461     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 462     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 463     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 464     external_downloader_args, concurrent_fragment_downloads.
 465
 466     The following options are used by the post processors:
 467     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 468                        otherwise prefer ffmpeg. (avconv support is deprecated)
 469     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 470                        to the binary or its containing directory.
 471     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 472                        and a list of additional command-line arguments for the
 473                        postprocessor/executable. The dict can also have "PP+EXE" keys
 474                        which are used when the given exe is used by the given PP.
 475                        Use 'default' as the name for arguments to passed to all PP
 476                        For compatibility with youtube-dl, a single list of args
 477                        can also be used
 478
 479     The following options are used by the extractors:
 480     extractor_retries: Number of times to retry for known errors
 481     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 482     hls_split_discontinuity: Split HLS playlists to different formats at
 483                        discontinuities such as ad breaks (default: False)
 484     extractor_args:    A dictionary of arguments to be passed to the extractors.
 485                        See "EXTRACTOR ARGUMENTS" for details.
 486                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 487     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 488     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 489                        If True (default), DASH manifests and related
 490                        data will be downloaded and processed by extractor.
 491                        You can reduce network I/O by disabling it if you don't
 492                        care about DASH. (only for youtube)
 493     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 494                        If True (default), HLS manifests and related
 495                        data will be downloaded and processed by extractor.
 496                        You can reduce network I/O by disabling it if you don't
 497                        care about HLS. (only for youtube)
 498     """
 499
 500     _NUMERIC_FIELDS = set((
 501         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 502         'timestamp', 'release_timestamp',
 503         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 504         'average_rating', 'comment_count', 'age_limit',
 505         'start_time', 'end_time',
 506         'chapter_number', 'season_number', 'episode_number',
 507         'track_number', 'disc_number', 'release_year',
 508     ))
 509
 510     _format_selection_exts = {
 511         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 512         'video': {'mp4', 'flv', 'webm', '3gp'},
 513         'storyboards': {'mhtml'},
 514     }
 515
 516     params = None
 517     _ies = {}
 518     _pps = {k: [] for k in POSTPROCESS_WHEN}
 519     _printed_messages = set()
 520     _first_webpage_request = True
 521     _download_retcode = None
 522     _num_downloads = None
 523     _playlist_level = 0
 524     _playlist_urls = set()
 525     _screen_file = None
 526
 527     def __init__(self, params=None, auto_init=True):
 528         """Create a FileDownloader object with the given options.
 529         @param auto_init    Whether to load the default extractors and print header (if verbose).
 530                             Set to 'no_verbose_header' to not print the header
 531         """
 532         if params is None:
 533             params = {}
 534         self._ies = {}
 535         self._ies_instances = {}
 536         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 537         self._printed_messages = set()
 538         self._first_webpage_request = True
 539         self._post_hooks = []
 540         self._progress_hooks = []
 541         self._postprocessor_hooks = []
 542         self._download_retcode = 0
 543         self._num_downloads = 0
 544         self._num_videos = 0
 545         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 546         self._err_file = sys.stderr
 547         self.params = params
 548         self.cache = Cache(self)
 549
 550         windows_enable_vt_mode()
 551         self._allow_colors = {
 552             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 553             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 554         }
 555
 556         if sys.version_info < (3, 6):
 557             self.report_warning(
 558                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 559
 560         if self.params.get('allow_unplayable_formats'):
 561             self.report_warning(
 562                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 563                 'This is a developer option intended for debugging. \n'
 564                 '         If you experience any issues while using this option, '
 565                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 566
 567         def check_deprecated(param, option, suggestion):
 568             if self.params.get(param) is not None:
 569                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 570                 return True
 571             return False
 572
 573         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 574             if self.params.get('geo_verification_proxy') is None:
 575                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 576
 577         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 578         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 579         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 580
 581         for msg in self.params.get('_warnings', []):
 582             self.report_warning(msg)
 583         for msg in self.params.get('_deprecation_warnings', []):
 584             self.deprecation_warning(msg)
 585
 586         if 'list-formats' in self.params.get('compat_opts', []):
 587             self.params['listformats_table'] = False
 588
 589         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 590             # nooverwrites was unnecessarily changed to overwrites
 591             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 592             # This ensures compatibility with both keys
 593             self.params['overwrites'] = not self.params['nooverwrites']
 594         elif self.params.get('overwrites') is None:
 595             self.params.pop('overwrites', None)
 596         else:
 597             self.params['nooverwrites'] = not self.params['overwrites']
 598
 599         self.params.setdefault('forceprint', {})
 600         self.params.setdefault('print_to_file', {})
 601
 602         # Compatibility with older syntax
 603         if not isinstance(params['forceprint'], dict):
 604             self.params['forceprint'] = {'video': params['forceprint']}
 605
 606         if self.params.get('bidi_workaround', False):
 607             try:
 608                 import pty
 609                 master, slave = pty.openpty()
 610                 width = compat_get_terminal_size().columns
 611                 if width is None:
 612                     width_args = []
 613                 else:
 614                     width_args = ['-w', str(width)]
 615                 sp_kwargs = dict(
 616                     stdin=subprocess.PIPE,
 617                     stdout=slave,
 618                     stderr=self._err_file)
 619                 try:
 620                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 621                 except OSError:
 622                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 623                 self._output_channel = os.fdopen(master, 'rb')
 624             except OSError as ose:
 625                 if ose.errno == errno.ENOENT:
 626                     self.report_warning(
 627                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 628                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 629                 else:
 630                     raise
 631
 632         if (sys.platform != 'win32'
 633                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 634                 and not self.params.get('restrictfilenames', False)):
 635             # Unicode filesystem API will throw errors (#1474, #13027)
 636             self.report_warning(
 637                 'Assuming --restrict-filenames since file system encoding '
 638                 'cannot encode all characters. '
 639                 'Set the LC_ALL environment variable to fix this.')
 640             self.params['restrictfilenames'] = True
 641
 642         self.outtmpl_dict = self.parse_outtmpl()
 643
 644         # Creating format selector here allows us to catch syntax errors before the extraction
 645         self.format_selector = (
 646             self.params.get('format') if self.params.get('format') in (None, '-')
 647             else self.params['format'] if callable(self.params['format'])
 648             else self.build_format_selector(self.params['format']))
 649
 650         self._setup_opener()
 651
 652         if auto_init:
 653             if auto_init != 'no_verbose_header':
 654                 self.print_debug_header()
 655             self.add_default_info_extractors()
 656
 657         hooks = {
 658             'post_hooks': self.add_post_hook,
 659             'progress_hooks': self.add_progress_hook,
 660             'postprocessor_hooks': self.add_postprocessor_hook,
 661         }
 662         for opt, fn in hooks.items():
 663             for ph in self.params.get(opt, []):
 664                 fn(ph)
 665
 666         for pp_def_raw in self.params.get('postprocessors', []):
 667             pp_def = dict(pp_def_raw)
 668             when = pp_def.pop('when', 'post_process')
 669             self.add_post_processor(
 670                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 671                 when=when)
 672
 673         register_socks_protocols()
 674
 675         def preload_download_archive(fn):
 676             """Preload the archive, if any is specified"""
 677             if fn is None:
 678                 return False
 679             self.write_debug(f'Loading archive file {fn!r}')
 680             try:
 681                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 682                     for line in archive_file:
 683                         self.archive.add(line.strip())
 684             except IOError as ioe:
 685                 if ioe.errno != errno.ENOENT:
 686                     raise
 687                 return False
 688             return True
 689
 690         self.archive = set()
 691         preload_download_archive(self.params.get('download_archive'))
 692
 693     def warn_if_short_id(self, argv):
 694         # short YouTube ID starting with dash?
 695         idxs = [
 696             i for i, a in enumerate(argv)
 697             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 698         if idxs:
 699             correct_argv = (
 700                 ['yt-dlp']
 701                 + [a for i, a in enumerate(argv) if i not in idxs]
 702                 + ['--'] + [argv[i] for i in idxs]
 703             )
 704             self.report_warning(
 705                 'Long argument string detected. '
 706                 'Use -- to separate parameters and URLs, like this:\n%s' %
 707                 args_to_str(correct_argv))
 708
 709     def add_info_extractor(self, ie):
 710         """Add an InfoExtractor object to the end of the list."""
 711         ie_key = ie.ie_key()
 712         self._ies[ie_key] = ie
 713         if not isinstance(ie, type):
 714             self._ies_instances[ie_key] = ie
 715             ie.set_downloader(self)
 716
 717     def _get_info_extractor_class(self, ie_key):
 718         ie = self._ies.get(ie_key)
 719         if ie is None:
 720             ie = get_info_extractor(ie_key)
 721             self.add_info_extractor(ie)
 722         return ie
 723
 724     def get_info_extractor(self, ie_key):
 725         """
 726         Get an instance of an IE with name ie_key, it will try to get one from
 727         the _ies list, if there's no instance it will create a new one and add
 728         it to the extractor list.
 729         """
 730         ie = self._ies_instances.get(ie_key)
 731         if ie is None:
 732             ie = get_info_extractor(ie_key)()
 733             self.add_info_extractor(ie)
 734         return ie
 735
 736     def add_default_info_extractors(self):
 737         """
 738         Add the InfoExtractors returned by gen_extractors to the end of the list
 739         """
 740         for ie in gen_extractor_classes():
 741             self.add_info_extractor(ie)
 742
 743     def add_post_processor(self, pp, when='post_process'):
 744         """Add a PostProcessor object to the end of the chain."""
 745         self._pps[when].append(pp)
 746         pp.set_downloader(self)
 747
 748     def add_post_hook(self, ph):
 749         """Add the post hook"""
 750         self._post_hooks.append(ph)
 751
 752     def add_progress_hook(self, ph):
 753         """Add the download progress hook"""
 754         self._progress_hooks.append(ph)
 755
 756     def add_postprocessor_hook(self, ph):
 757         """Add the postprocessing progress hook"""
 758         self._postprocessor_hooks.append(ph)
 759         for pps in self._pps.values():
 760             for pp in pps:
 761                 pp.add_progress_hook(ph)
 762
 763     def _bidi_workaround(self, message):
 764         if not hasattr(self, '_output_channel'):
 765             return message
 766
 767         assert hasattr(self, '_output_process')
 768         assert isinstance(message, compat_str)
 769         line_count = message.count('\n') + 1
 770         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 771         self._output_process.stdin.flush()
 772         res = ''.join(self._output_channel.readline().decode('utf-8')
 773                       for _ in range(line_count))
 774         return res[:-len('\n')]
 775
 776     def _write_string(self, message, out=None, only_once=False):
 777         if only_once:
 778             if message in self._printed_messages:
 779                 return
 780             self._printed_messages.add(message)
 781         write_string(message, out=out, encoding=self.params.get('encoding'))
 782
 783     def to_stdout(self, message, skip_eol=False, quiet=False):
 784         """Print message to stdout"""
 785         if self.params.get('logger'):
 786             self.params['logger'].debug(message)
 787         elif not quiet or self.params.get('verbose'):
 788             self._write_string(
 789                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 790                 self._err_file if quiet else self._screen_file)
 791
 792     def to_stderr(self, message, only_once=False):
 793         """Print message to stderr"""
 794         assert isinstance(message, compat_str)
 795         if self.params.get('logger'):
 796             self.params['logger'].error(message)
 797         else:
 798             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 799
 800     def to_console_title(self, message):
 801         if not self.params.get('consoletitle', False):
 802             return
 803         message = remove_terminal_sequences(message)
 804         if compat_os_name == 'nt':
 805             if ctypes.windll.kernel32.GetConsoleWindow():
 806                 # c_wchar_p() might not be necessary if `message` is
 807                 # already of type unicode()
 808                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 809         elif 'TERM' in os.environ:
 810             self._write_string('\033]0;%s\007' % message, self._screen_file)
 811
 812     def save_console_title(self):
 813         if not self.params.get('consoletitle', False):
 814             return
 815         if self.params.get('simulate'):
 816             return
 817         if compat_os_name != 'nt' and 'TERM' in os.environ:
 818             # Save the title on stack
 819             self._write_string('\033[22;0t', self._screen_file)
 820
 821     def restore_console_title(self):
 822         if not self.params.get('consoletitle', False):
 823             return
 824         if self.params.get('simulate'):
 825             return
 826         if compat_os_name != 'nt' and 'TERM' in os.environ:
 827             # Restore the title from stack
 828             self._write_string('\033[23;0t', self._screen_file)
 829
 830     def __enter__(self):
 831         self.save_console_title()
 832         return self
 833
 834     def __exit__(self, *args):
 835         self.restore_console_title()
 836
 837         if self.params.get('cookiefile') is not None:
 838             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 839
 840     def trouble(self, message=None, tb=None, is_error=True):
 841         """Determine action to take when a download problem appears.
 842
 843         Depending on if the downloader has been configured to ignore
 844         download errors or not, this method may throw an exception or
 845         not when errors are found, after printing the message.
 846
 847         @param tb          If given, is additional traceback information
 848         @param is_error    Whether to raise error according to ignorerrors
 849         """
 850         if message is not None:
 851             self.to_stderr(message)
 852         if self.params.get('verbose'):
 853             if tb is None:
 854                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 855                     tb = ''
 856                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 857                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 858                     tb += encode_compat_str(traceback.format_exc())
 859                 else:
 860                     tb_data = traceback.format_list(traceback.extract_stack())
 861                     tb = ''.join(tb_data)
 862             if tb:
 863                 self.to_stderr(tb)
 864         if not is_error:
 865             return
 866         if not self.params.get('ignoreerrors'):
 867             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 868                 exc_info = sys.exc_info()[1].exc_info
 869             else:
 870                 exc_info = sys.exc_info()
 871             raise DownloadError(message, exc_info)
 872         self._download_retcode = 1
 873
 874     def to_screen(self, message, skip_eol=False):
 875         """Print message to stdout if not in quiet mode"""
 876         self.to_stdout(
 877             message, skip_eol, quiet=self.params.get('quiet', False))
 878
 879     class Styles(Enum):
 880         HEADERS = 'yellow'
 881         EMPHASIS = 'light blue'
 882         ID = 'green'
 883         DELIM = 'blue'
 884         ERROR = 'red'
 885         WARNING = 'yellow'
 886         SUPPRESS = 'light black'
 887
 888     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 889         if test_encoding:
 890             original_text = text
 891             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 892             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 893             text = text.encode(encoding, 'ignore').decode(encoding)
 894             if fallback is not None and text != original_text:
 895                 text = fallback
 896         if isinstance(f, self.Styles):
 897             f = f.value
 898         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 899
 900     def _format_screen(self, *args, **kwargs):
 901         return self._format_text(
 902             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 903
 904     def _format_err(self, *args, **kwargs):
 905         return self._format_text(
 906             self._err_file, self._allow_colors['err'], *args, **kwargs)
 907
 908     def report_warning(self, message, only_once=False):
 909         '''
 910         Print the message to stderr, it will be prefixed with 'WARNING:'
 911         If stderr is a tty file the 'WARNING:' will be colored
 912         '''
 913         if self.params.get('logger') is not None:
 914             self.params['logger'].warning(message)
 915         else:
 916             if self.params.get('no_warnings'):
 917                 return
 918             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 919
 920     def deprecation_warning(self, message):
 921         if self.params.get('logger') is not None:
 922             self.params['logger'].warning('DeprecationWarning: {message}')
 923         else:
 924             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 925
 926     def report_error(self, message, *args, **kwargs):
 927         '''
 928         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 929         in red if stderr is a tty file.
 930         '''
 931         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 932
 933     def write_debug(self, message, only_once=False):
 934         '''Log debug message or Print message to stderr'''
 935         if not self.params.get('verbose', False):
 936             return
 937         message = '[debug] %s' % message
 938         if self.params.get('logger'):
 939             self.params['logger'].debug(message)
 940         else:
 941             self.to_stderr(message, only_once)
 942
 943     def report_file_already_downloaded(self, file_name):
 944         """Report file has already been fully downloaded."""
 945         try:
 946             self.to_screen('[download] %s has already been downloaded' % file_name)
 947         except UnicodeEncodeError:
 948             self.to_screen('[download] The file has already been downloaded')
 949
 950     def report_file_delete(self, file_name):
 951         """Report that existing file will be deleted."""
 952         try:
 953             self.to_screen('Deleting existing file %s' % file_name)
 954         except UnicodeEncodeError:
 955             self.to_screen('Deleting existing file')
 956
 957     def raise_no_formats(self, info, forced=False):
 958         has_drm = info.get('__has_drm')
 959         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 960         expected = self.params.get('ignore_no_formats_error')
 961         if forced or not expected:
 962             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 963                                  expected=has_drm or expected)
 964         else:
 965             self.report_warning(msg)
 966
 967     def parse_outtmpl(self):
 968         outtmpl_dict = self.params.get('outtmpl', {})
 969         if not isinstance(outtmpl_dict, dict):
 970             outtmpl_dict = {'default': outtmpl_dict}
 971         # Remove spaces in the default template
 972         if self.params.get('restrictfilenames'):
 973             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 974         else:
 975             sanitize = lambda x: x
 976         outtmpl_dict.update({
 977             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 978             if outtmpl_dict.get(k) is None})
 979         for key, val in outtmpl_dict.items():
 980             if isinstance(val, bytes):
 981                 self.report_warning(
 982                     'Parameter outtmpl is bytes, but should be a unicode string. '
 983                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 984         return outtmpl_dict
 985
 986     def get_output_path(self, dir_type='', filename=None):
 987         paths = self.params.get('paths', {})
 988         assert isinstance(paths, dict)
 989         path = os.path.join(
 990             expand_path(paths.get('home', '').strip()),
 991             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 992             filename or '')
 993
 994         # Temporary fix for #4787
 995         # 'Treat' all problem characters by passing filename through preferredencoding
 996         # to workaround encoding issues with subprocess on python2 @ Windows
 997         if sys.version_info < (3, 0) and sys.platform == 'win32':
 998             path = encodeFilename(path, True).decode(preferredencoding())
 999         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1000
1001     @staticmethod
1002     def _outtmpl_expandpath(outtmpl):
1003         # expand_path translates '%%' into '%' and '$$' into '$'
1004         # correspondingly that is not what we want since we need to keep
1005         # '%%' intact for template dict substitution step. Working around
1006         # with boundary-alike separator hack.
1007         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1008         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1009
1010         # outtmpl should be expand_path'ed before template dict substitution
1011         # because meta fields may contain env variables we don't want to
1012         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1013         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1014         return expand_path(outtmpl).replace(sep, '')
1015
1016     @staticmethod
1017     def escape_outtmpl(outtmpl):
1018         ''' Escape any remaining strings like %s, %abc% etc. '''
1019         return re.sub(
1020             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1021             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1022             outtmpl)
1023
1024     @classmethod
1025     def validate_outtmpl(cls, outtmpl):
1026         ''' @return None or Exception object '''
1027         outtmpl = re.sub(
1028             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1029             lambda mobj: f'{mobj.group(0)[:-1]}s',
1030             cls._outtmpl_expandpath(outtmpl))
1031         try:
1032             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1033             return None
1034         except ValueError as err:
1035             return err
1036
1037     @staticmethod
1038     def _copy_infodict(info_dict):
1039         info_dict = dict(info_dict)
1040         for key in ('__original_infodict', '__postprocessors'):
1041             info_dict.pop(key, None)
1042         return info_dict
1043
1044     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1045         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1046         @param sanitize    Whether to sanitize the output as a filename.
1047                            For backward compatibility, a function can also be passed
1048         """
1049
1050         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1051
1052         info_dict = self._copy_infodict(info_dict)
1053         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1054             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1055             if info_dict.get('duration', None) is not None
1056             else None)
1057         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1058         info_dict['video_autonumber'] = self._num_videos
1059         if info_dict.get('resolution') is None:
1060             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1061
1062         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1063         # of %(field)s to %(field)0Nd for backward compatibility
1064         field_size_compat_map = {
1065             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1066             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1067             'autonumber': self.params.get('autonumber_size') or 5,
1068         }
1069
1070         TMPL_DICT = {}
1071         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1072         MATH_FUNCTIONS = {
1073             '+': float.__add__,
1074             '-': float.__sub__,
1075         }
1076         # Field is of the form key1.key2...
1077         # where keys (except first) can be string, int or slice
1078         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1079         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1080         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1081         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1082             (?P<negate>-)?
1083             (?P<fields>{field})
1084             (?P<maths>(?:{math_op}{math_field})*)
1085             (?:>(?P<strf_format>.+?))?
1086             (?P<alternate>(?<!\\),[^|&)]+)?
1087             (?:&(?P<replacement>.*?))?
1088             (?:\|(?P<default>.*?))?
1089             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1090
1091         def _traverse_infodict(k):
1092             k = k.split('.')
1093             if k[0] == '':
1094                 k.pop(0)
1095             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1096
1097         def get_value(mdict):
1098             # Object traversal
1099             value = _traverse_infodict(mdict['fields'])
1100             # Negative
1101             if mdict['negate']:
1102                 value = float_or_none(value)
1103                 if value is not None:
1104                     value *= -1
1105             # Do maths
1106             offset_key = mdict['maths']
1107             if offset_key:
1108                 value = float_or_none(value)
1109                 operator = None
1110                 while offset_key:
1111                     item = re.match(
1112                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1113                         offset_key).group(0)
1114                     offset_key = offset_key[len(item):]
1115                     if operator is None:
1116                         operator = MATH_FUNCTIONS[item]
1117                         continue
1118                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1119                     offset = float_or_none(item)
1120                     if offset is None:
1121                         offset = float_or_none(_traverse_infodict(item))
1122                     try:
1123                         value = operator(value, multiplier * offset)
1124                     except (TypeError, ZeroDivisionError):
1125                         return None
1126                     operator = None
1127             # Datetime formatting
1128             if mdict['strf_format']:
1129                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1130
1131             return value
1132
1133         na = self.params.get('outtmpl_na_placeholder', 'NA')
1134
1135         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1136             return sanitize_filename(str(value), restricted=restricted,
1137                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1138
1139         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1140         sanitize = bool(sanitize)
1141
1142         def _dumpjson_default(obj):
1143             if isinstance(obj, (set, LazyList)):
1144                 return list(obj)
1145             return repr(obj)
1146
1147         def create_key(outer_mobj):
1148             if not outer_mobj.group('has_key'):
1149                 return outer_mobj.group(0)
1150             key = outer_mobj.group('key')
1151             mobj = re.match(INTERNAL_FORMAT_RE, key)
1152             initial_field = mobj.group('fields') if mobj else ''
1153             value, replacement, default = None, None, na
1154             while mobj:
1155                 mobj = mobj.groupdict()
1156                 default = mobj['default'] if mobj['default'] is not None else default
1157                 value = get_value(mobj)
1158                 replacement = mobj['replacement']
1159                 if value is None and mobj['alternate']:
1160                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1161                 else:
1162                     break
1163
1164             fmt = outer_mobj.group('format')
1165             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1166                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1167
1168             value = default if value is None else value if replacement is None else replacement
1169
1170             flags = outer_mobj.group('conversion') or ''
1171             str_fmt = f'{fmt[:-1]}s'
1172             if fmt[-1] == 'l':  # list
1173                 delim = '\n' if '#' in flags else ', '
1174                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1175             elif fmt[-1] == 'j':  # json
1176                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1177             elif fmt[-1] == 'q':  # quoted
1178                 value = map(str, variadic(value) if '#' in flags else [value])
1179                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1180             elif fmt[-1] == 'B':  # bytes
1181                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1182                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1183             elif fmt[-1] == 'U':  # unicode normalized
1184                 value, fmt = unicodedata.normalize(
1185                     # "+" = compatibility equivalence, "#" = NFD
1186                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1187                     value), str_fmt
1188             elif fmt[-1] == 'D':  # decimal suffix
1189                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1190                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1191                                               factor=1024 if '#' in flags else 1000)
1192             elif fmt[-1] == 'S':  # filename sanitization
1193                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1194             elif fmt[-1] == 'c':
1195                 if value:
1196                     value = str(value)[0]
1197                 else:
1198                     fmt = str_fmt
1199             elif fmt[-1] not in 'rs':  # numeric
1200                 value = float_or_none(value)
1201                 if value is None:
1202                     value, fmt = default, 's'
1203
1204             if sanitize:
1205                 if fmt[-1] == 'r':
1206                     # If value is an object, sanitize might convert it to a string
1207                     # So we convert it to repr first
1208                     value, fmt = repr(value), str_fmt
1209                 if fmt[-1] in 'csr':
1210                     value = sanitizer(initial_field, value)
1211
1212             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1213             TMPL_DICT[key] = value
1214             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1215
1216         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1217
1218     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1219         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1220         return self.escape_outtmpl(outtmpl) % info_dict
1221
1222     def _prepare_filename(self, info_dict, tmpl_type='default'):
1223         try:
1224             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1225             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1226             if not filename:
1227                 return None
1228
1229             if tmpl_type in ('default', 'temp'):
1230                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1231                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1232                     filename = replace_extension(filename, ext, final_ext)
1233             else:
1234                 force_ext = OUTTMPL_TYPES[tmpl_type]
1235                 if force_ext:
1236                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1237
1238             # https://github.com/blackjack4494/youtube-dlc/issues/85
1239             trim_file_name = self.params.get('trim_file_name', False)
1240             if trim_file_name:
1241                 no_ext, *ext = filename.rsplit('.', 2)
1242                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1243
1244             return filename
1245         except ValueError as err:
1246             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1247             return None
1248
1249     def prepare_filename(self, info_dict, dir_type='', warn=False):
1250         """Generate the output filename."""
1251
1252         filename = self._prepare_filename(info_dict, dir_type or 'default')
1253         if not filename and dir_type not in ('', 'temp'):
1254             return ''
1255
1256         if warn:
1257             if not self.params.get('paths'):
1258                 pass
1259             elif filename == '-':
1260                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1261             elif os.path.isabs(filename):
1262                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1263         if filename == '-' or not filename:
1264             return filename
1265
1266         return self.get_output_path(dir_type, filename)
1267
1268     def _match_entry(self, info_dict, incomplete=False, silent=False):
1269         """ Returns None if the file should be downloaded """
1270
1271         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1272
1273         def check_filter():
1274             if 'title' in info_dict:
1275                 # This can happen when we're just evaluating the playlist
1276                 title = info_dict['title']
1277                 matchtitle = self.params.get('matchtitle', False)
1278                 if matchtitle:
1279                     if not re.search(matchtitle, title, re.IGNORECASE):
1280                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1281                 rejecttitle = self.params.get('rejecttitle', False)
1282                 if rejecttitle:
1283                     if re.search(rejecttitle, title, re.IGNORECASE):
1284                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1285             date = info_dict.get('upload_date')
1286             if date is not None:
1287                 dateRange = self.params.get('daterange', DateRange())
1288                 if date not in dateRange:
1289                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1290             view_count = info_dict.get('view_count')
1291             if view_count is not None:
1292                 min_views = self.params.get('min_views')
1293                 if min_views is not None and view_count < min_views:
1294                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1295                 max_views = self.params.get('max_views')
1296                 if max_views is not None and view_count > max_views:
1297                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1298             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1299                 return 'Skipping "%s" because it is age restricted' % video_title
1300
1301             match_filter = self.params.get('match_filter')
1302             if match_filter is not None:
1303                 try:
1304                     ret = match_filter(info_dict, incomplete=incomplete)
1305                 except TypeError:
1306                     # For backward compatibility
1307                     ret = None if incomplete else match_filter(info_dict)
1308                 if ret is not None:
1309                     return ret
1310             return None
1311
1312         if self.in_download_archive(info_dict):
1313             reason = '%s has already been recorded in the archive' % video_title
1314             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1315         else:
1316             reason = check_filter()
1317             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1318         if reason is not None:
1319             if not silent:
1320                 self.to_screen('[download] ' + reason)
1321             if self.params.get(break_opt, False):
1322                 raise break_err()
1323         return reason
1324
1325     @staticmethod
1326     def add_extra_info(info_dict, extra_info):
1327         '''Set the keys from extra_info in info dict if they are missing'''
1328         for key, value in extra_info.items():
1329             info_dict.setdefault(key, value)
1330
1331     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1332                      process=True, force_generic_extractor=False):
1333         """
1334         Return a list with a dictionary for each video extracted.
1335
1336         Arguments:
1337         url -- URL to extract
1338
1339         Keyword arguments:
1340         download -- whether to download videos during extraction
1341         ie_key -- extractor key hint
1342         extra_info -- dictionary containing the extra values to add to each result
1343         process -- whether to resolve all unresolved references (URLs, playlist items),
1344             must be True for download to work.
1345         force_generic_extractor -- force using the generic extractor
1346         """
1347
1348         if extra_info is None:
1349             extra_info = {}
1350
1351         if not ie_key and force_generic_extractor:
1352             ie_key = 'Generic'
1353
1354         if ie_key:
1355             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1356         else:
1357             ies = self._ies
1358
1359         for ie_key, ie in ies.items():
1360             if not ie.suitable(url):
1361                 continue
1362
1363             if not ie.working():
1364                 self.report_warning('The program functionality for this site has been marked as broken, '
1365                                     'and will probably not work.')
1366
1367             temp_id = ie.get_temp_id(url)
1368             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1369                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1370                 if self.params.get('break_on_existing', False):
1371                     raise ExistingVideoReached()
1372                 break
1373             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1374         else:
1375             self.report_error('no suitable InfoExtractor for URL %s' % url)
1376
1377     def __handle_extraction_exceptions(func):
1378         @functools.wraps(func)
1379         def wrapper(self, *args, **kwargs):
1380             while True:
1381                 try:
1382                     return func(self, *args, **kwargs)
1383                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1384                     raise
1385                 except ReExtractInfo as e:
1386                     if e.expected:
1387                         self.to_screen(f'{e}; Re-extracting data')
1388                     else:
1389                         self.to_stderr('\r')
1390                         self.report_warning(f'{e}; Re-extracting data')
1391                     continue
1392                 except GeoRestrictedError as e:
1393                     msg = e.msg
1394                     if e.countries:
1395                         msg += '\nThis video is available in %s.' % ', '.join(
1396                             map(ISO3166Utils.short2full, e.countries))
1397                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1398                     self.report_error(msg)
1399                 except ExtractorError as e:  # An error we somewhat expected
1400                     self.report_error(str(e), e.format_traceback())
1401                 except Exception as e:
1402                     if self.params.get('ignoreerrors'):
1403                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1404                     else:
1405                         raise
1406                 break
1407         return wrapper
1408
1409     def _wait_for_video(self, ie_result):
1410         if (not self.params.get('wait_for_video')
1411                 or ie_result.get('_type', 'video') != 'video'
1412                 or ie_result.get('formats') or ie_result.get('url')):
1413             return
1414
1415         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1416         last_msg = ''
1417
1418         def progress(msg):
1419             nonlocal last_msg
1420             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1421             last_msg = msg
1422
1423         min_wait, max_wait = self.params.get('wait_for_video')
1424         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1425         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1426             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1427             self.report_warning('Release time of video is not known')
1428         elif (diff or 0) <= 0:
1429             self.report_warning('Video should already be available according to extracted info')
1430         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1431         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1432
1433         wait_till = time.time() + diff
1434         try:
1435             while True:
1436                 diff = wait_till - time.time()
1437                 if diff <= 0:
1438                     progress('')
1439                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1440                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1441                 time.sleep(1)
1442         except KeyboardInterrupt:
1443             progress('')
1444             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1445         except BaseException as e:
1446             if not isinstance(e, ReExtractInfo):
1447                 self.to_screen('')
1448             raise
1449
1450     @__handle_extraction_exceptions
1451     def __extract_info(self, url, ie, download, extra_info, process):
1452         ie_result = ie.extract(url)
1453         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1454             return
1455         if isinstance(ie_result, list):
1456             # Backwards compatibility: old IE result format
1457             ie_result = {
1458                 '_type': 'compat_list',
1459                 'entries': ie_result,
1460             }
1461         if extra_info.get('original_url'):
1462             ie_result.setdefault('original_url', extra_info['original_url'])
1463         self.add_default_extra_info(ie_result, ie, url)
1464         if process:
1465             self._wait_for_video(ie_result)
1466             return self.process_ie_result(ie_result, download, extra_info)
1467         else:
1468             return ie_result
1469
1470     def add_default_extra_info(self, ie_result, ie, url):
1471         if url is not None:
1472             self.add_extra_info(ie_result, {
1473                 'webpage_url': url,
1474                 'original_url': url,
1475             })
1476         webpage_url = ie_result.get('webpage_url')
1477         if webpage_url:
1478             self.add_extra_info(ie_result, {
1479                 'webpage_url_basename': url_basename(webpage_url),
1480                 'webpage_url_domain': get_domain(webpage_url),
1481             })
1482         if ie is not None:
1483             self.add_extra_info(ie_result, {
1484                 'extractor': ie.IE_NAME,
1485                 'extractor_key': ie.ie_key(),
1486             })
1487
1488     def process_ie_result(self, ie_result, download=True, extra_info=None):
1489         """
1490         Take the result of the ie(may be modified) and resolve all unresolved
1491         references (URLs, playlist items).
1492
1493         It will also download the videos if 'download'.
1494         Returns the resolved ie_result.
1495         """
1496         if extra_info is None:
1497             extra_info = {}
1498         result_type = ie_result.get('_type', 'video')
1499
1500         if result_type in ('url', 'url_transparent'):
1501             ie_result['url'] = sanitize_url(ie_result['url'])
1502             if ie_result.get('original_url'):
1503                 extra_info.setdefault('original_url', ie_result['original_url'])
1504
1505             extract_flat = self.params.get('extract_flat', False)
1506             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1507                     or extract_flat is True):
1508                 info_copy = ie_result.copy()
1509                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1510                 if ie and not ie_result.get('id'):
1511                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1512                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1513                 self.add_extra_info(info_copy, extra_info)
1514                 info_copy, _ = self.pre_process(info_copy)
1515                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1516                 if self.params.get('force_write_download_archive', False):
1517                     self.record_download_archive(info_copy)
1518                 return ie_result
1519
1520         if result_type == 'video':
1521             self.add_extra_info(ie_result, extra_info)
1522             ie_result = self.process_video_result(ie_result, download=download)
1523             additional_urls = (ie_result or {}).get('additional_urls')
1524             if additional_urls:
1525                 # TODO: Improve MetadataParserPP to allow setting a list
1526                 if isinstance(additional_urls, compat_str):
1527                     additional_urls = [additional_urls]
1528                 self.to_screen(
1529                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1530                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1531                 ie_result['additional_entries'] = [
1532                     self.extract_info(
1533                         url, download, extra_info=extra_info,
1534                         force_generic_extractor=self.params.get('force_generic_extractor'))
1535                     for url in additional_urls
1536                 ]
1537             return ie_result
1538         elif result_type == 'url':
1539             # We have to add extra_info to the results because it may be
1540             # contained in a playlist
1541             return self.extract_info(
1542                 ie_result['url'], download,
1543                 ie_key=ie_result.get('ie_key'),
1544                 extra_info=extra_info)
1545         elif result_type == 'url_transparent':
1546             # Use the information from the embedding page
1547             info = self.extract_info(
1548                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1549                 extra_info=extra_info, download=False, process=False)
1550
1551             # extract_info may return None when ignoreerrors is enabled and
1552             # extraction failed with an error, don't crash and return early
1553             # in this case
1554             if not info:
1555                 return info
1556
1557             force_properties = dict(
1558                 (k, v) for k, v in ie_result.items() if v is not None)
1559             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1560                 if f in force_properties:
1561                     del force_properties[f]
1562             new_result = info.copy()
1563             new_result.update(force_properties)
1564
1565             # Extracted info may not be a video result (i.e.
1566             # info.get('_type', 'video') != video) but rather an url or
1567             # url_transparent. In such cases outer metadata (from ie_result)
1568             # should be propagated to inner one (info). For this to happen
1569             # _type of info should be overridden with url_transparent. This
1570             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1571             if new_result.get('_type') == 'url':
1572                 new_result['_type'] = 'url_transparent'
1573
1574             return self.process_ie_result(
1575                 new_result, download=download, extra_info=extra_info)
1576         elif result_type in ('playlist', 'multi_video'):
1577             # Protect from infinite recursion due to recursively nested playlists
1578             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1579             webpage_url = ie_result['webpage_url']
1580             if webpage_url in self._playlist_urls:
1581                 self.to_screen(
1582                     '[download] Skipping already downloaded playlist: %s'
1583                     % ie_result.get('title') or ie_result.get('id'))
1584                 return
1585
1586             self._playlist_level += 1
1587             self._playlist_urls.add(webpage_url)
1588             self._sanitize_thumbnails(ie_result)
1589             try:
1590                 return self.__process_playlist(ie_result, download)
1591             finally:
1592                 self._playlist_level -= 1
1593                 if not self._playlist_level:
1594                     self._playlist_urls.clear()
1595         elif result_type == 'compat_list':
1596             self.report_warning(
1597                 'Extractor %s returned a compat_list result. '
1598                 'It needs to be updated.' % ie_result.get('extractor'))
1599
1600             def _fixup(r):
1601                 self.add_extra_info(r, {
1602                     'extractor': ie_result['extractor'],
1603                     'webpage_url': ie_result['webpage_url'],
1604                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1605                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1606                     'extractor_key': ie_result['extractor_key'],
1607                 })
1608                 return r
1609             ie_result['entries'] = [
1610                 self.process_ie_result(_fixup(r), download, extra_info)
1611                 for r in ie_result['entries']
1612             ]
1613             return ie_result
1614         else:
1615             raise Exception('Invalid result type: %s' % result_type)
1616
1617     def _ensure_dir_exists(self, path):
1618         return make_dir(path, self.report_error)
1619
1620     @staticmethod
1621     def _playlist_infodict(ie_result, **kwargs):
1622         return {
1623             **ie_result,
1624             'playlist': ie_result.get('title') or ie_result.get('id'),
1625             'playlist_id': ie_result.get('id'),
1626             'playlist_title': ie_result.get('title'),
1627             'playlist_uploader': ie_result.get('uploader'),
1628             'playlist_uploader_id': ie_result.get('uploader_id'),
1629             'playlist_index': 0,
1630             **kwargs,
1631         }
1632
1633     def __process_playlist(self, ie_result, download):
1634         # We process each entry in the playlist
1635         playlist = ie_result.get('title') or ie_result.get('id')
1636         self.to_screen('[download] Downloading playlist: %s' % playlist)
1637
1638         if 'entries' not in ie_result:
1639             raise EntryNotInPlaylist('There are no entries')
1640
1641         MissingEntry = object()
1642         incomplete_entries = bool(ie_result.get('requested_entries'))
1643         if incomplete_entries:
1644             def fill_missing_entries(entries, indices):
1645                 ret = [MissingEntry] * max(indices)
1646                 for i, entry in zip(indices, entries):
1647                     ret[i - 1] = entry
1648                 return ret
1649             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1650
1651         playlist_results = []
1652
1653         playliststart = self.params.get('playliststart', 1)
1654         playlistend = self.params.get('playlistend')
1655         # For backwards compatibility, interpret -1 as whole list
1656         if playlistend == -1:
1657             playlistend = None
1658
1659         playlistitems_str = self.params.get('playlist_items')
1660         playlistitems = None
1661         if playlistitems_str is not None:
1662             def iter_playlistitems(format):
1663                 for string_segment in format.split(','):
1664                     if '-' in string_segment:
1665                         start, end = string_segment.split('-')
1666                         for item in range(int(start), int(end) + 1):
1667                             yield int(item)
1668                     else:
1669                         yield int(string_segment)
1670             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1671
1672         ie_entries = ie_result['entries']
1673         if isinstance(ie_entries, list):
1674             playlist_count = len(ie_entries)
1675             msg = f'Collected {playlist_count} videos; downloading %d of them'
1676             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1677
1678             def get_entry(i):
1679                 return ie_entries[i - 1]
1680         else:
1681             msg = 'Downloading %d videos'
1682             if not isinstance(ie_entries, (PagedList, LazyList)):
1683                 ie_entries = LazyList(ie_entries)
1684             elif isinstance(ie_entries, InAdvancePagedList):
1685                 if ie_entries._pagesize == 1:
1686                     playlist_count = ie_entries._pagecount
1687
1688             def get_entry(i):
1689                 return YoutubeDL.__handle_extraction_exceptions(
1690                     lambda self, i: ie_entries[i - 1]
1691                 )(self, i)
1692
1693         entries, broken = [], False
1694         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1695         for i in items:
1696             if i == 0:
1697                 continue
1698             if playlistitems is None and playlistend is not None and playlistend < i:
1699                 break
1700             entry = None
1701             try:
1702                 entry = get_entry(i)
1703                 if entry is MissingEntry:
1704                     raise EntryNotInPlaylist()
1705             except (IndexError, EntryNotInPlaylist):
1706                 if incomplete_entries:
1707                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1708                 elif not playlistitems:
1709                     break
1710             entries.append(entry)
1711             try:
1712                 if entry is not None:
1713                     self._match_entry(entry, incomplete=True, silent=True)
1714             except (ExistingVideoReached, RejectedVideoReached):
1715                 broken = True
1716                 break
1717         ie_result['entries'] = entries
1718
1719         # Save playlist_index before re-ordering
1720         entries = [
1721             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1722             for i, entry in enumerate(entries, 1)
1723             if entry is not None]
1724         n_entries = len(entries)
1725
1726         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1727             ie_result['playlist_count'] = n_entries
1728
1729         if not playlistitems and (playliststart != 1 or playlistend):
1730             playlistitems = list(range(playliststart, playliststart + n_entries))
1731         ie_result['requested_entries'] = playlistitems
1732
1733         _infojson_written = False
1734         write_playlist_files = self.params.get('allow_playlist_files', True)
1735         if write_playlist_files and self.params.get('list_thumbnails'):
1736             self.list_thumbnails(ie_result)
1737         if write_playlist_files and not self.params.get('simulate'):
1738             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1739             _infojson_written = self._write_info_json(
1740                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1741             if _infojson_written is None:
1742                 return
1743             if self._write_description('playlist', ie_result,
1744                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1745                 return
1746             # TODO: This should be passed to ThumbnailsConvertor if necessary
1747             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1748
1749         if self.params.get('playlistreverse', False):
1750             entries = entries[::-1]
1751         if self.params.get('playlistrandom', False):
1752             random.shuffle(entries)
1753
1754         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1755
1756         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1757         failures = 0
1758         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1759         for i, entry_tuple in enumerate(entries, 1):
1760             playlist_index, entry = entry_tuple
1761             if 'playlist-index' in self.params.get('compat_opts', []):
1762                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1763             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1764             # This __x_forwarded_for_ip thing is a bit ugly but requires
1765             # minimal changes
1766             if x_forwarded_for:
1767                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1768             extra = {
1769                 'n_entries': n_entries,
1770                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1771                 'playlist_count': ie_result.get('playlist_count'),
1772                 'playlist_index': playlist_index,
1773                 'playlist_autonumber': i,
1774                 'playlist': playlist,
1775                 'playlist_id': ie_result.get('id'),
1776                 'playlist_title': ie_result.get('title'),
1777                 'playlist_uploader': ie_result.get('uploader'),
1778                 'playlist_uploader_id': ie_result.get('uploader_id'),
1779                 'extractor': ie_result['extractor'],
1780                 'webpage_url': ie_result['webpage_url'],
1781                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1782                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1783                 'extractor_key': ie_result['extractor_key'],
1784             }
1785
1786             if self._match_entry(entry, incomplete=True) is not None:
1787                 continue
1788
1789             entry_result = self.__process_iterable_entry(entry, download, extra)
1790             if not entry_result:
1791                 failures += 1
1792             if failures >= max_failures:
1793                 self.report_error(
1794                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1795                 break
1796             playlist_results.append(entry_result)
1797         ie_result['entries'] = playlist_results
1798
1799         # Write the updated info to json
1800         if _infojson_written and self._write_info_json(
1801                 'updated playlist', ie_result,
1802                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1803             return
1804
1805         ie_result = self.run_all_pps('playlist', ie_result)
1806         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1807         return ie_result
1808
1809     @__handle_extraction_exceptions
1810     def __process_iterable_entry(self, entry, download, extra_info):
1811         return self.process_ie_result(
1812             entry, download=download, extra_info=extra_info)
1813
1814     def _build_format_filter(self, filter_spec):
1815         " Returns a function to filter the formats according to the filter_spec "
1816
1817         OPERATORS = {
1818             '<': operator.lt,
1819             '<=': operator.le,
1820             '>': operator.gt,
1821             '>=': operator.ge,
1822             '=': operator.eq,
1823             '!=': operator.ne,
1824         }
1825         operator_rex = re.compile(r'''(?x)\s*
1826             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1827             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1828             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1829             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1830         m = operator_rex.fullmatch(filter_spec)
1831         if m:
1832             try:
1833                 comparison_value = int(m.group('value'))
1834             except ValueError:
1835                 comparison_value = parse_filesize(m.group('value'))
1836                 if comparison_value is None:
1837                     comparison_value = parse_filesize(m.group('value') + 'B')
1838                 if comparison_value is None:
1839                     raise ValueError(
1840                         'Invalid value %r in format specification %r' % (
1841                             m.group('value'), filter_spec))
1842             op = OPERATORS[m.group('op')]
1843
1844         if not m:
1845             STR_OPERATORS = {
1846                 '=': operator.eq,
1847                 '^=': lambda attr, value: attr.startswith(value),
1848                 '$=': lambda attr, value: attr.endswith(value),
1849                 '*=': lambda attr, value: value in attr,
1850                 '~=': lambda attr, value: value.search(attr) is not None
1851             }
1852             str_operator_rex = re.compile(r'''(?x)\s*
1853                 (?P<key>[a-zA-Z0-9._-]+)\s*
1854                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1855                 (?P<quote>["'])?
1856                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1857                 (?(quote)(?P=quote))\s*
1858                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1859             m = str_operator_rex.fullmatch(filter_spec)
1860             if m:
1861                 if m.group('op') == '~=':
1862                     comparison_value = re.compile(m.group('value'))
1863                 else:
1864                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1865                 str_op = STR_OPERATORS[m.group('op')]
1866                 if m.group('negation'):
1867                     op = lambda attr, value: not str_op(attr, value)
1868                 else:
1869                     op = str_op
1870
1871         if not m:
1872             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1873
1874         def _filter(f):
1875             actual_value = f.get(m.group('key'))
1876             if actual_value is None:
1877                 return m.group('none_inclusive')
1878             return op(actual_value, comparison_value)
1879         return _filter
1880
1881     def _check_formats(self, formats):
1882         for f in formats:
1883             self.to_screen('[info] Testing format %s' % f['format_id'])
1884             path = self.get_output_path('temp')
1885             if not self._ensure_dir_exists(f'{path}/'):
1886                 continue
1887             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1888             temp_file.close()
1889             try:
1890                 success, _ = self.dl(temp_file.name, f, test=True)
1891             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1892                 success = False
1893             finally:
1894                 if os.path.exists(temp_file.name):
1895                     try:
1896                         os.remove(temp_file.name)
1897                     except OSError:
1898                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1899             if success:
1900                 yield f
1901             else:
1902                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1903
1904     def _default_format_spec(self, info_dict, download=True):
1905
1906         def can_merge():
1907             merger = FFmpegMergerPP(self)
1908             return merger.available and merger.can_merge()
1909
1910         prefer_best = (
1911             not self.params.get('simulate')
1912             and download
1913             and (
1914                 not can_merge()
1915                 or info_dict.get('is_live', False)
1916                 or self.outtmpl_dict['default'] == '-'))
1917         compat = (
1918             prefer_best
1919             or self.params.get('allow_multiple_audio_streams', False)
1920             or 'format-spec' in self.params.get('compat_opts', []))
1921
1922         return (
1923             'best/bestvideo+bestaudio' if prefer_best
1924             else 'bestvideo*+bestaudio/best' if not compat
1925             else 'bestvideo+bestaudio/best')
1926
1927     def build_format_selector(self, format_spec):
1928         def syntax_error(note, start):
1929             message = (
1930                 'Invalid format specification: '
1931                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1932             return SyntaxError(message)
1933
1934         PICKFIRST = 'PICKFIRST'
1935         MERGE = 'MERGE'
1936         SINGLE = 'SINGLE'
1937         GROUP = 'GROUP'
1938         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1939
1940         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1941                                   'video': self.params.get('allow_multiple_video_streams', False)}
1942
1943         check_formats = self.params.get('check_formats') == 'selected'
1944
1945         def _parse_filter(tokens):
1946             filter_parts = []
1947             for type, string, start, _, _ in tokens:
1948                 if type == tokenize.OP and string == ']':
1949                     return ''.join(filter_parts)
1950                 else:
1951                     filter_parts.append(string)
1952
1953         def _remove_unused_ops(tokens):
1954             # Remove operators that we don't use and join them with the surrounding strings
1955             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1956             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1957             last_string, last_start, last_end, last_line = None, None, None, None
1958             for type, string, start, end, line in tokens:
1959                 if type == tokenize.OP and string == '[':
1960                     if last_string:
1961                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1962                         last_string = None
1963                     yield type, string, start, end, line
1964                     # everything inside brackets will be handled by _parse_filter
1965                     for type, string, start, end, line in tokens:
1966                         yield type, string, start, end, line
1967                         if type == tokenize.OP and string == ']':
1968                             break
1969                 elif type == tokenize.OP and string in ALLOWED_OPS:
1970                     if last_string:
1971                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1972                         last_string = None
1973                     yield type, string, start, end, line
1974                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1975                     if not last_string:
1976                         last_string = string
1977                         last_start = start
1978                         last_end = end
1979                     else:
1980                         last_string += string
1981             if last_string:
1982                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1983
1984         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1985             selectors = []
1986             current_selector = None
1987             for type, string, start, _, _ in tokens:
1988                 # ENCODING is only defined in python 3.x
1989                 if type == getattr(tokenize, 'ENCODING', None):
1990                     continue
1991                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1992                     current_selector = FormatSelector(SINGLE, string, [])
1993                 elif type == tokenize.OP:
1994                     if string == ')':
1995                         if not inside_group:
1996                             # ')' will be handled by the parentheses group
1997                             tokens.restore_last_token()
1998                         break
1999                     elif inside_merge and string in ['/', ',']:
2000                         tokens.restore_last_token()
2001                         break
2002                     elif inside_choice and string == ',':
2003                         tokens.restore_last_token()
2004                         break
2005                     elif string == ',':
2006                         if not current_selector:
2007                             raise syntax_error('"," must follow a format selector', start)
2008                         selectors.append(current_selector)
2009                         current_selector = None
2010                     elif string == '/':
2011                         if not current_selector:
2012                             raise syntax_error('"/" must follow a format selector', start)
2013                         first_choice = current_selector
2014                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2015                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2016                     elif string == '[':
2017                         if not current_selector:
2018                             current_selector = FormatSelector(SINGLE, 'best', [])
2019                         format_filter = _parse_filter(tokens)
2020                         current_selector.filters.append(format_filter)
2021                     elif string == '(':
2022                         if current_selector:
2023                             raise syntax_error('Unexpected "("', start)
2024                         group = _parse_format_selection(tokens, inside_group=True)
2025                         current_selector = FormatSelector(GROUP, group, [])
2026                     elif string == '+':
2027                         if not current_selector:
2028                             raise syntax_error('Unexpected "+"', start)
2029                         selector_1 = current_selector
2030                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2031                         if not selector_2:
2032                             raise syntax_error('Expected a selector', start)
2033                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2034                     else:
2035                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2036                 elif type == tokenize.ENDMARKER:
2037                     break
2038             if current_selector:
2039                 selectors.append(current_selector)
2040             return selectors
2041
2042         def _merge(formats_pair):
2043             format_1, format_2 = formats_pair
2044
2045             formats_info = []
2046             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2047             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2048
2049             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2050                 get_no_more = {'video': False, 'audio': False}
2051                 for (i, fmt_info) in enumerate(formats_info):
2052                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2053                         formats_info.pop(i)
2054                         continue
2055                     for aud_vid in ['audio', 'video']:
2056                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2057                             if get_no_more[aud_vid]:
2058                                 formats_info.pop(i)
2059                                 break
2060                             get_no_more[aud_vid] = True
2061
2062             if len(formats_info) == 1:
2063                 return formats_info[0]
2064
2065             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2066             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2067
2068             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2069             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2070
2071             output_ext = self.params.get('merge_output_format')
2072             if not output_ext:
2073                 if the_only_video:
2074                     output_ext = the_only_video['ext']
2075                 elif the_only_audio and not video_fmts:
2076                     output_ext = the_only_audio['ext']
2077                 else:
2078                     output_ext = 'mkv'
2079
2080             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2081
2082             new_dict = {
2083                 'requested_formats': formats_info,
2084                 'format': '+'.join(filtered('format')),
2085                 'format_id': '+'.join(filtered('format_id')),
2086                 'ext': output_ext,
2087                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2088                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2089                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2090                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2091                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2092             }
2093
2094             if the_only_video:
2095                 new_dict.update({
2096                     'width': the_only_video.get('width'),
2097                     'height': the_only_video.get('height'),
2098                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2099                     'fps': the_only_video.get('fps'),
2100                     'dynamic_range': the_only_video.get('dynamic_range'),
2101                     'vcodec': the_only_video.get('vcodec'),
2102                     'vbr': the_only_video.get('vbr'),
2103                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2104                 })
2105
2106             if the_only_audio:
2107                 new_dict.update({
2108                     'acodec': the_only_audio.get('acodec'),
2109                     'abr': the_only_audio.get('abr'),
2110                     'asr': the_only_audio.get('asr'),
2111                 })
2112
2113             return new_dict
2114
2115         def _check_formats(formats):
2116             if not check_formats:
2117                 yield from formats
2118                 return
2119             yield from self._check_formats(formats)
2120
2121         def _build_selector_function(selector):
2122             if isinstance(selector, list):  # ,
2123                 fs = [_build_selector_function(s) for s in selector]
2124
2125                 def selector_function(ctx):
2126                     for f in fs:
2127                         yield from f(ctx)
2128                 return selector_function
2129
2130             elif selector.type == GROUP:  # ()
2131                 selector_function = _build_selector_function(selector.selector)
2132
2133             elif selector.type == PICKFIRST:  # /
2134                 fs = [_build_selector_function(s) for s in selector.selector]
2135
2136                 def selector_function(ctx):
2137                     for f in fs:
2138                         picked_formats = list(f(ctx))
2139                         if picked_formats:
2140                             return picked_formats
2141                     return []
2142
2143             elif selector.type == MERGE:  # +
2144                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2145
2146                 def selector_function(ctx):
2147                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2148                         yield _merge(pair)
2149
2150             elif selector.type == SINGLE:  # atom
2151                 format_spec = selector.selector or 'best'
2152
2153                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2154                 if format_spec == 'all':
2155                     def selector_function(ctx):
2156                         yield from _check_formats(ctx['formats'][::-1])
2157                 elif format_spec == 'mergeall':
2158                     def selector_function(ctx):
2159                         formats = list(_check_formats(ctx['formats']))
2160                         if not formats:
2161                             return
2162                         merged_format = formats[-1]
2163                         for f in formats[-2::-1]:
2164                             merged_format = _merge((merged_format, f))
2165                         yield merged_format
2166
2167                 else:
2168                     format_fallback, format_reverse, format_idx = False, True, 1
2169                     mobj = re.match(
2170                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2171                         format_spec)
2172                     if mobj is not None:
2173                         format_idx = int_or_none(mobj.group('n'), default=1)
2174                         format_reverse = mobj.group('bw')[0] == 'b'
2175                         format_type = (mobj.group('type') or [None])[0]
2176                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2177                         format_modified = mobj.group('mod') is not None
2178
2179                         format_fallback = not format_type and not format_modified  # for b, w
2180                         _filter_f = (
2181                             (lambda f: f.get('%scodec' % format_type) != 'none')
2182                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2183                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2184                             if format_type  # bv, ba, wv, wa
2185                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2186                             if not format_modified  # b, w
2187                             else lambda f: True)  # b*, w*
2188                         filter_f = lambda f: _filter_f(f) and (
2189                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2190                     else:
2191                         if format_spec in self._format_selection_exts['audio']:
2192                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2193                         elif format_spec in self._format_selection_exts['video']:
2194                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2195                         elif format_spec in self._format_selection_exts['storyboards']:
2196                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2197                         else:
2198                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2199
2200                     def selector_function(ctx):
2201                         formats = list(ctx['formats'])
2202                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2203                         if format_fallback and ctx['incomplete_formats'] and not matches:
2204                             # for extractors with incomplete formats (audio only (soundcloud)
2205                             # or video only (imgur)) best/worst will fallback to
2206                             # best/worst {video,audio}-only format
2207                             matches = formats
2208                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2209                         try:
2210                             yield matches[format_idx - 1]
2211                         except IndexError:
2212                             return
2213
2214             filters = [self._build_format_filter(f) for f in selector.filters]
2215
2216             def final_selector(ctx):
2217                 ctx_copy = dict(ctx)
2218                 for _filter in filters:
2219                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2220                 return selector_function(ctx_copy)
2221             return final_selector
2222
2223         stream = io.BytesIO(format_spec.encode('utf-8'))
2224         try:
2225             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2226         except tokenize.TokenError:
2227             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2228
2229         class TokenIterator(object):
2230             def __init__(self, tokens):
2231                 self.tokens = tokens
2232                 self.counter = 0
2233
2234             def __iter__(self):
2235                 return self
2236
2237             def __next__(self):
2238                 if self.counter >= len(self.tokens):
2239                     raise StopIteration()
2240                 value = self.tokens[self.counter]
2241                 self.counter += 1
2242                 return value
2243
2244             next = __next__
2245
2246             def restore_last_token(self):
2247                 self.counter -= 1
2248
2249         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2250         return _build_selector_function(parsed_selector)
2251
2252     def _calc_headers(self, info_dict):
2253         res = std_headers.copy()
2254         res.update(info_dict.get('http_headers') or {})
2255
2256         cookies = self._calc_cookies(info_dict)
2257         if cookies:
2258             res['Cookie'] = cookies
2259
2260         if 'X-Forwarded-For' not in res:
2261             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2262             if x_forwarded_for_ip:
2263                 res['X-Forwarded-For'] = x_forwarded_for_ip
2264
2265         return res
2266
2267     def _calc_cookies(self, info_dict):
2268         pr = sanitized_Request(info_dict['url'])
2269         self.cookiejar.add_cookie_header(pr)
2270         return pr.get_header('Cookie')
2271
2272     def _sort_thumbnails(self, thumbnails):
2273         thumbnails.sort(key=lambda t: (
2274             t.get('preference') if t.get('preference') is not None else -1,
2275             t.get('width') if t.get('width') is not None else -1,
2276             t.get('height') if t.get('height') is not None else -1,
2277             t.get('id') if t.get('id') is not None else '',
2278             t.get('url')))
2279
2280     def _sanitize_thumbnails(self, info_dict):
2281         thumbnails = info_dict.get('thumbnails')
2282         if thumbnails is None:
2283             thumbnail = info_dict.get('thumbnail')
2284             if thumbnail:
2285                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2286         if not thumbnails:
2287             return
2288
2289         def check_thumbnails(thumbnails):
2290             for t in thumbnails:
2291                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2292                 try:
2293                     self.urlopen(HEADRequest(t['url']))
2294                 except network_exceptions as err:
2295                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2296                     continue
2297                 yield t
2298
2299         self._sort_thumbnails(thumbnails)
2300         for i, t in enumerate(thumbnails):
2301             if t.get('id') is None:
2302                 t['id'] = '%d' % i
2303             if t.get('width') and t.get('height'):
2304                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2305             t['url'] = sanitize_url(t['url'])
2306
2307         if self.params.get('check_formats') is True:
2308             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2309         else:
2310             info_dict['thumbnails'] = thumbnails
2311
2312     def process_video_result(self, info_dict, download=True):
2313         assert info_dict.get('_type', 'video') == 'video'
2314         self._num_videos += 1
2315
2316         if 'id' not in info_dict:
2317             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2318         elif not info_dict.get('id'):
2319             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2320
2321         info_dict['fulltitle'] = info_dict.get('title')
2322         if 'title' not in info_dict:
2323             raise ExtractorError('Missing "title" field in extractor result',
2324                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2325         elif not info_dict.get('title'):
2326             self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2327             info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2328
2329         def report_force_conversion(field, field_not, conversion):
2330             self.report_warning(
2331                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2332                 % (field, field_not, conversion))
2333
2334         def sanitize_string_field(info, string_field):
2335             field = info.get(string_field)
2336             if field is None or isinstance(field, compat_str):
2337                 return
2338             report_force_conversion(string_field, 'a string', 'string')
2339             info[string_field] = compat_str(field)
2340
2341         def sanitize_numeric_fields(info):
2342             for numeric_field in self._NUMERIC_FIELDS:
2343                 field = info.get(numeric_field)
2344                 if field is None or isinstance(field, compat_numeric_types):
2345                     continue
2346                 report_force_conversion(numeric_field, 'numeric', 'int')
2347                 info[numeric_field] = int_or_none(field)
2348
2349         sanitize_string_field(info_dict, 'id')
2350         sanitize_numeric_fields(info_dict)
2351
2352         if 'playlist' not in info_dict:
2353             # It isn't part of a playlist
2354             info_dict['playlist'] = None
2355             info_dict['playlist_index'] = None
2356
2357         self._sanitize_thumbnails(info_dict)
2358
2359         thumbnail = info_dict.get('thumbnail')
2360         thumbnails = info_dict.get('thumbnails')
2361         if thumbnail:
2362             info_dict['thumbnail'] = sanitize_url(thumbnail)
2363         elif thumbnails:
2364             info_dict['thumbnail'] = thumbnails[-1]['url']
2365
2366         if info_dict.get('display_id') is None and 'id' in info_dict:
2367             info_dict['display_id'] = info_dict['id']
2368
2369         if info_dict.get('duration') is not None:
2370             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2371
2372         for ts_key, date_key in (
2373                 ('timestamp', 'upload_date'),
2374                 ('release_timestamp', 'release_date'),
2375                 ('modified_timestamp', 'modified_date'),
2376         ):
2377             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2378                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2379                 # see http://bugs.python.org/issue1646728)
2380                 try:
2381                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2382                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2383                 except (ValueError, OverflowError, OSError):
2384                     pass
2385
2386         live_keys = ('is_live', 'was_live')
2387         live_status = info_dict.get('live_status')
2388         if live_status is None:
2389             for key in live_keys:
2390                 if info_dict.get(key) is False:
2391                     continue
2392                 if info_dict.get(key):
2393                     live_status = key
2394                 break
2395             if all(info_dict.get(key) is False for key in live_keys):
2396                 live_status = 'not_live'
2397         if live_status:
2398             info_dict['live_status'] = live_status
2399             for key in live_keys:
2400                 if info_dict.get(key) is None:
2401                     info_dict[key] = (live_status == key)
2402
2403         # Auto generate title fields corresponding to the *_number fields when missing
2404         # in order to always have clean titles. This is very common for TV series.
2405         for field in ('chapter', 'season', 'episode'):
2406             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2407                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2408
2409         for cc_kind in ('subtitles', 'automatic_captions'):
2410             cc = info_dict.get(cc_kind)
2411             if cc:
2412                 for _, subtitle in cc.items():
2413                     for subtitle_format in subtitle:
2414                         if subtitle_format.get('url'):
2415                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2416                         if subtitle_format.get('ext') is None:
2417                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2418
2419         automatic_captions = info_dict.get('automatic_captions')
2420         subtitles = info_dict.get('subtitles')
2421
2422         info_dict['requested_subtitles'] = self.process_subtitles(
2423             info_dict['id'], subtitles, automatic_captions)
2424
2425         if info_dict.get('formats') is None:
2426             # There's only one format available
2427             formats = [info_dict]
2428         else:
2429             formats = info_dict['formats']
2430
2431         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2432         if not self.params.get('allow_unplayable_formats'):
2433             formats = [f for f in formats if not f.get('has_drm')]
2434
2435         if info_dict.get('is_live'):
2436             get_from_start = bool(self.params.get('live_from_start'))
2437             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2438             if not get_from_start:
2439                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2440
2441         if not formats:
2442             self.raise_no_formats(info_dict)
2443
2444         def is_wellformed(f):
2445             url = f.get('url')
2446             if not url:
2447                 self.report_warning(
2448                     '"url" field is missing or empty - skipping format, '
2449                     'there is an error in extractor')
2450                 return False
2451             if isinstance(url, bytes):
2452                 sanitize_string_field(f, 'url')
2453             return True
2454
2455         # Filter out malformed formats for better extraction robustness
2456         formats = list(filter(is_wellformed, formats))
2457
2458         formats_dict = {}
2459
2460         # We check that all the formats have the format and format_id fields
2461         for i, format in enumerate(formats):
2462             sanitize_string_field(format, 'format_id')
2463             sanitize_numeric_fields(format)
2464             format['url'] = sanitize_url(format['url'])
2465             if not format.get('format_id'):
2466                 format['format_id'] = compat_str(i)
2467             else:
2468                 # Sanitize format_id from characters used in format selector expression
2469                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2470             format_id = format['format_id']
2471             if format_id not in formats_dict:
2472                 formats_dict[format_id] = []
2473             formats_dict[format_id].append(format)
2474
2475         # Make sure all formats have unique format_id
2476         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2477         for format_id, ambiguous_formats in formats_dict.items():
2478             ambigious_id = len(ambiguous_formats) > 1
2479             for i, format in enumerate(ambiguous_formats):
2480                 if ambigious_id:
2481                     format['format_id'] = '%s-%d' % (format_id, i)
2482                 if format.get('ext') is None:
2483                     format['ext'] = determine_ext(format['url']).lower()
2484                 # Ensure there is no conflict between id and ext in format selection
2485                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2486                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2487                     format['format_id'] = 'f%s' % format['format_id']
2488
2489         for i, format in enumerate(formats):
2490             if format.get('format') is None:
2491                 format['format'] = '{id} - {res}{note}'.format(
2492                     id=format['format_id'],
2493                     res=self.format_resolution(format),
2494                     note=format_field(format, 'format_note', ' (%s)'),
2495                 )
2496             if format.get('protocol') is None:
2497                 format['protocol'] = determine_protocol(format)
2498             if format.get('resolution') is None:
2499                 format['resolution'] = self.format_resolution(format, default=None)
2500             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2501                 format['dynamic_range'] = 'SDR'
2502             if (info_dict.get('duration') and format.get('tbr')
2503                     and not format.get('filesize') and not format.get('filesize_approx')):
2504                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2505
2506             # Add HTTP headers, so that external programs can use them from the
2507             # json output
2508             full_format_info = info_dict.copy()
2509             full_format_info.update(format)
2510             format['http_headers'] = self._calc_headers(full_format_info)
2511         # Remove private housekeeping stuff
2512         if '__x_forwarded_for_ip' in info_dict:
2513             del info_dict['__x_forwarded_for_ip']
2514
2515         # TODO Central sorting goes here
2516
2517         if self.params.get('check_formats') is True:
2518             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2519
2520         if not formats or formats[0] is not info_dict:
2521             # only set the 'formats' fields if the original info_dict list them
2522             # otherwise we end up with a circular reference, the first (and unique)
2523             # element in the 'formats' field in info_dict is info_dict itself,
2524             # which can't be exported to json
2525             info_dict['formats'] = formats
2526
2527         info_dict, _ = self.pre_process(info_dict)
2528
2529         # The pre-processors may have modified the formats
2530         formats = info_dict.get('formats', [info_dict])
2531
2532         list_only = self.params.get('simulate') is None and (
2533             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2534         interactive_format_selection = not list_only and self.format_selector == '-'
2535         if self.params.get('list_thumbnails'):
2536             self.list_thumbnails(info_dict)
2537         if self.params.get('listsubtitles'):
2538             if 'automatic_captions' in info_dict:
2539                 self.list_subtitles(
2540                     info_dict['id'], automatic_captions, 'automatic captions')
2541             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2542         if self.params.get('listformats') or interactive_format_selection:
2543             self.list_formats(info_dict)
2544         if list_only:
2545             # Without this printing, -F --print-json will not work
2546             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2547             return
2548
2549         format_selector = self.format_selector
2550         if format_selector is None:
2551             req_format = self._default_format_spec(info_dict, download=download)
2552             self.write_debug('Default format spec: %s' % req_format)
2553             format_selector = self.build_format_selector(req_format)
2554
2555         while True:
2556             if interactive_format_selection:
2557                 req_format = input(
2558                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2559                 try:
2560                     format_selector = self.build_format_selector(req_format)
2561                 except SyntaxError as err:
2562                     self.report_error(err, tb=False, is_error=False)
2563                     continue
2564
2565             # While in format selection we may need to have an access to the original
2566             # format set in order to calculate some metrics or do some processing.
2567             # For now we need to be able to guess whether original formats provided
2568             # by extractor are incomplete or not (i.e. whether extractor provides only
2569             # video-only or audio-only formats) for proper formats selection for
2570             # extractors with such incomplete formats (see
2571             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2572             # Since formats may be filtered during format selection and may not match
2573             # the original formats the results may be incorrect. Thus original formats
2574             # or pre-calculated metrics should be passed to format selection routines
2575             # as well.
2576             # We will pass a context object containing all necessary additional data
2577             # instead of just formats.
2578             # This fixes incorrect format selection issue (see
2579             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2580             incomplete_formats = (
2581                 # All formats are video-only or
2582                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2583                 # all formats are audio-only
2584                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2585
2586             ctx = {
2587                 'formats': formats,
2588                 'incomplete_formats': incomplete_formats,
2589             }
2590
2591             formats_to_download = list(format_selector(ctx))
2592             if interactive_format_selection and not formats_to_download:
2593                 self.report_error('Requested format is not available', tb=False, is_error=False)
2594                 continue
2595             break
2596
2597         if not formats_to_download:
2598             if not self.params.get('ignore_no_formats_error'):
2599                 raise ExtractorError('Requested format is not available', expected=True,
2600                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2601             self.report_warning('Requested format is not available')
2602             # Process what we can, even without any available formats.
2603             formats_to_download = [{}]
2604
2605         best_format = formats_to_download[-1]
2606         if download:
2607             if best_format:
2608                 self.to_screen(
2609                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2610                     + ', '.join([f['format_id'] for f in formats_to_download]))
2611             max_downloads_reached = False
2612             for i, fmt in enumerate(formats_to_download):
2613                 formats_to_download[i] = new_info = dict(info_dict)
2614                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2615                 new_info.update(fmt)
2616                 new_info['__original_infodict'] = info_dict
2617                 try:
2618                     self.process_info(new_info)
2619                 except MaxDownloadsReached:
2620                     max_downloads_reached = True
2621                 new_info.pop('__original_infodict')
2622                 # Remove copied info
2623                 for key, val in tuple(new_info.items()):
2624                     if info_dict.get(key) == val:
2625                         new_info.pop(key)
2626                 if max_downloads_reached:
2627                     break
2628
2629             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2630             assert write_archive.issubset({True, False, 'ignore'})
2631             if True in write_archive and False not in write_archive:
2632                 self.record_download_archive(info_dict)
2633
2634             info_dict['requested_downloads'] = formats_to_download
2635             info_dict = self.run_all_pps('after_video', info_dict)
2636             if max_downloads_reached:
2637                 raise MaxDownloadsReached()
2638
2639         # We update the info dict with the selected best quality format (backwards compatibility)
2640         info_dict.update(best_format)
2641         return info_dict
2642
2643     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2644         """Select the requested subtitles and their format"""
2645         available_subs = {}
2646         if normal_subtitles and self.params.get('writesubtitles'):
2647             available_subs.update(normal_subtitles)
2648         if automatic_captions and self.params.get('writeautomaticsub'):
2649             for lang, cap_info in automatic_captions.items():
2650                 if lang not in available_subs:
2651                     available_subs[lang] = cap_info
2652
2653         if (not self.params.get('writesubtitles') and not
2654                 self.params.get('writeautomaticsub') or not
2655                 available_subs):
2656             return None
2657
2658         all_sub_langs = available_subs.keys()
2659         if self.params.get('allsubtitles', False):
2660             requested_langs = all_sub_langs
2661         elif self.params.get('subtitleslangs', False):
2662             # A list is used so that the order of languages will be the same as
2663             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2664             requested_langs = []
2665             for lang_re in self.params.get('subtitleslangs'):
2666                 discard = lang_re[0] == '-'
2667                 if discard:
2668                     lang_re = lang_re[1:]
2669                 if lang_re == 'all':
2670                     if discard:
2671                         requested_langs = []
2672                     else:
2673                         requested_langs.extend(all_sub_langs)
2674                     continue
2675                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2676                 if discard:
2677                     for lang in current_langs:
2678                         while lang in requested_langs:
2679                             requested_langs.remove(lang)
2680                 else:
2681                     requested_langs.extend(current_langs)
2682             requested_langs = orderedSet(requested_langs)
2683         elif 'en' in available_subs:
2684             requested_langs = ['en']
2685         else:
2686             requested_langs = [list(all_sub_langs)[0]]
2687         if requested_langs:
2688             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2689
2690         formats_query = self.params.get('subtitlesformat', 'best')
2691         formats_preference = formats_query.split('/') if formats_query else []
2692         subs = {}
2693         for lang in requested_langs:
2694             formats = available_subs.get(lang)
2695             if formats is None:
2696                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2697                 continue
2698             for ext in formats_preference:
2699                 if ext == 'best':
2700                     f = formats[-1]
2701                     break
2702                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2703                 if matches:
2704                     f = matches[-1]
2705                     break
2706             else:
2707                 f = formats[-1]
2708                 self.report_warning(
2709                     'No subtitle format found matching "%s" for language %s, '
2710                     'using %s' % (formats_query, lang, f['ext']))
2711             subs[lang] = f
2712         return subs
2713
2714     def _forceprint(self, key, info_dict):
2715         if info_dict is None:
2716             return
2717         info_copy = info_dict.copy()
2718         info_copy['formats_table'] = self.render_formats_table(info_dict)
2719         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2720         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2721         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2722
2723         def format_tmpl(tmpl):
2724             mobj = re.match(r'\w+(=?)$', tmpl)
2725             if mobj and mobj.group(1):
2726                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2727             elif mobj:
2728                 return f'%({tmpl})s'
2729             return tmpl
2730
2731         for tmpl in self.params['forceprint'].get(key, []):
2732             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2733
2734         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2735             filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2736             tmpl = format_tmpl(tmpl)
2737             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2738             if self._ensure_dir_exists(filename):
2739                 with io.open(filename, 'a', encoding='utf-8') as f:
2740                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2741
2742     def __forced_printings(self, info_dict, filename, incomplete):
2743         def print_mandatory(field, actual_field=None):
2744             if actual_field is None:
2745                 actual_field = field
2746             if (self.params.get('force%s' % field, False)
2747                     and (not incomplete or info_dict.get(actual_field) is not None)):
2748                 self.to_stdout(info_dict[actual_field])
2749
2750         def print_optional(field):
2751             if (self.params.get('force%s' % field, False)
2752                     and info_dict.get(field) is not None):
2753                 self.to_stdout(info_dict[field])
2754
2755         info_dict = info_dict.copy()
2756         if filename is not None:
2757             info_dict['filename'] = filename
2758         if info_dict.get('requested_formats') is not None:
2759             # For RTMP URLs, also include the playpath
2760             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2761         elif 'url' in info_dict:
2762             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2763
2764         if (self.params.get('forcejson')
2765                 or self.params['forceprint'].get('video')
2766                 or self.params['print_to_file'].get('video')):
2767             self.post_extract(info_dict)
2768         self._forceprint('video', info_dict)
2769
2770         print_mandatory('title')
2771         print_mandatory('id')
2772         print_mandatory('url', 'urls')
2773         print_optional('thumbnail')
2774         print_optional('description')
2775         print_optional('filename')
2776         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2777             self.to_stdout(formatSeconds(info_dict['duration']))
2778         print_mandatory('format')
2779
2780         if self.params.get('forcejson'):
2781             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2782
2783     def dl(self, name, info, subtitle=False, test=False):
2784         if not info.get('url'):
2785             self.raise_no_formats(info, True)
2786
2787         if test:
2788             verbose = self.params.get('verbose')
2789             params = {
2790                 'test': True,
2791                 'quiet': self.params.get('quiet') or not verbose,
2792                 'verbose': verbose,
2793                 'noprogress': not verbose,
2794                 'nopart': True,
2795                 'skip_unavailable_fragments': False,
2796                 'keep_fragments': False,
2797                 'overwrites': True,
2798                 '_no_ytdl_file': True,
2799             }
2800         else:
2801             params = self.params
2802         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2803         if not test:
2804             for ph in self._progress_hooks:
2805                 fd.add_progress_hook(ph)
2806             urls = '", "'.join(
2807                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2808                 for f in info.get('requested_formats', []) or [info])
2809             self.write_debug('Invoking downloader on "%s"' % urls)
2810
2811         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2812         # But it may contain objects that are not deep-copyable
2813         new_info = self._copy_infodict(info)
2814         if new_info.get('http_headers') is None:
2815             new_info['http_headers'] = self._calc_headers(new_info)
2816         return fd.download(name, new_info, subtitle)
2817
2818     def existing_file(self, filepaths, *, default_overwrite=True):
2819         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2820         if existing_files and not self.params.get('overwrites', default_overwrite):
2821             return existing_files[0]
2822
2823         for file in existing_files:
2824             self.report_file_delete(file)
2825             os.remove(file)
2826         return None
2827
2828     def process_info(self, info_dict):
2829         """Process a single resolved IE result. (Modified it in-place)"""
2830
2831         assert info_dict.get('_type', 'video') == 'video'
2832         original_infodict = info_dict
2833
2834         if 'format' not in info_dict and 'ext' in info_dict:
2835             info_dict['format'] = info_dict['ext']
2836
2837         if self._match_entry(info_dict) is not None:
2838             info_dict['__write_download_archive'] = 'ignore'
2839             return
2840
2841         self.post_extract(info_dict)
2842         self._num_downloads += 1
2843
2844         # info_dict['_filename'] needs to be set for backward compatibility
2845         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2846         temp_filename = self.prepare_filename(info_dict, 'temp')
2847         files_to_move = {}
2848
2849         # Forced printings
2850         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2851
2852         if self.params.get('simulate'):
2853             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2854             return
2855
2856         if full_filename is None:
2857             return
2858         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2859             return
2860         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2861             return
2862
2863         if self._write_description('video', info_dict,
2864                                    self.prepare_filename(info_dict, 'description')) is None:
2865             return
2866
2867         sub_files = self._write_subtitles(info_dict, temp_filename)
2868         if sub_files is None:
2869             return
2870         files_to_move.update(dict(sub_files))
2871
2872         thumb_files = self._write_thumbnails(
2873             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2874         if thumb_files is None:
2875             return
2876         files_to_move.update(dict(thumb_files))
2877
2878         infofn = self.prepare_filename(info_dict, 'infojson')
2879         _infojson_written = self._write_info_json('video', info_dict, infofn)
2880         if _infojson_written:
2881             info_dict['infojson_filename'] = infofn
2882             # For backward compatibility, even though it was a private field
2883             info_dict['__infojson_filename'] = infofn
2884         elif _infojson_written is None:
2885             return
2886
2887         # Note: Annotations are deprecated
2888         annofn = None
2889         if self.params.get('writeannotations', False):
2890             annofn = self.prepare_filename(info_dict, 'annotation')
2891         if annofn:
2892             if not self._ensure_dir_exists(encodeFilename(annofn)):
2893                 return
2894             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2895                 self.to_screen('[info] Video annotations are already present')
2896             elif not info_dict.get('annotations'):
2897                 self.report_warning('There are no annotations to write.')
2898             else:
2899                 try:
2900                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2901                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2902                         annofile.write(info_dict['annotations'])
2903                 except (KeyError, TypeError):
2904                     self.report_warning('There are no annotations to write.')
2905                 except (OSError, IOError):
2906                     self.report_error('Cannot write annotations file: ' + annofn)
2907                     return
2908
2909         # Write internet shortcut files
2910         def _write_link_file(link_type):
2911             url = try_get(info_dict['webpage_url'], iri_to_uri)
2912             if not url:
2913                 self.report_warning(
2914                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2915                 return True
2916             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2917             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2918                 return False
2919             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2920                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2921                 return True
2922             try:
2923                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2924                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2925                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2926                     template_vars = {'url': url}
2927                     if link_type == 'desktop':
2928                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2929                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2930             except (OSError, IOError):
2931                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2932                 return False
2933             return True
2934
2935         write_links = {
2936             'url': self.params.get('writeurllink'),
2937             'webloc': self.params.get('writewebloclink'),
2938             'desktop': self.params.get('writedesktoplink'),
2939         }
2940         if self.params.get('writelink'):
2941             link_type = ('webloc' if sys.platform == 'darwin'
2942                          else 'desktop' if sys.platform.startswith('linux')
2943                          else 'url')
2944             write_links[link_type] = True
2945
2946         if any(should_write and not _write_link_file(link_type)
2947                for link_type, should_write in write_links.items()):
2948             return
2949
2950         def replace_info_dict(new_info):
2951             nonlocal info_dict
2952             if new_info == info_dict:
2953                 return
2954             info_dict.clear()
2955             info_dict.update(new_info)
2956
2957         try:
2958             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2959             replace_info_dict(new_info)
2960         except PostProcessingError as err:
2961             self.report_error('Preprocessing: %s' % str(err))
2962             return
2963
2964         if self.params.get('skip_download'):
2965             info_dict['filepath'] = temp_filename
2966             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2967             info_dict['__files_to_move'] = files_to_move
2968             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2969             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2970         else:
2971             # Download
2972             info_dict.setdefault('__postprocessors', [])
2973             try:
2974
2975                 def existing_video_file(*filepaths):
2976                     ext = info_dict.get('ext')
2977                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2978                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2979                                               default_overwrite=False)
2980                     if file:
2981                         info_dict['ext'] = os.path.splitext(file)[1][1:]
2982                     return file
2983
2984                 success = True
2985                 if info_dict.get('requested_formats') is not None:
2986
2987                     def compatible_formats(formats):
2988                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2989                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2990                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2991                         if len(video_formats) > 2 or len(audio_formats) > 2:
2992                             return False
2993
2994                         # Check extension
2995                         exts = set(format.get('ext') for format in formats)
2996                         COMPATIBLE_EXTS = (
2997                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2998                             set(('webm',)),
2999                         )
3000                         for ext_sets in COMPATIBLE_EXTS:
3001                             if ext_sets.issuperset(exts):
3002                                 return True
3003                         # TODO: Check acodec/vcodec
3004                         return False
3005
3006                     requested_formats = info_dict['requested_formats']
3007                     old_ext = info_dict['ext']
3008                     if self.params.get('merge_output_format') is None:
3009                         if not compatible_formats(requested_formats):
3010                             info_dict['ext'] = 'mkv'
3011                             self.report_warning(
3012                                 'Requested formats are incompatible for merge and will be merged into mkv')
3013                         if (info_dict['ext'] == 'webm'
3014                                 and info_dict.get('thumbnails')
3015                                 # check with type instead of pp_key, __name__, or isinstance
3016                                 # since we dont want any custom PPs to trigger this
3017                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3018                             info_dict['ext'] = 'mkv'
3019                             self.report_warning(
3020                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3021                     new_ext = info_dict['ext']
3022
3023                     def correct_ext(filename, ext=new_ext):
3024                         if filename == '-':
3025                             return filename
3026                         filename_real_ext = os.path.splitext(filename)[1][1:]
3027                         filename_wo_ext = (
3028                             os.path.splitext(filename)[0]
3029                             if filename_real_ext in (old_ext, new_ext)
3030                             else filename)
3031                         return '%s.%s' % (filename_wo_ext, ext)
3032
3033                     # Ensure filename always has a correct extension for successful merge
3034                     full_filename = correct_ext(full_filename)
3035                     temp_filename = correct_ext(temp_filename)
3036                     dl_filename = existing_video_file(full_filename, temp_filename)
3037                     info_dict['__real_download'] = False
3038
3039                     downloaded = []
3040                     merger = FFmpegMergerPP(self)
3041
3042                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3043                     if dl_filename is not None:
3044                         self.report_file_already_downloaded(dl_filename)
3045                     elif fd:
3046                         for f in requested_formats if fd != FFmpegFD else []:
3047                             f['filepath'] = fname = prepend_extension(
3048                                 correct_ext(temp_filename, info_dict['ext']),
3049                                 'f%s' % f['format_id'], info_dict['ext'])
3050                             downloaded.append(fname)
3051                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3052                         success, real_download = self.dl(temp_filename, info_dict)
3053                         info_dict['__real_download'] = real_download
3054                     else:
3055                         if self.params.get('allow_unplayable_formats'):
3056                             self.report_warning(
3057                                 'You have requested merging of multiple formats '
3058                                 'while also allowing unplayable formats to be downloaded. '
3059                                 'The formats won\'t be merged to prevent data corruption.')
3060                         elif not merger.available:
3061                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3062                             if not self.params.get('ignoreerrors'):
3063                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3064                                 return
3065                             self.report_warning(f'{msg}. The formats won\'t be merged')
3066
3067                         if temp_filename == '-':
3068                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3069                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3070                                       else 'but ffmpeg is not installed')
3071                             self.report_warning(
3072                                 f'You have requested downloading multiple formats to stdout {reason}. '
3073                                 'The formats will be streamed one after the other')
3074                             fname = temp_filename
3075                         for f in requested_formats:
3076                             new_info = dict(info_dict)
3077                             del new_info['requested_formats']
3078                             new_info.update(f)
3079                             if temp_filename != '-':
3080                                 fname = prepend_extension(
3081                                     correct_ext(temp_filename, new_info['ext']),
3082                                     'f%s' % f['format_id'], new_info['ext'])
3083                                 if not self._ensure_dir_exists(fname):
3084                                     return
3085                                 f['filepath'] = fname
3086                                 downloaded.append(fname)
3087                             partial_success, real_download = self.dl(fname, new_info)
3088                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3089                             success = success and partial_success
3090
3091                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3092                         info_dict['__postprocessors'].append(merger)
3093                         info_dict['__files_to_merge'] = downloaded
3094                         # Even if there were no downloads, it is being merged only now
3095                         info_dict['__real_download'] = True
3096                     else:
3097                         for file in downloaded:
3098                             files_to_move[file] = None
3099                 else:
3100                     # Just a single file
3101                     dl_filename = existing_video_file(full_filename, temp_filename)
3102                     if dl_filename is None or dl_filename == temp_filename:
3103                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3104                         # So we should try to resume the download
3105                         success, real_download = self.dl(temp_filename, info_dict)
3106                         info_dict['__real_download'] = real_download
3107                     else:
3108                         self.report_file_already_downloaded(dl_filename)
3109
3110                 dl_filename = dl_filename or temp_filename
3111                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3112
3113             except network_exceptions as err:
3114                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3115                 return
3116             except (OSError, IOError) as err:
3117                 raise UnavailableVideoError(err)
3118             except (ContentTooShortError, ) as err:
3119                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3120                 return
3121
3122             if success and full_filename != '-':
3123
3124                 def fixup():
3125                     do_fixup = True
3126                     fixup_policy = self.params.get('fixup')
3127                     vid = info_dict['id']
3128
3129                     if fixup_policy in ('ignore', 'never'):
3130                         return
3131                     elif fixup_policy == 'warn':
3132                         do_fixup = False
3133                     elif fixup_policy != 'force':
3134                         assert fixup_policy in ('detect_or_warn', None)
3135                         if not info_dict.get('__real_download'):
3136                             do_fixup = False
3137
3138                     def ffmpeg_fixup(cndn, msg, cls):
3139                         if not cndn:
3140                             return
3141                         if not do_fixup:
3142                             self.report_warning(f'{vid}: {msg}')
3143                             return
3144                         pp = cls(self)
3145                         if pp.available:
3146                             info_dict['__postprocessors'].append(pp)
3147                         else:
3148                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3149
3150                     stretched_ratio = info_dict.get('stretched_ratio')
3151                     ffmpeg_fixup(
3152                         stretched_ratio not in (1, None),
3153                         f'Non-uniform pixel ratio {stretched_ratio}',
3154                         FFmpegFixupStretchedPP)
3155
3156                     ffmpeg_fixup(
3157                         (info_dict.get('requested_formats') is None
3158                          and info_dict.get('container') == 'm4a_dash'
3159                          and info_dict.get('ext') == 'm4a'),
3160                         'writing DASH m4a. Only some players support this container',
3161                         FFmpegFixupM4aPP)
3162
3163                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3164                     downloader = downloader.__name__ if downloader else None
3165
3166                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3167                         ffmpeg_fixup(downloader == 'HlsFD',
3168                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3169                                      FFmpegFixupM3u8PP)
3170                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3171                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3172
3173                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3174                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3175
3176                 fixup()
3177                 try:
3178                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3179                 except PostProcessingError as err:
3180                     self.report_error('Postprocessing: %s' % str(err))
3181                     return
3182                 try:
3183                     for ph in self._post_hooks:
3184                         ph(info_dict['filepath'])
3185                 except Exception as err:
3186                     self.report_error('post hooks: %s' % str(err))
3187                     return
3188                 info_dict['__write_download_archive'] = True
3189
3190         if self.params.get('force_write_download_archive'):
3191             info_dict['__write_download_archive'] = True
3192
3193         # Make sure the info_dict was modified in-place
3194         assert info_dict is original_infodict
3195
3196         max_downloads = self.params.get('max_downloads')
3197         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3198             raise MaxDownloadsReached()
3199
3200     def __download_wrapper(self, func):
3201         @functools.wraps(func)
3202         def wrapper(*args, **kwargs):
3203             try:
3204                 res = func(*args, **kwargs)
3205             except UnavailableVideoError as e:
3206                 self.report_error(e)
3207             except MaxDownloadsReached as e:
3208                 self.to_screen(f'[info] {e}')
3209                 raise
3210             except DownloadCancelled as e:
3211                 self.to_screen(f'[info] {e}')
3212                 if not self.params.get('break_per_url'):
3213                     raise
3214             else:
3215                 if self.params.get('dump_single_json', False):
3216                     self.post_extract(res)
3217                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3218         return wrapper
3219
3220     def download(self, url_list):
3221         """Download a given list of URLs."""
3222         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3223         outtmpl = self.outtmpl_dict['default']
3224         if (len(url_list) > 1
3225                 and outtmpl != '-'
3226                 and '%' not in outtmpl
3227                 and self.params.get('max_downloads') != 1):
3228             raise SameFileError(outtmpl)
3229
3230         for url in url_list:
3231             self.__download_wrapper(self.extract_info)(
3232                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3233
3234         return self._download_retcode
3235
3236     def download_with_info_file(self, info_filename):
3237         with contextlib.closing(fileinput.FileInput(
3238                 [info_filename], mode='r',
3239                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3240             # FileInput doesn't have a read method, we can't call json.load
3241             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3242         try:
3243             self.__download_wrapper(self.process_ie_result)(info, download=True)
3244         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3245             if not isinstance(e, EntryNotInPlaylist):
3246                 self.to_stderr('\r')
3247             webpage_url = info.get('webpage_url')
3248             if webpage_url is not None:
3249                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3250                 return self.download([webpage_url])
3251             else:
3252                 raise
3253         return self._download_retcode
3254
3255     @staticmethod
3256     def sanitize_info(info_dict, remove_private_keys=False):
3257         ''' Sanitize the infodict for converting to json '''
3258         if info_dict is None:
3259             return info_dict
3260         info_dict.setdefault('epoch', int(time.time()))
3261         info_dict.setdefault('_type', 'video')
3262         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3263         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3264         if remove_private_keys:
3265             remove_keys |= {
3266                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3267                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3268             }
3269             reject = lambda k, v: k not in keep_keys and (
3270                 k.startswith('_') or k in remove_keys or v is None)
3271         else:
3272             reject = lambda k, v: k in remove_keys
3273
3274         def filter_fn(obj):
3275             if isinstance(obj, dict):
3276                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3277             elif isinstance(obj, (list, tuple, set, LazyList)):
3278                 return list(map(filter_fn, obj))
3279             elif obj is None or isinstance(obj, (str, int, float, bool)):
3280                 return obj
3281             else:
3282                 return repr(obj)
3283
3284         return filter_fn(info_dict)
3285
3286     @staticmethod
3287     def filter_requested_info(info_dict, actually_filter=True):
3288         ''' Alias of sanitize_info for backward compatibility '''
3289         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3290
3291     @staticmethod
3292     def post_extract(info_dict):
3293         def actual_post_extract(info_dict):
3294             if info_dict.get('_type') in ('playlist', 'multi_video'):
3295                 for video_dict in info_dict.get('entries', {}):
3296                     actual_post_extract(video_dict or {})
3297                 return
3298
3299             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3300             extra = post_extractor().items()
3301             info_dict.update(extra)
3302             info_dict.pop('__post_extractor', None)
3303
3304             original_infodict = info_dict.get('__original_infodict') or {}
3305             original_infodict.update(extra)
3306             original_infodict.pop('__post_extractor', None)
3307
3308         actual_post_extract(info_dict or {})
3309
3310     def run_pp(self, pp, infodict):
3311         files_to_delete = []
3312         if '__files_to_move' not in infodict:
3313             infodict['__files_to_move'] = {}
3314         try:
3315             files_to_delete, infodict = pp.run(infodict)
3316         except PostProcessingError as e:
3317             # Must be True and not 'only_download'
3318             if self.params.get('ignoreerrors') is True:
3319                 self.report_error(e)
3320                 return infodict
3321             raise
3322
3323         if not files_to_delete:
3324             return infodict
3325         if self.params.get('keepvideo', False):
3326             for f in files_to_delete:
3327                 infodict['__files_to_move'].setdefault(f, '')
3328         else:
3329             for old_filename in set(files_to_delete):
3330                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3331                 try:
3332                     os.remove(encodeFilename(old_filename))
3333                 except (IOError, OSError):
3334                     self.report_warning('Unable to remove downloaded original file')
3335                 if old_filename in infodict['__files_to_move']:
3336                     del infodict['__files_to_move'][old_filename]
3337         return infodict
3338
3339     def run_all_pps(self, key, info, *, additional_pps=None):
3340         self._forceprint(key, info)
3341         for pp in (additional_pps or []) + self._pps[key]:
3342             info = self.run_pp(pp, info)
3343         return info
3344
3345     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3346         info = dict(ie_info)
3347         info['__files_to_move'] = files_to_move or {}
3348         info = self.run_all_pps(key, info)
3349         return info, info.pop('__files_to_move', None)
3350
3351     def post_process(self, filename, info, files_to_move=None):
3352         """Run all the postprocessors on the given file."""
3353         info['filepath'] = filename
3354         info['__files_to_move'] = files_to_move or {}
3355         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3356         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3357         del info['__files_to_move']
3358         return self.run_all_pps('after_move', info)
3359
3360     def _make_archive_id(self, info_dict):
3361         video_id = info_dict.get('id')
3362         if not video_id:
3363             return
3364         # Future-proof against any change in case
3365         # and backwards compatibility with prior versions
3366         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3367         if extractor is None:
3368             url = str_or_none(info_dict.get('url'))
3369             if not url:
3370                 return
3371             # Try to find matching extractor for the URL and take its ie_key
3372             for ie_key, ie in self._ies.items():
3373                 if ie.suitable(url):
3374                     extractor = ie_key
3375                     break
3376             else:
3377                 return
3378         return '%s %s' % (extractor.lower(), video_id)
3379
3380     def in_download_archive(self, info_dict):
3381         fn = self.params.get('download_archive')
3382         if fn is None:
3383             return False
3384
3385         vid_id = self._make_archive_id(info_dict)
3386         if not vid_id:
3387             return False  # Incomplete video information
3388
3389         return vid_id in self.archive
3390
3391     def record_download_archive(self, info_dict):
3392         fn = self.params.get('download_archive')
3393         if fn is None:
3394             return
3395         vid_id = self._make_archive_id(info_dict)
3396         assert vid_id
3397         self.write_debug(f'Adding to archive: {vid_id}')
3398         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3399             archive_file.write(vid_id + '\n')
3400         self.archive.add(vid_id)
3401
3402     @staticmethod
3403     def format_resolution(format, default='unknown'):
3404         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3405             return 'audio only'
3406         if format.get('resolution') is not None:
3407             return format['resolution']
3408         if format.get('width') and format.get('height'):
3409             return '%dx%d' % (format['width'], format['height'])
3410         elif format.get('height'):
3411             return '%sp' % format['height']
3412         elif format.get('width'):
3413             return '%dx?' % format['width']
3414         return default
3415
3416     def _list_format_headers(self, *headers):
3417         if self.params.get('listformats_table', True) is not False:
3418             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3419         return headers
3420
3421     def _format_note(self, fdict):
3422         res = ''
3423         if fdict.get('ext') in ['f4f', 'f4m']:
3424             res += '(unsupported)'
3425         if fdict.get('language'):
3426             if res:
3427                 res += ' '
3428             res += '[%s]' % fdict['language']
3429         if fdict.get('format_note') is not None:
3430             if res:
3431                 res += ' '
3432             res += fdict['format_note']
3433         if fdict.get('tbr') is not None:
3434             if res:
3435                 res += ', '
3436             res += '%4dk' % fdict['tbr']
3437         if fdict.get('container') is not None:
3438             if res:
3439                 res += ', '
3440             res += '%s container' % fdict['container']
3441         if (fdict.get('vcodec') is not None
3442                 and fdict.get('vcodec') != 'none'):
3443             if res:
3444                 res += ', '
3445             res += fdict['vcodec']
3446             if fdict.get('vbr') is not None:
3447                 res += '@'
3448         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3449             res += 'video@'
3450         if fdict.get('vbr') is not None:
3451             res += '%4dk' % fdict['vbr']
3452         if fdict.get('fps') is not None:
3453             if res:
3454                 res += ', '
3455             res += '%sfps' % fdict['fps']
3456         if fdict.get('acodec') is not None:
3457             if res:
3458                 res += ', '
3459             if fdict['acodec'] == 'none':
3460                 res += 'video only'
3461             else:
3462                 res += '%-5s' % fdict['acodec']
3463         elif fdict.get('abr') is not None:
3464             if res:
3465                 res += ', '
3466             res += 'audio'
3467         if fdict.get('abr') is not None:
3468             res += '@%3dk' % fdict['abr']
3469         if fdict.get('asr') is not None:
3470             res += ' (%5dHz)' % fdict['asr']
3471         if fdict.get('filesize') is not None:
3472             if res:
3473                 res += ', '
3474             res += format_bytes(fdict['filesize'])
3475         elif fdict.get('filesize_approx') is not None:
3476             if res:
3477                 res += ', '
3478             res += '~' + format_bytes(fdict['filesize_approx'])
3479         return res
3480
3481     def render_formats_table(self, info_dict):
3482         if not info_dict.get('formats') and not info_dict.get('url'):
3483             return None
3484
3485         formats = info_dict.get('formats', [info_dict])
3486         if not self.params.get('listformats_table', True) is not False:
3487             table = [
3488                 [
3489                     format_field(f, 'format_id'),
3490                     format_field(f, 'ext'),
3491                     self.format_resolution(f),
3492                     self._format_note(f)
3493                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3494             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3495
3496         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3497         table = [
3498             [
3499                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3500                 format_field(f, 'ext'),
3501                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3502                 format_field(f, 'fps', '\t%d'),
3503                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3504                 delim,
3505                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3506                 format_field(f, 'tbr', '\t%dk'),
3507                 shorten_protocol_name(f.get('protocol', '')),
3508                 delim,
3509                 format_field(f, 'vcodec', default='unknown').replace(
3510                     'none', 'images' if f.get('acodec') == 'none'
3511                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3512                 format_field(f, 'vbr', '\t%dk'),
3513                 format_field(f, 'acodec', default='unknown').replace(
3514                     'none', '' if f.get('vcodec') == 'none'
3515                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3516                 format_field(f, 'abr', '\t%dk'),
3517                 format_field(f, 'asr', '\t%dHz'),
3518                 join_nonempty(
3519                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3520                     format_field(f, 'language', '[%s]'),
3521                     join_nonempty(format_field(f, 'format_note'),
3522                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3523                                   delim=', '),
3524                     delim=' '),
3525             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3526         header_line = self._list_format_headers(
3527             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3528             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3529
3530         return render_table(
3531             header_line, table, hide_empty=True,
3532             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3533
3534     def render_thumbnails_table(self, info_dict):
3535         thumbnails = list(info_dict.get('thumbnails') or [])
3536         if not thumbnails:
3537             return None
3538         return render_table(
3539             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3540             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3541
3542     def render_subtitles_table(self, video_id, subtitles):
3543         def _row(lang, formats):
3544             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3545             if len(set(names)) == 1:
3546                 names = [] if names[0] == 'unknown' else names[:1]
3547             return [lang, ', '.join(names), ', '.join(exts)]
3548
3549         if not subtitles:
3550             return None
3551         return render_table(
3552             self._list_format_headers('Language', 'Name', 'Formats'),
3553             [_row(lang, formats) for lang, formats in subtitles.items()],
3554             hide_empty=True)
3555
3556     def __list_table(self, video_id, name, func, *args):
3557         table = func(*args)
3558         if not table:
3559             self.to_screen(f'{video_id} has no {name}')
3560             return
3561         self.to_screen(f'[info] Available {name} for {video_id}:')
3562         self.to_stdout(table)
3563
3564     def list_formats(self, info_dict):
3565         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3566
3567     def list_thumbnails(self, info_dict):
3568         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3569
3570     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3571         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3572
3573     def urlopen(self, req):
3574         """ Start an HTTP download """
3575         if isinstance(req, compat_basestring):
3576             req = sanitized_Request(req)
3577         return self._opener.open(req, timeout=self._socket_timeout)
3578
3579     def print_debug_header(self):
3580         if not self.params.get('verbose'):
3581             return
3582
3583         def get_encoding(stream):
3584             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3585             if not supports_terminal_sequences(stream):
3586                 from .compat import WINDOWS_VT_MODE
3587                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3588             return ret
3589
3590         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3591             locale.getpreferredencoding(),
3592             sys.getfilesystemencoding(),
3593             get_encoding(self._screen_file), get_encoding(self._err_file),
3594             self.get_encoding())
3595
3596         logger = self.params.get('logger')
3597         if logger:
3598             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3599             write_debug(encoding_str)
3600         else:
3601             write_string(f'[debug] {encoding_str}\n', encoding=None)
3602             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3603
3604         source = detect_variant()
3605         write_debug(join_nonempty(
3606             'yt-dlp version', __version__,
3607             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3608             '' if source == 'unknown' else f'({source})',
3609             delim=' '))
3610         if not _LAZY_LOADER:
3611             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3612                 write_debug('Lazy loading extractors is forcibly disabled')
3613             else:
3614                 write_debug('Lazy loading extractors is disabled')
3615         if plugin_extractors or plugin_postprocessors:
3616             write_debug('Plugins: %s' % [
3617                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3618                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3619         if self.params.get('compat_opts'):
3620             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3621
3622         if source == 'source':
3623             try:
3624                 sp = Popen(
3625                     ['git', 'rev-parse', '--short', 'HEAD'],
3626                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3627                     cwd=os.path.dirname(os.path.abspath(__file__)))
3628                 out, err = sp.communicate_or_kill()
3629                 out = out.decode().strip()
3630                 if re.match('[0-9a-f]+', out):
3631                     write_debug('Git HEAD: %s' % out)
3632             except Exception:
3633                 try:
3634                     sys.exc_clear()
3635                 except Exception:
3636                     pass
3637
3638         def python_implementation():
3639             impl_name = platform.python_implementation()
3640             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3641                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3642             return impl_name
3643
3644         write_debug('Python version %s (%s %s) - %s' % (
3645             platform.python_version(),
3646             python_implementation(),
3647             platform.architecture()[0],
3648             platform_name()))
3649
3650         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3651         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3652         if ffmpeg_features:
3653             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3654
3655         exe_versions['rtmpdump'] = rtmpdump_version()
3656         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3657         exe_str = ', '.join(
3658             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3659         ) or 'none'
3660         write_debug('exe versions: %s' % exe_str)
3661
3662         from .downloader.websocket import has_websockets
3663         from .postprocessor.embedthumbnail import has_mutagen
3664         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3665
3666         lib_str = join_nonempty(
3667             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3668             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3669             has_mutagen and 'mutagen',
3670             SQLITE_AVAILABLE and 'sqlite',
3671             has_websockets and 'websockets',
3672             delim=', ') or 'none'
3673         write_debug('Optional libraries: %s' % lib_str)
3674
3675         proxy_map = {}
3676         for handler in self._opener.handlers:
3677             if hasattr(handler, 'proxies'):
3678                 proxy_map.update(handler.proxies)
3679         write_debug(f'Proxy map: {proxy_map}')
3680
3681         # Not implemented
3682         if False and self.params.get('call_home'):
3683             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3684             write_debug('Public IP address: %s' % ipaddr)
3685             latest_version = self.urlopen(
3686                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3687             if version_tuple(latest_version) > version_tuple(__version__):
3688                 self.report_warning(
3689                     'You are using an outdated version (newest version: %s)! '
3690                     'See https://yt-dl.org/update if you need help updating.' %
3691                     latest_version)
3692
3693     def _setup_opener(self):
3694         timeout_val = self.params.get('socket_timeout')
3695         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3696
3697         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3698         opts_cookiefile = self.params.get('cookiefile')
3699         opts_proxy = self.params.get('proxy')
3700
3701         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3702
3703         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3704         if opts_proxy is not None:
3705             if opts_proxy == '':
3706                 proxies = {}
3707             else:
3708                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3709         else:
3710             proxies = compat_urllib_request.getproxies()
3711             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3712             if 'http' in proxies and 'https' not in proxies:
3713                 proxies['https'] = proxies['http']
3714         proxy_handler = PerRequestProxyHandler(proxies)
3715
3716         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3717         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3718         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3719         redirect_handler = YoutubeDLRedirectHandler()
3720         data_handler = compat_urllib_request_DataHandler()
3721
3722         # When passing our own FileHandler instance, build_opener won't add the
3723         # default FileHandler and allows us to disable the file protocol, which
3724         # can be used for malicious purposes (see
3725         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3726         file_handler = compat_urllib_request.FileHandler()
3727
3728         def file_open(*args, **kwargs):
3729             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3730         file_handler.file_open = file_open
3731
3732         opener = compat_urllib_request.build_opener(
3733             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3734
3735         # Delete the default user-agent header, which would otherwise apply in
3736         # cases where our custom HTTP handler doesn't come into play
3737         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3738         opener.addheaders = []
3739         self._opener = opener
3740
3741     def encode(self, s):
3742         if isinstance(s, bytes):
3743             return s  # Already encoded
3744
3745         try:
3746             return s.encode(self.get_encoding())
3747         except UnicodeEncodeError as err:
3748             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3749             raise
3750
3751     def get_encoding(self):
3752         encoding = self.params.get('encoding')
3753         if encoding is None:
3754             encoding = preferredencoding()
3755         return encoding
3756
3757     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3758         ''' Write infojson and returns True = written, False = skip, None = error '''
3759         if overwrite is None:
3760             overwrite = self.params.get('overwrites', True)
3761         if not self.params.get('writeinfojson'):
3762             return False
3763         elif not infofn:
3764             self.write_debug(f'Skipping writing {label} infojson')
3765             return False
3766         elif not self._ensure_dir_exists(infofn):
3767             return None
3768         elif not overwrite and os.path.exists(infofn):
3769             self.to_screen(f'[info] {label.title()} metadata is already present')
3770         else:
3771             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3772             try:
3773                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3774             except (OSError, IOError):
3775                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3776                 return None
3777         return True
3778
3779     def _write_description(self, label, ie_result, descfn):
3780         ''' Write description and returns True = written, False = skip, None = error '''
3781         if not self.params.get('writedescription'):
3782             return False
3783         elif not descfn:
3784             self.write_debug(f'Skipping writing {label} description')
3785             return False
3786         elif not self._ensure_dir_exists(descfn):
3787             return None
3788         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3789             self.to_screen(f'[info] {label.title()} description is already present')
3790         elif ie_result.get('description') is None:
3791             self.report_warning(f'There\'s no {label} description to write')
3792             return False
3793         else:
3794             try:
3795                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3796                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3797                     descfile.write(ie_result['description'])
3798             except (OSError, IOError):
3799                 self.report_error(f'Cannot write {label} description file {descfn}')
3800                 return None
3801         return True
3802
3803     def _write_subtitles(self, info_dict, filename):
3804         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3805         ret = []
3806         subtitles = info_dict.get('requested_subtitles')
3807         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3808             # subtitles download errors are already managed as troubles in relevant IE
3809             # that way it will silently go on when used with unsupporting IE
3810             return ret
3811
3812         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3813         if not sub_filename_base:
3814             self.to_screen('[info] Skipping writing video subtitles')
3815             return ret
3816         for sub_lang, sub_info in subtitles.items():
3817             sub_format = sub_info['ext']
3818             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3819             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3820             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3821             if existing_sub:
3822                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3823                 sub_info['filepath'] = existing_sub
3824                 ret.append((existing_sub, sub_filename_final))
3825                 continue
3826
3827             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3828             if sub_info.get('data') is not None:
3829                 try:
3830                     # Use newline='' to prevent conversion of newline characters
3831                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3832                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3833                         subfile.write(sub_info['data'])
3834                     sub_info['filepath'] = sub_filename
3835                     ret.append((sub_filename, sub_filename_final))
3836                     continue
3837                 except (OSError, IOError):
3838                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3839                     return None
3840
3841             try:
3842                 sub_copy = sub_info.copy()
3843                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3844                 self.dl(sub_filename, sub_copy, subtitle=True)
3845                 sub_info['filepath'] = sub_filename
3846                 ret.append((sub_filename, sub_filename_final))
3847             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3848                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3849                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3850                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3851         return ret
3852
3853     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3854         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3855         write_all = self.params.get('write_all_thumbnails', False)
3856         thumbnails, ret = [], []
3857         if write_all or self.params.get('writethumbnail', False):
3858             thumbnails = info_dict.get('thumbnails') or []
3859         multiple = write_all and len(thumbnails) > 1
3860
3861         if thumb_filename_base is None:
3862             thumb_filename_base = filename
3863         if thumbnails and not thumb_filename_base:
3864             self.write_debug(f'Skipping writing {label} thumbnail')
3865             return ret
3866
3867         for idx, t in list(enumerate(thumbnails))[::-1]:
3868             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3869             thumb_display_id = f'{label} thumbnail {t["id"]}'
3870             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3871             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3872
3873             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3874             if existing_thumb:
3875                 self.to_screen('[info] %s is already present' % (
3876                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3877                 t['filepath'] = existing_thumb
3878                 ret.append((existing_thumb, thumb_filename_final))
3879             else:
3880                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3881                 try:
3882                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3883                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3884                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3885                         shutil.copyfileobj(uf, thumbf)
3886                     ret.append((thumb_filename, thumb_filename_final))
3887                     t['filepath'] = thumb_filename
3888                 except network_exceptions as err:
3889                     thumbnails.pop(idx)
3890                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3891             if ret and not write_all:
3892                 break
3893         return ret