yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_brotli,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     format_decimal_suffix,
  72     formatSeconds,
  73     GeoRestrictedError,
  74     get_domain,
  75     HEADRequest,
  76     InAdvancePagedList,
  77     int_or_none,
  78     iri_to_uri,
  79     ISO3166Utils,
  80     join_nonempty,
  81     LazyList,
  82     LINK_TEMPLATES,
  83     locked_file,
  84     make_dir,
  85     make_HTTPS_handler,
  86     MaxDownloadsReached,
  87     merge_headers,
  88     network_exceptions,
  89     number_of_digits,
  90     orderedSet,
  91     OUTTMPL_TYPES,
  92     PagedList,
  93     parse_filesize,
  94     PerRequestProxyHandler,
  95     platform_name,
  96     Popen,
  97     POSTPROCESS_WHEN,
  98     PostProcessingError,
  99     preferredencoding,
 100     prepend_extension,
 101     ReExtractInfo,
 102     register_socks_protocols,
 103     RejectedVideoReached,
 104     remove_terminal_sequences,
 105     render_table,
 106     replace_extension,
 107     SameFileError,
 108     sanitize_filename,
 109     sanitize_path,
 110     sanitize_url,
 111     sanitized_Request,
 112     std_headers,
 113     STR_FORMAT_RE_TMPL,
 114     STR_FORMAT_TYPES,
 115     str_or_none,
 116     strftime_or_none,
 117     subtitles_filename,
 118     supports_terminal_sequences,
 119     timetuple_from_msec,
 120     to_high_limit_path,
 121     traverse_obj,
 122     try_get,
 123     UnavailableVideoError,
 124     url_basename,
 125     variadic,
 126     version_tuple,
 127     write_json_file,
 128     write_string,
 129     YoutubeDLCookieProcessor,
 130     YoutubeDLHandler,
 131     YoutubeDLRedirectHandler,
 132 )
 133 from .cache import Cache
 134 from .minicurses import format_text
 135 from .extractor import (
 136     gen_extractor_classes,
 137     get_info_extractor,
 138     _LAZY_LOADER,
 139     _PLUGIN_CLASSES as plugin_extractors
 140 )
 141 from .extractor.openload import PhantomJSwrapper
 142 from .downloader import (
 143     FFmpegFD,
 144     get_suitable_downloader,
 145     shorten_protocol_name
 146 )
 147 from .downloader.rtmp import rtmpdump_version
 148 from .postprocessor import (
 149     get_postprocessor,
 150     EmbedThumbnailPP,
 151     FFmpegFixupDuplicateMoovPP,
 152     FFmpegFixupDurationPP,
 153     FFmpegFixupM3u8PP,
 154     FFmpegFixupM4aPP,
 155     FFmpegFixupStretchedPP,
 156     FFmpegFixupTimestampPP,
 157     FFmpegMergerPP,
 158     FFmpegPostProcessor,
 159     MoveFilesAfterDownloadPP,
 160     _PLUGIN_CLASSES as plugin_postprocessors
 161 )
 162 from .update import detect_variant
 163 from .version import __version__, RELEASE_GIT_HEAD
 164
 165 if compat_os_name == 'nt':
 166     import ctypes
 167
 168
 169 class YoutubeDL(object):
 170     """YoutubeDL class.
 171
 172     YoutubeDL objects are the ones responsible of downloading the
 173     actual video file and writing it to disk if the user has requested
 174     it, among some other tasks. In most cases there should be one per
 175     program. As, given a video URL, the downloader doesn't know how to
 176     extract all the needed information, task that InfoExtractors do, it
 177     has to pass the URL to one of them.
 178
 179     For this, YoutubeDL objects have a method that allows
 180     InfoExtractors to be registered in a given order. When it is passed
 181     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 182     finds that reports being able to handle it. The InfoExtractor extracts
 183     all the information about the video or videos the URL refers to, and
 184     YoutubeDL process the extracted information, possibly using a File
 185     Downloader to download the video.
 186
 187     YoutubeDL objects accept a lot of parameters. In order not to saturate
 188     the object constructor with arguments, it receives a dictionary of
 189     options instead. These options are available through the params
 190     attribute for the InfoExtractors to use. The YoutubeDL also
 191     registers itself as the downloader in charge for the InfoExtractors
 192     that are added to it, so this is a "mutual registration".
 193
 194     Available options:
 195
 196     username:          Username for authentication purposes.
 197     password:          Password for authentication purposes.
 198     videopassword:     Password for accessing a video.
 199     ap_mso:            Adobe Pass multiple-system operator identifier.
 200     ap_username:       Multiple-system operator account username.
 201     ap_password:       Multiple-system operator account password.
 202     usenetrc:          Use netrc for authentication instead.
 203     verbose:           Print additional info to stdout.
 204     quiet:             Do not print messages to stdout.
 205     no_warnings:       Do not print out anything for warnings.
 206     forceprint:        A dict with keys WHEN mapped to a list of templates to
 207                        print to stdout. The allowed keys are video or any of the
 208                        items in utils.POSTPROCESS_WHEN.
 209                        For compatibility, a single list is also accepted
 210     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 211                        a list of tuples with (template, filename)
 212     forceurl:          Force printing final URL. (Deprecated)
 213     forcetitle:        Force printing title. (Deprecated)
 214     forceid:           Force printing ID. (Deprecated)
 215     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 216     forcedescription:  Force printing description. (Deprecated)
 217     forcefilename:     Force printing final filename. (Deprecated)
 218     forceduration:     Force printing duration. (Deprecated)
 219     forcejson:         Force printing info_dict as JSON.
 220     dump_single_json:  Force printing the info_dict of the whole playlist
 221                        (or video) as a single JSON line.
 222     force_write_download_archive: Force writing download archive regardless
 223                        of 'skip_download' or 'simulate'.
 224     simulate:          Do not download the video files. If unset (or None),
 225                        simulate only if listsubtitles, listformats or list_thumbnails is used
 226     format:            Video format code. see "FORMAT SELECTION" for more details.
 227                        You can also pass a function. The function takes 'ctx' as
 228                        argument and returns the formats to download.
 229                        See "build_format_selector" for an implementation
 230     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 231     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 232                        extracting metadata even if the video is not actually
 233                        available for download (experimental)
 234     format_sort:       A list of fields by which to sort the video formats.
 235                        See "Sorting Formats" for more details.
 236     format_sort_force: Force the given format_sort. see "Sorting Formats"
 237                        for more details.
 238     prefer_free_formats: Whether to prefer video formats with free containers
 239                        over non-free ones of same quality.
 240     allow_multiple_video_streams:   Allow multiple video streams to be merged
 241                        into a single file
 242     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 243                        into a single file
 244     check_formats      Whether to test if the formats are downloadable.
 245                        Can be True (check all), False (check none),
 246                        'selected' (check selected formats),
 247                        or None (check only if requested by extractor)
 248     paths:             Dictionary of output paths. The allowed keys are 'home'
 249                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 250     outtmpl:           Dictionary of templates for output names. Allowed keys
 251                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 252                        For compatibility with youtube-dl, a single string can also be used
 253     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 254     restrictfilenames: Do not allow "&" and spaces in file names
 255     trim_file_name:    Limit length of filename (extension excluded)
 256     windowsfilenames:  Force the filenames to be windows compatible
 257     ignoreerrors:      Do not stop on download/postprocessing errors.
 258                        Can be 'only_download' to ignore only download errors.
 259                        Default is 'only_download' for CLI, but False for API
 260     skip_playlist_after_errors: Number of allowed failures until the rest of
 261                        the playlist is skipped
 262     force_generic_extractor: Force downloader to use the generic extractor
 263     overwrites:        Overwrite all video and metadata files if True,
 264                        overwrite only non-video files if None
 265                        and don't overwrite any file if False
 266                        For compatibility with youtube-dl,
 267                        "nooverwrites" may also be used instead
 268     playliststart:     Playlist item to start at.
 269     playlistend:       Playlist item to end at.
 270     playlist_items:    Specific indices of playlist to download.
 271     playlistreverse:   Download playlist items in reverse order.
 272     playlistrandom:    Download playlist items in random order.
 273     matchtitle:        Download only matching titles.
 274     rejecttitle:       Reject downloads for matching titles.
 275     logger:            Log messages to a logging.Logger instance.
 276     logtostderr:       Log messages to stderr instead of stdout.
 277     consoletitle:       Display progress in console window's titlebar.
 278     writedescription:  Write the video description to a .description file
 279     writeinfojson:     Write the video description to a .info.json file
 280     clean_infojson:    Remove private fields from the infojson
 281     getcomments:       Extract video comments. This will not be written to disk
 282                        unless writeinfojson is also given
 283     writeannotations:  Write the video annotations to a .annotations.xml file
 284     writethumbnail:    Write the thumbnail image to a file
 285     allow_playlist_files: Whether to write playlists' description, infojson etc
 286                        also to disk when using the 'write*' options
 287     write_all_thumbnails:  Write all thumbnail formats to files
 288     writelink:         Write an internet shortcut file, depending on the
 289                        current platform (.url/.webloc/.desktop)
 290     writeurllink:      Write a Windows internet shortcut file (.url)
 291     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 292     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 293     writesubtitles:    Write the video subtitles to a file
 294     writeautomaticsub: Write the automatically generated subtitles to a file
 295     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 296                        Downloads all the subtitles of the video
 297                        (requires writesubtitles or writeautomaticsub)
 298     listsubtitles:     Lists all available subtitles for the video
 299     subtitlesformat:   The format code for subtitles
 300     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 301                        The list may contain "all" to refer to all the available
 302                        subtitles. The language can be prefixed with a "-" to
 303                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 304     keepvideo:         Keep the video file after post-processing
 305     daterange:         A DateRange object, download only if the upload_date is in the range.
 306     skip_download:     Skip the actual download of the video file
 307     cachedir:          Location of the cache files in the filesystem.
 308                        False to disable filesystem cache.
 309     noplaylist:        Download single video instead of a playlist if in doubt.
 310     age_limit:         An integer representing the user's age in years.
 311                        Unsuitable videos for the given age are skipped.
 312     min_views:         An integer representing the minimum view count the video
 313                        must have in order to not be skipped.
 314                        Videos without view count information are always
 315                        downloaded. None for no limit.
 316     max_views:         An integer representing the maximum view count.
 317                        Videos that are more popular than that are not
 318                        downloaded.
 319                        Videos without view count information are always
 320                        downloaded. None for no limit.
 321     download_archive:  File name of a file where all downloads are recorded.
 322                        Videos already present in the file are not downloaded
 323                        again.
 324     break_on_existing: Stop the download process after attempting to download a
 325                        file that is in the archive.
 326     break_on_reject:   Stop the download process when encountering a video that
 327                        has been filtered out.
 328     break_per_url:     Whether break_on_reject and break_on_existing
 329                        should act on each input URL as opposed to for the entire queue
 330     cookiefile:        File name where cookies should be read from and dumped to
 331     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 332                        name/pathfrom where cookies are loaded, and the name of the
 333                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 334     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 335                        support RFC 5746 secure renegotiation
 336     nocheckcertificate:  Do not verify SSL certificates
 337     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 338                        At the moment, this is only supported by YouTube.
 339     http_headers:      A dictionary of custom headers to be used for all requests
 340     proxy:             URL of the proxy server to use
 341     geo_verification_proxy:  URL of the proxy to use for IP address verification
 342                        on geo-restricted sites.
 343     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 344     bidi_workaround:   Work around buggy terminals without bidirectional text
 345                        support, using fridibi
 346     debug_printtraffic:Print out sent and received HTTP traffic
 347     include_ads:       Download ads as well (deprecated)
 348     default_search:    Prepend this string if an input url is not valid.
 349                        'auto' for elaborate guessing
 350     encoding:          Use this encoding instead of the system-specified.
 351     extract_flat:      Do not resolve URLs, return the immediate result.
 352                        Pass in 'in_playlist' to only show this behavior for
 353                        playlist items.
 354     wait_for_video:    If given, wait for scheduled streams to become available.
 355                        The value should be a tuple containing the range
 356                        (min_secs, max_secs) to wait between retries
 357     postprocessors:    A list of dictionaries, each with an entry
 358                        * key:  The name of the postprocessor. See
 359                                yt_dlp/postprocessor/__init__.py for a list.
 360                        * when: When to run the postprocessor. Allowed values are
 361                                the entries of utils.POSTPROCESS_WHEN
 362                                Assumed to be 'post_process' if not given
 363     post_hooks:        Deprecated - Register a custom postprocessor instead
 364                        A list of functions that get called as the final step
 365                        for each video file, after all postprocessors have been
 366                        called. The filename will be passed as the only argument.
 367     progress_hooks:    A list of functions that get called on download
 368                        progress, with a dictionary with the entries
 369                        * status: One of "downloading", "error", or "finished".
 370                                  Check this first and ignore unknown values.
 371                        * info_dict: The extracted info_dict
 372
 373                        If status is one of "downloading", or "finished", the
 374                        following properties may also be present:
 375                        * filename: The final filename (always present)
 376                        * tmpfilename: The filename we're currently writing to
 377                        * downloaded_bytes: Bytes on disk
 378                        * total_bytes: Size of the whole file, None if unknown
 379                        * total_bytes_estimate: Guess of the eventual file size,
 380                                                None if unavailable.
 381                        * elapsed: The number of seconds since download started.
 382                        * eta: The estimated time in seconds, None if unknown
 383                        * speed: The download speed in bytes/second, None if
 384                                 unknown
 385                        * fragment_index: The counter of the currently
 386                                          downloaded video fragment.
 387                        * fragment_count: The number of fragments (= individual
 388                                          files that will be merged)
 389
 390                        Progress hooks are guaranteed to be called at least once
 391                        (with status "finished") if the download is successful.
 392     postprocessor_hooks:  A list of functions that get called on postprocessing
 393                        progress, with a dictionary with the entries
 394                        * status: One of "started", "processing", or "finished".
 395                                  Check this first and ignore unknown values.
 396                        * postprocessor: Name of the postprocessor
 397                        * info_dict: The extracted info_dict
 398
 399                        Progress hooks are guaranteed to be called at least twice
 400                        (with status "started" and "finished") if the processing is successful.
 401     merge_output_format: Extension to use when merging formats.
 402     final_ext:         Expected final extension; used to detect when the file was
 403                        already downloaded and converted
 404     fixup:             Automatically correct known faults of the file.
 405                        One of:
 406                        - "never": do nothing
 407                        - "warn": only emit a warning
 408                        - "detect_or_warn": check whether we can do anything
 409                                            about it, warn otherwise (default)
 410     source_address:    Client-side IP address to bind to.
 411     call_home:         Boolean, true iff we are allowed to contact the
 412                        yt-dlp servers for debugging. (BROKEN)
 413     sleep_interval_requests: Number of seconds to sleep between requests
 414                        during extraction
 415     sleep_interval:    Number of seconds to sleep before each download when
 416                        used alone or a lower bound of a range for randomized
 417                        sleep before each download (minimum possible number
 418                        of seconds to sleep) when used along with
 419                        max_sleep_interval.
 420     max_sleep_interval:Upper bound of a range for randomized sleep before each
 421                        download (maximum possible number of seconds to sleep).
 422                        Must only be used along with sleep_interval.
 423                        Actual sleep time will be a random float from range
 424                        [sleep_interval; max_sleep_interval].
 425     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 426     listformats:       Print an overview of available video formats and exit.
 427     list_thumbnails:   Print a table of all thumbnails and exit.
 428     match_filter:      A function that gets called with the info_dict of
 429                        every video.
 430                        If it returns a message, the video is ignored.
 431                        If it returns None, the video is downloaded.
 432                        match_filter_func in utils.py is one example for this.
 433     no_color:          Do not emit color codes in output.
 434     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 435                        HTTP header
 436     geo_bypass_country:
 437                        Two-letter ISO 3166-2 country code that will be used for
 438                        explicit geographic restriction bypassing via faking
 439                        X-Forwarded-For HTTP header
 440     geo_bypass_ip_block:
 441                        IP range in CIDR notation that will be used similarly to
 442                        geo_bypass_country
 443
 444     The following options determine which downloader is picked:
 445     external_downloader: A dictionary of protocol keys and the executable of the
 446                        external downloader to use for it. The allowed protocols
 447                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 448                        Set the value to 'native' to use the native downloader
 449     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 450                        or {'m3u8': 'ffmpeg'} instead.
 451                        Use the native HLS downloader instead of ffmpeg/avconv
 452                        if True, otherwise use ffmpeg/avconv if False, otherwise
 453                        use downloader suggested by extractor if None.
 454     compat_opts:       Compatibility options. See "Differences in default behavior".
 455                        The following options do not work when used through the API:
 456                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 457                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 458                        Refer __init__.py for their implementation
 459     progress_template: Dictionary of templates for progress outputs.
 460                        Allowed keys are 'download', 'postprocess',
 461                        'download-title' (console title) and 'postprocess-title'.
 462                        The template is mapped on a dictionary with keys 'progress' and 'info'
 463
 464     The following parameters are not used by YoutubeDL itself, they are used by
 465     the downloader (see yt_dlp/downloader/common.py):
 466     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 467     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 468     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 469     external_downloader_args, concurrent_fragment_downloads.
 470
 471     The following options are used by the post processors:
 472     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 473                        otherwise prefer ffmpeg. (avconv support is deprecated)
 474     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 475                        to the binary or its containing directory.
 476     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 477                        and a list of additional command-line arguments for the
 478                        postprocessor/executable. The dict can also have "PP+EXE" keys
 479                        which are used when the given exe is used by the given PP.
 480                        Use 'default' as the name for arguments to passed to all PP
 481                        For compatibility with youtube-dl, a single list of args
 482                        can also be used
 483
 484     The following options are used by the extractors:
 485     extractor_retries: Number of times to retry for known errors
 486     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 487     hls_split_discontinuity: Split HLS playlists to different formats at
 488                        discontinuities such as ad breaks (default: False)
 489     extractor_args:    A dictionary of arguments to be passed to the extractors.
 490                        See "EXTRACTOR ARGUMENTS" for details.
 491                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 492     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 493     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 494                        If True (default), DASH manifests and related
 495                        data will be downloaded and processed by extractor.
 496                        You can reduce network I/O by disabling it if you don't
 497                        care about DASH. (only for youtube)
 498     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 499                        If True (default), HLS manifests and related
 500                        data will be downloaded and processed by extractor.
 501                        You can reduce network I/O by disabling it if you don't
 502                        care about HLS. (only for youtube)
 503     """
 504
 505     _NUMERIC_FIELDS = set((
 506         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 507         'timestamp', 'release_timestamp',
 508         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 509         'average_rating', 'comment_count', 'age_limit',
 510         'start_time', 'end_time',
 511         'chapter_number', 'season_number', 'episode_number',
 512         'track_number', 'disc_number', 'release_year',
 513     ))
 514
 515     _format_selection_exts = {
 516         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 517         'video': {'mp4', 'flv', 'webm', '3gp'},
 518         'storyboards': {'mhtml'},
 519     }
 520
 521     params = None
 522     _ies = {}
 523     _pps = {k: [] for k in POSTPROCESS_WHEN}
 524     _printed_messages = set()
 525     _first_webpage_request = True
 526     _download_retcode = None
 527     _num_downloads = None
 528     _playlist_level = 0
 529     _playlist_urls = set()
 530     _screen_file = None
 531
 532     def __init__(self, params=None, auto_init=True):
 533         """Create a FileDownloader object with the given options.
 534         @param auto_init    Whether to load the default extractors and print header (if verbose).
 535                             Set to 'no_verbose_header' to not print the header
 536         """
 537         if params is None:
 538             params = {}
 539         self._ies = {}
 540         self._ies_instances = {}
 541         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 542         self._printed_messages = set()
 543         self._first_webpage_request = True
 544         self._post_hooks = []
 545         self._progress_hooks = []
 546         self._postprocessor_hooks = []
 547         self._download_retcode = 0
 548         self._num_downloads = 0
 549         self._num_videos = 0
 550         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 551         self._err_file = sys.stderr
 552         self.params = params
 553         self.cache = Cache(self)
 554
 555         windows_enable_vt_mode()
 556         self._allow_colors = {
 557             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 558             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 559         }
 560
 561         if sys.version_info < (3, 6):
 562             self.report_warning(
 563                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 564
 565         if self.params.get('allow_unplayable_formats'):
 566             self.report_warning(
 567                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 568                 'This is a developer option intended for debugging. \n'
 569                 '         If you experience any issues while using this option, '
 570                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 571
 572         def check_deprecated(param, option, suggestion):
 573             if self.params.get(param) is not None:
 574                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 575                 return True
 576             return False
 577
 578         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 579             if self.params.get('geo_verification_proxy') is None:
 580                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 581
 582         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 583         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 584         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 585
 586         for msg in self.params.get('_warnings', []):
 587             self.report_warning(msg)
 588         for msg in self.params.get('_deprecation_warnings', []):
 589             self.deprecation_warning(msg)
 590
 591         if 'list-formats' in self.params.get('compat_opts', []):
 592             self.params['listformats_table'] = False
 593
 594         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 595             # nooverwrites was unnecessarily changed to overwrites
 596             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 597             # This ensures compatibility with both keys
 598             self.params['overwrites'] = not self.params['nooverwrites']
 599         elif self.params.get('overwrites') is None:
 600             self.params.pop('overwrites', None)
 601         else:
 602             self.params['nooverwrites'] = not self.params['overwrites']
 603
 604         self.params.setdefault('forceprint', {})
 605         self.params.setdefault('print_to_file', {})
 606
 607         # Compatibility with older syntax
 608         if not isinstance(params['forceprint'], dict):
 609             self.params['forceprint'] = {'video': params['forceprint']}
 610
 611         if self.params.get('bidi_workaround', False):
 612             try:
 613                 import pty
 614                 master, slave = pty.openpty()
 615                 width = compat_get_terminal_size().columns
 616                 if width is None:
 617                     width_args = []
 618                 else:
 619                     width_args = ['-w', str(width)]
 620                 sp_kwargs = dict(
 621                     stdin=subprocess.PIPE,
 622                     stdout=slave,
 623                     stderr=self._err_file)
 624                 try:
 625                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 626                 except OSError:
 627                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 628                 self._output_channel = os.fdopen(master, 'rb')
 629             except OSError as ose:
 630                 if ose.errno == errno.ENOENT:
 631                     self.report_warning(
 632                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 633                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 634                 else:
 635                     raise
 636
 637         if (sys.platform != 'win32'
 638                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 639                 and not self.params.get('restrictfilenames', False)):
 640             # Unicode filesystem API will throw errors (#1474, #13027)
 641             self.report_warning(
 642                 'Assuming --restrict-filenames since file system encoding '
 643                 'cannot encode all characters. '
 644                 'Set the LC_ALL environment variable to fix this.')
 645             self.params['restrictfilenames'] = True
 646
 647         self.outtmpl_dict = self.parse_outtmpl()
 648
 649         # Creating format selector here allows us to catch syntax errors before the extraction
 650         self.format_selector = (
 651             self.params.get('format') if self.params.get('format') in (None, '-')
 652             else self.params['format'] if callable(self.params['format'])
 653             else self.build_format_selector(self.params['format']))
 654
 655         # Set http_headers defaults according to std_headers
 656         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 657
 658         self._setup_opener()
 659
 660         if auto_init:
 661             if auto_init != 'no_verbose_header':
 662                 self.print_debug_header()
 663             self.add_default_info_extractors()
 664
 665         hooks = {
 666             'post_hooks': self.add_post_hook,
 667             'progress_hooks': self.add_progress_hook,
 668             'postprocessor_hooks': self.add_postprocessor_hook,
 669         }
 670         for opt, fn in hooks.items():
 671             for ph in self.params.get(opt, []):
 672                 fn(ph)
 673
 674         for pp_def_raw in self.params.get('postprocessors', []):
 675             pp_def = dict(pp_def_raw)
 676             when = pp_def.pop('when', 'post_process')
 677             self.add_post_processor(
 678                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 679                 when=when)
 680
 681         register_socks_protocols()
 682
 683         def preload_download_archive(fn):
 684             """Preload the archive, if any is specified"""
 685             if fn is None:
 686                 return False
 687             self.write_debug(f'Loading archive file {fn!r}')
 688             try:
 689                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 690                     for line in archive_file:
 691                         self.archive.add(line.strip())
 692             except IOError as ioe:
 693                 if ioe.errno != errno.ENOENT:
 694                     raise
 695                 return False
 696             return True
 697
 698         self.archive = set()
 699         preload_download_archive(self.params.get('download_archive'))
 700
 701     def warn_if_short_id(self, argv):
 702         # short YouTube ID starting with dash?
 703         idxs = [
 704             i for i, a in enumerate(argv)
 705             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 706         if idxs:
 707             correct_argv = (
 708                 ['yt-dlp']
 709                 + [a for i, a in enumerate(argv) if i not in idxs]
 710                 + ['--'] + [argv[i] for i in idxs]
 711             )
 712             self.report_warning(
 713                 'Long argument string detected. '
 714                 'Use -- to separate parameters and URLs, like this:\n%s' %
 715                 args_to_str(correct_argv))
 716
 717     def add_info_extractor(self, ie):
 718         """Add an InfoExtractor object to the end of the list."""
 719         ie_key = ie.ie_key()
 720         self._ies[ie_key] = ie
 721         if not isinstance(ie, type):
 722             self._ies_instances[ie_key] = ie
 723             ie.set_downloader(self)
 724
 725     def _get_info_extractor_class(self, ie_key):
 726         ie = self._ies.get(ie_key)
 727         if ie is None:
 728             ie = get_info_extractor(ie_key)
 729             self.add_info_extractor(ie)
 730         return ie
 731
 732     def get_info_extractor(self, ie_key):
 733         """
 734         Get an instance of an IE with name ie_key, it will try to get one from
 735         the _ies list, if there's no instance it will create a new one and add
 736         it to the extractor list.
 737         """
 738         ie = self._ies_instances.get(ie_key)
 739         if ie is None:
 740             ie = get_info_extractor(ie_key)()
 741             self.add_info_extractor(ie)
 742         return ie
 743
 744     def add_default_info_extractors(self):
 745         """
 746         Add the InfoExtractors returned by gen_extractors to the end of the list
 747         """
 748         for ie in gen_extractor_classes():
 749             self.add_info_extractor(ie)
 750
 751     def add_post_processor(self, pp, when='post_process'):
 752         """Add a PostProcessor object to the end of the chain."""
 753         self._pps[when].append(pp)
 754         pp.set_downloader(self)
 755
 756     def add_post_hook(self, ph):
 757         """Add the post hook"""
 758         self._post_hooks.append(ph)
 759
 760     def add_progress_hook(self, ph):
 761         """Add the download progress hook"""
 762         self._progress_hooks.append(ph)
 763
 764     def add_postprocessor_hook(self, ph):
 765         """Add the postprocessing progress hook"""
 766         self._postprocessor_hooks.append(ph)
 767         for pps in self._pps.values():
 768             for pp in pps:
 769                 pp.add_progress_hook(ph)
 770
 771     def _bidi_workaround(self, message):
 772         if not hasattr(self, '_output_channel'):
 773             return message
 774
 775         assert hasattr(self, '_output_process')
 776         assert isinstance(message, compat_str)
 777         line_count = message.count('\n') + 1
 778         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 779         self._output_process.stdin.flush()
 780         res = ''.join(self._output_channel.readline().decode('utf-8')
 781                       for _ in range(line_count))
 782         return res[:-len('\n')]
 783
 784     def _write_string(self, message, out=None, only_once=False):
 785         if only_once:
 786             if message in self._printed_messages:
 787                 return
 788             self._printed_messages.add(message)
 789         write_string(message, out=out, encoding=self.params.get('encoding'))
 790
 791     def to_stdout(self, message, skip_eol=False, quiet=False):
 792         """Print message to stdout"""
 793         if self.params.get('logger'):
 794             self.params['logger'].debug(message)
 795         elif not quiet or self.params.get('verbose'):
 796             self._write_string(
 797                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 798                 self._err_file if quiet else self._screen_file)
 799
 800     def to_stderr(self, message, only_once=False):
 801         """Print message to stderr"""
 802         assert isinstance(message, compat_str)
 803         if self.params.get('logger'):
 804             self.params['logger'].error(message)
 805         else:
 806             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 807
 808     def to_console_title(self, message):
 809         if not self.params.get('consoletitle', False):
 810             return
 811         message = remove_terminal_sequences(message)
 812         if compat_os_name == 'nt':
 813             if ctypes.windll.kernel32.GetConsoleWindow():
 814                 # c_wchar_p() might not be necessary if `message` is
 815                 # already of type unicode()
 816                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 817         elif 'TERM' in os.environ:
 818             self._write_string('\033]0;%s\007' % message, self._screen_file)
 819
 820     def save_console_title(self):
 821         if not self.params.get('consoletitle', False):
 822             return
 823         if self.params.get('simulate'):
 824             return
 825         if compat_os_name != 'nt' and 'TERM' in os.environ:
 826             # Save the title on stack
 827             self._write_string('\033[22;0t', self._screen_file)
 828
 829     def restore_console_title(self):
 830         if not self.params.get('consoletitle', False):
 831             return
 832         if self.params.get('simulate'):
 833             return
 834         if compat_os_name != 'nt' and 'TERM' in os.environ:
 835             # Restore the title from stack
 836             self._write_string('\033[23;0t', self._screen_file)
 837
 838     def __enter__(self):
 839         self.save_console_title()
 840         return self
 841
 842     def __exit__(self, *args):
 843         self.restore_console_title()
 844
 845         if self.params.get('cookiefile') is not None:
 846             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 847
 848     def trouble(self, message=None, tb=None, is_error=True):
 849         """Determine action to take when a download problem appears.
 850
 851         Depending on if the downloader has been configured to ignore
 852         download errors or not, this method may throw an exception or
 853         not when errors are found, after printing the message.
 854
 855         @param tb          If given, is additional traceback information
 856         @param is_error    Whether to raise error according to ignorerrors
 857         """
 858         if message is not None:
 859             self.to_stderr(message)
 860         if self.params.get('verbose'):
 861             if tb is None:
 862                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 863                     tb = ''
 864                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 865                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 866                     tb += encode_compat_str(traceback.format_exc())
 867                 else:
 868                     tb_data = traceback.format_list(traceback.extract_stack())
 869                     tb = ''.join(tb_data)
 870             if tb:
 871                 self.to_stderr(tb)
 872         if not is_error:
 873             return
 874         if not self.params.get('ignoreerrors'):
 875             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 876                 exc_info = sys.exc_info()[1].exc_info
 877             else:
 878                 exc_info = sys.exc_info()
 879             raise DownloadError(message, exc_info)
 880         self._download_retcode = 1
 881
 882     def to_screen(self, message, skip_eol=False):
 883         """Print message to stdout if not in quiet mode"""
 884         self.to_stdout(
 885             message, skip_eol, quiet=self.params.get('quiet', False))
 886
 887     class Styles(Enum):
 888         HEADERS = 'yellow'
 889         EMPHASIS = 'light blue'
 890         ID = 'green'
 891         DELIM = 'blue'
 892         ERROR = 'red'
 893         WARNING = 'yellow'
 894         SUPPRESS = 'light black'
 895
 896     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 897         if test_encoding:
 898             original_text = text
 899             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 900             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 901             text = text.encode(encoding, 'ignore').decode(encoding)
 902             if fallback is not None and text != original_text:
 903                 text = fallback
 904         if isinstance(f, self.Styles):
 905             f = f.value
 906         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 907
 908     def _format_screen(self, *args, **kwargs):
 909         return self._format_text(
 910             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 911
 912     def _format_err(self, *args, **kwargs):
 913         return self._format_text(
 914             self._err_file, self._allow_colors['err'], *args, **kwargs)
 915
 916     def report_warning(self, message, only_once=False):
 917         '''
 918         Print the message to stderr, it will be prefixed with 'WARNING:'
 919         If stderr is a tty file the 'WARNING:' will be colored
 920         '''
 921         if self.params.get('logger') is not None:
 922             self.params['logger'].warning(message)
 923         else:
 924             if self.params.get('no_warnings'):
 925                 return
 926             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 927
 928     def deprecation_warning(self, message):
 929         if self.params.get('logger') is not None:
 930             self.params['logger'].warning('DeprecationWarning: {message}')
 931         else:
 932             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 933
 934     def report_error(self, message, *args, **kwargs):
 935         '''
 936         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 937         in red if stderr is a tty file.
 938         '''
 939         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 940
 941     def write_debug(self, message, only_once=False):
 942         '''Log debug message or Print message to stderr'''
 943         if not self.params.get('verbose', False):
 944             return
 945         message = '[debug] %s' % message
 946         if self.params.get('logger'):
 947             self.params['logger'].debug(message)
 948         else:
 949             self.to_stderr(message, only_once)
 950
 951     def report_file_already_downloaded(self, file_name):
 952         """Report file has already been fully downloaded."""
 953         try:
 954             self.to_screen('[download] %s has already been downloaded' % file_name)
 955         except UnicodeEncodeError:
 956             self.to_screen('[download] The file has already been downloaded')
 957
 958     def report_file_delete(self, file_name):
 959         """Report that existing file will be deleted."""
 960         try:
 961             self.to_screen('Deleting existing file %s' % file_name)
 962         except UnicodeEncodeError:
 963             self.to_screen('Deleting existing file')
 964
 965     def raise_no_formats(self, info, forced=False, *, msg=None):
 966         has_drm = info.get('__has_drm')
 967         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
 968         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
 969         if forced or not ignored:
 970             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 971                                  expected=has_drm or ignored or expected)
 972         else:
 973             self.report_warning(msg)
 974
 975     def parse_outtmpl(self):
 976         outtmpl_dict = self.params.get('outtmpl', {})
 977         if not isinstance(outtmpl_dict, dict):
 978             outtmpl_dict = {'default': outtmpl_dict}
 979         # Remove spaces in the default template
 980         if self.params.get('restrictfilenames'):
 981             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 982         else:
 983             sanitize = lambda x: x
 984         outtmpl_dict.update({
 985             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 986             if outtmpl_dict.get(k) is None})
 987         for key, val in outtmpl_dict.items():
 988             if isinstance(val, bytes):
 989                 self.report_warning(
 990                     'Parameter outtmpl is bytes, but should be a unicode string. '
 991                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 992         return outtmpl_dict
 993
 994     def get_output_path(self, dir_type='', filename=None):
 995         paths = self.params.get('paths', {})
 996         assert isinstance(paths, dict)
 997         path = os.path.join(
 998             expand_path(paths.get('home', '').strip()),
 999             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1000             filename or '')
1001
1002         # Temporary fix for #4787
1003         # 'Treat' all problem characters by passing filename through preferredencoding
1004         # to workaround encoding issues with subprocess on python2 @ Windows
1005         if sys.version_info < (3, 0) and sys.platform == 'win32':
1006             path = encodeFilename(path, True).decode(preferredencoding())
1007         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1008
1009     @staticmethod
1010     def _outtmpl_expandpath(outtmpl):
1011         # expand_path translates '%%' into '%' and '$$' into '$'
1012         # correspondingly that is not what we want since we need to keep
1013         # '%%' intact for template dict substitution step. Working around
1014         # with boundary-alike separator hack.
1015         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1016         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1017
1018         # outtmpl should be expand_path'ed before template dict substitution
1019         # because meta fields may contain env variables we don't want to
1020         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1021         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1022         return expand_path(outtmpl).replace(sep, '')
1023
1024     @staticmethod
1025     def escape_outtmpl(outtmpl):
1026         ''' Escape any remaining strings like %s, %abc% etc. '''
1027         return re.sub(
1028             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1029             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1030             outtmpl)
1031
1032     @classmethod
1033     def validate_outtmpl(cls, outtmpl):
1034         ''' @return None or Exception object '''
1035         outtmpl = re.sub(
1036             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1037             lambda mobj: f'{mobj.group(0)[:-1]}s',
1038             cls._outtmpl_expandpath(outtmpl))
1039         try:
1040             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1041             return None
1042         except ValueError as err:
1043             return err
1044
1045     @staticmethod
1046     def _copy_infodict(info_dict):
1047         info_dict = dict(info_dict)
1048         info_dict.pop('__postprocessors', None)
1049         return info_dict
1050
1051     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1052         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1053         @param sanitize    Whether to sanitize the output as a filename.
1054                            For backward compatibility, a function can also be passed
1055         """
1056
1057         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1058
1059         info_dict = self._copy_infodict(info_dict)
1060         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1061             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1062             if info_dict.get('duration', None) is not None
1063             else None)
1064         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1065         info_dict['video_autonumber'] = self._num_videos
1066         if info_dict.get('resolution') is None:
1067             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1068
1069         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1070         # of %(field)s to %(field)0Nd for backward compatibility
1071         field_size_compat_map = {
1072             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1073             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1074             'autonumber': self.params.get('autonumber_size') or 5,
1075         }
1076
1077         TMPL_DICT = {}
1078         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1079         MATH_FUNCTIONS = {
1080             '+': float.__add__,
1081             '-': float.__sub__,
1082         }
1083         # Field is of the form key1.key2...
1084         # where keys (except first) can be string, int or slice
1085         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1086         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1087         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1088         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1089             (?P<negate>-)?
1090             (?P<fields>{field})
1091             (?P<maths>(?:{math_op}{math_field})*)
1092             (?:>(?P<strf_format>.+?))?
1093             (?P<alternate>(?<!\\),[^|&)]+)?
1094             (?:&(?P<replacement>.*?))?
1095             (?:\|(?P<default>.*?))?
1096             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1097
1098         def _traverse_infodict(k):
1099             k = k.split('.')
1100             if k[0] == '':
1101                 k.pop(0)
1102             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1103
1104         def get_value(mdict):
1105             # Object traversal
1106             value = _traverse_infodict(mdict['fields'])
1107             # Negative
1108             if mdict['negate']:
1109                 value = float_or_none(value)
1110                 if value is not None:
1111                     value *= -1
1112             # Do maths
1113             offset_key = mdict['maths']
1114             if offset_key:
1115                 value = float_or_none(value)
1116                 operator = None
1117                 while offset_key:
1118                     item = re.match(
1119                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1120                         offset_key).group(0)
1121                     offset_key = offset_key[len(item):]
1122                     if operator is None:
1123                         operator = MATH_FUNCTIONS[item]
1124                         continue
1125                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1126                     offset = float_or_none(item)
1127                     if offset is None:
1128                         offset = float_or_none(_traverse_infodict(item))
1129                     try:
1130                         value = operator(value, multiplier * offset)
1131                     except (TypeError, ZeroDivisionError):
1132                         return None
1133                     operator = None
1134             # Datetime formatting
1135             if mdict['strf_format']:
1136                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1137
1138             return value
1139
1140         na = self.params.get('outtmpl_na_placeholder', 'NA')
1141
1142         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1143             return sanitize_filename(str(value), restricted=restricted,
1144                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1145
1146         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1147         sanitize = bool(sanitize)
1148
1149         def _dumpjson_default(obj):
1150             if isinstance(obj, (set, LazyList)):
1151                 return list(obj)
1152             return repr(obj)
1153
1154         def create_key(outer_mobj):
1155             if not outer_mobj.group('has_key'):
1156                 return outer_mobj.group(0)
1157             key = outer_mobj.group('key')
1158             mobj = re.match(INTERNAL_FORMAT_RE, key)
1159             initial_field = mobj.group('fields') if mobj else ''
1160             value, replacement, default = None, None, na
1161             while mobj:
1162                 mobj = mobj.groupdict()
1163                 default = mobj['default'] if mobj['default'] is not None else default
1164                 value = get_value(mobj)
1165                 replacement = mobj['replacement']
1166                 if value is None and mobj['alternate']:
1167                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1168                 else:
1169                     break
1170
1171             fmt = outer_mobj.group('format')
1172             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1173                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1174
1175             value = default if value is None else value if replacement is None else replacement
1176
1177             flags = outer_mobj.group('conversion') or ''
1178             str_fmt = f'{fmt[:-1]}s'
1179             if fmt[-1] == 'l':  # list
1180                 delim = '\n' if '#' in flags else ', '
1181                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1182             elif fmt[-1] == 'j':  # json
1183                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1184             elif fmt[-1] == 'q':  # quoted
1185                 value = map(str, variadic(value) if '#' in flags else [value])
1186                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1187             elif fmt[-1] == 'B':  # bytes
1188                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1189                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1190             elif fmt[-1] == 'U':  # unicode normalized
1191                 value, fmt = unicodedata.normalize(
1192                     # "+" = compatibility equivalence, "#" = NFD
1193                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1194                     value), str_fmt
1195             elif fmt[-1] == 'D':  # decimal suffix
1196                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1197                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1198                                               factor=1024 if '#' in flags else 1000)
1199             elif fmt[-1] == 'S':  # filename sanitization
1200                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1201             elif fmt[-1] == 'c':
1202                 if value:
1203                     value = str(value)[0]
1204                 else:
1205                     fmt = str_fmt
1206             elif fmt[-1] not in 'rs':  # numeric
1207                 value = float_or_none(value)
1208                 if value is None:
1209                     value, fmt = default, 's'
1210
1211             if sanitize:
1212                 if fmt[-1] == 'r':
1213                     # If value is an object, sanitize might convert it to a string
1214                     # So we convert it to repr first
1215                     value, fmt = repr(value), str_fmt
1216                 if fmt[-1] in 'csr':
1217                     value = sanitizer(initial_field, value)
1218
1219             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1220             TMPL_DICT[key] = value
1221             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1222
1223         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1224
1225     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1226         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1227         return self.escape_outtmpl(outtmpl) % info_dict
1228
1229     def _prepare_filename(self, info_dict, tmpl_type='default'):
1230         try:
1231             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1232             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1233             if not filename:
1234                 return None
1235
1236             if tmpl_type in ('default', 'temp'):
1237                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1238                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1239                     filename = replace_extension(filename, ext, final_ext)
1240             else:
1241                 force_ext = OUTTMPL_TYPES[tmpl_type]
1242                 if force_ext:
1243                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1244
1245             # https://github.com/blackjack4494/youtube-dlc/issues/85
1246             trim_file_name = self.params.get('trim_file_name', False)
1247             if trim_file_name:
1248                 no_ext, *ext = filename.rsplit('.', 2)
1249                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1250
1251             return filename
1252         except ValueError as err:
1253             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1254             return None
1255
1256     def prepare_filename(self, info_dict, dir_type='', warn=False):
1257         """Generate the output filename."""
1258
1259         filename = self._prepare_filename(info_dict, dir_type or 'default')
1260         if not filename and dir_type not in ('', 'temp'):
1261             return ''
1262
1263         if warn:
1264             if not self.params.get('paths'):
1265                 pass
1266             elif filename == '-':
1267                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1268             elif os.path.isabs(filename):
1269                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1270         if filename == '-' or not filename:
1271             return filename
1272
1273         return self.get_output_path(dir_type, filename)
1274
1275     def _match_entry(self, info_dict, incomplete=False, silent=False):
1276         """ Returns None if the file should be downloaded """
1277
1278         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1279
1280         def check_filter():
1281             if 'title' in info_dict:
1282                 # This can happen when we're just evaluating the playlist
1283                 title = info_dict['title']
1284                 matchtitle = self.params.get('matchtitle', False)
1285                 if matchtitle:
1286                     if not re.search(matchtitle, title, re.IGNORECASE):
1287                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1288                 rejecttitle = self.params.get('rejecttitle', False)
1289                 if rejecttitle:
1290                     if re.search(rejecttitle, title, re.IGNORECASE):
1291                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1292             date = info_dict.get('upload_date')
1293             if date is not None:
1294                 dateRange = self.params.get('daterange', DateRange())
1295                 if date not in dateRange:
1296                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1297             view_count = info_dict.get('view_count')
1298             if view_count is not None:
1299                 min_views = self.params.get('min_views')
1300                 if min_views is not None and view_count < min_views:
1301                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1302                 max_views = self.params.get('max_views')
1303                 if max_views is not None and view_count > max_views:
1304                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1305             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1306                 return 'Skipping "%s" because it is age restricted' % video_title
1307
1308             match_filter = self.params.get('match_filter')
1309             if match_filter is not None:
1310                 try:
1311                     ret = match_filter(info_dict, incomplete=incomplete)
1312                 except TypeError:
1313                     # For backward compatibility
1314                     ret = None if incomplete else match_filter(info_dict)
1315                 if ret is not None:
1316                     return ret
1317             return None
1318
1319         if self.in_download_archive(info_dict):
1320             reason = '%s has already been recorded in the archive' % video_title
1321             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1322         else:
1323             reason = check_filter()
1324             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1325         if reason is not None:
1326             if not silent:
1327                 self.to_screen('[download] ' + reason)
1328             if self.params.get(break_opt, False):
1329                 raise break_err()
1330         return reason
1331
1332     @staticmethod
1333     def add_extra_info(info_dict, extra_info):
1334         '''Set the keys from extra_info in info dict if they are missing'''
1335         for key, value in extra_info.items():
1336             info_dict.setdefault(key, value)
1337
1338     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1339                      process=True, force_generic_extractor=False):
1340         """
1341         Return a list with a dictionary for each video extracted.
1342
1343         Arguments:
1344         url -- URL to extract
1345
1346         Keyword arguments:
1347         download -- whether to download videos during extraction
1348         ie_key -- extractor key hint
1349         extra_info -- dictionary containing the extra values to add to each result
1350         process -- whether to resolve all unresolved references (URLs, playlist items),
1351             must be True for download to work.
1352         force_generic_extractor -- force using the generic extractor
1353         """
1354
1355         if extra_info is None:
1356             extra_info = {}
1357
1358         if not ie_key and force_generic_extractor:
1359             ie_key = 'Generic'
1360
1361         if ie_key:
1362             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1363         else:
1364             ies = self._ies
1365
1366         for ie_key, ie in ies.items():
1367             if not ie.suitable(url):
1368                 continue
1369
1370             if not ie.working():
1371                 self.report_warning('The program functionality for this site has been marked as broken, '
1372                                     'and will probably not work.')
1373
1374             temp_id = ie.get_temp_id(url)
1375             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1376                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1377                 if self.params.get('break_on_existing', False):
1378                     raise ExistingVideoReached()
1379                 break
1380             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1381         else:
1382             self.report_error('no suitable InfoExtractor for URL %s' % url)
1383
1384     def __handle_extraction_exceptions(func):
1385         @functools.wraps(func)
1386         def wrapper(self, *args, **kwargs):
1387             while True:
1388                 try:
1389                     return func(self, *args, **kwargs)
1390                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1391                     raise
1392                 except ReExtractInfo as e:
1393                     if e.expected:
1394                         self.to_screen(f'{e}; Re-extracting data')
1395                     else:
1396                         self.to_stderr('\r')
1397                         self.report_warning(f'{e}; Re-extracting data')
1398                     continue
1399                 except GeoRestrictedError as e:
1400                     msg = e.msg
1401                     if e.countries:
1402                         msg += '\nThis video is available in %s.' % ', '.join(
1403                             map(ISO3166Utils.short2full, e.countries))
1404                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1405                     self.report_error(msg)
1406                 except ExtractorError as e:  # An error we somewhat expected
1407                     self.report_error(str(e), e.format_traceback())
1408                 except Exception as e:
1409                     if self.params.get('ignoreerrors'):
1410                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1411                     else:
1412                         raise
1413                 break
1414         return wrapper
1415
1416     def _wait_for_video(self, ie_result):
1417         if (not self.params.get('wait_for_video')
1418                 or ie_result.get('_type', 'video') != 'video'
1419                 or ie_result.get('formats') or ie_result.get('url')):
1420             return
1421
1422         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1423         last_msg = ''
1424
1425         def progress(msg):
1426             nonlocal last_msg
1427             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1428             last_msg = msg
1429
1430         min_wait, max_wait = self.params.get('wait_for_video')
1431         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1432         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1433             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1434             self.report_warning('Release time of video is not known')
1435         elif (diff or 0) <= 0:
1436             self.report_warning('Video should already be available according to extracted info')
1437         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1438         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1439
1440         wait_till = time.time() + diff
1441         try:
1442             while True:
1443                 diff = wait_till - time.time()
1444                 if diff <= 0:
1445                     progress('')
1446                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1447                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1448                 time.sleep(1)
1449         except KeyboardInterrupt:
1450             progress('')
1451             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1452         except BaseException as e:
1453             if not isinstance(e, ReExtractInfo):
1454                 self.to_screen('')
1455             raise
1456
1457     @__handle_extraction_exceptions
1458     def __extract_info(self, url, ie, download, extra_info, process):
1459         ie_result = ie.extract(url)
1460         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1461             return
1462         if isinstance(ie_result, list):
1463             # Backwards compatibility: old IE result format
1464             ie_result = {
1465                 '_type': 'compat_list',
1466                 'entries': ie_result,
1467             }
1468         if extra_info.get('original_url'):
1469             ie_result.setdefault('original_url', extra_info['original_url'])
1470         self.add_default_extra_info(ie_result, ie, url)
1471         if process:
1472             self._wait_for_video(ie_result)
1473             return self.process_ie_result(ie_result, download, extra_info)
1474         else:
1475             return ie_result
1476
1477     def add_default_extra_info(self, ie_result, ie, url):
1478         if url is not None:
1479             self.add_extra_info(ie_result, {
1480                 'webpage_url': url,
1481                 'original_url': url,
1482             })
1483         webpage_url = ie_result.get('webpage_url')
1484         if webpage_url:
1485             self.add_extra_info(ie_result, {
1486                 'webpage_url_basename': url_basename(webpage_url),
1487                 'webpage_url_domain': get_domain(webpage_url),
1488             })
1489         if ie is not None:
1490             self.add_extra_info(ie_result, {
1491                 'extractor': ie.IE_NAME,
1492                 'extractor_key': ie.ie_key(),
1493             })
1494
1495     def process_ie_result(self, ie_result, download=True, extra_info=None):
1496         """
1497         Take the result of the ie(may be modified) and resolve all unresolved
1498         references (URLs, playlist items).
1499
1500         It will also download the videos if 'download'.
1501         Returns the resolved ie_result.
1502         """
1503         if extra_info is None:
1504             extra_info = {}
1505         result_type = ie_result.get('_type', 'video')
1506
1507         if result_type in ('url', 'url_transparent'):
1508             ie_result['url'] = sanitize_url(ie_result['url'])
1509             if ie_result.get('original_url'):
1510                 extra_info.setdefault('original_url', ie_result['original_url'])
1511
1512             extract_flat = self.params.get('extract_flat', False)
1513             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1514                     or extract_flat is True):
1515                 info_copy = ie_result.copy()
1516                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1517                 if ie and not ie_result.get('id'):
1518                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1519                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1520                 self.add_extra_info(info_copy, extra_info)
1521                 info_copy, _ = self.pre_process(info_copy)
1522                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1523                 if self.params.get('force_write_download_archive', False):
1524                     self.record_download_archive(info_copy)
1525                 return ie_result
1526
1527         if result_type == 'video':
1528             self.add_extra_info(ie_result, extra_info)
1529             ie_result = self.process_video_result(ie_result, download=download)
1530             additional_urls = (ie_result or {}).get('additional_urls')
1531             if additional_urls:
1532                 # TODO: Improve MetadataParserPP to allow setting a list
1533                 if isinstance(additional_urls, compat_str):
1534                     additional_urls = [additional_urls]
1535                 self.to_screen(
1536                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1537                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1538                 ie_result['additional_entries'] = [
1539                     self.extract_info(
1540                         url, download, extra_info=extra_info,
1541                         force_generic_extractor=self.params.get('force_generic_extractor'))
1542                     for url in additional_urls
1543                 ]
1544             return ie_result
1545         elif result_type == 'url':
1546             # We have to add extra_info to the results because it may be
1547             # contained in a playlist
1548             return self.extract_info(
1549                 ie_result['url'], download,
1550                 ie_key=ie_result.get('ie_key'),
1551                 extra_info=extra_info)
1552         elif result_type == 'url_transparent':
1553             # Use the information from the embedding page
1554             info = self.extract_info(
1555                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1556                 extra_info=extra_info, download=False, process=False)
1557
1558             # extract_info may return None when ignoreerrors is enabled and
1559             # extraction failed with an error, don't crash and return early
1560             # in this case
1561             if not info:
1562                 return info
1563
1564             force_properties = dict(
1565                 (k, v) for k, v in ie_result.items() if v is not None)
1566             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1567                 if f in force_properties:
1568                     del force_properties[f]
1569             new_result = info.copy()
1570             new_result.update(force_properties)
1571
1572             # Extracted info may not be a video result (i.e.
1573             # info.get('_type', 'video') != video) but rather an url or
1574             # url_transparent. In such cases outer metadata (from ie_result)
1575             # should be propagated to inner one (info). For this to happen
1576             # _type of info should be overridden with url_transparent. This
1577             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1578             if new_result.get('_type') == 'url':
1579                 new_result['_type'] = 'url_transparent'
1580
1581             return self.process_ie_result(
1582                 new_result, download=download, extra_info=extra_info)
1583         elif result_type in ('playlist', 'multi_video'):
1584             # Protect from infinite recursion due to recursively nested playlists
1585             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1586             webpage_url = ie_result['webpage_url']
1587             if webpage_url in self._playlist_urls:
1588                 self.to_screen(
1589                     '[download] Skipping already downloaded playlist: %s'
1590                     % ie_result.get('title') or ie_result.get('id'))
1591                 return
1592
1593             self._playlist_level += 1
1594             self._playlist_urls.add(webpage_url)
1595             self._fill_common_fields(ie_result, False)
1596             self._sanitize_thumbnails(ie_result)
1597             try:
1598                 return self.__process_playlist(ie_result, download)
1599             finally:
1600                 self._playlist_level -= 1
1601                 if not self._playlist_level:
1602                     self._playlist_urls.clear()
1603         elif result_type == 'compat_list':
1604             self.report_warning(
1605                 'Extractor %s returned a compat_list result. '
1606                 'It needs to be updated.' % ie_result.get('extractor'))
1607
1608             def _fixup(r):
1609                 self.add_extra_info(r, {
1610                     'extractor': ie_result['extractor'],
1611                     'webpage_url': ie_result['webpage_url'],
1612                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1613                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1614                     'extractor_key': ie_result['extractor_key'],
1615                 })
1616                 return r
1617             ie_result['entries'] = [
1618                 self.process_ie_result(_fixup(r), download, extra_info)
1619                 for r in ie_result['entries']
1620             ]
1621             return ie_result
1622         else:
1623             raise Exception('Invalid result type: %s' % result_type)
1624
1625     def _ensure_dir_exists(self, path):
1626         return make_dir(path, self.report_error)
1627
1628     @staticmethod
1629     def _playlist_infodict(ie_result, **kwargs):
1630         return {
1631             **ie_result,
1632             'playlist': ie_result.get('title') or ie_result.get('id'),
1633             'playlist_id': ie_result.get('id'),
1634             'playlist_title': ie_result.get('title'),
1635             'playlist_uploader': ie_result.get('uploader'),
1636             'playlist_uploader_id': ie_result.get('uploader_id'),
1637             'playlist_index': 0,
1638             **kwargs,
1639         }
1640
1641     def __process_playlist(self, ie_result, download):
1642         # We process each entry in the playlist
1643         playlist = ie_result.get('title') or ie_result.get('id')
1644         self.to_screen('[download] Downloading playlist: %s' % playlist)
1645
1646         if 'entries' not in ie_result:
1647             raise EntryNotInPlaylist('There are no entries')
1648
1649         MissingEntry = object()
1650         incomplete_entries = bool(ie_result.get('requested_entries'))
1651         if incomplete_entries:
1652             def fill_missing_entries(entries, indices):
1653                 ret = [MissingEntry] * max(indices)
1654                 for i, entry in zip(indices, entries):
1655                     ret[i - 1] = entry
1656                 return ret
1657             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1658
1659         playlist_results = []
1660
1661         playliststart = self.params.get('playliststart', 1)
1662         playlistend = self.params.get('playlistend')
1663         # For backwards compatibility, interpret -1 as whole list
1664         if playlistend == -1:
1665             playlistend = None
1666
1667         playlistitems_str = self.params.get('playlist_items')
1668         playlistitems = None
1669         if playlistitems_str is not None:
1670             def iter_playlistitems(format):
1671                 for string_segment in format.split(','):
1672                     if '-' in string_segment:
1673                         start, end = string_segment.split('-')
1674                         for item in range(int(start), int(end) + 1):
1675                             yield int(item)
1676                     else:
1677                         yield int(string_segment)
1678             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1679
1680         ie_entries = ie_result['entries']
1681         if isinstance(ie_entries, list):
1682             playlist_count = len(ie_entries)
1683             msg = f'Collected {playlist_count} videos; downloading %d of them'
1684             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1685
1686             def get_entry(i):
1687                 return ie_entries[i - 1]
1688         else:
1689             msg = 'Downloading %d videos'
1690             if not isinstance(ie_entries, (PagedList, LazyList)):
1691                 ie_entries = LazyList(ie_entries)
1692             elif isinstance(ie_entries, InAdvancePagedList):
1693                 if ie_entries._pagesize == 1:
1694                     playlist_count = ie_entries._pagecount
1695
1696             def get_entry(i):
1697                 return YoutubeDL.__handle_extraction_exceptions(
1698                     lambda self, i: ie_entries[i - 1]
1699                 )(self, i)
1700
1701         entries, broken = [], False
1702         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1703         for i in items:
1704             if i == 0:
1705                 continue
1706             if playlistitems is None and playlistend is not None and playlistend < i:
1707                 break
1708             entry = None
1709             try:
1710                 entry = get_entry(i)
1711                 if entry is MissingEntry:
1712                     raise EntryNotInPlaylist()
1713             except (IndexError, EntryNotInPlaylist):
1714                 if incomplete_entries:
1715                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1716                 elif not playlistitems:
1717                     break
1718             entries.append(entry)
1719             try:
1720                 if entry is not None:
1721                     self._match_entry(entry, incomplete=True, silent=True)
1722             except (ExistingVideoReached, RejectedVideoReached):
1723                 broken = True
1724                 break
1725         ie_result['entries'] = entries
1726
1727         # Save playlist_index before re-ordering
1728         entries = [
1729             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1730             for i, entry in enumerate(entries, 1)
1731             if entry is not None]
1732         n_entries = len(entries)
1733
1734         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1735             ie_result['playlist_count'] = n_entries
1736
1737         if not playlistitems and (playliststart != 1 or playlistend):
1738             playlistitems = list(range(playliststart, playliststart + n_entries))
1739         ie_result['requested_entries'] = playlistitems
1740
1741         _infojson_written = False
1742         write_playlist_files = self.params.get('allow_playlist_files', True)
1743         if write_playlist_files and self.params.get('list_thumbnails'):
1744             self.list_thumbnails(ie_result)
1745         if write_playlist_files and not self.params.get('simulate'):
1746             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1747             _infojson_written = self._write_info_json(
1748                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1749             if _infojson_written is None:
1750                 return
1751             if self._write_description('playlist', ie_result,
1752                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1753                 return
1754             # TODO: This should be passed to ThumbnailsConvertor if necessary
1755             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1756
1757         if self.params.get('playlistreverse', False):
1758             entries = entries[::-1]
1759         if self.params.get('playlistrandom', False):
1760             random.shuffle(entries)
1761
1762         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1763
1764         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1765         failures = 0
1766         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1767         for i, entry_tuple in enumerate(entries, 1):
1768             playlist_index, entry = entry_tuple
1769             if 'playlist-index' in self.params.get('compat_opts', []):
1770                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1771             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1772             # This __x_forwarded_for_ip thing is a bit ugly but requires
1773             # minimal changes
1774             if x_forwarded_for:
1775                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1776             extra = {
1777                 'n_entries': n_entries,
1778                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1779                 'playlist_count': ie_result.get('playlist_count'),
1780                 'playlist_index': playlist_index,
1781                 'playlist_autonumber': i,
1782                 'playlist': playlist,
1783                 'playlist_id': ie_result.get('id'),
1784                 'playlist_title': ie_result.get('title'),
1785                 'playlist_uploader': ie_result.get('uploader'),
1786                 'playlist_uploader_id': ie_result.get('uploader_id'),
1787                 'extractor': ie_result['extractor'],
1788                 'webpage_url': ie_result['webpage_url'],
1789                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1790                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1791                 'extractor_key': ie_result['extractor_key'],
1792             }
1793
1794             if self._match_entry(entry, incomplete=True) is not None:
1795                 continue
1796
1797             entry_result = self.__process_iterable_entry(entry, download, extra)
1798             if not entry_result:
1799                 failures += 1
1800             if failures >= max_failures:
1801                 self.report_error(
1802                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1803                 break
1804             playlist_results.append(entry_result)
1805         ie_result['entries'] = playlist_results
1806
1807         # Write the updated info to json
1808         if _infojson_written and self._write_info_json(
1809                 'updated playlist', ie_result,
1810                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1811             return
1812
1813         ie_result = self.run_all_pps('playlist', ie_result)
1814         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1815         return ie_result
1816
1817     @__handle_extraction_exceptions
1818     def __process_iterable_entry(self, entry, download, extra_info):
1819         return self.process_ie_result(
1820             entry, download=download, extra_info=extra_info)
1821
1822     def _build_format_filter(self, filter_spec):
1823         " Returns a function to filter the formats according to the filter_spec "
1824
1825         OPERATORS = {
1826             '<': operator.lt,
1827             '<=': operator.le,
1828             '>': operator.gt,
1829             '>=': operator.ge,
1830             '=': operator.eq,
1831             '!=': operator.ne,
1832         }
1833         operator_rex = re.compile(r'''(?x)\s*
1834             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1835             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1836             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1837             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1838         m = operator_rex.fullmatch(filter_spec)
1839         if m:
1840             try:
1841                 comparison_value = int(m.group('value'))
1842             except ValueError:
1843                 comparison_value = parse_filesize(m.group('value'))
1844                 if comparison_value is None:
1845                     comparison_value = parse_filesize(m.group('value') + 'B')
1846                 if comparison_value is None:
1847                     raise ValueError(
1848                         'Invalid value %r in format specification %r' % (
1849                             m.group('value'), filter_spec))
1850             op = OPERATORS[m.group('op')]
1851
1852         if not m:
1853             STR_OPERATORS = {
1854                 '=': operator.eq,
1855                 '^=': lambda attr, value: attr.startswith(value),
1856                 '$=': lambda attr, value: attr.endswith(value),
1857                 '*=': lambda attr, value: value in attr,
1858                 '~=': lambda attr, value: value.search(attr) is not None
1859             }
1860             str_operator_rex = re.compile(r'''(?x)\s*
1861                 (?P<key>[a-zA-Z0-9._-]+)\s*
1862                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1863                 (?P<quote>["'])?
1864                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1865                 (?(quote)(?P=quote))\s*
1866                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1867             m = str_operator_rex.fullmatch(filter_spec)
1868             if m:
1869                 if m.group('op') == '~=':
1870                     comparison_value = re.compile(m.group('value'))
1871                 else:
1872                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1873                 str_op = STR_OPERATORS[m.group('op')]
1874                 if m.group('negation'):
1875                     op = lambda attr, value: not str_op(attr, value)
1876                 else:
1877                     op = str_op
1878
1879         if not m:
1880             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1881
1882         def _filter(f):
1883             actual_value = f.get(m.group('key'))
1884             if actual_value is None:
1885                 return m.group('none_inclusive')
1886             return op(actual_value, comparison_value)
1887         return _filter
1888
1889     def _check_formats(self, formats):
1890         for f in formats:
1891             self.to_screen('[info] Testing format %s' % f['format_id'])
1892             path = self.get_output_path('temp')
1893             if not self._ensure_dir_exists(f'{path}/'):
1894                 continue
1895             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1896             temp_file.close()
1897             try:
1898                 success, _ = self.dl(temp_file.name, f, test=True)
1899             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1900                 success = False
1901             finally:
1902                 if os.path.exists(temp_file.name):
1903                     try:
1904                         os.remove(temp_file.name)
1905                     except OSError:
1906                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1907             if success:
1908                 yield f
1909             else:
1910                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1911
1912     def _default_format_spec(self, info_dict, download=True):
1913
1914         def can_merge():
1915             merger = FFmpegMergerPP(self)
1916             return merger.available and merger.can_merge()
1917
1918         prefer_best = (
1919             not self.params.get('simulate')
1920             and download
1921             and (
1922                 not can_merge()
1923                 or info_dict.get('is_live', False)
1924                 or self.outtmpl_dict['default'] == '-'))
1925         compat = (
1926             prefer_best
1927             or self.params.get('allow_multiple_audio_streams', False)
1928             or 'format-spec' in self.params.get('compat_opts', []))
1929
1930         return (
1931             'best/bestvideo+bestaudio' if prefer_best
1932             else 'bestvideo*+bestaudio/best' if not compat
1933             else 'bestvideo+bestaudio/best')
1934
1935     def build_format_selector(self, format_spec):
1936         def syntax_error(note, start):
1937             message = (
1938                 'Invalid format specification: '
1939                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1940             return SyntaxError(message)
1941
1942         PICKFIRST = 'PICKFIRST'
1943         MERGE = 'MERGE'
1944         SINGLE = 'SINGLE'
1945         GROUP = 'GROUP'
1946         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1947
1948         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1949                                   'video': self.params.get('allow_multiple_video_streams', False)}
1950
1951         check_formats = self.params.get('check_formats') == 'selected'
1952
1953         def _parse_filter(tokens):
1954             filter_parts = []
1955             for type, string, start, _, _ in tokens:
1956                 if type == tokenize.OP and string == ']':
1957                     return ''.join(filter_parts)
1958                 else:
1959                     filter_parts.append(string)
1960
1961         def _remove_unused_ops(tokens):
1962             # Remove operators that we don't use and join them with the surrounding strings
1963             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1964             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1965             last_string, last_start, last_end, last_line = None, None, None, None
1966             for type, string, start, end, line in tokens:
1967                 if type == tokenize.OP and string == '[':
1968                     if last_string:
1969                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1970                         last_string = None
1971                     yield type, string, start, end, line
1972                     # everything inside brackets will be handled by _parse_filter
1973                     for type, string, start, end, line in tokens:
1974                         yield type, string, start, end, line
1975                         if type == tokenize.OP and string == ']':
1976                             break
1977                 elif type == tokenize.OP and string in ALLOWED_OPS:
1978                     if last_string:
1979                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1980                         last_string = None
1981                     yield type, string, start, end, line
1982                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1983                     if not last_string:
1984                         last_string = string
1985                         last_start = start
1986                         last_end = end
1987                     else:
1988                         last_string += string
1989             if last_string:
1990                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1991
1992         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1993             selectors = []
1994             current_selector = None
1995             for type, string, start, _, _ in tokens:
1996                 # ENCODING is only defined in python 3.x
1997                 if type == getattr(tokenize, 'ENCODING', None):
1998                     continue
1999                 elif type in [tokenize.NAME, tokenize.NUMBER]:
2000                     current_selector = FormatSelector(SINGLE, string, [])
2001                 elif type == tokenize.OP:
2002                     if string == ')':
2003                         if not inside_group:
2004                             # ')' will be handled by the parentheses group
2005                             tokens.restore_last_token()
2006                         break
2007                     elif inside_merge and string in ['/', ',']:
2008                         tokens.restore_last_token()
2009                         break
2010                     elif inside_choice and string == ',':
2011                         tokens.restore_last_token()
2012                         break
2013                     elif string == ',':
2014                         if not current_selector:
2015                             raise syntax_error('"," must follow a format selector', start)
2016                         selectors.append(current_selector)
2017                         current_selector = None
2018                     elif string == '/':
2019                         if not current_selector:
2020                             raise syntax_error('"/" must follow a format selector', start)
2021                         first_choice = current_selector
2022                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2023                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2024                     elif string == '[':
2025                         if not current_selector:
2026                             current_selector = FormatSelector(SINGLE, 'best', [])
2027                         format_filter = _parse_filter(tokens)
2028                         current_selector.filters.append(format_filter)
2029                     elif string == '(':
2030                         if current_selector:
2031                             raise syntax_error('Unexpected "("', start)
2032                         group = _parse_format_selection(tokens, inside_group=True)
2033                         current_selector = FormatSelector(GROUP, group, [])
2034                     elif string == '+':
2035                         if not current_selector:
2036                             raise syntax_error('Unexpected "+"', start)
2037                         selector_1 = current_selector
2038                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2039                         if not selector_2:
2040                             raise syntax_error('Expected a selector', start)
2041                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2042                     else:
2043                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2044                 elif type == tokenize.ENDMARKER:
2045                     break
2046             if current_selector:
2047                 selectors.append(current_selector)
2048             return selectors
2049
2050         def _merge(formats_pair):
2051             format_1, format_2 = formats_pair
2052
2053             formats_info = []
2054             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2055             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2056
2057             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2058                 get_no_more = {'video': False, 'audio': False}
2059                 for (i, fmt_info) in enumerate(formats_info):
2060                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2061                         formats_info.pop(i)
2062                         continue
2063                     for aud_vid in ['audio', 'video']:
2064                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2065                             if get_no_more[aud_vid]:
2066                                 formats_info.pop(i)
2067                                 break
2068                             get_no_more[aud_vid] = True
2069
2070             if len(formats_info) == 1:
2071                 return formats_info[0]
2072
2073             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2074             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2075
2076             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2077             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2078
2079             output_ext = self.params.get('merge_output_format')
2080             if not output_ext:
2081                 if the_only_video:
2082                     output_ext = the_only_video['ext']
2083                 elif the_only_audio and not video_fmts:
2084                     output_ext = the_only_audio['ext']
2085                 else:
2086                     output_ext = 'mkv'
2087
2088             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2089
2090             new_dict = {
2091                 'requested_formats': formats_info,
2092                 'format': '+'.join(filtered('format')),
2093                 'format_id': '+'.join(filtered('format_id')),
2094                 'ext': output_ext,
2095                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2096                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2097                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2098                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2099                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2100             }
2101
2102             if the_only_video:
2103                 new_dict.update({
2104                     'width': the_only_video.get('width'),
2105                     'height': the_only_video.get('height'),
2106                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2107                     'fps': the_only_video.get('fps'),
2108                     'dynamic_range': the_only_video.get('dynamic_range'),
2109                     'vcodec': the_only_video.get('vcodec'),
2110                     'vbr': the_only_video.get('vbr'),
2111                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2112                 })
2113
2114             if the_only_audio:
2115                 new_dict.update({
2116                     'acodec': the_only_audio.get('acodec'),
2117                     'abr': the_only_audio.get('abr'),
2118                     'asr': the_only_audio.get('asr'),
2119                 })
2120
2121             return new_dict
2122
2123         def _check_formats(formats):
2124             if not check_formats:
2125                 yield from formats
2126                 return
2127             yield from self._check_formats(formats)
2128
2129         def _build_selector_function(selector):
2130             if isinstance(selector, list):  # ,
2131                 fs = [_build_selector_function(s) for s in selector]
2132
2133                 def selector_function(ctx):
2134                     for f in fs:
2135                         yield from f(ctx)
2136                 return selector_function
2137
2138             elif selector.type == GROUP:  # ()
2139                 selector_function = _build_selector_function(selector.selector)
2140
2141             elif selector.type == PICKFIRST:  # /
2142                 fs = [_build_selector_function(s) for s in selector.selector]
2143
2144                 def selector_function(ctx):
2145                     for f in fs:
2146                         picked_formats = list(f(ctx))
2147                         if picked_formats:
2148                             return picked_formats
2149                     return []
2150
2151             elif selector.type == MERGE:  # +
2152                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2153
2154                 def selector_function(ctx):
2155                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2156                         yield _merge(pair)
2157
2158             elif selector.type == SINGLE:  # atom
2159                 format_spec = selector.selector or 'best'
2160
2161                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2162                 if format_spec == 'all':
2163                     def selector_function(ctx):
2164                         yield from _check_formats(ctx['formats'][::-1])
2165                 elif format_spec == 'mergeall':
2166                     def selector_function(ctx):
2167                         formats = list(_check_formats(ctx['formats']))
2168                         if not formats:
2169                             return
2170                         merged_format = formats[-1]
2171                         for f in formats[-2::-1]:
2172                             merged_format = _merge((merged_format, f))
2173                         yield merged_format
2174
2175                 else:
2176                     format_fallback, format_reverse, format_idx = False, True, 1
2177                     mobj = re.match(
2178                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2179                         format_spec)
2180                     if mobj is not None:
2181                         format_idx = int_or_none(mobj.group('n'), default=1)
2182                         format_reverse = mobj.group('bw')[0] == 'b'
2183                         format_type = (mobj.group('type') or [None])[0]
2184                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2185                         format_modified = mobj.group('mod') is not None
2186
2187                         format_fallback = not format_type and not format_modified  # for b, w
2188                         _filter_f = (
2189                             (lambda f: f.get('%scodec' % format_type) != 'none')
2190                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2191                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2192                             if format_type  # bv, ba, wv, wa
2193                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2194                             if not format_modified  # b, w
2195                             else lambda f: True)  # b*, w*
2196                         filter_f = lambda f: _filter_f(f) and (
2197                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2198                     else:
2199                         if format_spec in self._format_selection_exts['audio']:
2200                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2201                         elif format_spec in self._format_selection_exts['video']:
2202                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2203                         elif format_spec in self._format_selection_exts['storyboards']:
2204                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2205                         else:
2206                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2207
2208                     def selector_function(ctx):
2209                         formats = list(ctx['formats'])
2210                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2211                         if format_fallback and ctx['incomplete_formats'] and not matches:
2212                             # for extractors with incomplete formats (audio only (soundcloud)
2213                             # or video only (imgur)) best/worst will fallback to
2214                             # best/worst {video,audio}-only format
2215                             matches = formats
2216                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2217                         try:
2218                             yield matches[format_idx - 1]
2219                         except IndexError:
2220                             return
2221
2222             filters = [self._build_format_filter(f) for f in selector.filters]
2223
2224             def final_selector(ctx):
2225                 ctx_copy = dict(ctx)
2226                 for _filter in filters:
2227                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2228                 return selector_function(ctx_copy)
2229             return final_selector
2230
2231         stream = io.BytesIO(format_spec.encode('utf-8'))
2232         try:
2233             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2234         except tokenize.TokenError:
2235             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2236
2237         class TokenIterator(object):
2238             def __init__(self, tokens):
2239                 self.tokens = tokens
2240                 self.counter = 0
2241
2242             def __iter__(self):
2243                 return self
2244
2245             def __next__(self):
2246                 if self.counter >= len(self.tokens):
2247                     raise StopIteration()
2248                 value = self.tokens[self.counter]
2249                 self.counter += 1
2250                 return value
2251
2252             next = __next__
2253
2254             def restore_last_token(self):
2255                 self.counter -= 1
2256
2257         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2258         return _build_selector_function(parsed_selector)
2259
2260     def _calc_headers(self, info_dict):
2261         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2262
2263         cookies = self._calc_cookies(info_dict)
2264         if cookies:
2265             res['Cookie'] = cookies
2266
2267         if 'X-Forwarded-For' not in res:
2268             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2269             if x_forwarded_for_ip:
2270                 res['X-Forwarded-For'] = x_forwarded_for_ip
2271
2272         return res
2273
2274     def _calc_cookies(self, info_dict):
2275         pr = sanitized_Request(info_dict['url'])
2276         self.cookiejar.add_cookie_header(pr)
2277         return pr.get_header('Cookie')
2278
2279     def _sort_thumbnails(self, thumbnails):
2280         thumbnails.sort(key=lambda t: (
2281             t.get('preference') if t.get('preference') is not None else -1,
2282             t.get('width') if t.get('width') is not None else -1,
2283             t.get('height') if t.get('height') is not None else -1,
2284             t.get('id') if t.get('id') is not None else '',
2285             t.get('url')))
2286
2287     def _sanitize_thumbnails(self, info_dict):
2288         thumbnails = info_dict.get('thumbnails')
2289         if thumbnails is None:
2290             thumbnail = info_dict.get('thumbnail')
2291             if thumbnail:
2292                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2293         if not thumbnails:
2294             return
2295
2296         def check_thumbnails(thumbnails):
2297             for t in thumbnails:
2298                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2299                 try:
2300                     self.urlopen(HEADRequest(t['url']))
2301                 except network_exceptions as err:
2302                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2303                     continue
2304                 yield t
2305
2306         self._sort_thumbnails(thumbnails)
2307         for i, t in enumerate(thumbnails):
2308             if t.get('id') is None:
2309                 t['id'] = '%d' % i
2310             if t.get('width') and t.get('height'):
2311                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2312             t['url'] = sanitize_url(t['url'])
2313
2314         if self.params.get('check_formats') is True:
2315             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2316         else:
2317             info_dict['thumbnails'] = thumbnails
2318
2319     def _fill_common_fields(self, info_dict, is_video=True):
2320         # TODO: move sanitization here
2321         if is_video:
2322             # playlists are allowed to lack "title"
2323             info_dict['fulltitle'] = info_dict.get('title')
2324             if 'title' not in info_dict:
2325                 raise ExtractorError('Missing "title" field in extractor result',
2326                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2327             elif not info_dict.get('title'):
2328                 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2329                 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2330
2331         if info_dict.get('duration') is not None:
2332             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2333
2334         for ts_key, date_key in (
2335                 ('timestamp', 'upload_date'),
2336                 ('release_timestamp', 'release_date'),
2337                 ('modified_timestamp', 'modified_date'),
2338         ):
2339             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2340                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2341                 # see http://bugs.python.org/issue1646728)
2342                 try:
2343                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2344                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2345                 except (ValueError, OverflowError, OSError):
2346                     pass
2347
2348         live_keys = ('is_live', 'was_live')
2349         live_status = info_dict.get('live_status')
2350         if live_status is None:
2351             for key in live_keys:
2352                 if info_dict.get(key) is False:
2353                     continue
2354                 if info_dict.get(key):
2355                     live_status = key
2356                 break
2357             if all(info_dict.get(key) is False for key in live_keys):
2358                 live_status = 'not_live'
2359         if live_status:
2360             info_dict['live_status'] = live_status
2361             for key in live_keys:
2362                 if info_dict.get(key) is None:
2363                     info_dict[key] = (live_status == key)
2364
2365         # Auto generate title fields corresponding to the *_number fields when missing
2366         # in order to always have clean titles. This is very common for TV series.
2367         for field in ('chapter', 'season', 'episode'):
2368             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2369                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2370
2371     def process_video_result(self, info_dict, download=True):
2372         assert info_dict.get('_type', 'video') == 'video'
2373         self._num_videos += 1
2374
2375         if 'id' not in info_dict:
2376             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2377         elif not info_dict.get('id'):
2378             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2379
2380         def report_force_conversion(field, field_not, conversion):
2381             self.report_warning(
2382                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2383                 % (field, field_not, conversion))
2384
2385         def sanitize_string_field(info, string_field):
2386             field = info.get(string_field)
2387             if field is None or isinstance(field, compat_str):
2388                 return
2389             report_force_conversion(string_field, 'a string', 'string')
2390             info[string_field] = compat_str(field)
2391
2392         def sanitize_numeric_fields(info):
2393             for numeric_field in self._NUMERIC_FIELDS:
2394                 field = info.get(numeric_field)
2395                 if field is None or isinstance(field, compat_numeric_types):
2396                     continue
2397                 report_force_conversion(numeric_field, 'numeric', 'int')
2398                 info[numeric_field] = int_or_none(field)
2399
2400         sanitize_string_field(info_dict, 'id')
2401         sanitize_numeric_fields(info_dict)
2402         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2403             self.report_warning('"duration" field is negative, there is an error in extractor')
2404
2405         if 'playlist' not in info_dict:
2406             # It isn't part of a playlist
2407             info_dict['playlist'] = None
2408             info_dict['playlist_index'] = None
2409
2410         self._sanitize_thumbnails(info_dict)
2411
2412         thumbnail = info_dict.get('thumbnail')
2413         thumbnails = info_dict.get('thumbnails')
2414         if thumbnail:
2415             info_dict['thumbnail'] = sanitize_url(thumbnail)
2416         elif thumbnails:
2417             info_dict['thumbnail'] = thumbnails[-1]['url']
2418
2419         if info_dict.get('display_id') is None and 'id' in info_dict:
2420             info_dict['display_id'] = info_dict['id']
2421
2422         self._fill_common_fields(info_dict)
2423
2424         for cc_kind in ('subtitles', 'automatic_captions'):
2425             cc = info_dict.get(cc_kind)
2426             if cc:
2427                 for _, subtitle in cc.items():
2428                     for subtitle_format in subtitle:
2429                         if subtitle_format.get('url'):
2430                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2431                         if subtitle_format.get('ext') is None:
2432                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2433
2434         automatic_captions = info_dict.get('automatic_captions')
2435         subtitles = info_dict.get('subtitles')
2436
2437         info_dict['requested_subtitles'] = self.process_subtitles(
2438             info_dict['id'], subtitles, automatic_captions)
2439
2440         if info_dict.get('formats') is None:
2441             # There's only one format available
2442             formats = [info_dict]
2443         else:
2444             formats = info_dict['formats']
2445
2446         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2447         if not self.params.get('allow_unplayable_formats'):
2448             formats = [f for f in formats if not f.get('has_drm')]
2449
2450         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2451         if not get_from_start:
2452             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2453         if info_dict.get('is_live') and formats:
2454             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2455             if get_from_start and not formats:
2456                 self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2457                                                      'If you want to download from the current time, pass --no-live-from-start')
2458
2459         if not formats:
2460             self.raise_no_formats(info_dict)
2461
2462         def is_wellformed(f):
2463             url = f.get('url')
2464             if not url:
2465                 self.report_warning(
2466                     '"url" field is missing or empty - skipping format, '
2467                     'there is an error in extractor')
2468                 return False
2469             if isinstance(url, bytes):
2470                 sanitize_string_field(f, 'url')
2471             return True
2472
2473         # Filter out malformed formats for better extraction robustness
2474         formats = list(filter(is_wellformed, formats))
2475
2476         formats_dict = {}
2477
2478         # We check that all the formats have the format and format_id fields
2479         for i, format in enumerate(formats):
2480             sanitize_string_field(format, 'format_id')
2481             sanitize_numeric_fields(format)
2482             format['url'] = sanitize_url(format['url'])
2483             if not format.get('format_id'):
2484                 format['format_id'] = compat_str(i)
2485             else:
2486                 # Sanitize format_id from characters used in format selector expression
2487                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2488             format_id = format['format_id']
2489             if format_id not in formats_dict:
2490                 formats_dict[format_id] = []
2491             formats_dict[format_id].append(format)
2492
2493         # Make sure all formats have unique format_id
2494         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2495         for format_id, ambiguous_formats in formats_dict.items():
2496             ambigious_id = len(ambiguous_formats) > 1
2497             for i, format in enumerate(ambiguous_formats):
2498                 if ambigious_id:
2499                     format['format_id'] = '%s-%d' % (format_id, i)
2500                 if format.get('ext') is None:
2501                     format['ext'] = determine_ext(format['url']).lower()
2502                 # Ensure there is no conflict between id and ext in format selection
2503                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2504                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2505                     format['format_id'] = 'f%s' % format['format_id']
2506
2507         for i, format in enumerate(formats):
2508             if format.get('format') is None:
2509                 format['format'] = '{id} - {res}{note}'.format(
2510                     id=format['format_id'],
2511                     res=self.format_resolution(format),
2512                     note=format_field(format, 'format_note', ' (%s)'),
2513                 )
2514             if format.get('protocol') is None:
2515                 format['protocol'] = determine_protocol(format)
2516             if format.get('resolution') is None:
2517                 format['resolution'] = self.format_resolution(format, default=None)
2518             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2519                 format['dynamic_range'] = 'SDR'
2520             if (info_dict.get('duration') and format.get('tbr')
2521                     and not format.get('filesize') and not format.get('filesize_approx')):
2522                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2523
2524             # Add HTTP headers, so that external programs can use them from the
2525             # json output
2526             full_format_info = info_dict.copy()
2527             full_format_info.update(format)
2528             format['http_headers'] = self._calc_headers(full_format_info)
2529         # Remove private housekeeping stuff
2530         if '__x_forwarded_for_ip' in info_dict:
2531             del info_dict['__x_forwarded_for_ip']
2532
2533         if self.params.get('check_formats') is True:
2534             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2535
2536         if not formats or formats[0] is not info_dict:
2537             # only set the 'formats' fields if the original info_dict list them
2538             # otherwise we end up with a circular reference, the first (and unique)
2539             # element in the 'formats' field in info_dict is info_dict itself,
2540             # which can't be exported to json
2541             info_dict['formats'] = formats
2542
2543         info_dict, _ = self.pre_process(info_dict)
2544
2545         if self._match_entry(info_dict) is not None:
2546             return info_dict
2547
2548         self.post_extract(info_dict)
2549         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2550
2551         # The pre-processors may have modified the formats
2552         formats = info_dict.get('formats', [info_dict])
2553
2554         list_only = self.params.get('simulate') is None and (
2555             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2556         interactive_format_selection = not list_only and self.format_selector == '-'
2557         if self.params.get('list_thumbnails'):
2558             self.list_thumbnails(info_dict)
2559         if self.params.get('listsubtitles'):
2560             if 'automatic_captions' in info_dict:
2561                 self.list_subtitles(
2562                     info_dict['id'], automatic_captions, 'automatic captions')
2563             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2564         if self.params.get('listformats') or interactive_format_selection:
2565             self.list_formats(info_dict)
2566         if list_only:
2567             # Without this printing, -F --print-json will not work
2568             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2569             return
2570
2571         format_selector = self.format_selector
2572         if format_selector is None:
2573             req_format = self._default_format_spec(info_dict, download=download)
2574             self.write_debug('Default format spec: %s' % req_format)
2575             format_selector = self.build_format_selector(req_format)
2576
2577         while True:
2578             if interactive_format_selection:
2579                 req_format = input(
2580                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2581                 try:
2582                     format_selector = self.build_format_selector(req_format)
2583                 except SyntaxError as err:
2584                     self.report_error(err, tb=False, is_error=False)
2585                     continue
2586
2587             # While in format selection we may need to have an access to the original
2588             # format set in order to calculate some metrics or do some processing.
2589             # For now we need to be able to guess whether original formats provided
2590             # by extractor are incomplete or not (i.e. whether extractor provides only
2591             # video-only or audio-only formats) for proper formats selection for
2592             # extractors with such incomplete formats (see
2593             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2594             # Since formats may be filtered during format selection and may not match
2595             # the original formats the results may be incorrect. Thus original formats
2596             # or pre-calculated metrics should be passed to format selection routines
2597             # as well.
2598             # We will pass a context object containing all necessary additional data
2599             # instead of just formats.
2600             # This fixes incorrect format selection issue (see
2601             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2602             incomplete_formats = (
2603                 # All formats are video-only or
2604                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2605                 # all formats are audio-only
2606                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2607
2608             ctx = {
2609                 'formats': formats,
2610                 'incomplete_formats': incomplete_formats,
2611             }
2612
2613             formats_to_download = list(format_selector(ctx))
2614             if interactive_format_selection and not formats_to_download:
2615                 self.report_error('Requested format is not available', tb=False, is_error=False)
2616                 continue
2617             break
2618
2619         if not formats_to_download:
2620             if not self.params.get('ignore_no_formats_error'):
2621                 raise ExtractorError('Requested format is not available', expected=True,
2622                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2623             self.report_warning('Requested format is not available')
2624             # Process what we can, even without any available formats.
2625             formats_to_download = [{}]
2626
2627         best_format = formats_to_download[-1]
2628         if download:
2629             if best_format:
2630                 self.to_screen(
2631                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2632                     + ', '.join([f['format_id'] for f in formats_to_download]))
2633             max_downloads_reached = False
2634             for i, fmt in enumerate(formats_to_download):
2635                 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
2636                 new_info.update(fmt)
2637                 try:
2638                     self.process_info(new_info)
2639                 except MaxDownloadsReached:
2640                     max_downloads_reached = True
2641                 # Remove copied info
2642                 for key, val in tuple(new_info.items()):
2643                     if info_dict.get(key) == val:
2644                         new_info.pop(key)
2645                 if max_downloads_reached:
2646                     break
2647
2648             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2649             assert write_archive.issubset({True, False, 'ignore'})
2650             if True in write_archive and False not in write_archive:
2651                 self.record_download_archive(info_dict)
2652
2653             info_dict['requested_downloads'] = formats_to_download
2654             info_dict = self.run_all_pps('after_video', info_dict)
2655             if max_downloads_reached:
2656                 raise MaxDownloadsReached()
2657
2658         # We update the info dict with the selected best quality format (backwards compatibility)
2659         info_dict.update(best_format)
2660         return info_dict
2661
2662     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2663         """Select the requested subtitles and their format"""
2664         available_subs = {}
2665         if normal_subtitles and self.params.get('writesubtitles'):
2666             available_subs.update(normal_subtitles)
2667         if automatic_captions and self.params.get('writeautomaticsub'):
2668             for lang, cap_info in automatic_captions.items():
2669                 if lang not in available_subs:
2670                     available_subs[lang] = cap_info
2671
2672         if (not self.params.get('writesubtitles') and not
2673                 self.params.get('writeautomaticsub') or not
2674                 available_subs):
2675             return None
2676
2677         all_sub_langs = available_subs.keys()
2678         if self.params.get('allsubtitles', False):
2679             requested_langs = all_sub_langs
2680         elif self.params.get('subtitleslangs', False):
2681             # A list is used so that the order of languages will be the same as
2682             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2683             requested_langs = []
2684             for lang_re in self.params.get('subtitleslangs'):
2685                 discard = lang_re[0] == '-'
2686                 if discard:
2687                     lang_re = lang_re[1:]
2688                 if lang_re == 'all':
2689                     if discard:
2690                         requested_langs = []
2691                     else:
2692                         requested_langs.extend(all_sub_langs)
2693                     continue
2694                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2695                 if discard:
2696                     for lang in current_langs:
2697                         while lang in requested_langs:
2698                             requested_langs.remove(lang)
2699                 else:
2700                     requested_langs.extend(current_langs)
2701             requested_langs = orderedSet(requested_langs)
2702         elif 'en' in available_subs:
2703             requested_langs = ['en']
2704         else:
2705             requested_langs = [list(all_sub_langs)[0]]
2706         if requested_langs:
2707             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2708
2709         formats_query = self.params.get('subtitlesformat', 'best')
2710         formats_preference = formats_query.split('/') if formats_query else []
2711         subs = {}
2712         for lang in requested_langs:
2713             formats = available_subs.get(lang)
2714             if formats is None:
2715                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2716                 continue
2717             for ext in formats_preference:
2718                 if ext == 'best':
2719                     f = formats[-1]
2720                     break
2721                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2722                 if matches:
2723                     f = matches[-1]
2724                     break
2725             else:
2726                 f = formats[-1]
2727                 self.report_warning(
2728                     'No subtitle format found matching "%s" for language %s, '
2729                     'using %s' % (formats_query, lang, f['ext']))
2730             subs[lang] = f
2731         return subs
2732
2733     def _forceprint(self, key, info_dict):
2734         if info_dict is None:
2735             return
2736         info_copy = info_dict.copy()
2737         info_copy['formats_table'] = self.render_formats_table(info_dict)
2738         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2739         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2740         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2741
2742         def format_tmpl(tmpl):
2743             mobj = re.match(r'\w+(=?)$', tmpl)
2744             if mobj and mobj.group(1):
2745                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2746             elif mobj:
2747                 return f'%({tmpl})s'
2748             return tmpl
2749
2750         for tmpl in self.params['forceprint'].get(key, []):
2751             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2752
2753         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2754             filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2755             tmpl = format_tmpl(tmpl)
2756             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2757             if self._ensure_dir_exists(filename):
2758                 with io.open(filename, 'a', encoding='utf-8') as f:
2759                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2760
2761     def __forced_printings(self, info_dict, filename, incomplete):
2762         def print_mandatory(field, actual_field=None):
2763             if actual_field is None:
2764                 actual_field = field
2765             if (self.params.get('force%s' % field, False)
2766                     and (not incomplete or info_dict.get(actual_field) is not None)):
2767                 self.to_stdout(info_dict[actual_field])
2768
2769         def print_optional(field):
2770             if (self.params.get('force%s' % field, False)
2771                     and info_dict.get(field) is not None):
2772                 self.to_stdout(info_dict[field])
2773
2774         info_dict = info_dict.copy()
2775         if filename is not None:
2776             info_dict['filename'] = filename
2777         if info_dict.get('requested_formats') is not None:
2778             # For RTMP URLs, also include the playpath
2779             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2780         elif 'url' in info_dict:
2781             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2782
2783         if (self.params.get('forcejson')
2784                 or self.params['forceprint'].get('video')
2785                 or self.params['print_to_file'].get('video')):
2786             self.post_extract(info_dict)
2787         self._forceprint('video', info_dict)
2788
2789         print_mandatory('title')
2790         print_mandatory('id')
2791         print_mandatory('url', 'urls')
2792         print_optional('thumbnail')
2793         print_optional('description')
2794         print_optional('filename')
2795         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2796             self.to_stdout(formatSeconds(info_dict['duration']))
2797         print_mandatory('format')
2798
2799         if self.params.get('forcejson'):
2800             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2801
2802     def dl(self, name, info, subtitle=False, test=False):
2803         if not info.get('url'):
2804             self.raise_no_formats(info, True)
2805
2806         if test:
2807             verbose = self.params.get('verbose')
2808             params = {
2809                 'test': True,
2810                 'quiet': self.params.get('quiet') or not verbose,
2811                 'verbose': verbose,
2812                 'noprogress': not verbose,
2813                 'nopart': True,
2814                 'skip_unavailable_fragments': False,
2815                 'keep_fragments': False,
2816                 'overwrites': True,
2817                 '_no_ytdl_file': True,
2818             }
2819         else:
2820             params = self.params
2821         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2822         if not test:
2823             for ph in self._progress_hooks:
2824                 fd.add_progress_hook(ph)
2825             urls = '", "'.join(
2826                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2827                 for f in info.get('requested_formats', []) or [info])
2828             self.write_debug('Invoking downloader on "%s"' % urls)
2829
2830         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2831         # But it may contain objects that are not deep-copyable
2832         new_info = self._copy_infodict(info)
2833         if new_info.get('http_headers') is None:
2834             new_info['http_headers'] = self._calc_headers(new_info)
2835         return fd.download(name, new_info, subtitle)
2836
2837     def existing_file(self, filepaths, *, default_overwrite=True):
2838         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2839         if existing_files and not self.params.get('overwrites', default_overwrite):
2840             return existing_files[0]
2841
2842         for file in existing_files:
2843             self.report_file_delete(file)
2844             os.remove(file)
2845         return None
2846
2847     def process_info(self, info_dict):
2848         """Process a single resolved IE result. (Modifies it in-place)"""
2849
2850         assert info_dict.get('_type', 'video') == 'video'
2851         original_infodict = info_dict
2852
2853         if 'format' not in info_dict and 'ext' in info_dict:
2854             info_dict['format'] = info_dict['ext']
2855
2856         # This is mostly just for backward compatibility of process_info
2857         # As a side-effect, this allows for format-specific filters
2858         if self._match_entry(info_dict) is not None:
2859             info_dict['__write_download_archive'] = 'ignore'
2860             return
2861
2862         # Does nothing under normal operation - for backward compatibility of process_info
2863         self.post_extract(info_dict)
2864
2865         # info_dict['_filename'] needs to be set for backward compatibility
2866         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2867         temp_filename = self.prepare_filename(info_dict, 'temp')
2868         files_to_move = {}
2869
2870         self._num_downloads += 1
2871
2872         # Forced printings
2873         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2874
2875         if self.params.get('simulate'):
2876             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2877             return
2878
2879         if full_filename is None:
2880             return
2881         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2882             return
2883         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2884             return
2885
2886         if self._write_description('video', info_dict,
2887                                    self.prepare_filename(info_dict, 'description')) is None:
2888             return
2889
2890         sub_files = self._write_subtitles(info_dict, temp_filename)
2891         if sub_files is None:
2892             return
2893         files_to_move.update(dict(sub_files))
2894
2895         thumb_files = self._write_thumbnails(
2896             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2897         if thumb_files is None:
2898             return
2899         files_to_move.update(dict(thumb_files))
2900
2901         infofn = self.prepare_filename(info_dict, 'infojson')
2902         _infojson_written = self._write_info_json('video', info_dict, infofn)
2903         if _infojson_written:
2904             info_dict['infojson_filename'] = infofn
2905             # For backward compatibility, even though it was a private field
2906             info_dict['__infojson_filename'] = infofn
2907         elif _infojson_written is None:
2908             return
2909
2910         # Note: Annotations are deprecated
2911         annofn = None
2912         if self.params.get('writeannotations', False):
2913             annofn = self.prepare_filename(info_dict, 'annotation')
2914         if annofn:
2915             if not self._ensure_dir_exists(encodeFilename(annofn)):
2916                 return
2917             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2918                 self.to_screen('[info] Video annotations are already present')
2919             elif not info_dict.get('annotations'):
2920                 self.report_warning('There are no annotations to write.')
2921             else:
2922                 try:
2923                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2924                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2925                         annofile.write(info_dict['annotations'])
2926                 except (KeyError, TypeError):
2927                     self.report_warning('There are no annotations to write.')
2928                 except (OSError, IOError):
2929                     self.report_error('Cannot write annotations file: ' + annofn)
2930                     return
2931
2932         # Write internet shortcut files
2933         def _write_link_file(link_type):
2934             url = try_get(info_dict['webpage_url'], iri_to_uri)
2935             if not url:
2936                 self.report_warning(
2937                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2938                 return True
2939             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2940             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2941                 return False
2942             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2943                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2944                 return True
2945             try:
2946                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2947                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2948                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2949                     template_vars = {'url': url}
2950                     if link_type == 'desktop':
2951                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2952                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2953             except (OSError, IOError):
2954                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2955                 return False
2956             return True
2957
2958         write_links = {
2959             'url': self.params.get('writeurllink'),
2960             'webloc': self.params.get('writewebloclink'),
2961             'desktop': self.params.get('writedesktoplink'),
2962         }
2963         if self.params.get('writelink'):
2964             link_type = ('webloc' if sys.platform == 'darwin'
2965                          else 'desktop' if sys.platform.startswith('linux')
2966                          else 'url')
2967             write_links[link_type] = True
2968
2969         if any(should_write and not _write_link_file(link_type)
2970                for link_type, should_write in write_links.items()):
2971             return
2972
2973         def replace_info_dict(new_info):
2974             nonlocal info_dict
2975             if new_info == info_dict:
2976                 return
2977             info_dict.clear()
2978             info_dict.update(new_info)
2979
2980         try:
2981             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2982             replace_info_dict(new_info)
2983         except PostProcessingError as err:
2984             self.report_error('Preprocessing: %s' % str(err))
2985             return
2986
2987         if self.params.get('skip_download'):
2988             info_dict['filepath'] = temp_filename
2989             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2990             info_dict['__files_to_move'] = files_to_move
2991             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2992             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2993         else:
2994             # Download
2995             info_dict.setdefault('__postprocessors', [])
2996             try:
2997
2998                 def existing_video_file(*filepaths):
2999                     ext = info_dict.get('ext')
3000                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3001                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3002                                               default_overwrite=False)
3003                     if file:
3004                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3005                     return file
3006
3007                 success = True
3008                 if info_dict.get('requested_formats') is not None:
3009
3010                     def compatible_formats(formats):
3011                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3012                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
3013                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
3014                         if len(video_formats) > 2 or len(audio_formats) > 2:
3015                             return False
3016
3017                         # Check extension
3018                         exts = set(format.get('ext') for format in formats)
3019                         COMPATIBLE_EXTS = (
3020                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
3021                             set(('webm',)),
3022                         )
3023                         for ext_sets in COMPATIBLE_EXTS:
3024                             if ext_sets.issuperset(exts):
3025                                 return True
3026                         # TODO: Check acodec/vcodec
3027                         return False
3028
3029                     requested_formats = info_dict['requested_formats']
3030                     old_ext = info_dict['ext']
3031                     if self.params.get('merge_output_format') is None:
3032                         if not compatible_formats(requested_formats):
3033                             info_dict['ext'] = 'mkv'
3034                             self.report_warning(
3035                                 'Requested formats are incompatible for merge and will be merged into mkv')
3036                         if (info_dict['ext'] == 'webm'
3037                                 and info_dict.get('thumbnails')
3038                                 # check with type instead of pp_key, __name__, or isinstance
3039                                 # since we dont want any custom PPs to trigger this
3040                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3041                             info_dict['ext'] = 'mkv'
3042                             self.report_warning(
3043                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3044                     new_ext = info_dict['ext']
3045
3046                     def correct_ext(filename, ext=new_ext):
3047                         if filename == '-':
3048                             return filename
3049                         filename_real_ext = os.path.splitext(filename)[1][1:]
3050                         filename_wo_ext = (
3051                             os.path.splitext(filename)[0]
3052                             if filename_real_ext in (old_ext, new_ext)
3053                             else filename)
3054                         return '%s.%s' % (filename_wo_ext, ext)
3055
3056                     # Ensure filename always has a correct extension for successful merge
3057                     full_filename = correct_ext(full_filename)
3058                     temp_filename = correct_ext(temp_filename)
3059                     dl_filename = existing_video_file(full_filename, temp_filename)
3060                     info_dict['__real_download'] = False
3061
3062                     downloaded = []
3063                     merger = FFmpegMergerPP(self)
3064
3065                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3066                     if dl_filename is not None:
3067                         self.report_file_already_downloaded(dl_filename)
3068                     elif fd:
3069                         for f in requested_formats if fd != FFmpegFD else []:
3070                             f['filepath'] = fname = prepend_extension(
3071                                 correct_ext(temp_filename, info_dict['ext']),
3072                                 'f%s' % f['format_id'], info_dict['ext'])
3073                             downloaded.append(fname)
3074                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3075                         success, real_download = self.dl(temp_filename, info_dict)
3076                         info_dict['__real_download'] = real_download
3077                     else:
3078                         if self.params.get('allow_unplayable_formats'):
3079                             self.report_warning(
3080                                 'You have requested merging of multiple formats '
3081                                 'while also allowing unplayable formats to be downloaded. '
3082                                 'The formats won\'t be merged to prevent data corruption.')
3083                         elif not merger.available:
3084                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3085                             if not self.params.get('ignoreerrors'):
3086                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3087                                 return
3088                             self.report_warning(f'{msg}. The formats won\'t be merged')
3089
3090                         if temp_filename == '-':
3091                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3092                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3093                                       else 'but ffmpeg is not installed')
3094                             self.report_warning(
3095                                 f'You have requested downloading multiple formats to stdout {reason}. '
3096                                 'The formats will be streamed one after the other')
3097                             fname = temp_filename
3098                         for f in requested_formats:
3099                             new_info = dict(info_dict)
3100                             del new_info['requested_formats']
3101                             new_info.update(f)
3102                             if temp_filename != '-':
3103                                 fname = prepend_extension(
3104                                     correct_ext(temp_filename, new_info['ext']),
3105                                     'f%s' % f['format_id'], new_info['ext'])
3106                                 if not self._ensure_dir_exists(fname):
3107                                     return
3108                                 f['filepath'] = fname
3109                                 downloaded.append(fname)
3110                             partial_success, real_download = self.dl(fname, new_info)
3111                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3112                             success = success and partial_success
3113
3114                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3115                         info_dict['__postprocessors'].append(merger)
3116                         info_dict['__files_to_merge'] = downloaded
3117                         # Even if there were no downloads, it is being merged only now
3118                         info_dict['__real_download'] = True
3119                     else:
3120                         for file in downloaded:
3121                             files_to_move[file] = None
3122                 else:
3123                     # Just a single file
3124                     dl_filename = existing_video_file(full_filename, temp_filename)
3125                     if dl_filename is None or dl_filename == temp_filename:
3126                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3127                         # So we should try to resume the download
3128                         success, real_download = self.dl(temp_filename, info_dict)
3129                         info_dict['__real_download'] = real_download
3130                     else:
3131                         self.report_file_already_downloaded(dl_filename)
3132
3133                 dl_filename = dl_filename or temp_filename
3134                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3135
3136             except network_exceptions as err:
3137                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3138                 return
3139             except (OSError, IOError) as err:
3140                 raise UnavailableVideoError(err)
3141             except (ContentTooShortError, ) as err:
3142                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3143                 return
3144
3145             if success and full_filename != '-':
3146
3147                 def fixup():
3148                     do_fixup = True
3149                     fixup_policy = self.params.get('fixup')
3150                     vid = info_dict['id']
3151
3152                     if fixup_policy in ('ignore', 'never'):
3153                         return
3154                     elif fixup_policy == 'warn':
3155                         do_fixup = False
3156                     elif fixup_policy != 'force':
3157                         assert fixup_policy in ('detect_or_warn', None)
3158                         if not info_dict.get('__real_download'):
3159                             do_fixup = False
3160
3161                     def ffmpeg_fixup(cndn, msg, cls):
3162                         if not cndn:
3163                             return
3164                         if not do_fixup:
3165                             self.report_warning(f'{vid}: {msg}')
3166                             return
3167                         pp = cls(self)
3168                         if pp.available:
3169                             info_dict['__postprocessors'].append(pp)
3170                         else:
3171                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3172
3173                     stretched_ratio = info_dict.get('stretched_ratio')
3174                     ffmpeg_fixup(
3175                         stretched_ratio not in (1, None),
3176                         f'Non-uniform pixel ratio {stretched_ratio}',
3177                         FFmpegFixupStretchedPP)
3178
3179                     ffmpeg_fixup(
3180                         (info_dict.get('requested_formats') is None
3181                          and info_dict.get('container') == 'm4a_dash'
3182                          and info_dict.get('ext') == 'm4a'),
3183                         'writing DASH m4a. Only some players support this container',
3184                         FFmpegFixupM4aPP)
3185
3186                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3187                     downloader = downloader.__name__ if downloader else None
3188
3189                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3190                         ffmpeg_fixup(downloader == 'HlsFD',
3191                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3192                                      FFmpegFixupM3u8PP)
3193                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3194                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3195
3196                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3197                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3198
3199                 fixup()
3200                 try:
3201                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3202                 except PostProcessingError as err:
3203                     self.report_error('Postprocessing: %s' % str(err))
3204                     return
3205                 try:
3206                     for ph in self._post_hooks:
3207                         ph(info_dict['filepath'])
3208                 except Exception as err:
3209                     self.report_error('post hooks: %s' % str(err))
3210                     return
3211                 info_dict['__write_download_archive'] = True
3212
3213         if self.params.get('force_write_download_archive'):
3214             info_dict['__write_download_archive'] = True
3215
3216         # Make sure the info_dict was modified in-place
3217         assert info_dict is original_infodict
3218
3219         max_downloads = self.params.get('max_downloads')
3220         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3221             raise MaxDownloadsReached()
3222
3223     def __download_wrapper(self, func):
3224         @functools.wraps(func)
3225         def wrapper(*args, **kwargs):
3226             try:
3227                 res = func(*args, **kwargs)
3228             except UnavailableVideoError as e:
3229                 self.report_error(e)
3230             except MaxDownloadsReached as e:
3231                 self.to_screen(f'[info] {e}')
3232                 raise
3233             except DownloadCancelled as e:
3234                 self.to_screen(f'[info] {e}')
3235                 if not self.params.get('break_per_url'):
3236                     raise
3237             else:
3238                 if self.params.get('dump_single_json', False):
3239                     self.post_extract(res)
3240                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3241         return wrapper
3242
3243     def download(self, url_list):
3244         """Download a given list of URLs."""
3245         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3246         outtmpl = self.outtmpl_dict['default']
3247         if (len(url_list) > 1
3248                 and outtmpl != '-'
3249                 and '%' not in outtmpl
3250                 and self.params.get('max_downloads') != 1):
3251             raise SameFileError(outtmpl)
3252
3253         for url in url_list:
3254             self.__download_wrapper(self.extract_info)(
3255                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3256
3257         return self._download_retcode
3258
3259     def download_with_info_file(self, info_filename):
3260         with contextlib.closing(fileinput.FileInput(
3261                 [info_filename], mode='r',
3262                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3263             # FileInput doesn't have a read method, we can't call json.load
3264             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3265         try:
3266             self.__download_wrapper(self.process_ie_result)(info, download=True)
3267         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3268             if not isinstance(e, EntryNotInPlaylist):
3269                 self.to_stderr('\r')
3270             webpage_url = info.get('webpage_url')
3271             if webpage_url is not None:
3272                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3273                 return self.download([webpage_url])
3274             else:
3275                 raise
3276         return self._download_retcode
3277
3278     @staticmethod
3279     def sanitize_info(info_dict, remove_private_keys=False):
3280         ''' Sanitize the infodict for converting to json '''
3281         if info_dict is None:
3282             return info_dict
3283         info_dict.setdefault('epoch', int(time.time()))
3284         info_dict.setdefault('_type', 'video')
3285
3286         if remove_private_keys:
3287             reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
3288                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3289                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3290             }
3291         else:
3292             reject = lambda k, v: False
3293
3294         def filter_fn(obj):
3295             if isinstance(obj, dict):
3296                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3297             elif isinstance(obj, (list, tuple, set, LazyList)):
3298                 return list(map(filter_fn, obj))
3299             elif obj is None or isinstance(obj, (str, int, float, bool)):
3300                 return obj
3301             else:
3302                 return repr(obj)
3303
3304         return filter_fn(info_dict)
3305
3306     @staticmethod
3307     def filter_requested_info(info_dict, actually_filter=True):
3308         ''' Alias of sanitize_info for backward compatibility '''
3309         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3310
3311     @staticmethod
3312     def post_extract(info_dict):
3313         def actual_post_extract(info_dict):
3314             if info_dict.get('_type') in ('playlist', 'multi_video'):
3315                 for video_dict in info_dict.get('entries', {}):
3316                     actual_post_extract(video_dict or {})
3317                 return
3318
3319             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3320             info_dict.update(post_extractor())
3321
3322         actual_post_extract(info_dict or {})
3323
3324     def run_pp(self, pp, infodict):
3325         files_to_delete = []
3326         if '__files_to_move' not in infodict:
3327             infodict['__files_to_move'] = {}
3328         try:
3329             files_to_delete, infodict = pp.run(infodict)
3330         except PostProcessingError as e:
3331             # Must be True and not 'only_download'
3332             if self.params.get('ignoreerrors') is True:
3333                 self.report_error(e)
3334                 return infodict
3335             raise
3336
3337         if not files_to_delete:
3338             return infodict
3339         if self.params.get('keepvideo', False):
3340             for f in files_to_delete:
3341                 infodict['__files_to_move'].setdefault(f, '')
3342         else:
3343             for old_filename in set(files_to_delete):
3344                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3345                 try:
3346                     os.remove(encodeFilename(old_filename))
3347                 except (IOError, OSError):
3348                     self.report_warning('Unable to remove downloaded original file')
3349                 if old_filename in infodict['__files_to_move']:
3350                     del infodict['__files_to_move'][old_filename]
3351         return infodict
3352
3353     def run_all_pps(self, key, info, *, additional_pps=None):
3354         self._forceprint(key, info)
3355         for pp in (additional_pps or []) + self._pps[key]:
3356             info = self.run_pp(pp, info)
3357         return info
3358
3359     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3360         info = dict(ie_info)
3361         info['__files_to_move'] = files_to_move or {}
3362         info = self.run_all_pps(key, info)
3363         return info, info.pop('__files_to_move', None)
3364
3365     def post_process(self, filename, info, files_to_move=None):
3366         """Run all the postprocessors on the given file."""
3367         info['filepath'] = filename
3368         info['__files_to_move'] = files_to_move or {}
3369         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3370         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3371         del info['__files_to_move']
3372         return self.run_all_pps('after_move', info)
3373
3374     def _make_archive_id(self, info_dict):
3375         video_id = info_dict.get('id')
3376         if not video_id:
3377             return
3378         # Future-proof against any change in case
3379         # and backwards compatibility with prior versions
3380         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3381         if extractor is None:
3382             url = str_or_none(info_dict.get('url'))
3383             if not url:
3384                 return
3385             # Try to find matching extractor for the URL and take its ie_key
3386             for ie_key, ie in self._ies.items():
3387                 if ie.suitable(url):
3388                     extractor = ie_key
3389                     break
3390             else:
3391                 return
3392         return '%s %s' % (extractor.lower(), video_id)
3393
3394     def in_download_archive(self, info_dict):
3395         fn = self.params.get('download_archive')
3396         if fn is None:
3397             return False
3398
3399         vid_id = self._make_archive_id(info_dict)
3400         if not vid_id:
3401             return False  # Incomplete video information
3402
3403         return vid_id in self.archive
3404
3405     def record_download_archive(self, info_dict):
3406         fn = self.params.get('download_archive')
3407         if fn is None:
3408             return
3409         vid_id = self._make_archive_id(info_dict)
3410         assert vid_id
3411         self.write_debug(f'Adding to archive: {vid_id}')
3412         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3413             archive_file.write(vid_id + '\n')
3414         self.archive.add(vid_id)
3415
3416     @staticmethod
3417     def format_resolution(format, default='unknown'):
3418         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3419             return 'audio only'
3420         if format.get('resolution') is not None:
3421             return format['resolution']
3422         if format.get('width') and format.get('height'):
3423             return '%dx%d' % (format['width'], format['height'])
3424         elif format.get('height'):
3425             return '%sp' % format['height']
3426         elif format.get('width'):
3427             return '%dx?' % format['width']
3428         return default
3429
3430     def _list_format_headers(self, *headers):
3431         if self.params.get('listformats_table', True) is not False:
3432             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3433         return headers
3434
3435     def _format_note(self, fdict):
3436         res = ''
3437         if fdict.get('ext') in ['f4f', 'f4m']:
3438             res += '(unsupported)'
3439         if fdict.get('language'):
3440             if res:
3441                 res += ' '
3442             res += '[%s]' % fdict['language']
3443         if fdict.get('format_note') is not None:
3444             if res:
3445                 res += ' '
3446             res += fdict['format_note']
3447         if fdict.get('tbr') is not None:
3448             if res:
3449                 res += ', '
3450             res += '%4dk' % fdict['tbr']
3451         if fdict.get('container') is not None:
3452             if res:
3453                 res += ', '
3454             res += '%s container' % fdict['container']
3455         if (fdict.get('vcodec') is not None
3456                 and fdict.get('vcodec') != 'none'):
3457             if res:
3458                 res += ', '
3459             res += fdict['vcodec']
3460             if fdict.get('vbr') is not None:
3461                 res += '@'
3462         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3463             res += 'video@'
3464         if fdict.get('vbr') is not None:
3465             res += '%4dk' % fdict['vbr']
3466         if fdict.get('fps') is not None:
3467             if res:
3468                 res += ', '
3469             res += '%sfps' % fdict['fps']
3470         if fdict.get('acodec') is not None:
3471             if res:
3472                 res += ', '
3473             if fdict['acodec'] == 'none':
3474                 res += 'video only'
3475             else:
3476                 res += '%-5s' % fdict['acodec']
3477         elif fdict.get('abr') is not None:
3478             if res:
3479                 res += ', '
3480             res += 'audio'
3481         if fdict.get('abr') is not None:
3482             res += '@%3dk' % fdict['abr']
3483         if fdict.get('asr') is not None:
3484             res += ' (%5dHz)' % fdict['asr']
3485         if fdict.get('filesize') is not None:
3486             if res:
3487                 res += ', '
3488             res += format_bytes(fdict['filesize'])
3489         elif fdict.get('filesize_approx') is not None:
3490             if res:
3491                 res += ', '
3492             res += '~' + format_bytes(fdict['filesize_approx'])
3493         return res
3494
3495     def render_formats_table(self, info_dict):
3496         if not info_dict.get('formats') and not info_dict.get('url'):
3497             return None
3498
3499         formats = info_dict.get('formats', [info_dict])
3500         if not self.params.get('listformats_table', True) is not False:
3501             table = [
3502                 [
3503                     format_field(f, 'format_id'),
3504                     format_field(f, 'ext'),
3505                     self.format_resolution(f),
3506                     self._format_note(f)
3507                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3508             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3509
3510         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3511         table = [
3512             [
3513                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3514                 format_field(f, 'ext'),
3515                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3516                 format_field(f, 'fps', '\t%d'),
3517                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3518                 delim,
3519                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3520                 format_field(f, 'tbr', '\t%dk'),
3521                 shorten_protocol_name(f.get('protocol', '')),
3522                 delim,
3523                 format_field(f, 'vcodec', default='unknown').replace(
3524                     'none', 'images' if f.get('acodec') == 'none'
3525                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3526                 format_field(f, 'vbr', '\t%dk'),
3527                 format_field(f, 'acodec', default='unknown').replace(
3528                     'none', '' if f.get('vcodec') == 'none'
3529                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3530                 format_field(f, 'abr', '\t%dk'),
3531                 format_field(f, 'asr', '\t%dHz'),
3532                 join_nonempty(
3533                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3534                     format_field(f, 'language', '[%s]'),
3535                     join_nonempty(format_field(f, 'format_note'),
3536                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3537                                   delim=', '),
3538                     delim=' '),
3539             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3540         header_line = self._list_format_headers(
3541             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3542             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3543
3544         return render_table(
3545             header_line, table, hide_empty=True,
3546             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3547
3548     def render_thumbnails_table(self, info_dict):
3549         thumbnails = list(info_dict.get('thumbnails') or [])
3550         if not thumbnails:
3551             return None
3552         return render_table(
3553             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3554             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3555
3556     def render_subtitles_table(self, video_id, subtitles):
3557         def _row(lang, formats):
3558             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3559             if len(set(names)) == 1:
3560                 names = [] if names[0] == 'unknown' else names[:1]
3561             return [lang, ', '.join(names), ', '.join(exts)]
3562
3563         if not subtitles:
3564             return None
3565         return render_table(
3566             self._list_format_headers('Language', 'Name', 'Formats'),
3567             [_row(lang, formats) for lang, formats in subtitles.items()],
3568             hide_empty=True)
3569
3570     def __list_table(self, video_id, name, func, *args):
3571         table = func(*args)
3572         if not table:
3573             self.to_screen(f'{video_id} has no {name}')
3574             return
3575         self.to_screen(f'[info] Available {name} for {video_id}:')
3576         self.to_stdout(table)
3577
3578     def list_formats(self, info_dict):
3579         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3580
3581     def list_thumbnails(self, info_dict):
3582         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3583
3584     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3585         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3586
3587     def urlopen(self, req):
3588         """ Start an HTTP download """
3589         if isinstance(req, compat_basestring):
3590             req = sanitized_Request(req)
3591         return self._opener.open(req, timeout=self._socket_timeout)
3592
3593     def print_debug_header(self):
3594         if not self.params.get('verbose'):
3595             return
3596
3597         def get_encoding(stream):
3598             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3599             if not supports_terminal_sequences(stream):
3600                 from .compat import WINDOWS_VT_MODE
3601                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3602             return ret
3603
3604         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3605             locale.getpreferredencoding(),
3606             sys.getfilesystemencoding(),
3607             get_encoding(self._screen_file), get_encoding(self._err_file),
3608             self.get_encoding())
3609
3610         logger = self.params.get('logger')
3611         if logger:
3612             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3613             write_debug(encoding_str)
3614         else:
3615             write_string(f'[debug] {encoding_str}\n', encoding=None)
3616             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3617
3618         source = detect_variant()
3619         write_debug(join_nonempty(
3620             'yt-dlp version', __version__,
3621             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3622             '' if source == 'unknown' else f'({source})',
3623             delim=' '))
3624         if not _LAZY_LOADER:
3625             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3626                 write_debug('Lazy loading extractors is forcibly disabled')
3627             else:
3628                 write_debug('Lazy loading extractors is disabled')
3629         if plugin_extractors or plugin_postprocessors:
3630             write_debug('Plugins: %s' % [
3631                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3632                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3633         if self.params.get('compat_opts'):
3634             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3635
3636         if source == 'source':
3637             try:
3638                 sp = Popen(
3639                     ['git', 'rev-parse', '--short', 'HEAD'],
3640                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3641                     cwd=os.path.dirname(os.path.abspath(__file__)))
3642                 out, err = sp.communicate_or_kill()
3643                 out = out.decode().strip()
3644                 if re.match('[0-9a-f]+', out):
3645                     write_debug('Git HEAD: %s' % out)
3646             except Exception:
3647                 try:
3648                     sys.exc_clear()
3649                 except Exception:
3650                     pass
3651
3652         def python_implementation():
3653             impl_name = platform.python_implementation()
3654             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3655                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3656             return impl_name
3657
3658         write_debug('Python version %s (%s %s) - %s' % (
3659             platform.python_version(),
3660             python_implementation(),
3661             platform.architecture()[0],
3662             platform_name()))
3663
3664         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3665         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3666         if ffmpeg_features:
3667             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3668
3669         exe_versions['rtmpdump'] = rtmpdump_version()
3670         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3671         exe_str = ', '.join(
3672             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3673         ) or 'none'
3674         write_debug('exe versions: %s' % exe_str)
3675
3676         from .downloader.websocket import has_websockets
3677         from .postprocessor.embedthumbnail import has_mutagen
3678         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3679
3680         lib_str = join_nonempty(
3681             compat_brotli and compat_brotli.__name__,
3682             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3683             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3684             has_mutagen and 'mutagen',
3685             SQLITE_AVAILABLE and 'sqlite',
3686             has_websockets and 'websockets',
3687             delim=', ') or 'none'
3688         write_debug('Optional libraries: %s' % lib_str)
3689
3690         proxy_map = {}
3691         for handler in self._opener.handlers:
3692             if hasattr(handler, 'proxies'):
3693                 proxy_map.update(handler.proxies)
3694         write_debug(f'Proxy map: {proxy_map}')
3695
3696         # Not implemented
3697         if False and self.params.get('call_home'):
3698             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3699             write_debug('Public IP address: %s' % ipaddr)
3700             latest_version = self.urlopen(
3701                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3702             if version_tuple(latest_version) > version_tuple(__version__):
3703                 self.report_warning(
3704                     'You are using an outdated version (newest version: %s)! '
3705                     'See https://yt-dl.org/update if you need help updating.' %
3706                     latest_version)
3707
3708     def _setup_opener(self):
3709         timeout_val = self.params.get('socket_timeout')
3710         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3711
3712         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3713         opts_cookiefile = self.params.get('cookiefile')
3714         opts_proxy = self.params.get('proxy')
3715
3716         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3717
3718         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3719         if opts_proxy is not None:
3720             if opts_proxy == '':
3721                 proxies = {}
3722             else:
3723                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3724         else:
3725             proxies = compat_urllib_request.getproxies()
3726             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3727             if 'http' in proxies and 'https' not in proxies:
3728                 proxies['https'] = proxies['http']
3729         proxy_handler = PerRequestProxyHandler(proxies)
3730
3731         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3732         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3733         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3734         redirect_handler = YoutubeDLRedirectHandler()
3735         data_handler = compat_urllib_request_DataHandler()
3736
3737         # When passing our own FileHandler instance, build_opener won't add the
3738         # default FileHandler and allows us to disable the file protocol, which
3739         # can be used for malicious purposes (see
3740         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3741         file_handler = compat_urllib_request.FileHandler()
3742
3743         def file_open(*args, **kwargs):
3744             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3745         file_handler.file_open = file_open
3746
3747         opener = compat_urllib_request.build_opener(
3748             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3749
3750         # Delete the default user-agent header, which would otherwise apply in
3751         # cases where our custom HTTP handler doesn't come into play
3752         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3753         opener.addheaders = []
3754         self._opener = opener
3755
3756     def encode(self, s):
3757         if isinstance(s, bytes):
3758             return s  # Already encoded
3759
3760         try:
3761             return s.encode(self.get_encoding())
3762         except UnicodeEncodeError as err:
3763             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3764             raise
3765
3766     def get_encoding(self):
3767         encoding = self.params.get('encoding')
3768         if encoding is None:
3769             encoding = preferredencoding()
3770         return encoding
3771
3772     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3773         ''' Write infojson and returns True = written, False = skip, None = error '''
3774         if overwrite is None:
3775             overwrite = self.params.get('overwrites', True)
3776         if not self.params.get('writeinfojson'):
3777             return False
3778         elif not infofn:
3779             self.write_debug(f'Skipping writing {label} infojson')
3780             return False
3781         elif not self._ensure_dir_exists(infofn):
3782             return None
3783         elif not overwrite and os.path.exists(infofn):
3784             self.to_screen(f'[info] {label.title()} metadata is already present')
3785         else:
3786             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3787             try:
3788                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3789             except (OSError, IOError):
3790                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3791                 return None
3792         return True
3793
3794     def _write_description(self, label, ie_result, descfn):
3795         ''' Write description and returns True = written, False = skip, None = error '''
3796         if not self.params.get('writedescription'):
3797             return False
3798         elif not descfn:
3799             self.write_debug(f'Skipping writing {label} description')
3800             return False
3801         elif not self._ensure_dir_exists(descfn):
3802             return None
3803         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3804             self.to_screen(f'[info] {label.title()} description is already present')
3805         elif ie_result.get('description') is None:
3806             self.report_warning(f'There\'s no {label} description to write')
3807             return False
3808         else:
3809             try:
3810                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3811                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3812                     descfile.write(ie_result['description'])
3813             except (OSError, IOError):
3814                 self.report_error(f'Cannot write {label} description file {descfn}')
3815                 return None
3816         return True
3817
3818     def _write_subtitles(self, info_dict, filename):
3819         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3820         ret = []
3821         subtitles = info_dict.get('requested_subtitles')
3822         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3823             # subtitles download errors are already managed as troubles in relevant IE
3824             # that way it will silently go on when used with unsupporting IE
3825             return ret
3826
3827         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3828         if not sub_filename_base:
3829             self.to_screen('[info] Skipping writing video subtitles')
3830             return ret
3831         for sub_lang, sub_info in subtitles.items():
3832             sub_format = sub_info['ext']
3833             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3834             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3835             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3836             if existing_sub:
3837                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3838                 sub_info['filepath'] = existing_sub
3839                 ret.append((existing_sub, sub_filename_final))
3840                 continue
3841
3842             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3843             if sub_info.get('data') is not None:
3844                 try:
3845                     # Use newline='' to prevent conversion of newline characters
3846                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3847                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3848                         subfile.write(sub_info['data'])
3849                     sub_info['filepath'] = sub_filename
3850                     ret.append((sub_filename, sub_filename_final))
3851                     continue
3852                 except (OSError, IOError):
3853                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3854                     return None
3855
3856             try:
3857                 sub_copy = sub_info.copy()
3858                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3859                 self.dl(sub_filename, sub_copy, subtitle=True)
3860                 sub_info['filepath'] = sub_filename
3861                 ret.append((sub_filename, sub_filename_final))
3862             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3863                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3864                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3865                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3866         return ret
3867
3868     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3869         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3870         write_all = self.params.get('write_all_thumbnails', False)
3871         thumbnails, ret = [], []
3872         if write_all or self.params.get('writethumbnail', False):
3873             thumbnails = info_dict.get('thumbnails') or []
3874         multiple = write_all and len(thumbnails) > 1
3875
3876         if thumb_filename_base is None:
3877             thumb_filename_base = filename
3878         if thumbnails and not thumb_filename_base:
3879             self.write_debug(f'Skipping writing {label} thumbnail')
3880             return ret
3881
3882         for idx, t in list(enumerate(thumbnails))[::-1]:
3883             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3884             thumb_display_id = f'{label} thumbnail {t["id"]}'
3885             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3886             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3887
3888             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3889             if existing_thumb:
3890                 self.to_screen('[info] %s is already present' % (
3891                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3892                 t['filepath'] = existing_thumb
3893                 ret.append((existing_thumb, thumb_filename_final))
3894             else:
3895                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3896                 try:
3897                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3898                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3899                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3900                         shutil.copyfileobj(uf, thumbf)
3901                     ret.append((thumb_filename, thumb_filename_final))
3902                     t['filepath'] = thumb_filename
3903                 except network_exceptions as err:
3904                     thumbnails.pop(idx)
3905                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3906             if ret and not write_all:
3907                 break
3908         return ret