yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29 import urllib.request
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_brotli,
  36     compat_get_terminal_size,
  37     compat_os_name,
  38     compat_pycrypto_AES,
  39     compat_shlex_quote,
  40     compat_str,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     windows_enable_vt_mode,
  44 )
  45 from .cookies import load_cookies
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     determine_ext,
  54     determine_protocol,
  55     DownloadCancelled,
  56     DownloadError,
  57     encode_compat_str,
  58     encodeFilename,
  59     EntryNotInPlaylist,
  60     error_to_compat_str,
  61     ExistingVideoReached,
  62     expand_path,
  63     ExtractorError,
  64     filter_dict,
  65     float_or_none,
  66     format_bytes,
  67     format_field,
  68     format_decimal_suffix,
  69     formatSeconds,
  70     GeoRestrictedError,
  71     get_domain,
  72     has_certifi,
  73     HEADRequest,
  74     InAdvancePagedList,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     merge_headers,
  86     network_exceptions,
  87     NO_DEFAULT,
  88     number_of_digits,
  89     orderedSet,
  90     OUTTMPL_TYPES,
  91     PagedList,
  92     parse_filesize,
  93     PerRequestProxyHandler,
  94     platform_name,
  95     Popen,
  96     POSTPROCESS_WHEN,
  97     PostProcessingError,
  98     preferredencoding,
  99     prepend_extension,
 100     ReExtractInfo,
 101     register_socks_protocols,
 102     RejectedVideoReached,
 103     remove_terminal_sequences,
 104     render_table,
 105     replace_extension,
 106     SameFileError,
 107     sanitize_filename,
 108     sanitize_path,
 109     sanitize_url,
 110     sanitized_Request,
 111     std_headers,
 112     STR_FORMAT_RE_TMPL,
 113     STR_FORMAT_TYPES,
 114     str_or_none,
 115     strftime_or_none,
 116     subtitles_filename,
 117     supports_terminal_sequences,
 118     timetuple_from_msec,
 119     to_high_limit_path,
 120     traverse_obj,
 121     try_get,
 122     UnavailableVideoError,
 123     url_basename,
 124     variadic,
 125     version_tuple,
 126     write_json_file,
 127     write_string,
 128     YoutubeDLCookieProcessor,
 129     YoutubeDLHandler,
 130     YoutubeDLRedirectHandler,
 131 )
 132 from .cache import Cache
 133 from .minicurses import format_text
 134 from .extractor import (
 135     gen_extractor_classes,
 136     get_info_extractor,
 137     _LAZY_LOADER,
 138     _PLUGIN_CLASSES as plugin_extractors
 139 )
 140 from .extractor.openload import PhantomJSwrapper
 141 from .downloader import (
 142     FFmpegFD,
 143     get_suitable_downloader,
 144     shorten_protocol_name
 145 )
 146 from .downloader.rtmp import rtmpdump_version
 147 from .postprocessor import (
 148     get_postprocessor,
 149     EmbedThumbnailPP,
 150     FFmpegFixupDuplicateMoovPP,
 151     FFmpegFixupDurationPP,
 152     FFmpegFixupM3u8PP,
 153     FFmpegFixupM4aPP,
 154     FFmpegFixupStretchedPP,
 155     FFmpegFixupTimestampPP,
 156     FFmpegMergerPP,
 157     FFmpegPostProcessor,
 158     MoveFilesAfterDownloadPP,
 159     _PLUGIN_CLASSES as plugin_postprocessors
 160 )
 161 from .update import detect_variant
 162 from .version import __version__, RELEASE_GIT_HEAD
 163
 164 if compat_os_name == 'nt':
 165     import ctypes
 166
 167
 168 class YoutubeDL(object):
 169     """YoutubeDL class.
 170
 171     YoutubeDL objects are the ones responsible of downloading the
 172     actual video file and writing it to disk if the user has requested
 173     it, among some other tasks. In most cases there should be one per
 174     program. As, given a video URL, the downloader doesn't know how to
 175     extract all the needed information, task that InfoExtractors do, it
 176     has to pass the URL to one of them.
 177
 178     For this, YoutubeDL objects have a method that allows
 179     InfoExtractors to be registered in a given order. When it is passed
 180     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 181     finds that reports being able to handle it. The InfoExtractor extracts
 182     all the information about the video or videos the URL refers to, and
 183     YoutubeDL process the extracted information, possibly using a File
 184     Downloader to download the video.
 185
 186     YoutubeDL objects accept a lot of parameters. In order not to saturate
 187     the object constructor with arguments, it receives a dictionary of
 188     options instead. These options are available through the params
 189     attribute for the InfoExtractors to use. The YoutubeDL also
 190     registers itself as the downloader in charge for the InfoExtractors
 191     that are added to it, so this is a "mutual registration".
 192
 193     Available options:
 194
 195     username:          Username for authentication purposes.
 196     password:          Password for authentication purposes.
 197     videopassword:     Password for accessing a video.
 198     ap_mso:            Adobe Pass multiple-system operator identifier.
 199     ap_username:       Multiple-system operator account username.
 200     ap_password:       Multiple-system operator account password.
 201     usenetrc:          Use netrc for authentication instead.
 202     verbose:           Print additional info to stdout.
 203     quiet:             Do not print messages to stdout.
 204     no_warnings:       Do not print out anything for warnings.
 205     forceprint:        A dict with keys WHEN mapped to a list of templates to
 206                        print to stdout. The allowed keys are video or any of the
 207                        items in utils.POSTPROCESS_WHEN.
 208                        For compatibility, a single list is also accepted
 209     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 210                        a list of tuples with (template, filename)
 211     forceurl:          Force printing final URL. (Deprecated)
 212     forcetitle:        Force printing title. (Deprecated)
 213     forceid:           Force printing ID. (Deprecated)
 214     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 215     forcedescription:  Force printing description. (Deprecated)
 216     forcefilename:     Force printing final filename. (Deprecated)
 217     forceduration:     Force printing duration. (Deprecated)
 218     forcejson:         Force printing info_dict as JSON.
 219     dump_single_json:  Force printing the info_dict of the whole playlist
 220                        (or video) as a single JSON line.
 221     force_write_download_archive: Force writing download archive regardless
 222                        of 'skip_download' or 'simulate'.
 223     simulate:          Do not download the video files. If unset (or None),
 224                        simulate only if listsubtitles, listformats or list_thumbnails is used
 225     format:            Video format code. see "FORMAT SELECTION" for more details.
 226                        You can also pass a function. The function takes 'ctx' as
 227                        argument and returns the formats to download.
 228                        See "build_format_selector" for an implementation
 229     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 230     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 231                        extracting metadata even if the video is not actually
 232                        available for download (experimental)
 233     format_sort:       A list of fields by which to sort the video formats.
 234                        See "Sorting Formats" for more details.
 235     format_sort_force: Force the given format_sort. see "Sorting Formats"
 236                        for more details.
 237     prefer_free_formats: Whether to prefer video formats with free containers
 238                        over non-free ones of same quality.
 239     allow_multiple_video_streams:   Allow multiple video streams to be merged
 240                        into a single file
 241     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 242                        into a single file
 243     check_formats      Whether to test if the formats are downloadable.
 244                        Can be True (check all), False (check none),
 245                        'selected' (check selected formats),
 246                        or None (check only if requested by extractor)
 247     paths:             Dictionary of output paths. The allowed keys are 'home'
 248                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 249     outtmpl:           Dictionary of templates for output names. Allowed keys
 250                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 251                        For compatibility with youtube-dl, a single string can also be used
 252     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 253     restrictfilenames: Do not allow "&" and spaces in file names
 254     trim_file_name:    Limit length of filename (extension excluded)
 255     windowsfilenames:  Force the filenames to be windows compatible
 256     ignoreerrors:      Do not stop on download/postprocessing errors.
 257                        Can be 'only_download' to ignore only download errors.
 258                        Default is 'only_download' for CLI, but False for API
 259     skip_playlist_after_errors: Number of allowed failures until the rest of
 260                        the playlist is skipped
 261     force_generic_extractor: Force downloader to use the generic extractor
 262     overwrites:        Overwrite all video and metadata files if True,
 263                        overwrite only non-video files if None
 264                        and don't overwrite any file if False
 265                        For compatibility with youtube-dl,
 266                        "nooverwrites" may also be used instead
 267     playliststart:     Playlist item to start at.
 268     playlistend:       Playlist item to end at.
 269     playlist_items:    Specific indices of playlist to download.
 270     playlistreverse:   Download playlist items in reverse order.
 271     playlistrandom:    Download playlist items in random order.
 272     matchtitle:        Download only matching titles.
 273     rejecttitle:       Reject downloads for matching titles.
 274     logger:            Log messages to a logging.Logger instance.
 275     logtostderr:       Log messages to stderr instead of stdout.
 276     consoletitle:       Display progress in console window's titlebar.
 277     writedescription:  Write the video description to a .description file
 278     writeinfojson:     Write the video description to a .info.json file
 279     clean_infojson:    Remove private fields from the infojson
 280     getcomments:       Extract video comments. This will not be written to disk
 281                        unless writeinfojson is also given
 282     writeannotations:  Write the video annotations to a .annotations.xml file
 283     writethumbnail:    Write the thumbnail image to a file
 284     allow_playlist_files: Whether to write playlists' description, infojson etc
 285                        also to disk when using the 'write*' options
 286     write_all_thumbnails:  Write all thumbnail formats to files
 287     writelink:         Write an internet shortcut file, depending on the
 288                        current platform (.url/.webloc/.desktop)
 289     writeurllink:      Write a Windows internet shortcut file (.url)
 290     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 291     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 292     writesubtitles:    Write the video subtitles to a file
 293     writeautomaticsub: Write the automatically generated subtitles to a file
 294     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 295                        Downloads all the subtitles of the video
 296                        (requires writesubtitles or writeautomaticsub)
 297     listsubtitles:     Lists all available subtitles for the video
 298     subtitlesformat:   The format code for subtitles
 299     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 300                        The list may contain "all" to refer to all the available
 301                        subtitles. The language can be prefixed with a "-" to
 302                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 303     keepvideo:         Keep the video file after post-processing
 304     daterange:         A DateRange object, download only if the upload_date is in the range.
 305     skip_download:     Skip the actual download of the video file
 306     cachedir:          Location of the cache files in the filesystem.
 307                        False to disable filesystem cache.
 308     noplaylist:        Download single video instead of a playlist if in doubt.
 309     age_limit:         An integer representing the user's age in years.
 310                        Unsuitable videos for the given age are skipped.
 311     min_views:         An integer representing the minimum view count the video
 312                        must have in order to not be skipped.
 313                        Videos without view count information are always
 314                        downloaded. None for no limit.
 315     max_views:         An integer representing the maximum view count.
 316                        Videos that are more popular than that are not
 317                        downloaded.
 318                        Videos without view count information are always
 319                        downloaded. None for no limit.
 320     download_archive:  File name of a file where all downloads are recorded.
 321                        Videos already present in the file are not downloaded
 322                        again.
 323     break_on_existing: Stop the download process after attempting to download a
 324                        file that is in the archive.
 325     break_on_reject:   Stop the download process when encountering a video that
 326                        has been filtered out.
 327     break_per_url:     Whether break_on_reject and break_on_existing
 328                        should act on each input URL as opposed to for the entire queue
 329     cookiefile:        File name where cookies should be read from and dumped to
 330     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 331                        name/pathfrom where cookies are loaded, and the name of the
 332                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 333     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 334                        support RFC 5746 secure renegotiation
 335     nocheckcertificate:  Do not verify SSL certificates
 336     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 337                        At the moment, this is only supported by YouTube.
 338     http_headers:      A dictionary of custom headers to be used for all requests
 339     proxy:             URL of the proxy server to use
 340     geo_verification_proxy:  URL of the proxy to use for IP address verification
 341                        on geo-restricted sites.
 342     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 343     bidi_workaround:   Work around buggy terminals without bidirectional text
 344                        support, using fridibi
 345     debug_printtraffic:Print out sent and received HTTP traffic
 346     include_ads:       Download ads as well (deprecated)
 347     default_search:    Prepend this string if an input url is not valid.
 348                        'auto' for elaborate guessing
 349     encoding:          Use this encoding instead of the system-specified.
 350     extract_flat:      Do not resolve URLs, return the immediate result.
 351                        Pass in 'in_playlist' to only show this behavior for
 352                        playlist items.
 353     wait_for_video:    If given, wait for scheduled streams to become available.
 354                        The value should be a tuple containing the range
 355                        (min_secs, max_secs) to wait between retries
 356     postprocessors:    A list of dictionaries, each with an entry
 357                        * key:  The name of the postprocessor. See
 358                                yt_dlp/postprocessor/__init__.py for a list.
 359                        * when: When to run the postprocessor. Allowed values are
 360                                the entries of utils.POSTPROCESS_WHEN
 361                                Assumed to be 'post_process' if not given
 362     post_hooks:        Deprecated - Register a custom postprocessor instead
 363                        A list of functions that get called as the final step
 364                        for each video file, after all postprocessors have been
 365                        called. The filename will be passed as the only argument.
 366     progress_hooks:    A list of functions that get called on download
 367                        progress, with a dictionary with the entries
 368                        * status: One of "downloading", "error", or "finished".
 369                                  Check this first and ignore unknown values.
 370                        * info_dict: The extracted info_dict
 371
 372                        If status is one of "downloading", or "finished", the
 373                        following properties may also be present:
 374                        * filename: The final filename (always present)
 375                        * tmpfilename: The filename we're currently writing to
 376                        * downloaded_bytes: Bytes on disk
 377                        * total_bytes: Size of the whole file, None if unknown
 378                        * total_bytes_estimate: Guess of the eventual file size,
 379                                                None if unavailable.
 380                        * elapsed: The number of seconds since download started.
 381                        * eta: The estimated time in seconds, None if unknown
 382                        * speed: The download speed in bytes/second, None if
 383                                 unknown
 384                        * fragment_index: The counter of the currently
 385                                          downloaded video fragment.
 386                        * fragment_count: The number of fragments (= individual
 387                                          files that will be merged)
 388
 389                        Progress hooks are guaranteed to be called at least once
 390                        (with status "finished") if the download is successful.
 391     postprocessor_hooks:  A list of functions that get called on postprocessing
 392                        progress, with a dictionary with the entries
 393                        * status: One of "started", "processing", or "finished".
 394                                  Check this first and ignore unknown values.
 395                        * postprocessor: Name of the postprocessor
 396                        * info_dict: The extracted info_dict
 397
 398                        Progress hooks are guaranteed to be called at least twice
 399                        (with status "started" and "finished") if the processing is successful.
 400     merge_output_format: Extension to use when merging formats.
 401     final_ext:         Expected final extension; used to detect when the file was
 402                        already downloaded and converted
 403     fixup:             Automatically correct known faults of the file.
 404                        One of:
 405                        - "never": do nothing
 406                        - "warn": only emit a warning
 407                        - "detect_or_warn": check whether we can do anything
 408                                            about it, warn otherwise (default)
 409     source_address:    Client-side IP address to bind to.
 410     call_home:         Boolean, true iff we are allowed to contact the
 411                        yt-dlp servers for debugging. (BROKEN)
 412     sleep_interval_requests: Number of seconds to sleep between requests
 413                        during extraction
 414     sleep_interval:    Number of seconds to sleep before each download when
 415                        used alone or a lower bound of a range for randomized
 416                        sleep before each download (minimum possible number
 417                        of seconds to sleep) when used along with
 418                        max_sleep_interval.
 419     max_sleep_interval:Upper bound of a range for randomized sleep before each
 420                        download (maximum possible number of seconds to sleep).
 421                        Must only be used along with sleep_interval.
 422                        Actual sleep time will be a random float from range
 423                        [sleep_interval; max_sleep_interval].
 424     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 425     listformats:       Print an overview of available video formats and exit.
 426     list_thumbnails:   Print a table of all thumbnails and exit.
 427     match_filter:      A function that gets called with the info_dict of
 428                        every video.
 429                        If it returns a message, the video is ignored.
 430                        If it returns None, the video is downloaded.
 431                        match_filter_func in utils.py is one example for this.
 432     no_color:          Do not emit color codes in output.
 433     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 434                        HTTP header
 435     geo_bypass_country:
 436                        Two-letter ISO 3166-2 country code that will be used for
 437                        explicit geographic restriction bypassing via faking
 438                        X-Forwarded-For HTTP header
 439     geo_bypass_ip_block:
 440                        IP range in CIDR notation that will be used similarly to
 441                        geo_bypass_country
 442
 443     The following options determine which downloader is picked:
 444     external_downloader: A dictionary of protocol keys and the executable of the
 445                        external downloader to use for it. The allowed protocols
 446                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 447                        Set the value to 'native' to use the native downloader
 448     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 449                        or {'m3u8': 'ffmpeg'} instead.
 450                        Use the native HLS downloader instead of ffmpeg/avconv
 451                        if True, otherwise use ffmpeg/avconv if False, otherwise
 452                        use downloader suggested by extractor if None.
 453     compat_opts:       Compatibility options. See "Differences in default behavior".
 454                        The following options do not work when used through the API:
 455                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 456                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 457                        Refer __init__.py for their implementation
 458     progress_template: Dictionary of templates for progress outputs.
 459                        Allowed keys are 'download', 'postprocess',
 460                        'download-title' (console title) and 'postprocess-title'.
 461                        The template is mapped on a dictionary with keys 'progress' and 'info'
 462
 463     The following parameters are not used by YoutubeDL itself, they are used by
 464     the downloader (see yt_dlp/downloader/common.py):
 465     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 466     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 467     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 468     external_downloader_args, concurrent_fragment_downloads.
 469
 470     The following options are used by the post processors:
 471     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 472                        otherwise prefer ffmpeg. (avconv support is deprecated)
 473     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 474                        to the binary or its containing directory.
 475     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 476                        and a list of additional command-line arguments for the
 477                        postprocessor/executable. The dict can also have "PP+EXE" keys
 478                        which are used when the given exe is used by the given PP.
 479                        Use 'default' as the name for arguments to passed to all PP
 480                        For compatibility with youtube-dl, a single list of args
 481                        can also be used
 482
 483     The following options are used by the extractors:
 484     extractor_retries: Number of times to retry for known errors
 485     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 486     hls_split_discontinuity: Split HLS playlists to different formats at
 487                        discontinuities such as ad breaks (default: False)
 488     extractor_args:    A dictionary of arguments to be passed to the extractors.
 489                        See "EXTRACTOR ARGUMENTS" for details.
 490                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 491     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 492     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 493                        If True (default), DASH manifests and related
 494                        data will be downloaded and processed by extractor.
 495                        You can reduce network I/O by disabling it if you don't
 496                        care about DASH. (only for youtube)
 497     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 498                        If True (default), HLS manifests and related
 499                        data will be downloaded and processed by extractor.
 500                        You can reduce network I/O by disabling it if you don't
 501                        care about HLS. (only for youtube)
 502     """
 503
 504     _NUMERIC_FIELDS = set((
 505         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 506         'timestamp', 'release_timestamp',
 507         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 508         'average_rating', 'comment_count', 'age_limit',
 509         'start_time', 'end_time',
 510         'chapter_number', 'season_number', 'episode_number',
 511         'track_number', 'disc_number', 'release_year',
 512     ))
 513
 514     _format_fields = {
 515         # NB: Keep in sync with the docstring of extractor/common.py
 516         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 517         'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
 518         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
 519         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 520         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 521         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
 522         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 523     }
 524     _format_selection_exts = {
 525         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 526         'video': {'mp4', 'flv', 'webm', '3gp'},
 527         'storyboards': {'mhtml'},
 528     }
 529
 530     def __init__(self, params=None, auto_init=True):
 531         """Create a FileDownloader object with the given options.
 532         @param auto_init    Whether to load the default extractors and print header (if verbose).
 533                             Set to 'no_verbose_header' to not print the header
 534         """
 535         if params is None:
 536             params = {}
 537         self.params = params
 538         self._ies = {}
 539         self._ies_instances = {}
 540         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 541         self._printed_messages = set()
 542         self._first_webpage_request = True
 543         self._post_hooks = []
 544         self._progress_hooks = []
 545         self._postprocessor_hooks = []
 546         self._download_retcode = 0
 547         self._num_downloads = 0
 548         self._num_videos = 0
 549         self._playlist_level = 0
 550         self._playlist_urls = set()
 551         self.cache = Cache(self)
 552
 553         windows_enable_vt_mode()
 554         self._out_files = {
 555             'error': sys.stderr,
 556             'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
 557             'console': None if compat_os_name == 'nt' else next(
 558                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 559         }
 560         self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
 561         self._allow_colors = {
 562             type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
 563             for type_ in ('screen', 'error')
 564         }
 565
 566         if sys.version_info < (3, 6):
 567             self.report_warning(
 568                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 569
 570         if self.params.get('allow_unplayable_formats'):
 571             self.report_warning(
 572                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 573                 'This is a developer option intended for debugging. \n'
 574                 '         If you experience any issues while using this option, '
 575                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 576
 577         def check_deprecated(param, option, suggestion):
 578             if self.params.get(param) is not None:
 579                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 580                 return True
 581             return False
 582
 583         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 584             if self.params.get('geo_verification_proxy') is None:
 585                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 586
 587         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 588         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 589         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 590
 591         for msg in self.params.get('_warnings', []):
 592             self.report_warning(msg)
 593         for msg in self.params.get('_deprecation_warnings', []):
 594             self.deprecation_warning(msg)
 595
 596         if 'list-formats' in self.params.get('compat_opts', []):
 597             self.params['listformats_table'] = False
 598
 599         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 600             # nooverwrites was unnecessarily changed to overwrites
 601             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 602             # This ensures compatibility with both keys
 603             self.params['overwrites'] = not self.params['nooverwrites']
 604         elif self.params.get('overwrites') is None:
 605             self.params.pop('overwrites', None)
 606         else:
 607             self.params['nooverwrites'] = not self.params['overwrites']
 608
 609         self.params.setdefault('forceprint', {})
 610         self.params.setdefault('print_to_file', {})
 611
 612         # Compatibility with older syntax
 613         if not isinstance(params['forceprint'], dict):
 614             self.params['forceprint'] = {'video': params['forceprint']}
 615
 616         if self.params.get('bidi_workaround', False):
 617             try:
 618                 import pty
 619                 master, slave = pty.openpty()
 620                 width = compat_get_terminal_size().columns
 621                 if width is None:
 622                     width_args = []
 623                 else:
 624                     width_args = ['-w', str(width)]
 625                 sp_kwargs = dict(
 626                     stdin=subprocess.PIPE,
 627                     stdout=slave,
 628                     stderr=self._out_files['error'])
 629                 try:
 630                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 631                 except OSError:
 632                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 633                 self._output_channel = os.fdopen(master, 'rb')
 634             except OSError as ose:
 635                 if ose.errno == errno.ENOENT:
 636                     self.report_warning(
 637                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 638                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 639                 else:
 640                     raise
 641
 642         if auto_init:
 643             if auto_init != 'no_verbose_header':
 644                 self.print_debug_header()
 645             self.add_default_info_extractors()
 646
 647         if (sys.platform != 'win32'
 648                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 649                 and not self.params.get('restrictfilenames', False)):
 650             # Unicode filesystem API will throw errors (#1474, #13027)
 651             self.report_warning(
 652                 'Assuming --restrict-filenames since file system encoding '
 653                 'cannot encode all characters. '
 654                 'Set the LC_ALL environment variable to fix this.')
 655             self.params['restrictfilenames'] = True
 656
 657         self.outtmpl_dict = self.parse_outtmpl()
 658
 659         # Creating format selector here allows us to catch syntax errors before the extraction
 660         self.format_selector = (
 661             self.params.get('format') if self.params.get('format') in (None, '-')
 662             else self.params['format'] if callable(self.params['format'])
 663             else self.build_format_selector(self.params['format']))
 664
 665         # Set http_headers defaults according to std_headers
 666         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 667
 668         hooks = {
 669             'post_hooks': self.add_post_hook,
 670             'progress_hooks': self.add_progress_hook,
 671             'postprocessor_hooks': self.add_postprocessor_hook,
 672         }
 673         for opt, fn in hooks.items():
 674             for ph in self.params.get(opt, []):
 675                 fn(ph)
 676
 677         for pp_def_raw in self.params.get('postprocessors', []):
 678             pp_def = dict(pp_def_raw)
 679             when = pp_def.pop('when', 'post_process')
 680             self.add_post_processor(
 681                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 682                 when=when)
 683
 684         self._setup_opener()
 685         register_socks_protocols()
 686
 687         def preload_download_archive(fn):
 688             """Preload the archive, if any is specified"""
 689             if fn is None:
 690                 return False
 691             self.write_debug(f'Loading archive file {fn!r}')
 692             try:
 693                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 694                     for line in archive_file:
 695                         self.archive.add(line.strip())
 696             except IOError as ioe:
 697                 if ioe.errno != errno.ENOENT:
 698                     raise
 699                 return False
 700             return True
 701
 702         self.archive = set()
 703         preload_download_archive(self.params.get('download_archive'))
 704
 705     def warn_if_short_id(self, argv):
 706         # short YouTube ID starting with dash?
 707         idxs = [
 708             i for i, a in enumerate(argv)
 709             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 710         if idxs:
 711             correct_argv = (
 712                 ['yt-dlp']
 713                 + [a for i, a in enumerate(argv) if i not in idxs]
 714                 + ['--'] + [argv[i] for i in idxs]
 715             )
 716             self.report_warning(
 717                 'Long argument string detected. '
 718                 'Use -- to separate parameters and URLs, like this:\n%s' %
 719                 args_to_str(correct_argv))
 720
 721     def add_info_extractor(self, ie):
 722         """Add an InfoExtractor object to the end of the list."""
 723         ie_key = ie.ie_key()
 724         self._ies[ie_key] = ie
 725         if not isinstance(ie, type):
 726             self._ies_instances[ie_key] = ie
 727             ie.set_downloader(self)
 728
 729     def _get_info_extractor_class(self, ie_key):
 730         ie = self._ies.get(ie_key)
 731         if ie is None:
 732             ie = get_info_extractor(ie_key)
 733             self.add_info_extractor(ie)
 734         return ie
 735
 736     def get_info_extractor(self, ie_key):
 737         """
 738         Get an instance of an IE with name ie_key, it will try to get one from
 739         the _ies list, if there's no instance it will create a new one and add
 740         it to the extractor list.
 741         """
 742         ie = self._ies_instances.get(ie_key)
 743         if ie is None:
 744             ie = get_info_extractor(ie_key)()
 745             self.add_info_extractor(ie)
 746         return ie
 747
 748     def add_default_info_extractors(self):
 749         """
 750         Add the InfoExtractors returned by gen_extractors to the end of the list
 751         """
 752         for ie in gen_extractor_classes():
 753             self.add_info_extractor(ie)
 754
 755     def add_post_processor(self, pp, when='post_process'):
 756         """Add a PostProcessor object to the end of the chain."""
 757         self._pps[when].append(pp)
 758         pp.set_downloader(self)
 759
 760     def add_post_hook(self, ph):
 761         """Add the post hook"""
 762         self._post_hooks.append(ph)
 763
 764     def add_progress_hook(self, ph):
 765         """Add the download progress hook"""
 766         self._progress_hooks.append(ph)
 767
 768     def add_postprocessor_hook(self, ph):
 769         """Add the postprocessing progress hook"""
 770         self._postprocessor_hooks.append(ph)
 771         for pps in self._pps.values():
 772             for pp in pps:
 773                 pp.add_progress_hook(ph)
 774
 775     def _bidi_workaround(self, message):
 776         if not hasattr(self, '_output_channel'):
 777             return message
 778
 779         assert hasattr(self, '_output_process')
 780         assert isinstance(message, compat_str)
 781         line_count = message.count('\n') + 1
 782         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 783         self._output_process.stdin.flush()
 784         res = ''.join(self._output_channel.readline().decode('utf-8')
 785                       for _ in range(line_count))
 786         return res[:-len('\n')]
 787
 788     def _write_string(self, message, out=None, only_once=False):
 789         if only_once:
 790             if message in self._printed_messages:
 791                 return
 792             self._printed_messages.add(message)
 793         write_string(message, out=out, encoding=self.params.get('encoding'))
 794
 795     def to_stdout(self, message, skip_eol=False, quiet=None):
 796         """Print message to stdout"""
 797         if quiet is not None:
 798             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
 799         self._write_string(
 800             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 801             self._out_files['print'])
 802
 803     def to_screen(self, message, skip_eol=False, quiet=None):
 804         """Print message to screen if not in quiet mode"""
 805         if self.params.get('logger'):
 806             self.params['logger'].debug(message)
 807             return
 808         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 809             return
 810         self._write_string(
 811             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 812             self._out_files['screen'])
 813
 814     def to_stderr(self, message, only_once=False):
 815         """Print message to stderr"""
 816         assert isinstance(message, compat_str)
 817         if self.params.get('logger'):
 818             self.params['logger'].error(message)
 819         else:
 820             self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
 821
 822     def _send_console_code(self, code):
 823         if compat_os_name == 'nt' or not self._out_files['console']:
 824             return
 825         self._write_string(code, self._out_files['console'])
 826
 827     def to_console_title(self, message):
 828         if not self.params.get('consoletitle', False):
 829             return
 830         message = remove_terminal_sequences(message)
 831         if compat_os_name == 'nt':
 832             if ctypes.windll.kernel32.GetConsoleWindow():
 833                 # c_wchar_p() might not be necessary if `message` is
 834                 # already of type unicode()
 835                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 836         else:
 837             self._send_console_code(f'\033]0;{message}\007')
 838
 839     def save_console_title(self):
 840         if not self.params.get('consoletitle') or self.params.get('simulate'):
 841             return
 842         self._send_console_code('\033[22;0t')  # Save the title on stack
 843
 844     def restore_console_title(self):
 845         if not self.params.get('consoletitle') or self.params.get('simulate'):
 846             return
 847         self._send_console_code('\033[23;0t')  # Restore the title from stack
 848
 849     def __enter__(self):
 850         self.save_console_title()
 851         return self
 852
 853     def __exit__(self, *args):
 854         self.restore_console_title()
 855
 856         if self.params.get('cookiefile') is not None:
 857             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 858
 859     def trouble(self, message=None, tb=None, is_error=True):
 860         """Determine action to take when a download problem appears.
 861
 862         Depending on if the downloader has been configured to ignore
 863         download errors or not, this method may throw an exception or
 864         not when errors are found, after printing the message.
 865
 866         @param tb          If given, is additional traceback information
 867         @param is_error    Whether to raise error according to ignorerrors
 868         """
 869         if message is not None:
 870             self.to_stderr(message)
 871         if self.params.get('verbose'):
 872             if tb is None:
 873                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 874                     tb = ''
 875                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 876                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 877                     tb += encode_compat_str(traceback.format_exc())
 878                 else:
 879                     tb_data = traceback.format_list(traceback.extract_stack())
 880                     tb = ''.join(tb_data)
 881             if tb:
 882                 self.to_stderr(tb)
 883         if not is_error:
 884             return
 885         if not self.params.get('ignoreerrors'):
 886             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 887                 exc_info = sys.exc_info()[1].exc_info
 888             else:
 889                 exc_info = sys.exc_info()
 890             raise DownloadError(message, exc_info)
 891         self._download_retcode = 1
 892
 893     class Styles(Enum):
 894         HEADERS = 'yellow'
 895         EMPHASIS = 'light blue'
 896         ID = 'green'
 897         DELIM = 'blue'
 898         ERROR = 'red'
 899         WARNING = 'yellow'
 900         SUPPRESS = 'light black'
 901
 902     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 903         if test_encoding:
 904             original_text = text
 905             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 906             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 907             text = text.encode(encoding, 'ignore').decode(encoding)
 908             if fallback is not None and text != original_text:
 909                 text = fallback
 910         if isinstance(f, self.Styles):
 911             f = f.value
 912         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 913
 914     def _format_screen(self, *args, **kwargs):
 915         return self._format_text(
 916             self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
 917
 918     def _format_err(self, *args, **kwargs):
 919         return self._format_text(
 920             self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
 921
 922     def report_warning(self, message, only_once=False):
 923         '''
 924         Print the message to stderr, it will be prefixed with 'WARNING:'
 925         If stderr is a tty file the 'WARNING:' will be colored
 926         '''
 927         if self.params.get('logger') is not None:
 928             self.params['logger'].warning(message)
 929         else:
 930             if self.params.get('no_warnings'):
 931                 return
 932             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 933
 934     def deprecation_warning(self, message):
 935         if self.params.get('logger') is not None:
 936             self.params['logger'].warning(f'DeprecationWarning: {message}')
 937         else:
 938             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 939
 940     def report_error(self, message, *args, **kwargs):
 941         '''
 942         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 943         in red if stderr is a tty file.
 944         '''
 945         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 946
 947     def write_debug(self, message, only_once=False):
 948         '''Log debug message or Print message to stderr'''
 949         if not self.params.get('verbose', False):
 950             return
 951         message = '[debug] %s' % message
 952         if self.params.get('logger'):
 953             self.params['logger'].debug(message)
 954         else:
 955             self.to_stderr(message, only_once)
 956
 957     def report_file_already_downloaded(self, file_name):
 958         """Report file has already been fully downloaded."""
 959         try:
 960             self.to_screen('[download] %s has already been downloaded' % file_name)
 961         except UnicodeEncodeError:
 962             self.to_screen('[download] The file has already been downloaded')
 963
 964     def report_file_delete(self, file_name):
 965         """Report that existing file will be deleted."""
 966         try:
 967             self.to_screen('Deleting existing file %s' % file_name)
 968         except UnicodeEncodeError:
 969             self.to_screen('Deleting existing file')
 970
 971     def raise_no_formats(self, info, forced=False, *, msg=None):
 972         has_drm = info.get('__has_drm')
 973         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
 974         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
 975         if forced or not ignored:
 976             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 977                                  expected=has_drm or ignored or expected)
 978         else:
 979             self.report_warning(msg)
 980
 981     def parse_outtmpl(self):
 982         outtmpl_dict = self.params.get('outtmpl', {})
 983         if not isinstance(outtmpl_dict, dict):
 984             outtmpl_dict = {'default': outtmpl_dict}
 985         # Remove spaces in the default template
 986         if self.params.get('restrictfilenames'):
 987             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 988         else:
 989             sanitize = lambda x: x
 990         outtmpl_dict.update({
 991             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 992             if outtmpl_dict.get(k) is None})
 993         for key, val in outtmpl_dict.items():
 994             if isinstance(val, bytes):
 995                 self.report_warning(
 996                     'Parameter outtmpl is bytes, but should be a unicode string. '
 997                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 998         return outtmpl_dict
 999
1000     def get_output_path(self, dir_type='', filename=None):
1001         paths = self.params.get('paths', {})
1002         assert isinstance(paths, dict)
1003         path = os.path.join(
1004             expand_path(paths.get('home', '').strip()),
1005             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1006             filename or '')
1007         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1008
1009     @staticmethod
1010     def _outtmpl_expandpath(outtmpl):
1011         # expand_path translates '%%' into '%' and '$$' into '$'
1012         # correspondingly that is not what we want since we need to keep
1013         # '%%' intact for template dict substitution step. Working around
1014         # with boundary-alike separator hack.
1015         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1016         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1017
1018         # outtmpl should be expand_path'ed before template dict substitution
1019         # because meta fields may contain env variables we don't want to
1020         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1021         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1022         return expand_path(outtmpl).replace(sep, '')
1023
1024     @staticmethod
1025     def escape_outtmpl(outtmpl):
1026         ''' Escape any remaining strings like %s, %abc% etc. '''
1027         return re.sub(
1028             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1029             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1030             outtmpl)
1031
1032     @classmethod
1033     def validate_outtmpl(cls, outtmpl):
1034         ''' @return None or Exception object '''
1035         outtmpl = re.sub(
1036             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1037             lambda mobj: f'{mobj.group(0)[:-1]}s',
1038             cls._outtmpl_expandpath(outtmpl))
1039         try:
1040             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1041             return None
1042         except ValueError as err:
1043             return err
1044
1045     @staticmethod
1046     def _copy_infodict(info_dict):
1047         info_dict = dict(info_dict)
1048         info_dict.pop('__postprocessors', None)
1049         return info_dict
1050
1051     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1052         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1053         @param sanitize    Whether to sanitize the output as a filename.
1054                            For backward compatibility, a function can also be passed
1055         """
1056
1057         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1058
1059         info_dict = self._copy_infodict(info_dict)
1060         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1061             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1062             if info_dict.get('duration', None) is not None
1063             else None)
1064         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1065         info_dict['video_autonumber'] = self._num_videos
1066         if info_dict.get('resolution') is None:
1067             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1068
1069         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1070         # of %(field)s to %(field)0Nd for backward compatibility
1071         field_size_compat_map = {
1072             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1073             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1074             'autonumber': self.params.get('autonumber_size') or 5,
1075         }
1076
1077         TMPL_DICT = {}
1078         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1079         MATH_FUNCTIONS = {
1080             '+': float.__add__,
1081             '-': float.__sub__,
1082         }
1083         # Field is of the form key1.key2...
1084         # where keys (except first) can be string, int or slice
1085         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1086         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1087         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1088         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1089             (?P<negate>-)?
1090             (?P<fields>{field})
1091             (?P<maths>(?:{math_op}{math_field})*)
1092             (?:>(?P<strf_format>.+?))?
1093             (?P<remaining>
1094                 (?P<alternate>(?<!\\),[^|&)]+)?
1095                 (?:&(?P<replacement>.*?))?
1096                 (?:\|(?P<default>.*?))?
1097             )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1098
1099         def _traverse_infodict(k):
1100             k = k.split('.')
1101             if k[0] == '':
1102                 k.pop(0)
1103             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1104
1105         def get_value(mdict):
1106             # Object traversal
1107             value = _traverse_infodict(mdict['fields'])
1108             # Negative
1109             if mdict['negate']:
1110                 value = float_or_none(value)
1111                 if value is not None:
1112                     value *= -1
1113             # Do maths
1114             offset_key = mdict['maths']
1115             if offset_key:
1116                 value = float_or_none(value)
1117                 operator = None
1118                 while offset_key:
1119                     item = re.match(
1120                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1121                         offset_key).group(0)
1122                     offset_key = offset_key[len(item):]
1123                     if operator is None:
1124                         operator = MATH_FUNCTIONS[item]
1125                         continue
1126                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1127                     offset = float_or_none(item)
1128                     if offset is None:
1129                         offset = float_or_none(_traverse_infodict(item))
1130                     try:
1131                         value = operator(value, multiplier * offset)
1132                     except (TypeError, ZeroDivisionError):
1133                         return None
1134                     operator = None
1135             # Datetime formatting
1136             if mdict['strf_format']:
1137                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1138
1139             return value
1140
1141         na = self.params.get('outtmpl_na_placeholder', 'NA')
1142
1143         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1144             return sanitize_filename(str(value), restricted=restricted, is_id=(
1145                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1146                 if 'filename-sanitization' in self.params.get('compat_opts', [])
1147                 else NO_DEFAULT))
1148
1149         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1150         sanitize = bool(sanitize)
1151
1152         def _dumpjson_default(obj):
1153             if isinstance(obj, (set, LazyList)):
1154                 return list(obj)
1155             return repr(obj)
1156
1157         def create_key(outer_mobj):
1158             if not outer_mobj.group('has_key'):
1159                 return outer_mobj.group(0)
1160             key = outer_mobj.group('key')
1161             mobj = re.match(INTERNAL_FORMAT_RE, key)
1162             initial_field = mobj.group('fields') if mobj else ''
1163             value, replacement, default = None, None, na
1164             while mobj:
1165                 mobj = mobj.groupdict()
1166                 default = mobj['default'] if mobj['default'] is not None else default
1167                 value = get_value(mobj)
1168                 replacement = mobj['replacement']
1169                 if value is None and mobj['alternate']:
1170                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1171                 else:
1172                     break
1173
1174             fmt = outer_mobj.group('format')
1175             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1176                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1177
1178             value = default if value is None else value if replacement is None else replacement
1179
1180             flags = outer_mobj.group('conversion') or ''
1181             str_fmt = f'{fmt[:-1]}s'
1182             if fmt[-1] == 'l':  # list
1183                 delim = '\n' if '#' in flags else ', '
1184                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1185             elif fmt[-1] == 'j':  # json
1186                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1187             elif fmt[-1] == 'q':  # quoted
1188                 value = map(str, variadic(value) if '#' in flags else [value])
1189                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1190             elif fmt[-1] == 'B':  # bytes
1191                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1192                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1193             elif fmt[-1] == 'U':  # unicode normalized
1194                 value, fmt = unicodedata.normalize(
1195                     # "+" = compatibility equivalence, "#" = NFD
1196                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1197                     value), str_fmt
1198             elif fmt[-1] == 'D':  # decimal suffix
1199                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1200                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1201                                               factor=1024 if '#' in flags else 1000)
1202             elif fmt[-1] == 'S':  # filename sanitization
1203                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1204             elif fmt[-1] == 'c':
1205                 if value:
1206                     value = str(value)[0]
1207                 else:
1208                     fmt = str_fmt
1209             elif fmt[-1] not in 'rs':  # numeric
1210                 value = float_or_none(value)
1211                 if value is None:
1212                     value, fmt = default, 's'
1213
1214             if sanitize:
1215                 if fmt[-1] == 'r':
1216                     # If value is an object, sanitize might convert it to a string
1217                     # So we convert it to repr first
1218                     value, fmt = repr(value), str_fmt
1219                 if fmt[-1] in 'csr':
1220                     value = sanitizer(initial_field, value)
1221
1222             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1223             TMPL_DICT[key] = value
1224             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1225
1226         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1227
1228     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1229         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1230         return self.escape_outtmpl(outtmpl) % info_dict
1231
1232     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1233         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1234         if outtmpl is None:
1235             outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
1236         try:
1237             outtmpl = self._outtmpl_expandpath(outtmpl)
1238             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1239             if not filename:
1240                 return None
1241
1242             if tmpl_type in ('', 'temp'):
1243                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1244                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1245                     filename = replace_extension(filename, ext, final_ext)
1246             elif tmpl_type:
1247                 force_ext = OUTTMPL_TYPES[tmpl_type]
1248                 if force_ext:
1249                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1250
1251             # https://github.com/blackjack4494/youtube-dlc/issues/85
1252             trim_file_name = self.params.get('trim_file_name', False)
1253             if trim_file_name:
1254                 no_ext, *ext = filename.rsplit('.', 2)
1255                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1256
1257             return filename
1258         except ValueError as err:
1259             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1260             return None
1261
1262     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1263         """Generate the output filename"""
1264         if outtmpl:
1265             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1266             dir_type = None
1267         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1268         if not filename and dir_type not in ('', 'temp'):
1269             return ''
1270
1271         if warn:
1272             if not self.params.get('paths'):
1273                 pass
1274             elif filename == '-':
1275                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1276             elif os.path.isabs(filename):
1277                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1278         if filename == '-' or not filename:
1279             return filename
1280
1281         return self.get_output_path(dir_type, filename)
1282
1283     def _match_entry(self, info_dict, incomplete=False, silent=False):
1284         """ Returns None if the file should be downloaded """
1285
1286         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1287
1288         def check_filter():
1289             if 'title' in info_dict:
1290                 # This can happen when we're just evaluating the playlist
1291                 title = info_dict['title']
1292                 matchtitle = self.params.get('matchtitle', False)
1293                 if matchtitle:
1294                     if not re.search(matchtitle, title, re.IGNORECASE):
1295                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1296                 rejecttitle = self.params.get('rejecttitle', False)
1297                 if rejecttitle:
1298                     if re.search(rejecttitle, title, re.IGNORECASE):
1299                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1300             date = info_dict.get('upload_date')
1301             if date is not None:
1302                 dateRange = self.params.get('daterange', DateRange())
1303                 if date not in dateRange:
1304                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1305             view_count = info_dict.get('view_count')
1306             if view_count is not None:
1307                 min_views = self.params.get('min_views')
1308                 if min_views is not None and view_count < min_views:
1309                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1310                 max_views = self.params.get('max_views')
1311                 if max_views is not None and view_count > max_views:
1312                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1313             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1314                 return 'Skipping "%s" because it is age restricted' % video_title
1315
1316             match_filter = self.params.get('match_filter')
1317             if match_filter is not None:
1318                 try:
1319                     ret = match_filter(info_dict, incomplete=incomplete)
1320                 except TypeError:
1321                     # For backward compatibility
1322                     ret = None if incomplete else match_filter(info_dict)
1323                 if ret is not None:
1324                     return ret
1325             return None
1326
1327         if self.in_download_archive(info_dict):
1328             reason = '%s has already been recorded in the archive' % video_title
1329             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1330         else:
1331             reason = check_filter()
1332             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1333         if reason is not None:
1334             if not silent:
1335                 self.to_screen('[download] ' + reason)
1336             if self.params.get(break_opt, False):
1337                 raise break_err()
1338         return reason
1339
1340     @staticmethod
1341     def add_extra_info(info_dict, extra_info):
1342         '''Set the keys from extra_info in info dict if they are missing'''
1343         for key, value in extra_info.items():
1344             info_dict.setdefault(key, value)
1345
1346     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1347                      process=True, force_generic_extractor=False):
1348         """
1349         Return a list with a dictionary for each video extracted.
1350
1351         Arguments:
1352         url -- URL to extract
1353
1354         Keyword arguments:
1355         download -- whether to download videos during extraction
1356         ie_key -- extractor key hint
1357         extra_info -- dictionary containing the extra values to add to each result
1358         process -- whether to resolve all unresolved references (URLs, playlist items),
1359             must be True for download to work.
1360         force_generic_extractor -- force using the generic extractor
1361         """
1362
1363         if extra_info is None:
1364             extra_info = {}
1365
1366         if not ie_key and force_generic_extractor:
1367             ie_key = 'Generic'
1368
1369         if ie_key:
1370             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1371         else:
1372             ies = self._ies
1373
1374         for ie_key, ie in ies.items():
1375             if not ie.suitable(url):
1376                 continue
1377
1378             if not ie.working():
1379                 self.report_warning('The program functionality for this site has been marked as broken, '
1380                                     'and will probably not work.')
1381
1382             temp_id = ie.get_temp_id(url)
1383             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1384                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1385                 if self.params.get('break_on_existing', False):
1386                     raise ExistingVideoReached()
1387                 break
1388             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1389         else:
1390             self.report_error('no suitable InfoExtractor for URL %s' % url)
1391
1392     def __handle_extraction_exceptions(func):
1393         @functools.wraps(func)
1394         def wrapper(self, *args, **kwargs):
1395             while True:
1396                 try:
1397                     return func(self, *args, **kwargs)
1398                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1399                     raise
1400                 except ReExtractInfo as e:
1401                     if e.expected:
1402                         self.to_screen(f'{e}; Re-extracting data')
1403                     else:
1404                         self.to_stderr('\r')
1405                         self.report_warning(f'{e}; Re-extracting data')
1406                     continue
1407                 except GeoRestrictedError as e:
1408                     msg = e.msg
1409                     if e.countries:
1410                         msg += '\nThis video is available in %s.' % ', '.join(
1411                             map(ISO3166Utils.short2full, e.countries))
1412                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1413                     self.report_error(msg)
1414                 except ExtractorError as e:  # An error we somewhat expected
1415                     self.report_error(str(e), e.format_traceback())
1416                 except Exception as e:
1417                     if self.params.get('ignoreerrors'):
1418                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1419                     else:
1420                         raise
1421                 break
1422         return wrapper
1423
1424     def _wait_for_video(self, ie_result):
1425         if (not self.params.get('wait_for_video')
1426                 or ie_result.get('_type', 'video') != 'video'
1427                 or ie_result.get('formats') or ie_result.get('url')):
1428             return
1429
1430         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1431         last_msg = ''
1432
1433         def progress(msg):
1434             nonlocal last_msg
1435             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1436             last_msg = msg
1437
1438         min_wait, max_wait = self.params.get('wait_for_video')
1439         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1440         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1441             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1442             self.report_warning('Release time of video is not known')
1443         elif (diff or 0) <= 0:
1444             self.report_warning('Video should already be available according to extracted info')
1445         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1446         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1447
1448         wait_till = time.time() + diff
1449         try:
1450             while True:
1451                 diff = wait_till - time.time()
1452                 if diff <= 0:
1453                     progress('')
1454                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1455                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1456                 time.sleep(1)
1457         except KeyboardInterrupt:
1458             progress('')
1459             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1460         except BaseException as e:
1461             if not isinstance(e, ReExtractInfo):
1462                 self.to_screen('')
1463             raise
1464
1465     @__handle_extraction_exceptions
1466     def __extract_info(self, url, ie, download, extra_info, process):
1467         ie_result = ie.extract(url)
1468         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1469             return
1470         if isinstance(ie_result, list):
1471             # Backwards compatibility: old IE result format
1472             ie_result = {
1473                 '_type': 'compat_list',
1474                 'entries': ie_result,
1475             }
1476         if extra_info.get('original_url'):
1477             ie_result.setdefault('original_url', extra_info['original_url'])
1478         self.add_default_extra_info(ie_result, ie, url)
1479         if process:
1480             self._wait_for_video(ie_result)
1481             return self.process_ie_result(ie_result, download, extra_info)
1482         else:
1483             return ie_result
1484
1485     def add_default_extra_info(self, ie_result, ie, url):
1486         if url is not None:
1487             self.add_extra_info(ie_result, {
1488                 'webpage_url': url,
1489                 'original_url': url,
1490             })
1491         webpage_url = ie_result.get('webpage_url')
1492         if webpage_url:
1493             self.add_extra_info(ie_result, {
1494                 'webpage_url_basename': url_basename(webpage_url),
1495                 'webpage_url_domain': get_domain(webpage_url),
1496             })
1497         if ie is not None:
1498             self.add_extra_info(ie_result, {
1499                 'extractor': ie.IE_NAME,
1500                 'extractor_key': ie.ie_key(),
1501             })
1502
1503     def process_ie_result(self, ie_result, download=True, extra_info=None):
1504         """
1505         Take the result of the ie(may be modified) and resolve all unresolved
1506         references (URLs, playlist items).
1507
1508         It will also download the videos if 'download'.
1509         Returns the resolved ie_result.
1510         """
1511         if extra_info is None:
1512             extra_info = {}
1513         result_type = ie_result.get('_type', 'video')
1514
1515         if result_type in ('url', 'url_transparent'):
1516             ie_result['url'] = sanitize_url(ie_result['url'])
1517             if ie_result.get('original_url'):
1518                 extra_info.setdefault('original_url', ie_result['original_url'])
1519
1520             extract_flat = self.params.get('extract_flat', False)
1521             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1522                     or extract_flat is True):
1523                 info_copy = ie_result.copy()
1524                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1525                 if ie and not ie_result.get('id'):
1526                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1527                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1528                 self.add_extra_info(info_copy, extra_info)
1529                 info_copy, _ = self.pre_process(info_copy)
1530                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1531                 if self.params.get('force_write_download_archive', False):
1532                     self.record_download_archive(info_copy)
1533                 return ie_result
1534
1535         if result_type == 'video':
1536             self.add_extra_info(ie_result, extra_info)
1537             ie_result = self.process_video_result(ie_result, download=download)
1538             additional_urls = (ie_result or {}).get('additional_urls')
1539             if additional_urls:
1540                 # TODO: Improve MetadataParserPP to allow setting a list
1541                 if isinstance(additional_urls, compat_str):
1542                     additional_urls = [additional_urls]
1543                 self.to_screen(
1544                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1545                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1546                 ie_result['additional_entries'] = [
1547                     self.extract_info(
1548                         url, download, extra_info=extra_info,
1549                         force_generic_extractor=self.params.get('force_generic_extractor'))
1550                     for url in additional_urls
1551                 ]
1552             return ie_result
1553         elif result_type == 'url':
1554             # We have to add extra_info to the results because it may be
1555             # contained in a playlist
1556             return self.extract_info(
1557                 ie_result['url'], download,
1558                 ie_key=ie_result.get('ie_key'),
1559                 extra_info=extra_info)
1560         elif result_type == 'url_transparent':
1561             # Use the information from the embedding page
1562             info = self.extract_info(
1563                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1564                 extra_info=extra_info, download=False, process=False)
1565
1566             # extract_info may return None when ignoreerrors is enabled and
1567             # extraction failed with an error, don't crash and return early
1568             # in this case
1569             if not info:
1570                 return info
1571
1572             new_result = info.copy()
1573             new_result.update(filter_dict(ie_result, lambda k, v: (
1574                 v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
1575
1576             # Extracted info may not be a video result (i.e.
1577             # info.get('_type', 'video') != video) but rather an url or
1578             # url_transparent. In such cases outer metadata (from ie_result)
1579             # should be propagated to inner one (info). For this to happen
1580             # _type of info should be overridden with url_transparent. This
1581             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1582             if new_result.get('_type') == 'url':
1583                 new_result['_type'] = 'url_transparent'
1584
1585             return self.process_ie_result(
1586                 new_result, download=download, extra_info=extra_info)
1587         elif result_type in ('playlist', 'multi_video'):
1588             # Protect from infinite recursion due to recursively nested playlists
1589             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1590             webpage_url = ie_result['webpage_url']
1591             if webpage_url in self._playlist_urls:
1592                 self.to_screen(
1593                     '[download] Skipping already downloaded playlist: %s'
1594                     % ie_result.get('title') or ie_result.get('id'))
1595                 return
1596
1597             self._playlist_level += 1
1598             self._playlist_urls.add(webpage_url)
1599             self._fill_common_fields(ie_result, False)
1600             self._sanitize_thumbnails(ie_result)
1601             try:
1602                 return self.__process_playlist(ie_result, download)
1603             finally:
1604                 self._playlist_level -= 1
1605                 if not self._playlist_level:
1606                     self._playlist_urls.clear()
1607         elif result_type == 'compat_list':
1608             self.report_warning(
1609                 'Extractor %s returned a compat_list result. '
1610                 'It needs to be updated.' % ie_result.get('extractor'))
1611
1612             def _fixup(r):
1613                 self.add_extra_info(r, {
1614                     'extractor': ie_result['extractor'],
1615                     'webpage_url': ie_result['webpage_url'],
1616                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1617                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1618                     'extractor_key': ie_result['extractor_key'],
1619                 })
1620                 return r
1621             ie_result['entries'] = [
1622                 self.process_ie_result(_fixup(r), download, extra_info)
1623                 for r in ie_result['entries']
1624             ]
1625             return ie_result
1626         else:
1627             raise Exception('Invalid result type: %s' % result_type)
1628
1629     def _ensure_dir_exists(self, path):
1630         return make_dir(path, self.report_error)
1631
1632     @staticmethod
1633     def _playlist_infodict(ie_result, **kwargs):
1634         return {
1635             **ie_result,
1636             'playlist': ie_result.get('title') or ie_result.get('id'),
1637             'playlist_id': ie_result.get('id'),
1638             'playlist_title': ie_result.get('title'),
1639             'playlist_uploader': ie_result.get('uploader'),
1640             'playlist_uploader_id': ie_result.get('uploader_id'),
1641             'playlist_index': 0,
1642             **kwargs,
1643         }
1644
1645     def __process_playlist(self, ie_result, download):
1646         # We process each entry in the playlist
1647         playlist = ie_result.get('title') or ie_result.get('id')
1648         self.to_screen('[download] Downloading playlist: %s' % playlist)
1649
1650         if 'entries' not in ie_result:
1651             raise EntryNotInPlaylist('There are no entries')
1652
1653         MissingEntry = object()
1654         incomplete_entries = bool(ie_result.get('requested_entries'))
1655         if incomplete_entries:
1656             def fill_missing_entries(entries, indices):
1657                 ret = [MissingEntry] * max(indices)
1658                 for i, entry in zip(indices, entries):
1659                     ret[i - 1] = entry
1660                 return ret
1661             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1662
1663         playlist_results = []
1664
1665         playliststart = self.params.get('playliststart', 1)
1666         playlistend = self.params.get('playlistend')
1667         # For backwards compatibility, interpret -1 as whole list
1668         if playlistend == -1:
1669             playlistend = None
1670
1671         playlistitems_str = self.params.get('playlist_items')
1672         playlistitems = None
1673         if playlistitems_str is not None:
1674             def iter_playlistitems(format):
1675                 for string_segment in format.split(','):
1676                     if '-' in string_segment:
1677                         start, end = string_segment.split('-')
1678                         for item in range(int(start), int(end) + 1):
1679                             yield int(item)
1680                     else:
1681                         yield int(string_segment)
1682             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1683
1684         ie_entries = ie_result['entries']
1685         if isinstance(ie_entries, list):
1686             playlist_count = len(ie_entries)
1687             msg = f'Collected {playlist_count} videos; downloading %d of them'
1688             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1689
1690             def get_entry(i):
1691                 return ie_entries[i - 1]
1692         else:
1693             msg = 'Downloading %d videos'
1694             if not isinstance(ie_entries, (PagedList, LazyList)):
1695                 ie_entries = LazyList(ie_entries)
1696             elif isinstance(ie_entries, InAdvancePagedList):
1697                 if ie_entries._pagesize == 1:
1698                     playlist_count = ie_entries._pagecount
1699
1700             def get_entry(i):
1701                 return YoutubeDL.__handle_extraction_exceptions(
1702                     lambda self, i: ie_entries[i - 1]
1703                 )(self, i)
1704
1705         entries, broken = [], False
1706         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1707         for i in items:
1708             if i == 0:
1709                 continue
1710             if playlistitems is None and playlistend is not None and playlistend < i:
1711                 break
1712             entry = None
1713             try:
1714                 entry = get_entry(i)
1715                 if entry is MissingEntry:
1716                     raise EntryNotInPlaylist()
1717             except (IndexError, EntryNotInPlaylist):
1718                 if incomplete_entries:
1719                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1720                 elif not playlistitems:
1721                     break
1722             entries.append(entry)
1723             try:
1724                 if entry is not None:
1725                     self._match_entry(entry, incomplete=True, silent=True)
1726             except (ExistingVideoReached, RejectedVideoReached):
1727                 broken = True
1728                 break
1729         ie_result['entries'] = entries
1730
1731         # Save playlist_index before re-ordering
1732         entries = [
1733             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1734             for i, entry in enumerate(entries, 1)
1735             if entry is not None]
1736         n_entries = len(entries)
1737
1738         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1739             ie_result['playlist_count'] = n_entries
1740
1741         if not playlistitems and (playliststart != 1 or playlistend):
1742             playlistitems = list(range(playliststart, playliststart + n_entries))
1743         ie_result['requested_entries'] = playlistitems
1744
1745         _infojson_written = False
1746         write_playlist_files = self.params.get('allow_playlist_files', True)
1747         if write_playlist_files and self.params.get('list_thumbnails'):
1748             self.list_thumbnails(ie_result)
1749         if write_playlist_files and not self.params.get('simulate'):
1750             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1751             _infojson_written = self._write_info_json(
1752                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1753             if _infojson_written is None:
1754                 return
1755             if self._write_description('playlist', ie_result,
1756                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1757                 return
1758             # TODO: This should be passed to ThumbnailsConvertor if necessary
1759             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1760
1761         if self.params.get('playlistreverse', False):
1762             entries = entries[::-1]
1763         if self.params.get('playlistrandom', False):
1764             random.shuffle(entries)
1765
1766         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1767
1768         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1769         failures = 0
1770         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1771         for i, entry_tuple in enumerate(entries, 1):
1772             playlist_index, entry = entry_tuple
1773             if 'playlist-index' in self.params.get('compat_opts', []):
1774                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1775             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1776             # This __x_forwarded_for_ip thing is a bit ugly but requires
1777             # minimal changes
1778             if x_forwarded_for:
1779                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1780             extra = {
1781                 'n_entries': n_entries,
1782                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1783                 'playlist_count': ie_result.get('playlist_count'),
1784                 'playlist_index': playlist_index,
1785                 'playlist_autonumber': i,
1786                 'playlist': playlist,
1787                 'playlist_id': ie_result.get('id'),
1788                 'playlist_title': ie_result.get('title'),
1789                 'playlist_uploader': ie_result.get('uploader'),
1790                 'playlist_uploader_id': ie_result.get('uploader_id'),
1791                 'extractor': ie_result['extractor'],
1792                 'webpage_url': ie_result['webpage_url'],
1793                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1794                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1795                 'extractor_key': ie_result['extractor_key'],
1796             }
1797
1798             if self._match_entry(entry, incomplete=True) is not None:
1799                 continue
1800
1801             entry_result = self.__process_iterable_entry(entry, download, extra)
1802             if not entry_result:
1803                 failures += 1
1804             if failures >= max_failures:
1805                 self.report_error(
1806                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1807                 break
1808             playlist_results.append(entry_result)
1809         ie_result['entries'] = playlist_results
1810
1811         # Write the updated info to json
1812         if _infojson_written is True and self._write_info_json(
1813                 'updated playlist', ie_result,
1814                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1815             return
1816
1817         ie_result = self.run_all_pps('playlist', ie_result)
1818         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1819         return ie_result
1820
1821     @__handle_extraction_exceptions
1822     def __process_iterable_entry(self, entry, download, extra_info):
1823         return self.process_ie_result(
1824             entry, download=download, extra_info=extra_info)
1825
1826     def _build_format_filter(self, filter_spec):
1827         " Returns a function to filter the formats according to the filter_spec "
1828
1829         OPERATORS = {
1830             '<': operator.lt,
1831             '<=': operator.le,
1832             '>': operator.gt,
1833             '>=': operator.ge,
1834             '=': operator.eq,
1835             '!=': operator.ne,
1836         }
1837         operator_rex = re.compile(r'''(?x)\s*
1838             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1839             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1840             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1841             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1842         m = operator_rex.fullmatch(filter_spec)
1843         if m:
1844             try:
1845                 comparison_value = int(m.group('value'))
1846             except ValueError:
1847                 comparison_value = parse_filesize(m.group('value'))
1848                 if comparison_value is None:
1849                     comparison_value = parse_filesize(m.group('value') + 'B')
1850                 if comparison_value is None:
1851                     raise ValueError(
1852                         'Invalid value %r in format specification %r' % (
1853                             m.group('value'), filter_spec))
1854             op = OPERATORS[m.group('op')]
1855
1856         if not m:
1857             STR_OPERATORS = {
1858                 '=': operator.eq,
1859                 '^=': lambda attr, value: attr.startswith(value),
1860                 '$=': lambda attr, value: attr.endswith(value),
1861                 '*=': lambda attr, value: value in attr,
1862                 '~=': lambda attr, value: value.search(attr) is not None
1863             }
1864             str_operator_rex = re.compile(r'''(?x)\s*
1865                 (?P<key>[a-zA-Z0-9._-]+)\s*
1866                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1867                 (?P<quote>["'])?
1868                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1869                 (?(quote)(?P=quote))\s*
1870                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1871             m = str_operator_rex.fullmatch(filter_spec)
1872             if m:
1873                 if m.group('op') == '~=':
1874                     comparison_value = re.compile(m.group('value'))
1875                 else:
1876                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1877                 str_op = STR_OPERATORS[m.group('op')]
1878                 if m.group('negation'):
1879                     op = lambda attr, value: not str_op(attr, value)
1880                 else:
1881                     op = str_op
1882
1883         if not m:
1884             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1885
1886         def _filter(f):
1887             actual_value = f.get(m.group('key'))
1888             if actual_value is None:
1889                 return m.group('none_inclusive')
1890             return op(actual_value, comparison_value)
1891         return _filter
1892
1893     def _check_formats(self, formats):
1894         for f in formats:
1895             self.to_screen('[info] Testing format %s' % f['format_id'])
1896             path = self.get_output_path('temp')
1897             if not self._ensure_dir_exists(f'{path}/'):
1898                 continue
1899             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1900             temp_file.close()
1901             try:
1902                 success, _ = self.dl(temp_file.name, f, test=True)
1903             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1904                 success = False
1905             finally:
1906                 if os.path.exists(temp_file.name):
1907                     try:
1908                         os.remove(temp_file.name)
1909                     except OSError:
1910                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1911             if success:
1912                 yield f
1913             else:
1914                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1915
1916     def _default_format_spec(self, info_dict, download=True):
1917
1918         def can_merge():
1919             merger = FFmpegMergerPP(self)
1920             return merger.available and merger.can_merge()
1921
1922         prefer_best = (
1923             not self.params.get('simulate')
1924             and download
1925             and (
1926                 not can_merge()
1927                 or info_dict.get('is_live', False)
1928                 or self.outtmpl_dict['default'] == '-'))
1929         compat = (
1930             prefer_best
1931             or self.params.get('allow_multiple_audio_streams', False)
1932             or 'format-spec' in self.params.get('compat_opts', []))
1933
1934         return (
1935             'best/bestvideo+bestaudio' if prefer_best
1936             else 'bestvideo*+bestaudio/best' if not compat
1937             else 'bestvideo+bestaudio/best')
1938
1939     def build_format_selector(self, format_spec):
1940         def syntax_error(note, start):
1941             message = (
1942                 'Invalid format specification: '
1943                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1944             return SyntaxError(message)
1945
1946         PICKFIRST = 'PICKFIRST'
1947         MERGE = 'MERGE'
1948         SINGLE = 'SINGLE'
1949         GROUP = 'GROUP'
1950         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1951
1952         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1953                                   'video': self.params.get('allow_multiple_video_streams', False)}
1954
1955         check_formats = self.params.get('check_formats') == 'selected'
1956
1957         def _parse_filter(tokens):
1958             filter_parts = []
1959             for type, string, start, _, _ in tokens:
1960                 if type == tokenize.OP and string == ']':
1961                     return ''.join(filter_parts)
1962                 else:
1963                     filter_parts.append(string)
1964
1965         def _remove_unused_ops(tokens):
1966             # Remove operators that we don't use and join them with the surrounding strings
1967             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1968             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1969             last_string, last_start, last_end, last_line = None, None, None, None
1970             for type, string, start, end, line in tokens:
1971                 if type == tokenize.OP and string == '[':
1972                     if last_string:
1973                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1974                         last_string = None
1975                     yield type, string, start, end, line
1976                     # everything inside brackets will be handled by _parse_filter
1977                     for type, string, start, end, line in tokens:
1978                         yield type, string, start, end, line
1979                         if type == tokenize.OP and string == ']':
1980                             break
1981                 elif type == tokenize.OP and string in ALLOWED_OPS:
1982                     if last_string:
1983                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1984                         last_string = None
1985                     yield type, string, start, end, line
1986                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1987                     if not last_string:
1988                         last_string = string
1989                         last_start = start
1990                         last_end = end
1991                     else:
1992                         last_string += string
1993             if last_string:
1994                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1995
1996         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1997             selectors = []
1998             current_selector = None
1999             for type, string, start, _, _ in tokens:
2000                 # ENCODING is only defined in python 3.x
2001                 if type == getattr(tokenize, 'ENCODING', None):
2002                     continue
2003                 elif type in [tokenize.NAME, tokenize.NUMBER]:
2004                     current_selector = FormatSelector(SINGLE, string, [])
2005                 elif type == tokenize.OP:
2006                     if string == ')':
2007                         if not inside_group:
2008                             # ')' will be handled by the parentheses group
2009                             tokens.restore_last_token()
2010                         break
2011                     elif inside_merge and string in ['/', ',']:
2012                         tokens.restore_last_token()
2013                         break
2014                     elif inside_choice and string == ',':
2015                         tokens.restore_last_token()
2016                         break
2017                     elif string == ',':
2018                         if not current_selector:
2019                             raise syntax_error('"," must follow a format selector', start)
2020                         selectors.append(current_selector)
2021                         current_selector = None
2022                     elif string == '/':
2023                         if not current_selector:
2024                             raise syntax_error('"/" must follow a format selector', start)
2025                         first_choice = current_selector
2026                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2027                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2028                     elif string == '[':
2029                         if not current_selector:
2030                             current_selector = FormatSelector(SINGLE, 'best', [])
2031                         format_filter = _parse_filter(tokens)
2032                         current_selector.filters.append(format_filter)
2033                     elif string == '(':
2034                         if current_selector:
2035                             raise syntax_error('Unexpected "("', start)
2036                         group = _parse_format_selection(tokens, inside_group=True)
2037                         current_selector = FormatSelector(GROUP, group, [])
2038                     elif string == '+':
2039                         if not current_selector:
2040                             raise syntax_error('Unexpected "+"', start)
2041                         selector_1 = current_selector
2042                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2043                         if not selector_2:
2044                             raise syntax_error('Expected a selector', start)
2045                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2046                     else:
2047                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2048                 elif type == tokenize.ENDMARKER:
2049                     break
2050             if current_selector:
2051                 selectors.append(current_selector)
2052             return selectors
2053
2054         def _merge(formats_pair):
2055             format_1, format_2 = formats_pair
2056
2057             formats_info = []
2058             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2059             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2060
2061             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2062                 get_no_more = {'video': False, 'audio': False}
2063                 for (i, fmt_info) in enumerate(formats_info):
2064                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2065                         formats_info.pop(i)
2066                         continue
2067                     for aud_vid in ['audio', 'video']:
2068                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2069                             if get_no_more[aud_vid]:
2070                                 formats_info.pop(i)
2071                                 break
2072                             get_no_more[aud_vid] = True
2073
2074             if len(formats_info) == 1:
2075                 return formats_info[0]
2076
2077             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2078             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2079
2080             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2081             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2082
2083             output_ext = self.params.get('merge_output_format')
2084             if not output_ext:
2085                 if the_only_video:
2086                     output_ext = the_only_video['ext']
2087                 elif the_only_audio and not video_fmts:
2088                     output_ext = the_only_audio['ext']
2089                 else:
2090                     output_ext = 'mkv'
2091
2092             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2093
2094             new_dict = {
2095                 'requested_formats': formats_info,
2096                 'format': '+'.join(filtered('format')),
2097                 'format_id': '+'.join(filtered('format_id')),
2098                 'ext': output_ext,
2099                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2100                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2101                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2102                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2103                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2104             }
2105
2106             if the_only_video:
2107                 new_dict.update({
2108                     'width': the_only_video.get('width'),
2109                     'height': the_only_video.get('height'),
2110                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2111                     'fps': the_only_video.get('fps'),
2112                     'dynamic_range': the_only_video.get('dynamic_range'),
2113                     'vcodec': the_only_video.get('vcodec'),
2114                     'vbr': the_only_video.get('vbr'),
2115                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2116                 })
2117
2118             if the_only_audio:
2119                 new_dict.update({
2120                     'acodec': the_only_audio.get('acodec'),
2121                     'abr': the_only_audio.get('abr'),
2122                     'asr': the_only_audio.get('asr'),
2123                 })
2124
2125             return new_dict
2126
2127         def _check_formats(formats):
2128             if not check_formats:
2129                 yield from formats
2130                 return
2131             yield from self._check_formats(formats)
2132
2133         def _build_selector_function(selector):
2134             if isinstance(selector, list):  # ,
2135                 fs = [_build_selector_function(s) for s in selector]
2136
2137                 def selector_function(ctx):
2138                     for f in fs:
2139                         yield from f(ctx)
2140                 return selector_function
2141
2142             elif selector.type == GROUP:  # ()
2143                 selector_function = _build_selector_function(selector.selector)
2144
2145             elif selector.type == PICKFIRST:  # /
2146                 fs = [_build_selector_function(s) for s in selector.selector]
2147
2148                 def selector_function(ctx):
2149                     for f in fs:
2150                         picked_formats = list(f(ctx))
2151                         if picked_formats:
2152                             return picked_formats
2153                     return []
2154
2155             elif selector.type == MERGE:  # +
2156                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2157
2158                 def selector_function(ctx):
2159                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2160                         yield _merge(pair)
2161
2162             elif selector.type == SINGLE:  # atom
2163                 format_spec = selector.selector or 'best'
2164
2165                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2166                 if format_spec == 'all':
2167                     def selector_function(ctx):
2168                         yield from _check_formats(ctx['formats'][::-1])
2169                 elif format_spec == 'mergeall':
2170                     def selector_function(ctx):
2171                         formats = list(_check_formats(
2172                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2173                         if not formats:
2174                             return
2175                         merged_format = formats[-1]
2176                         for f in formats[-2::-1]:
2177                             merged_format = _merge((merged_format, f))
2178                         yield merged_format
2179
2180                 else:
2181                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2182                     mobj = re.match(
2183                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2184                         format_spec)
2185                     if mobj is not None:
2186                         format_idx = int_or_none(mobj.group('n'), default=1)
2187                         format_reverse = mobj.group('bw')[0] == 'b'
2188                         format_type = (mobj.group('type') or [None])[0]
2189                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2190                         format_modified = mobj.group('mod') is not None
2191
2192                         format_fallback = not format_type and not format_modified  # for b, w
2193                         _filter_f = (
2194                             (lambda f: f.get('%scodec' % format_type) != 'none')
2195                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2196                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2197                             if format_type  # bv, ba, wv, wa
2198                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2199                             if not format_modified  # b, w
2200                             else lambda f: True)  # b*, w*
2201                         filter_f = lambda f: _filter_f(f) and (
2202                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2203                     else:
2204                         if format_spec in self._format_selection_exts['audio']:
2205                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2206                         elif format_spec in self._format_selection_exts['video']:
2207                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2208                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2209                         elif format_spec in self._format_selection_exts['storyboards']:
2210                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2211                         else:
2212                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2213
2214                     def selector_function(ctx):
2215                         formats = list(ctx['formats'])
2216                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2217                         if not matches:
2218                             if format_fallback and ctx['incomplete_formats']:
2219                                 # for extractors with incomplete formats (audio only (soundcloud)
2220                                 # or video only (imgur)) best/worst will fallback to
2221                                 # best/worst {video,audio}-only format
2222                                 matches = formats
2223                             elif seperate_fallback and not ctx['has_merged_format']:
2224                                 # for compatibility with youtube-dl when there is no pre-merged format
2225                                 matches = list(filter(seperate_fallback, formats))
2226                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2227                         try:
2228                             yield matches[format_idx - 1]
2229                         except LazyList.IndexError:
2230                             return
2231
2232             filters = [self._build_format_filter(f) for f in selector.filters]
2233
2234             def final_selector(ctx):
2235                 ctx_copy = dict(ctx)
2236                 for _filter in filters:
2237                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2238                 return selector_function(ctx_copy)
2239             return final_selector
2240
2241         stream = io.BytesIO(format_spec.encode('utf-8'))
2242         try:
2243             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2244         except tokenize.TokenError:
2245             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2246
2247         class TokenIterator(object):
2248             def __init__(self, tokens):
2249                 self.tokens = tokens
2250                 self.counter = 0
2251
2252             def __iter__(self):
2253                 return self
2254
2255             def __next__(self):
2256                 if self.counter >= len(self.tokens):
2257                     raise StopIteration()
2258                 value = self.tokens[self.counter]
2259                 self.counter += 1
2260                 return value
2261
2262             next = __next__
2263
2264             def restore_last_token(self):
2265                 self.counter -= 1
2266
2267         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2268         return _build_selector_function(parsed_selector)
2269
2270     def _calc_headers(self, info_dict):
2271         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2272
2273         cookies = self._calc_cookies(info_dict)
2274         if cookies:
2275             res['Cookie'] = cookies
2276
2277         if 'X-Forwarded-For' not in res:
2278             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2279             if x_forwarded_for_ip:
2280                 res['X-Forwarded-For'] = x_forwarded_for_ip
2281
2282         return res
2283
2284     def _calc_cookies(self, info_dict):
2285         pr = sanitized_Request(info_dict['url'])
2286         self.cookiejar.add_cookie_header(pr)
2287         return pr.get_header('Cookie')
2288
2289     def _sort_thumbnails(self, thumbnails):
2290         thumbnails.sort(key=lambda t: (
2291             t.get('preference') if t.get('preference') is not None else -1,
2292             t.get('width') if t.get('width') is not None else -1,
2293             t.get('height') if t.get('height') is not None else -1,
2294             t.get('id') if t.get('id') is not None else '',
2295             t.get('url')))
2296
2297     def _sanitize_thumbnails(self, info_dict):
2298         thumbnails = info_dict.get('thumbnails')
2299         if thumbnails is None:
2300             thumbnail = info_dict.get('thumbnail')
2301             if thumbnail:
2302                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2303         if not thumbnails:
2304             return
2305
2306         def check_thumbnails(thumbnails):
2307             for t in thumbnails:
2308                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2309                 try:
2310                     self.urlopen(HEADRequest(t['url']))
2311                 except network_exceptions as err:
2312                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2313                     continue
2314                 yield t
2315
2316         self._sort_thumbnails(thumbnails)
2317         for i, t in enumerate(thumbnails):
2318             if t.get('id') is None:
2319                 t['id'] = '%d' % i
2320             if t.get('width') and t.get('height'):
2321                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2322             t['url'] = sanitize_url(t['url'])
2323
2324         if self.params.get('check_formats') is True:
2325             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2326         else:
2327             info_dict['thumbnails'] = thumbnails
2328
2329     def _fill_common_fields(self, info_dict, is_video=True):
2330         # TODO: move sanitization here
2331         if is_video:
2332             # playlists are allowed to lack "title"
2333             info_dict['fulltitle'] = info_dict.get('title')
2334             if 'title' not in info_dict:
2335                 raise ExtractorError('Missing "title" field in extractor result',
2336                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2337             elif not info_dict.get('title'):
2338                 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2339                 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2340
2341         if info_dict.get('duration') is not None:
2342             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2343
2344         for ts_key, date_key in (
2345                 ('timestamp', 'upload_date'),
2346                 ('release_timestamp', 'release_date'),
2347                 ('modified_timestamp', 'modified_date'),
2348         ):
2349             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2350                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2351                 # see http://bugs.python.org/issue1646728)
2352                 try:
2353                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2354                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2355                 except (ValueError, OverflowError, OSError):
2356                     pass
2357
2358         live_keys = ('is_live', 'was_live')
2359         live_status = info_dict.get('live_status')
2360         if live_status is None:
2361             for key in live_keys:
2362                 if info_dict.get(key) is False:
2363                     continue
2364                 if info_dict.get(key):
2365                     live_status = key
2366                 break
2367             if all(info_dict.get(key) is False for key in live_keys):
2368                 live_status = 'not_live'
2369         if live_status:
2370             info_dict['live_status'] = live_status
2371             for key in live_keys:
2372                 if info_dict.get(key) is None:
2373                     info_dict[key] = (live_status == key)
2374
2375         # Auto generate title fields corresponding to the *_number fields when missing
2376         # in order to always have clean titles. This is very common for TV series.
2377         for field in ('chapter', 'season', 'episode'):
2378             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2379                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2380
2381     def process_video_result(self, info_dict, download=True):
2382         assert info_dict.get('_type', 'video') == 'video'
2383         self._num_videos += 1
2384
2385         if 'id' not in info_dict:
2386             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2387         elif not info_dict.get('id'):
2388             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2389
2390         def report_force_conversion(field, field_not, conversion):
2391             self.report_warning(
2392                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2393                 % (field, field_not, conversion))
2394
2395         def sanitize_string_field(info, string_field):
2396             field = info.get(string_field)
2397             if field is None or isinstance(field, compat_str):
2398                 return
2399             report_force_conversion(string_field, 'a string', 'string')
2400             info[string_field] = compat_str(field)
2401
2402         def sanitize_numeric_fields(info):
2403             for numeric_field in self._NUMERIC_FIELDS:
2404                 field = info.get(numeric_field)
2405                 if field is None or isinstance(field, (int, float)):
2406                     continue
2407                 report_force_conversion(numeric_field, 'numeric', 'int')
2408                 info[numeric_field] = int_or_none(field)
2409
2410         sanitize_string_field(info_dict, 'id')
2411         sanitize_numeric_fields(info_dict)
2412         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2413             self.report_warning('"duration" field is negative, there is an error in extractor')
2414
2415         if 'playlist' not in info_dict:
2416             # It isn't part of a playlist
2417             info_dict['playlist'] = None
2418             info_dict['playlist_index'] = None
2419
2420         self._sanitize_thumbnails(info_dict)
2421
2422         thumbnail = info_dict.get('thumbnail')
2423         thumbnails = info_dict.get('thumbnails')
2424         if thumbnail:
2425             info_dict['thumbnail'] = sanitize_url(thumbnail)
2426         elif thumbnails:
2427             info_dict['thumbnail'] = thumbnails[-1]['url']
2428
2429         if info_dict.get('display_id') is None and 'id' in info_dict:
2430             info_dict['display_id'] = info_dict['id']
2431
2432         self._fill_common_fields(info_dict)
2433
2434         for cc_kind in ('subtitles', 'automatic_captions'):
2435             cc = info_dict.get(cc_kind)
2436             if cc:
2437                 for _, subtitle in cc.items():
2438                     for subtitle_format in subtitle:
2439                         if subtitle_format.get('url'):
2440                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2441                         if subtitle_format.get('ext') is None:
2442                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2443
2444         automatic_captions = info_dict.get('automatic_captions')
2445         subtitles = info_dict.get('subtitles')
2446
2447         info_dict['requested_subtitles'] = self.process_subtitles(
2448             info_dict['id'], subtitles, automatic_captions)
2449
2450         if info_dict.get('formats') is None:
2451             # There's only one format available
2452             formats = [info_dict]
2453         else:
2454             formats = info_dict['formats']
2455
2456         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2457         if not self.params.get('allow_unplayable_formats'):
2458             formats = [f for f in formats if not f.get('has_drm')]
2459             if info_dict['__has_drm'] and all(
2460                     f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2461                 self.report_warning(
2462                     'This video is DRM protected and only images are available for download. '
2463                     'Use --list-formats to see them')
2464
2465         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2466         if not get_from_start:
2467             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2468         if info_dict.get('is_live') and formats:
2469             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2470             if get_from_start and not formats:
2471                 self.raise_no_formats(info_dict, msg=(
2472                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2473                     'If you want to download from the current time, use --no-live-from-start'))
2474
2475         if not formats:
2476             self.raise_no_formats(info_dict)
2477
2478         def is_wellformed(f):
2479             url = f.get('url')
2480             if not url:
2481                 self.report_warning(
2482                     '"url" field is missing or empty - skipping format, '
2483                     'there is an error in extractor')
2484                 return False
2485             if isinstance(url, bytes):
2486                 sanitize_string_field(f, 'url')
2487             return True
2488
2489         # Filter out malformed formats for better extraction robustness
2490         formats = list(filter(is_wellformed, formats))
2491
2492         formats_dict = {}
2493
2494         # We check that all the formats have the format and format_id fields
2495         for i, format in enumerate(formats):
2496             sanitize_string_field(format, 'format_id')
2497             sanitize_numeric_fields(format)
2498             format['url'] = sanitize_url(format['url'])
2499             if not format.get('format_id'):
2500                 format['format_id'] = compat_str(i)
2501             else:
2502                 # Sanitize format_id from characters used in format selector expression
2503                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2504             format_id = format['format_id']
2505             if format_id not in formats_dict:
2506                 formats_dict[format_id] = []
2507             formats_dict[format_id].append(format)
2508
2509         # Make sure all formats have unique format_id
2510         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2511         for format_id, ambiguous_formats in formats_dict.items():
2512             ambigious_id = len(ambiguous_formats) > 1
2513             for i, format in enumerate(ambiguous_formats):
2514                 if ambigious_id:
2515                     format['format_id'] = '%s-%d' % (format_id, i)
2516                 if format.get('ext') is None:
2517                     format['ext'] = determine_ext(format['url']).lower()
2518                 # Ensure there is no conflict between id and ext in format selection
2519                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2520                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2521                     format['format_id'] = 'f%s' % format['format_id']
2522
2523         for i, format in enumerate(formats):
2524             if format.get('format') is None:
2525                 format['format'] = '{id} - {res}{note}'.format(
2526                     id=format['format_id'],
2527                     res=self.format_resolution(format),
2528                     note=format_field(format, 'format_note', ' (%s)'),
2529                 )
2530             if format.get('protocol') is None:
2531                 format['protocol'] = determine_protocol(format)
2532             if format.get('resolution') is None:
2533                 format['resolution'] = self.format_resolution(format, default=None)
2534             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2535                 format['dynamic_range'] = 'SDR'
2536             if (info_dict.get('duration') and format.get('tbr')
2537                     and not format.get('filesize') and not format.get('filesize_approx')):
2538                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2539
2540             # Add HTTP headers, so that external programs can use them from the
2541             # json output
2542             full_format_info = info_dict.copy()
2543             full_format_info.update(format)
2544             format['http_headers'] = self._calc_headers(full_format_info)
2545         # Remove private housekeeping stuff
2546         if '__x_forwarded_for_ip' in info_dict:
2547             del info_dict['__x_forwarded_for_ip']
2548
2549         if self.params.get('check_formats') is True:
2550             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2551
2552         if not formats or formats[0] is not info_dict:
2553             # only set the 'formats' fields if the original info_dict list them
2554             # otherwise we end up with a circular reference, the first (and unique)
2555             # element in the 'formats' field in info_dict is info_dict itself,
2556             # which can't be exported to json
2557             info_dict['formats'] = formats
2558
2559         info_dict, _ = self.pre_process(info_dict)
2560
2561         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2562             return info_dict
2563
2564         self.post_extract(info_dict)
2565         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2566
2567         # The pre-processors may have modified the formats
2568         formats = info_dict.get('formats', [info_dict])
2569
2570         list_only = self.params.get('simulate') is None and (
2571             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2572         interactive_format_selection = not list_only and self.format_selector == '-'
2573         if self.params.get('list_thumbnails'):
2574             self.list_thumbnails(info_dict)
2575         if self.params.get('listsubtitles'):
2576             if 'automatic_captions' in info_dict:
2577                 self.list_subtitles(
2578                     info_dict['id'], automatic_captions, 'automatic captions')
2579             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2580         if self.params.get('listformats') or interactive_format_selection:
2581             self.list_formats(info_dict)
2582         if list_only:
2583             # Without this printing, -F --print-json will not work
2584             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2585             return
2586
2587         format_selector = self.format_selector
2588         if format_selector is None:
2589             req_format = self._default_format_spec(info_dict, download=download)
2590             self.write_debug('Default format spec: %s' % req_format)
2591             format_selector = self.build_format_selector(req_format)
2592
2593         while True:
2594             if interactive_format_selection:
2595                 req_format = input(
2596                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2597                 try:
2598                     format_selector = self.build_format_selector(req_format)
2599                 except SyntaxError as err:
2600                     self.report_error(err, tb=False, is_error=False)
2601                     continue
2602
2603             formats_to_download = list(format_selector({
2604                 'formats': formats,
2605                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2606                 'incomplete_formats': (
2607                     # All formats are video-only or
2608                     all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2609                     # all formats are audio-only
2610                     or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2611             }))
2612             if interactive_format_selection and not formats_to_download:
2613                 self.report_error('Requested format is not available', tb=False, is_error=False)
2614                 continue
2615             break
2616
2617         if not formats_to_download:
2618             if not self.params.get('ignore_no_formats_error'):
2619                 raise ExtractorError(
2620                     'Requested format is not available. Use --list-formats for a list of available formats',
2621                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2622             self.report_warning('Requested format is not available')
2623             # Process what we can, even without any available formats.
2624             formats_to_download = [{}]
2625
2626         best_format = formats_to_download[-1]
2627         if download:
2628             if best_format:
2629                 self.to_screen(
2630                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2631                     + ', '.join([f['format_id'] for f in formats_to_download]))
2632             max_downloads_reached = False
2633             for i, fmt in enumerate(formats_to_download):
2634                 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
2635                 new_info.update(fmt)
2636                 try:
2637                     self.process_info(new_info)
2638                 except MaxDownloadsReached:
2639                     max_downloads_reached = True
2640                 # Remove copied info
2641                 for key, val in tuple(new_info.items()):
2642                     if info_dict.get(key) == val:
2643                         new_info.pop(key)
2644                 if max_downloads_reached:
2645                     break
2646
2647             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2648             assert write_archive.issubset({True, False, 'ignore'})
2649             if True in write_archive and False not in write_archive:
2650                 self.record_download_archive(info_dict)
2651
2652             info_dict['requested_downloads'] = formats_to_download
2653             info_dict = self.run_all_pps('after_video', info_dict)
2654             if max_downloads_reached:
2655                 raise MaxDownloadsReached()
2656
2657         # We update the info dict with the selected best quality format (backwards compatibility)
2658         info_dict.update(best_format)
2659         return info_dict
2660
2661     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2662         """Select the requested subtitles and their format"""
2663         available_subs, normal_sub_langs = {}, []
2664         if normal_subtitles and self.params.get('writesubtitles'):
2665             available_subs.update(normal_subtitles)
2666             normal_sub_langs = tuple(normal_subtitles.keys())
2667         if automatic_captions and self.params.get('writeautomaticsub'):
2668             for lang, cap_info in automatic_captions.items():
2669                 if lang not in available_subs:
2670                     available_subs[lang] = cap_info
2671
2672         if (not self.params.get('writesubtitles') and not
2673                 self.params.get('writeautomaticsub') or not
2674                 available_subs):
2675             return None
2676
2677         all_sub_langs = tuple(available_subs.keys())
2678         if self.params.get('allsubtitles', False):
2679             requested_langs = all_sub_langs
2680         elif self.params.get('subtitleslangs', False):
2681             # A list is used so that the order of languages will be the same as
2682             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2683             requested_langs = []
2684             for lang_re in self.params.get('subtitleslangs'):
2685                 discard = lang_re[0] == '-'
2686                 if discard:
2687                     lang_re = lang_re[1:]
2688                 if lang_re == 'all':
2689                     if discard:
2690                         requested_langs = []
2691                     else:
2692                         requested_langs.extend(all_sub_langs)
2693                     continue
2694                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2695                 if discard:
2696                     for lang in current_langs:
2697                         while lang in requested_langs:
2698                             requested_langs.remove(lang)
2699                 else:
2700                     requested_langs.extend(current_langs)
2701             requested_langs = orderedSet(requested_langs)
2702         elif normal_sub_langs:
2703             requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2704         else:
2705             requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2706         if requested_langs:
2707             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2708
2709         formats_query = self.params.get('subtitlesformat', 'best')
2710         formats_preference = formats_query.split('/') if formats_query else []
2711         subs = {}
2712         for lang in requested_langs:
2713             formats = available_subs.get(lang)
2714             if formats is None:
2715                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2716                 continue
2717             for ext in formats_preference:
2718                 if ext == 'best':
2719                     f = formats[-1]
2720                     break
2721                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2722                 if matches:
2723                     f = matches[-1]
2724                     break
2725             else:
2726                 f = formats[-1]
2727                 self.report_warning(
2728                     'No subtitle format found matching "%s" for language %s, '
2729                     'using %s' % (formats_query, lang, f['ext']))
2730             subs[lang] = f
2731         return subs
2732
2733     def _forceprint(self, key, info_dict):
2734         if info_dict is None:
2735             return
2736         info_copy = info_dict.copy()
2737         info_copy['formats_table'] = self.render_formats_table(info_dict)
2738         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2739         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2740         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2741
2742         def format_tmpl(tmpl):
2743             mobj = re.match(r'\w+(=?)$', tmpl)
2744             if mobj and mobj.group(1):
2745                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2746             elif mobj:
2747                 return f'%({tmpl})s'
2748             return tmpl
2749
2750         for tmpl in self.params['forceprint'].get(key, []):
2751             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2752
2753         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2754             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2755             tmpl = format_tmpl(tmpl)
2756             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2757             if self._ensure_dir_exists(filename):
2758                 with io.open(filename, 'a', encoding='utf-8') as f:
2759                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2760
2761     def __forced_printings(self, info_dict, filename, incomplete):
2762         def print_mandatory(field, actual_field=None):
2763             if actual_field is None:
2764                 actual_field = field
2765             if (self.params.get('force%s' % field, False)
2766                     and (not incomplete or info_dict.get(actual_field) is not None)):
2767                 self.to_stdout(info_dict[actual_field])
2768
2769         def print_optional(field):
2770             if (self.params.get('force%s' % field, False)
2771                     and info_dict.get(field) is not None):
2772                 self.to_stdout(info_dict[field])
2773
2774         info_dict = info_dict.copy()
2775         if filename is not None:
2776             info_dict['filename'] = filename
2777         if info_dict.get('requested_formats') is not None:
2778             # For RTMP URLs, also include the playpath
2779             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2780         elif info_dict.get('url'):
2781             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2782
2783         if (self.params.get('forcejson')
2784                 or self.params['forceprint'].get('video')
2785                 or self.params['print_to_file'].get('video')):
2786             self.post_extract(info_dict)
2787         self._forceprint('video', info_dict)
2788
2789         print_mandatory('title')
2790         print_mandatory('id')
2791         print_mandatory('url', 'urls')
2792         print_optional('thumbnail')
2793         print_optional('description')
2794         print_optional('filename')
2795         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2796             self.to_stdout(formatSeconds(info_dict['duration']))
2797         print_mandatory('format')
2798
2799         if self.params.get('forcejson'):
2800             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2801
2802     def dl(self, name, info, subtitle=False, test=False):
2803         if not info.get('url'):
2804             self.raise_no_formats(info, True)
2805
2806         if test:
2807             verbose = self.params.get('verbose')
2808             params = {
2809                 'test': True,
2810                 'quiet': self.params.get('quiet') or not verbose,
2811                 'verbose': verbose,
2812                 'noprogress': not verbose,
2813                 'nopart': True,
2814                 'skip_unavailable_fragments': False,
2815                 'keep_fragments': False,
2816                 'overwrites': True,
2817                 '_no_ytdl_file': True,
2818             }
2819         else:
2820             params = self.params
2821         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2822         if not test:
2823             for ph in self._progress_hooks:
2824                 fd.add_progress_hook(ph)
2825             urls = '", "'.join(
2826                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2827                 for f in info.get('requested_formats', []) or [info])
2828             self.write_debug('Invoking downloader on "%s"' % urls)
2829
2830         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2831         # But it may contain objects that are not deep-copyable
2832         new_info = self._copy_infodict(info)
2833         if new_info.get('http_headers') is None:
2834             new_info['http_headers'] = self._calc_headers(new_info)
2835         return fd.download(name, new_info, subtitle)
2836
2837     def existing_file(self, filepaths, *, default_overwrite=True):
2838         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2839         if existing_files and not self.params.get('overwrites', default_overwrite):
2840             return existing_files[0]
2841
2842         for file in existing_files:
2843             self.report_file_delete(file)
2844             os.remove(file)
2845         return None
2846
2847     def process_info(self, info_dict):
2848         """Process a single resolved IE result. (Modifies it in-place)"""
2849
2850         assert info_dict.get('_type', 'video') == 'video'
2851         original_infodict = info_dict
2852
2853         if 'format' not in info_dict and 'ext' in info_dict:
2854             info_dict['format'] = info_dict['ext']
2855
2856         # This is mostly just for backward compatibility of process_info
2857         # As a side-effect, this allows for format-specific filters
2858         if self._match_entry(info_dict) is not None:
2859             info_dict['__write_download_archive'] = 'ignore'
2860             return
2861
2862         # Does nothing under normal operation - for backward compatibility of process_info
2863         self.post_extract(info_dict)
2864         self._num_downloads += 1
2865
2866         # info_dict['_filename'] needs to be set for backward compatibility
2867         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2868         temp_filename = self.prepare_filename(info_dict, 'temp')
2869         files_to_move = {}
2870
2871         # Forced printings
2872         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2873
2874         if self.params.get('simulate'):
2875             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2876             return
2877
2878         if full_filename is None:
2879             return
2880         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2881             return
2882         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2883             return
2884
2885         if self._write_description('video', info_dict,
2886                                    self.prepare_filename(info_dict, 'description')) is None:
2887             return
2888
2889         sub_files = self._write_subtitles(info_dict, temp_filename)
2890         if sub_files is None:
2891             return
2892         files_to_move.update(dict(sub_files))
2893
2894         thumb_files = self._write_thumbnails(
2895             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2896         if thumb_files is None:
2897             return
2898         files_to_move.update(dict(thumb_files))
2899
2900         infofn = self.prepare_filename(info_dict, 'infojson')
2901         _infojson_written = self._write_info_json('video', info_dict, infofn)
2902         if _infojson_written:
2903             info_dict['infojson_filename'] = infofn
2904             # For backward compatibility, even though it was a private field
2905             info_dict['__infojson_filename'] = infofn
2906         elif _infojson_written is None:
2907             return
2908
2909         # Note: Annotations are deprecated
2910         annofn = None
2911         if self.params.get('writeannotations', False):
2912             annofn = self.prepare_filename(info_dict, 'annotation')
2913         if annofn:
2914             if not self._ensure_dir_exists(encodeFilename(annofn)):
2915                 return
2916             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2917                 self.to_screen('[info] Video annotations are already present')
2918             elif not info_dict.get('annotations'):
2919                 self.report_warning('There are no annotations to write.')
2920             else:
2921                 try:
2922                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2923                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2924                         annofile.write(info_dict['annotations'])
2925                 except (KeyError, TypeError):
2926                     self.report_warning('There are no annotations to write.')
2927                 except (OSError, IOError):
2928                     self.report_error('Cannot write annotations file: ' + annofn)
2929                     return
2930
2931         # Write internet shortcut files
2932         def _write_link_file(link_type):
2933             url = try_get(info_dict['webpage_url'], iri_to_uri)
2934             if not url:
2935                 self.report_warning(
2936                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2937                 return True
2938             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2939             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2940                 return False
2941             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2942                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2943                 return True
2944             try:
2945                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2946                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2947                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2948                     template_vars = {'url': url}
2949                     if link_type == 'desktop':
2950                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2951                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2952             except (OSError, IOError):
2953                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2954                 return False
2955             return True
2956
2957         write_links = {
2958             'url': self.params.get('writeurllink'),
2959             'webloc': self.params.get('writewebloclink'),
2960             'desktop': self.params.get('writedesktoplink'),
2961         }
2962         if self.params.get('writelink'):
2963             link_type = ('webloc' if sys.platform == 'darwin'
2964                          else 'desktop' if sys.platform.startswith('linux')
2965                          else 'url')
2966             write_links[link_type] = True
2967
2968         if any(should_write and not _write_link_file(link_type)
2969                for link_type, should_write in write_links.items()):
2970             return
2971
2972         def replace_info_dict(new_info):
2973             nonlocal info_dict
2974             if new_info == info_dict:
2975                 return
2976             info_dict.clear()
2977             info_dict.update(new_info)
2978
2979         try:
2980             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2981             replace_info_dict(new_info)
2982         except PostProcessingError as err:
2983             self.report_error('Preprocessing: %s' % str(err))
2984             return
2985
2986         if self.params.get('skip_download'):
2987             info_dict['filepath'] = temp_filename
2988             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2989             info_dict['__files_to_move'] = files_to_move
2990             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2991             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2992         else:
2993             # Download
2994             info_dict.setdefault('__postprocessors', [])
2995             try:
2996
2997                 def existing_video_file(*filepaths):
2998                     ext = info_dict.get('ext')
2999                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3000                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3001                                               default_overwrite=False)
3002                     if file:
3003                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3004                     return file
3005
3006                 success = True
3007                 if info_dict.get('requested_formats') is not None:
3008
3009                     def compatible_formats(formats):
3010                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3011                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
3012                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
3013                         if len(video_formats) > 2 or len(audio_formats) > 2:
3014                             return False
3015
3016                         # Check extension
3017                         exts = set(format.get('ext') for format in formats)
3018                         COMPATIBLE_EXTS = (
3019                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
3020                             set(('webm',)),
3021                         )
3022                         for ext_sets in COMPATIBLE_EXTS:
3023                             if ext_sets.issuperset(exts):
3024                                 return True
3025                         # TODO: Check acodec/vcodec
3026                         return False
3027
3028                     requested_formats = info_dict['requested_formats']
3029                     old_ext = info_dict['ext']
3030                     if self.params.get('merge_output_format') is None:
3031                         if not compatible_formats(requested_formats):
3032                             info_dict['ext'] = 'mkv'
3033                             self.report_warning(
3034                                 'Requested formats are incompatible for merge and will be merged into mkv')
3035                         if (info_dict['ext'] == 'webm'
3036                                 and info_dict.get('thumbnails')
3037                                 # check with type instead of pp_key, __name__, or isinstance
3038                                 # since we dont want any custom PPs to trigger this
3039                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3040                             info_dict['ext'] = 'mkv'
3041                             self.report_warning(
3042                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3043                     new_ext = info_dict['ext']
3044
3045                     def correct_ext(filename, ext=new_ext):
3046                         if filename == '-':
3047                             return filename
3048                         filename_real_ext = os.path.splitext(filename)[1][1:]
3049                         filename_wo_ext = (
3050                             os.path.splitext(filename)[0]
3051                             if filename_real_ext in (old_ext, new_ext)
3052                             else filename)
3053                         return '%s.%s' % (filename_wo_ext, ext)
3054
3055                     # Ensure filename always has a correct extension for successful merge
3056                     full_filename = correct_ext(full_filename)
3057                     temp_filename = correct_ext(temp_filename)
3058                     dl_filename = existing_video_file(full_filename, temp_filename)
3059                     info_dict['__real_download'] = False
3060
3061                     downloaded = []
3062                     merger = FFmpegMergerPP(self)
3063
3064                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3065                     if dl_filename is not None:
3066                         self.report_file_already_downloaded(dl_filename)
3067                     elif fd:
3068                         for f in requested_formats if fd != FFmpegFD else []:
3069                             f['filepath'] = fname = prepend_extension(
3070                                 correct_ext(temp_filename, info_dict['ext']),
3071                                 'f%s' % f['format_id'], info_dict['ext'])
3072                             downloaded.append(fname)
3073                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3074                         success, real_download = self.dl(temp_filename, info_dict)
3075                         info_dict['__real_download'] = real_download
3076                     else:
3077                         if self.params.get('allow_unplayable_formats'):
3078                             self.report_warning(
3079                                 'You have requested merging of multiple formats '
3080                                 'while also allowing unplayable formats to be downloaded. '
3081                                 'The formats won\'t be merged to prevent data corruption.')
3082                         elif not merger.available:
3083                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3084                             if not self.params.get('ignoreerrors'):
3085                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3086                                 return
3087                             self.report_warning(f'{msg}. The formats won\'t be merged')
3088
3089                         if temp_filename == '-':
3090                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3091                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3092                                       else 'but ffmpeg is not installed')
3093                             self.report_warning(
3094                                 f'You have requested downloading multiple formats to stdout {reason}. '
3095                                 'The formats will be streamed one after the other')
3096                             fname = temp_filename
3097                         for f in requested_formats:
3098                             new_info = dict(info_dict)
3099                             del new_info['requested_formats']
3100                             new_info.update(f)
3101                             if temp_filename != '-':
3102                                 fname = prepend_extension(
3103                                     correct_ext(temp_filename, new_info['ext']),
3104                                     'f%s' % f['format_id'], new_info['ext'])
3105                                 if not self._ensure_dir_exists(fname):
3106                                     return
3107                                 f['filepath'] = fname
3108                                 downloaded.append(fname)
3109                             partial_success, real_download = self.dl(fname, new_info)
3110                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3111                             success = success and partial_success
3112
3113                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3114                         info_dict['__postprocessors'].append(merger)
3115                         info_dict['__files_to_merge'] = downloaded
3116                         # Even if there were no downloads, it is being merged only now
3117                         info_dict['__real_download'] = True
3118                     else:
3119                         for file in downloaded:
3120                             files_to_move[file] = None
3121                 else:
3122                     # Just a single file
3123                     dl_filename = existing_video_file(full_filename, temp_filename)
3124                     if dl_filename is None or dl_filename == temp_filename:
3125                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3126                         # So we should try to resume the download
3127                         success, real_download = self.dl(temp_filename, info_dict)
3128                         info_dict['__real_download'] = real_download
3129                     else:
3130                         self.report_file_already_downloaded(dl_filename)
3131
3132                 dl_filename = dl_filename or temp_filename
3133                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3134
3135             except network_exceptions as err:
3136                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3137                 return
3138             except (OSError, IOError) as err:
3139                 raise UnavailableVideoError(err)
3140             except (ContentTooShortError, ) as err:
3141                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3142                 return
3143
3144             if success and full_filename != '-':
3145
3146                 def fixup():
3147                     do_fixup = True
3148                     fixup_policy = self.params.get('fixup')
3149                     vid = info_dict['id']
3150
3151                     if fixup_policy in ('ignore', 'never'):
3152                         return
3153                     elif fixup_policy == 'warn':
3154                         do_fixup = False
3155                     elif fixup_policy != 'force':
3156                         assert fixup_policy in ('detect_or_warn', None)
3157                         if not info_dict.get('__real_download'):
3158                             do_fixup = False
3159
3160                     def ffmpeg_fixup(cndn, msg, cls):
3161                         if not cndn:
3162                             return
3163                         if not do_fixup:
3164                             self.report_warning(f'{vid}: {msg}')
3165                             return
3166                         pp = cls(self)
3167                         if pp.available:
3168                             info_dict['__postprocessors'].append(pp)
3169                         else:
3170                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3171
3172                     stretched_ratio = info_dict.get('stretched_ratio')
3173                     ffmpeg_fixup(
3174                         stretched_ratio not in (1, None),
3175                         f'Non-uniform pixel ratio {stretched_ratio}',
3176                         FFmpegFixupStretchedPP)
3177
3178                     ffmpeg_fixup(
3179                         (info_dict.get('requested_formats') is None
3180                          and info_dict.get('container') == 'm4a_dash'
3181                          and info_dict.get('ext') == 'm4a'),
3182                         'writing DASH m4a. Only some players support this container',
3183                         FFmpegFixupM4aPP)
3184
3185                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3186                     downloader = downloader.__name__ if downloader else None
3187
3188                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3189                         ffmpeg_fixup(downloader == 'HlsFD',
3190                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3191                                      FFmpegFixupM3u8PP)
3192                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3193                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3194
3195                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3196                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3197
3198                 fixup()
3199                 try:
3200                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3201                 except PostProcessingError as err:
3202                     self.report_error('Postprocessing: %s' % str(err))
3203                     return
3204                 try:
3205                     for ph in self._post_hooks:
3206                         ph(info_dict['filepath'])
3207                 except Exception as err:
3208                     self.report_error('post hooks: %s' % str(err))
3209                     return
3210                 info_dict['__write_download_archive'] = True
3211
3212         if self.params.get('force_write_download_archive'):
3213             info_dict['__write_download_archive'] = True
3214
3215         # Make sure the info_dict was modified in-place
3216         assert info_dict is original_infodict
3217
3218         max_downloads = self.params.get('max_downloads')
3219         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3220             raise MaxDownloadsReached()
3221
3222     def __download_wrapper(self, func):
3223         @functools.wraps(func)
3224         def wrapper(*args, **kwargs):
3225             try:
3226                 res = func(*args, **kwargs)
3227             except UnavailableVideoError as e:
3228                 self.report_error(e)
3229             except MaxDownloadsReached as e:
3230                 self.to_screen(f'[info] {e}')
3231                 raise
3232             except DownloadCancelled as e:
3233                 self.to_screen(f'[info] {e}')
3234                 if not self.params.get('break_per_url'):
3235                     raise
3236             else:
3237                 if self.params.get('dump_single_json', False):
3238                     self.post_extract(res)
3239                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3240         return wrapper
3241
3242     def download(self, url_list):
3243         """Download a given list of URLs."""
3244         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3245         outtmpl = self.outtmpl_dict['default']
3246         if (len(url_list) > 1
3247                 and outtmpl != '-'
3248                 and '%' not in outtmpl
3249                 and self.params.get('max_downloads') != 1):
3250             raise SameFileError(outtmpl)
3251
3252         for url in url_list:
3253             self.__download_wrapper(self.extract_info)(
3254                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3255
3256         return self._download_retcode
3257
3258     def download_with_info_file(self, info_filename):
3259         with contextlib.closing(fileinput.FileInput(
3260                 [info_filename], mode='r',
3261                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3262             # FileInput doesn't have a read method, we can't call json.load
3263             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3264         try:
3265             self.__download_wrapper(self.process_ie_result)(info, download=True)
3266         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3267             if not isinstance(e, EntryNotInPlaylist):
3268                 self.to_stderr('\r')
3269             webpage_url = info.get('webpage_url')
3270             if webpage_url is not None:
3271                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3272                 return self.download([webpage_url])
3273             else:
3274                 raise
3275         return self._download_retcode
3276
3277     @staticmethod
3278     def sanitize_info(info_dict, remove_private_keys=False):
3279         ''' Sanitize the infodict for converting to json '''
3280         if info_dict is None:
3281             return info_dict
3282         info_dict.setdefault('epoch', int(time.time()))
3283         info_dict.setdefault('_type', 'video')
3284
3285         if remove_private_keys:
3286             reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
3287                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3288                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3289             }
3290         else:
3291             reject = lambda k, v: False
3292
3293         def filter_fn(obj):
3294             if isinstance(obj, dict):
3295                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3296             elif isinstance(obj, (list, tuple, set, LazyList)):
3297                 return list(map(filter_fn, obj))
3298             elif obj is None or isinstance(obj, (str, int, float, bool)):
3299                 return obj
3300             else:
3301                 return repr(obj)
3302
3303         return filter_fn(info_dict)
3304
3305     @staticmethod
3306     def filter_requested_info(info_dict, actually_filter=True):
3307         ''' Alias of sanitize_info for backward compatibility '''
3308         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3309
3310     @staticmethod
3311     def post_extract(info_dict):
3312         def actual_post_extract(info_dict):
3313             if info_dict.get('_type') in ('playlist', 'multi_video'):
3314                 for video_dict in info_dict.get('entries', {}):
3315                     actual_post_extract(video_dict or {})
3316                 return
3317
3318             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3319             info_dict.update(post_extractor())
3320
3321         actual_post_extract(info_dict or {})
3322
3323     def run_pp(self, pp, infodict):
3324         files_to_delete = []
3325         if '__files_to_move' not in infodict:
3326             infodict['__files_to_move'] = {}
3327         try:
3328             files_to_delete, infodict = pp.run(infodict)
3329         except PostProcessingError as e:
3330             # Must be True and not 'only_download'
3331             if self.params.get('ignoreerrors') is True:
3332                 self.report_error(e)
3333                 return infodict
3334             raise
3335
3336         if not files_to_delete:
3337             return infodict
3338         if self.params.get('keepvideo', False):
3339             for f in files_to_delete:
3340                 infodict['__files_to_move'].setdefault(f, '')
3341         else:
3342             for old_filename in set(files_to_delete):
3343                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3344                 try:
3345                     os.remove(encodeFilename(old_filename))
3346                 except (IOError, OSError):
3347                     self.report_warning('Unable to remove downloaded original file')
3348                 if old_filename in infodict['__files_to_move']:
3349                     del infodict['__files_to_move'][old_filename]
3350         return infodict
3351
3352     def run_all_pps(self, key, info, *, additional_pps=None):
3353         self._forceprint(key, info)
3354         for pp in (additional_pps or []) + self._pps[key]:
3355             info = self.run_pp(pp, info)
3356         return info
3357
3358     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3359         info = dict(ie_info)
3360         info['__files_to_move'] = files_to_move or {}
3361         info = self.run_all_pps(key, info)
3362         return info, info.pop('__files_to_move', None)
3363
3364     def post_process(self, filename, info, files_to_move=None):
3365         """Run all the postprocessors on the given file."""
3366         info['filepath'] = filename
3367         info['__files_to_move'] = files_to_move or {}
3368         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3369         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3370         del info['__files_to_move']
3371         return self.run_all_pps('after_move', info)
3372
3373     def _make_archive_id(self, info_dict):
3374         video_id = info_dict.get('id')
3375         if not video_id:
3376             return
3377         # Future-proof against any change in case
3378         # and backwards compatibility with prior versions
3379         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3380         if extractor is None:
3381             url = str_or_none(info_dict.get('url'))
3382             if not url:
3383                 return
3384             # Try to find matching extractor for the URL and take its ie_key
3385             for ie_key, ie in self._ies.items():
3386                 if ie.suitable(url):
3387                     extractor = ie_key
3388                     break
3389             else:
3390                 return
3391         return '%s %s' % (extractor.lower(), video_id)
3392
3393     def in_download_archive(self, info_dict):
3394         fn = self.params.get('download_archive')
3395         if fn is None:
3396             return False
3397
3398         vid_id = self._make_archive_id(info_dict)
3399         if not vid_id:
3400             return False  # Incomplete video information
3401
3402         return vid_id in self.archive
3403
3404     def record_download_archive(self, info_dict):
3405         fn = self.params.get('download_archive')
3406         if fn is None:
3407             return
3408         vid_id = self._make_archive_id(info_dict)
3409         assert vid_id
3410         self.write_debug(f'Adding to archive: {vid_id}')
3411         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3412             archive_file.write(vid_id + '\n')
3413         self.archive.add(vid_id)
3414
3415     @staticmethod
3416     def format_resolution(format, default='unknown'):
3417         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3418             return 'audio only'
3419         if format.get('resolution') is not None:
3420             return format['resolution']
3421         if format.get('width') and format.get('height'):
3422             return '%dx%d' % (format['width'], format['height'])
3423         elif format.get('height'):
3424             return '%sp' % format['height']
3425         elif format.get('width'):
3426             return '%dx?' % format['width']
3427         return default
3428
3429     def _list_format_headers(self, *headers):
3430         if self.params.get('listformats_table', True) is not False:
3431             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3432         return headers
3433
3434     def _format_note(self, fdict):
3435         res = ''
3436         if fdict.get('ext') in ['f4f', 'f4m']:
3437             res += '(unsupported)'
3438         if fdict.get('language'):
3439             if res:
3440                 res += ' '
3441             res += '[%s]' % fdict['language']
3442         if fdict.get('format_note') is not None:
3443             if res:
3444                 res += ' '
3445             res += fdict['format_note']
3446         if fdict.get('tbr') is not None:
3447             if res:
3448                 res += ', '
3449             res += '%4dk' % fdict['tbr']
3450         if fdict.get('container') is not None:
3451             if res:
3452                 res += ', '
3453             res += '%s container' % fdict['container']
3454         if (fdict.get('vcodec') is not None
3455                 and fdict.get('vcodec') != 'none'):
3456             if res:
3457                 res += ', '
3458             res += fdict['vcodec']
3459             if fdict.get('vbr') is not None:
3460                 res += '@'
3461         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3462             res += 'video@'
3463         if fdict.get('vbr') is not None:
3464             res += '%4dk' % fdict['vbr']
3465         if fdict.get('fps') is not None:
3466             if res:
3467                 res += ', '
3468             res += '%sfps' % fdict['fps']
3469         if fdict.get('acodec') is not None:
3470             if res:
3471                 res += ', '
3472             if fdict['acodec'] == 'none':
3473                 res += 'video only'
3474             else:
3475                 res += '%-5s' % fdict['acodec']
3476         elif fdict.get('abr') is not None:
3477             if res:
3478                 res += ', '
3479             res += 'audio'
3480         if fdict.get('abr') is not None:
3481             res += '@%3dk' % fdict['abr']
3482         if fdict.get('asr') is not None:
3483             res += ' (%5dHz)' % fdict['asr']
3484         if fdict.get('filesize') is not None:
3485             if res:
3486                 res += ', '
3487             res += format_bytes(fdict['filesize'])
3488         elif fdict.get('filesize_approx') is not None:
3489             if res:
3490                 res += ', '
3491             res += '~' + format_bytes(fdict['filesize_approx'])
3492         return res
3493
3494     def render_formats_table(self, info_dict):
3495         if not info_dict.get('formats') and not info_dict.get('url'):
3496             return None
3497
3498         formats = info_dict.get('formats', [info_dict])
3499         if not self.params.get('listformats_table', True) is not False:
3500             table = [
3501                 [
3502                     format_field(f, 'format_id'),
3503                     format_field(f, 'ext'),
3504                     self.format_resolution(f),
3505                     self._format_note(f)
3506                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3507             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3508
3509         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3510         table = [
3511             [
3512                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3513                 format_field(f, 'ext'),
3514                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3515                 format_field(f, 'fps', '\t%d'),
3516                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3517                 delim,
3518                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3519                 format_field(f, 'tbr', '\t%dk'),
3520                 shorten_protocol_name(f.get('protocol', '')),
3521                 delim,
3522                 format_field(f, 'vcodec', default='unknown').replace(
3523                     'none', 'images' if f.get('acodec') == 'none'
3524                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3525                 format_field(f, 'vbr', '\t%dk'),
3526                 format_field(f, 'acodec', default='unknown').replace(
3527                     'none', '' if f.get('vcodec') == 'none'
3528                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3529                 format_field(f, 'abr', '\t%dk'),
3530                 format_field(f, 'asr', '\t%dHz'),
3531                 join_nonempty(
3532                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3533                     format_field(f, 'language', '[%s]'),
3534                     join_nonempty(format_field(f, 'format_note'),
3535                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3536                                   delim=', '),
3537                     delim=' '),
3538             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3539         header_line = self._list_format_headers(
3540             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3541             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3542
3543         return render_table(
3544             header_line, table, hide_empty=True,
3545             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3546
3547     def render_thumbnails_table(self, info_dict):
3548         thumbnails = list(info_dict.get('thumbnails') or [])
3549         if not thumbnails:
3550             return None
3551         return render_table(
3552             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3553             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3554
3555     def render_subtitles_table(self, video_id, subtitles):
3556         def _row(lang, formats):
3557             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3558             if len(set(names)) == 1:
3559                 names = [] if names[0] == 'unknown' else names[:1]
3560             return [lang, ', '.join(names), ', '.join(exts)]
3561
3562         if not subtitles:
3563             return None
3564         return render_table(
3565             self._list_format_headers('Language', 'Name', 'Formats'),
3566             [_row(lang, formats) for lang, formats in subtitles.items()],
3567             hide_empty=True)
3568
3569     def __list_table(self, video_id, name, func, *args):
3570         table = func(*args)
3571         if not table:
3572             self.to_screen(f'{video_id} has no {name}')
3573             return
3574         self.to_screen(f'[info] Available {name} for {video_id}:')
3575         self.to_stdout(table)
3576
3577     def list_formats(self, info_dict):
3578         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3579
3580     def list_thumbnails(self, info_dict):
3581         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3582
3583     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3584         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3585
3586     def urlopen(self, req):
3587         """ Start an HTTP download """
3588         if isinstance(req, str):
3589             req = sanitized_Request(req)
3590         return self._opener.open(req, timeout=self._socket_timeout)
3591
3592     def print_debug_header(self):
3593         if not self.params.get('verbose'):
3594             return
3595
3596         def get_encoding(stream):
3597             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3598             if not supports_terminal_sequences(stream):
3599                 from .compat import WINDOWS_VT_MODE
3600                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3601             return ret
3602
3603         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3604             locale.getpreferredencoding(),
3605             sys.getfilesystemencoding(),
3606             get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
3607             self.get_encoding())
3608
3609         logger = self.params.get('logger')
3610         if logger:
3611             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3612             write_debug(encoding_str)
3613         else:
3614             write_string(f'[debug] {encoding_str}\n', encoding=None)
3615             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3616
3617         source = detect_variant()
3618         write_debug(join_nonempty(
3619             'yt-dlp version', __version__,
3620             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3621             '' if source == 'unknown' else f'({source})',
3622             delim=' '))
3623         if not _LAZY_LOADER:
3624             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3625                 write_debug('Lazy loading extractors is forcibly disabled')
3626             else:
3627                 write_debug('Lazy loading extractors is disabled')
3628         if plugin_extractors or plugin_postprocessors:
3629             write_debug('Plugins: %s' % [
3630                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3631                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3632         if self.params.get('compat_opts'):
3633             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3634
3635         if source == 'source':
3636             try:
3637                 sp = Popen(
3638                     ['git', 'rev-parse', '--short', 'HEAD'],
3639                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3640                     cwd=os.path.dirname(os.path.abspath(__file__)))
3641                 out, err = sp.communicate_or_kill()
3642                 out = out.decode().strip()
3643                 if re.match('[0-9a-f]+', out):
3644                     write_debug('Git HEAD: %s' % out)
3645             except Exception:
3646                 try:
3647                     sys.exc_clear()
3648                 except Exception:
3649                     pass
3650
3651         def python_implementation():
3652             impl_name = platform.python_implementation()
3653             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3654                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3655             return impl_name
3656
3657         write_debug('Python version %s (%s %s) - %s' % (
3658             platform.python_version(),
3659             python_implementation(),
3660             platform.architecture()[0],
3661             platform_name()))
3662
3663         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3664         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3665         if ffmpeg_features:
3666             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3667
3668         exe_versions['rtmpdump'] = rtmpdump_version()
3669         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3670         exe_str = ', '.join(
3671             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3672         ) or 'none'
3673         write_debug('exe versions: %s' % exe_str)
3674
3675         from .downloader.websocket import has_websockets
3676         from .postprocessor.embedthumbnail import has_mutagen
3677         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3678
3679         lib_str = join_nonempty(
3680             compat_brotli and compat_brotli.__name__,
3681             has_certifi and 'certifi',
3682             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3683             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3684             has_mutagen and 'mutagen',
3685             SQLITE_AVAILABLE and 'sqlite',
3686             has_websockets and 'websockets',
3687             delim=', ') or 'none'
3688         write_debug('Optional libraries: %s' % lib_str)
3689
3690         self._setup_opener()
3691         proxy_map = {}
3692         for handler in self._opener.handlers:
3693             if hasattr(handler, 'proxies'):
3694                 proxy_map.update(handler.proxies)
3695         write_debug(f'Proxy map: {proxy_map}')
3696
3697         # Not implemented
3698         if False and self.params.get('call_home'):
3699             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3700             write_debug('Public IP address: %s' % ipaddr)
3701             latest_version = self.urlopen(
3702                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3703             if version_tuple(latest_version) > version_tuple(__version__):
3704                 self.report_warning(
3705                     'You are using an outdated version (newest version: %s)! '
3706                     'See https://yt-dl.org/update if you need help updating.' %
3707                     latest_version)
3708
3709     def _setup_opener(self):
3710         if hasattr(self, '_opener'):
3711             return
3712         timeout_val = self.params.get('socket_timeout')
3713         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3714
3715         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3716         opts_cookiefile = self.params.get('cookiefile')
3717         opts_proxy = self.params.get('proxy')
3718
3719         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3720
3721         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3722         if opts_proxy is not None:
3723             if opts_proxy == '':
3724                 proxies = {}
3725             else:
3726                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3727         else:
3728             proxies = compat_urllib_request.getproxies()
3729             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3730             if 'http' in proxies and 'https' not in proxies:
3731                 proxies['https'] = proxies['http']
3732         proxy_handler = PerRequestProxyHandler(proxies)
3733
3734         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3735         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3736         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3737         redirect_handler = YoutubeDLRedirectHandler()
3738         data_handler = urllib.request.DataHandler()
3739
3740         # When passing our own FileHandler instance, build_opener won't add the
3741         # default FileHandler and allows us to disable the file protocol, which
3742         # can be used for malicious purposes (see
3743         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3744         file_handler = compat_urllib_request.FileHandler()
3745
3746         def file_open(*args, **kwargs):
3747             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3748         file_handler.file_open = file_open
3749
3750         opener = compat_urllib_request.build_opener(
3751             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3752
3753         # Delete the default user-agent header, which would otherwise apply in
3754         # cases where our custom HTTP handler doesn't come into play
3755         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3756         opener.addheaders = []
3757         self._opener = opener
3758
3759     def encode(self, s):
3760         if isinstance(s, bytes):
3761             return s  # Already encoded
3762
3763         try:
3764             return s.encode(self.get_encoding())
3765         except UnicodeEncodeError as err:
3766             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3767             raise
3768
3769     def get_encoding(self):
3770         encoding = self.params.get('encoding')
3771         if encoding is None:
3772             encoding = preferredencoding()
3773         return encoding
3774
3775     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3776         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3777         if overwrite is None:
3778             overwrite = self.params.get('overwrites', True)
3779         if not self.params.get('writeinfojson'):
3780             return False
3781         elif not infofn:
3782             self.write_debug(f'Skipping writing {label} infojson')
3783             return False
3784         elif not self._ensure_dir_exists(infofn):
3785             return None
3786         elif not overwrite and os.path.exists(infofn):
3787             self.to_screen(f'[info] {label.title()} metadata is already present')
3788             return 'exists'
3789
3790         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3791         try:
3792             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3793             return True
3794         except (OSError, IOError):
3795             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3796             return None
3797
3798     def _write_description(self, label, ie_result, descfn):
3799         ''' Write description and returns True = written, False = skip, None = error '''
3800         if not self.params.get('writedescription'):
3801             return False
3802         elif not descfn:
3803             self.write_debug(f'Skipping writing {label} description')
3804             return False
3805         elif not self._ensure_dir_exists(descfn):
3806             return None
3807         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3808             self.to_screen(f'[info] {label.title()} description is already present')
3809         elif ie_result.get('description') is None:
3810             self.report_warning(f'There\'s no {label} description to write')
3811             return False
3812         else:
3813             try:
3814                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3815                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3816                     descfile.write(ie_result['description'])
3817             except (OSError, IOError):
3818                 self.report_error(f'Cannot write {label} description file {descfn}')
3819                 return None
3820         return True
3821
3822     def _write_subtitles(self, info_dict, filename):
3823         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3824         ret = []
3825         subtitles = info_dict.get('requested_subtitles')
3826         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3827             # subtitles download errors are already managed as troubles in relevant IE
3828             # that way it will silently go on when used with unsupporting IE
3829             return ret
3830
3831         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3832         if not sub_filename_base:
3833             self.to_screen('[info] Skipping writing video subtitles')
3834             return ret
3835         for sub_lang, sub_info in subtitles.items():
3836             sub_format = sub_info['ext']
3837             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3838             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3839             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3840             if existing_sub:
3841                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3842                 sub_info['filepath'] = existing_sub
3843                 ret.append((existing_sub, sub_filename_final))
3844                 continue
3845
3846             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3847             if sub_info.get('data') is not None:
3848                 try:
3849                     # Use newline='' to prevent conversion of newline characters
3850                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3851                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3852                         subfile.write(sub_info['data'])
3853                     sub_info['filepath'] = sub_filename
3854                     ret.append((sub_filename, sub_filename_final))
3855                     continue
3856                 except (OSError, IOError):
3857                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3858                     return None
3859
3860             try:
3861                 sub_copy = sub_info.copy()
3862                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3863                 self.dl(sub_filename, sub_copy, subtitle=True)
3864                 sub_info['filepath'] = sub_filename
3865                 ret.append((sub_filename, sub_filename_final))
3866             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3867                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3868                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3869                     if not self.params.get('ignoreerrors'):
3870                         self.report_error(msg)
3871                     raise DownloadError(msg)
3872                 self.report_warning(msg)
3873         return ret
3874
3875     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3876         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3877         write_all = self.params.get('write_all_thumbnails', False)
3878         thumbnails, ret = [], []
3879         if write_all or self.params.get('writethumbnail', False):
3880             thumbnails = info_dict.get('thumbnails') or []
3881         multiple = write_all and len(thumbnails) > 1
3882
3883         if thumb_filename_base is None:
3884             thumb_filename_base = filename
3885         if thumbnails and not thumb_filename_base:
3886             self.write_debug(f'Skipping writing {label} thumbnail')
3887             return ret
3888
3889         for idx, t in list(enumerate(thumbnails))[::-1]:
3890             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3891             thumb_display_id = f'{label} thumbnail {t["id"]}'
3892             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3893             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3894
3895             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3896             if existing_thumb:
3897                 self.to_screen('[info] %s is already present' % (
3898                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3899                 t['filepath'] = existing_thumb
3900                 ret.append((existing_thumb, thumb_filename_final))
3901             else:
3902                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3903                 try:
3904                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3905                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3906                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3907                         shutil.copyfileobj(uf, thumbf)
3908                     ret.append((thumb_filename, thumb_filename_final))
3909                     t['filepath'] = thumb_filename
3910                 except network_exceptions as err:
3911                     thumbnails.pop(idx)
3912                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3913             if ret and not write_all:
3914                 break
3915         return ret