yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_brotli,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     format_decimal_suffix,
  72     formatSeconds,
  73     GeoRestrictedError,
  74     get_domain,
  75     has_certifi,
  76     HEADRequest,
  77     InAdvancePagedList,
  78     int_or_none,
  79     iri_to_uri,
  80     ISO3166Utils,
  81     join_nonempty,
  82     LazyList,
  83     LINK_TEMPLATES,
  84     locked_file,
  85     make_dir,
  86     make_HTTPS_handler,
  87     MaxDownloadsReached,
  88     merge_headers,
  89     network_exceptions,
  90     number_of_digits,
  91     orderedSet,
  92     OUTTMPL_TYPES,
  93     PagedList,
  94     parse_filesize,
  95     PerRequestProxyHandler,
  96     platform_name,
  97     Popen,
  98     POSTPROCESS_WHEN,
  99     PostProcessingError,
 100     preferredencoding,
 101     prepend_extension,
 102     ReExtractInfo,
 103     register_socks_protocols,
 104     RejectedVideoReached,
 105     remove_terminal_sequences,
 106     render_table,
 107     replace_extension,
 108     SameFileError,
 109     sanitize_filename,
 110     sanitize_path,
 111     sanitize_url,
 112     sanitized_Request,
 113     std_headers,
 114     STR_FORMAT_RE_TMPL,
 115     STR_FORMAT_TYPES,
 116     str_or_none,
 117     strftime_or_none,
 118     subtitles_filename,
 119     supports_terminal_sequences,
 120     timetuple_from_msec,
 121     to_high_limit_path,
 122     traverse_obj,
 123     try_get,
 124     UnavailableVideoError,
 125     url_basename,
 126     variadic,
 127     version_tuple,
 128     write_json_file,
 129     write_string,
 130     YoutubeDLCookieProcessor,
 131     YoutubeDLHandler,
 132     YoutubeDLRedirectHandler,
 133 )
 134 from .cache import Cache
 135 from .minicurses import format_text
 136 from .extractor import (
 137     gen_extractor_classes,
 138     get_info_extractor,
 139     _LAZY_LOADER,
 140     _PLUGIN_CLASSES as plugin_extractors
 141 )
 142 from .extractor.openload import PhantomJSwrapper
 143 from .downloader import (
 144     FFmpegFD,
 145     get_suitable_downloader,
 146     shorten_protocol_name
 147 )
 148 from .downloader.rtmp import rtmpdump_version
 149 from .postprocessor import (
 150     get_postprocessor,
 151     EmbedThumbnailPP,
 152     FFmpegFixupDuplicateMoovPP,
 153     FFmpegFixupDurationPP,
 154     FFmpegFixupM3u8PP,
 155     FFmpegFixupM4aPP,
 156     FFmpegFixupStretchedPP,
 157     FFmpegFixupTimestampPP,
 158     FFmpegMergerPP,
 159     FFmpegPostProcessor,
 160     MoveFilesAfterDownloadPP,
 161     _PLUGIN_CLASSES as plugin_postprocessors
 162 )
 163 from .update import detect_variant
 164 from .version import __version__, RELEASE_GIT_HEAD
 165
 166 if compat_os_name == 'nt':
 167     import ctypes
 168
 169
 170 class YoutubeDL(object):
 171     """YoutubeDL class.
 172
 173     YoutubeDL objects are the ones responsible of downloading the
 174     actual video file and writing it to disk if the user has requested
 175     it, among some other tasks. In most cases there should be one per
 176     program. As, given a video URL, the downloader doesn't know how to
 177     extract all the needed information, task that InfoExtractors do, it
 178     has to pass the URL to one of them.
 179
 180     For this, YoutubeDL objects have a method that allows
 181     InfoExtractors to be registered in a given order. When it is passed
 182     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 183     finds that reports being able to handle it. The InfoExtractor extracts
 184     all the information about the video or videos the URL refers to, and
 185     YoutubeDL process the extracted information, possibly using a File
 186     Downloader to download the video.
 187
 188     YoutubeDL objects accept a lot of parameters. In order not to saturate
 189     the object constructor with arguments, it receives a dictionary of
 190     options instead. These options are available through the params
 191     attribute for the InfoExtractors to use. The YoutubeDL also
 192     registers itself as the downloader in charge for the InfoExtractors
 193     that are added to it, so this is a "mutual registration".
 194
 195     Available options:
 196
 197     username:          Username for authentication purposes.
 198     password:          Password for authentication purposes.
 199     videopassword:     Password for accessing a video.
 200     ap_mso:            Adobe Pass multiple-system operator identifier.
 201     ap_username:       Multiple-system operator account username.
 202     ap_password:       Multiple-system operator account password.
 203     usenetrc:          Use netrc for authentication instead.
 204     verbose:           Print additional info to stdout.
 205     quiet:             Do not print messages to stdout.
 206     no_warnings:       Do not print out anything for warnings.
 207     forceprint:        A dict with keys WHEN mapped to a list of templates to
 208                        print to stdout. The allowed keys are video or any of the
 209                        items in utils.POSTPROCESS_WHEN.
 210                        For compatibility, a single list is also accepted
 211     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 212                        a list of tuples with (template, filename)
 213     forceurl:          Force printing final URL. (Deprecated)
 214     forcetitle:        Force printing title. (Deprecated)
 215     forceid:           Force printing ID. (Deprecated)
 216     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 217     forcedescription:  Force printing description. (Deprecated)
 218     forcefilename:     Force printing final filename. (Deprecated)
 219     forceduration:     Force printing duration. (Deprecated)
 220     forcejson:         Force printing info_dict as JSON.
 221     dump_single_json:  Force printing the info_dict of the whole playlist
 222                        (or video) as a single JSON line.
 223     force_write_download_archive: Force writing download archive regardless
 224                        of 'skip_download' or 'simulate'.
 225     simulate:          Do not download the video files. If unset (or None),
 226                        simulate only if listsubtitles, listformats or list_thumbnails is used
 227     format:            Video format code. see "FORMAT SELECTION" for more details.
 228                        You can also pass a function. The function takes 'ctx' as
 229                        argument and returns the formats to download.
 230                        See "build_format_selector" for an implementation
 231     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 232     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 233                        extracting metadata even if the video is not actually
 234                        available for download (experimental)
 235     format_sort:       A list of fields by which to sort the video formats.
 236                        See "Sorting Formats" for more details.
 237     format_sort_force: Force the given format_sort. see "Sorting Formats"
 238                        for more details.
 239     prefer_free_formats: Whether to prefer video formats with free containers
 240                        over non-free ones of same quality.
 241     allow_multiple_video_streams:   Allow multiple video streams to be merged
 242                        into a single file
 243     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 244                        into a single file
 245     check_formats      Whether to test if the formats are downloadable.
 246                        Can be True (check all), False (check none),
 247                        'selected' (check selected formats),
 248                        or None (check only if requested by extractor)
 249     paths:             Dictionary of output paths. The allowed keys are 'home'
 250                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 251     outtmpl:           Dictionary of templates for output names. Allowed keys
 252                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 253                        For compatibility with youtube-dl, a single string can also be used
 254     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 255     restrictfilenames: Do not allow "&" and spaces in file names
 256     trim_file_name:    Limit length of filename (extension excluded)
 257     windowsfilenames:  Force the filenames to be windows compatible
 258     ignoreerrors:      Do not stop on download/postprocessing errors.
 259                        Can be 'only_download' to ignore only download errors.
 260                        Default is 'only_download' for CLI, but False for API
 261     skip_playlist_after_errors: Number of allowed failures until the rest of
 262                        the playlist is skipped
 263     force_generic_extractor: Force downloader to use the generic extractor
 264     overwrites:        Overwrite all video and metadata files if True,
 265                        overwrite only non-video files if None
 266                        and don't overwrite any file if False
 267                        For compatibility with youtube-dl,
 268                        "nooverwrites" may also be used instead
 269     playliststart:     Playlist item to start at.
 270     playlistend:       Playlist item to end at.
 271     playlist_items:    Specific indices of playlist to download.
 272     playlistreverse:   Download playlist items in reverse order.
 273     playlistrandom:    Download playlist items in random order.
 274     matchtitle:        Download only matching titles.
 275     rejecttitle:       Reject downloads for matching titles.
 276     logger:            Log messages to a logging.Logger instance.
 277     logtostderr:       Log messages to stderr instead of stdout.
 278     consoletitle:       Display progress in console window's titlebar.
 279     writedescription:  Write the video description to a .description file
 280     writeinfojson:     Write the video description to a .info.json file
 281     clean_infojson:    Remove private fields from the infojson
 282     getcomments:       Extract video comments. This will not be written to disk
 283                        unless writeinfojson is also given
 284     writeannotations:  Write the video annotations to a .annotations.xml file
 285     writethumbnail:    Write the thumbnail image to a file
 286     allow_playlist_files: Whether to write playlists' description, infojson etc
 287                        also to disk when using the 'write*' options
 288     write_all_thumbnails:  Write all thumbnail formats to files
 289     writelink:         Write an internet shortcut file, depending on the
 290                        current platform (.url/.webloc/.desktop)
 291     writeurllink:      Write a Windows internet shortcut file (.url)
 292     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 293     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 294     writesubtitles:    Write the video subtitles to a file
 295     writeautomaticsub: Write the automatically generated subtitles to a file
 296     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 297                        Downloads all the subtitles of the video
 298                        (requires writesubtitles or writeautomaticsub)
 299     listsubtitles:     Lists all available subtitles for the video
 300     subtitlesformat:   The format code for subtitles
 301     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 302                        The list may contain "all" to refer to all the available
 303                        subtitles. The language can be prefixed with a "-" to
 304                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 305     keepvideo:         Keep the video file after post-processing
 306     daterange:         A DateRange object, download only if the upload_date is in the range.
 307     skip_download:     Skip the actual download of the video file
 308     cachedir:          Location of the cache files in the filesystem.
 309                        False to disable filesystem cache.
 310     noplaylist:        Download single video instead of a playlist if in doubt.
 311     age_limit:         An integer representing the user's age in years.
 312                        Unsuitable videos for the given age are skipped.
 313     min_views:         An integer representing the minimum view count the video
 314                        must have in order to not be skipped.
 315                        Videos without view count information are always
 316                        downloaded. None for no limit.
 317     max_views:         An integer representing the maximum view count.
 318                        Videos that are more popular than that are not
 319                        downloaded.
 320                        Videos without view count information are always
 321                        downloaded. None for no limit.
 322     download_archive:  File name of a file where all downloads are recorded.
 323                        Videos already present in the file are not downloaded
 324                        again.
 325     break_on_existing: Stop the download process after attempting to download a
 326                        file that is in the archive.
 327     break_on_reject:   Stop the download process when encountering a video that
 328                        has been filtered out.
 329     break_per_url:     Whether break_on_reject and break_on_existing
 330                        should act on each input URL as opposed to for the entire queue
 331     cookiefile:        File name where cookies should be read from and dumped to
 332     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 333                        name/pathfrom where cookies are loaded, and the name of the
 334                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 335     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 336                        support RFC 5746 secure renegotiation
 337     nocheckcertificate:  Do not verify SSL certificates
 338     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 339                        At the moment, this is only supported by YouTube.
 340     http_headers:      A dictionary of custom headers to be used for all requests
 341     proxy:             URL of the proxy server to use
 342     geo_verification_proxy:  URL of the proxy to use for IP address verification
 343                        on geo-restricted sites.
 344     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 345     bidi_workaround:   Work around buggy terminals without bidirectional text
 346                        support, using fridibi
 347     debug_printtraffic:Print out sent and received HTTP traffic
 348     include_ads:       Download ads as well (deprecated)
 349     default_search:    Prepend this string if an input url is not valid.
 350                        'auto' for elaborate guessing
 351     encoding:          Use this encoding instead of the system-specified.
 352     extract_flat:      Do not resolve URLs, return the immediate result.
 353                        Pass in 'in_playlist' to only show this behavior for
 354                        playlist items.
 355     wait_for_video:    If given, wait for scheduled streams to become available.
 356                        The value should be a tuple containing the range
 357                        (min_secs, max_secs) to wait between retries
 358     postprocessors:    A list of dictionaries, each with an entry
 359                        * key:  The name of the postprocessor. See
 360                                yt_dlp/postprocessor/__init__.py for a list.
 361                        * when: When to run the postprocessor. Allowed values are
 362                                the entries of utils.POSTPROCESS_WHEN
 363                                Assumed to be 'post_process' if not given
 364     post_hooks:        Deprecated - Register a custom postprocessor instead
 365                        A list of functions that get called as the final step
 366                        for each video file, after all postprocessors have been
 367                        called. The filename will be passed as the only argument.
 368     progress_hooks:    A list of functions that get called on download
 369                        progress, with a dictionary with the entries
 370                        * status: One of "downloading", "error", or "finished".
 371                                  Check this first and ignore unknown values.
 372                        * info_dict: The extracted info_dict
 373
 374                        If status is one of "downloading", or "finished", the
 375                        following properties may also be present:
 376                        * filename: The final filename (always present)
 377                        * tmpfilename: The filename we're currently writing to
 378                        * downloaded_bytes: Bytes on disk
 379                        * total_bytes: Size of the whole file, None if unknown
 380                        * total_bytes_estimate: Guess of the eventual file size,
 381                                                None if unavailable.
 382                        * elapsed: The number of seconds since download started.
 383                        * eta: The estimated time in seconds, None if unknown
 384                        * speed: The download speed in bytes/second, None if
 385                                 unknown
 386                        * fragment_index: The counter of the currently
 387                                          downloaded video fragment.
 388                        * fragment_count: The number of fragments (= individual
 389                                          files that will be merged)
 390
 391                        Progress hooks are guaranteed to be called at least once
 392                        (with status "finished") if the download is successful.
 393     postprocessor_hooks:  A list of functions that get called on postprocessing
 394                        progress, with a dictionary with the entries
 395                        * status: One of "started", "processing", or "finished".
 396                                  Check this first and ignore unknown values.
 397                        * postprocessor: Name of the postprocessor
 398                        * info_dict: The extracted info_dict
 399
 400                        Progress hooks are guaranteed to be called at least twice
 401                        (with status "started" and "finished") if the processing is successful.
 402     merge_output_format: Extension to use when merging formats.
 403     final_ext:         Expected final extension; used to detect when the file was
 404                        already downloaded and converted
 405     fixup:             Automatically correct known faults of the file.
 406                        One of:
 407                        - "never": do nothing
 408                        - "warn": only emit a warning
 409                        - "detect_or_warn": check whether we can do anything
 410                                            about it, warn otherwise (default)
 411     source_address:    Client-side IP address to bind to.
 412     call_home:         Boolean, true iff we are allowed to contact the
 413                        yt-dlp servers for debugging. (BROKEN)
 414     sleep_interval_requests: Number of seconds to sleep between requests
 415                        during extraction
 416     sleep_interval:    Number of seconds to sleep before each download when
 417                        used alone or a lower bound of a range for randomized
 418                        sleep before each download (minimum possible number
 419                        of seconds to sleep) when used along with
 420                        max_sleep_interval.
 421     max_sleep_interval:Upper bound of a range for randomized sleep before each
 422                        download (maximum possible number of seconds to sleep).
 423                        Must only be used along with sleep_interval.
 424                        Actual sleep time will be a random float from range
 425                        [sleep_interval; max_sleep_interval].
 426     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 427     listformats:       Print an overview of available video formats and exit.
 428     list_thumbnails:   Print a table of all thumbnails and exit.
 429     match_filter:      A function that gets called with the info_dict of
 430                        every video.
 431                        If it returns a message, the video is ignored.
 432                        If it returns None, the video is downloaded.
 433                        match_filter_func in utils.py is one example for this.
 434     no_color:          Do not emit color codes in output.
 435     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 436                        HTTP header
 437     geo_bypass_country:
 438                        Two-letter ISO 3166-2 country code that will be used for
 439                        explicit geographic restriction bypassing via faking
 440                        X-Forwarded-For HTTP header
 441     geo_bypass_ip_block:
 442                        IP range in CIDR notation that will be used similarly to
 443                        geo_bypass_country
 444
 445     The following options determine which downloader is picked:
 446     external_downloader: A dictionary of protocol keys and the executable of the
 447                        external downloader to use for it. The allowed protocols
 448                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 449                        Set the value to 'native' to use the native downloader
 450     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 451                        or {'m3u8': 'ffmpeg'} instead.
 452                        Use the native HLS downloader instead of ffmpeg/avconv
 453                        if True, otherwise use ffmpeg/avconv if False, otherwise
 454                        use downloader suggested by extractor if None.
 455     compat_opts:       Compatibility options. See "Differences in default behavior".
 456                        The following options do not work when used through the API:
 457                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 458                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 459                        Refer __init__.py for their implementation
 460     progress_template: Dictionary of templates for progress outputs.
 461                        Allowed keys are 'download', 'postprocess',
 462                        'download-title' (console title) and 'postprocess-title'.
 463                        The template is mapped on a dictionary with keys 'progress' and 'info'
 464
 465     The following parameters are not used by YoutubeDL itself, they are used by
 466     the downloader (see yt_dlp/downloader/common.py):
 467     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 468     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 469     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 470     external_downloader_args, concurrent_fragment_downloads.
 471
 472     The following options are used by the post processors:
 473     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 474                        otherwise prefer ffmpeg. (avconv support is deprecated)
 475     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 476                        to the binary or its containing directory.
 477     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 478                        and a list of additional command-line arguments for the
 479                        postprocessor/executable. The dict can also have "PP+EXE" keys
 480                        which are used when the given exe is used by the given PP.
 481                        Use 'default' as the name for arguments to passed to all PP
 482                        For compatibility with youtube-dl, a single list of args
 483                        can also be used
 484
 485     The following options are used by the extractors:
 486     extractor_retries: Number of times to retry for known errors
 487     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 488     hls_split_discontinuity: Split HLS playlists to different formats at
 489                        discontinuities such as ad breaks (default: False)
 490     extractor_args:    A dictionary of arguments to be passed to the extractors.
 491                        See "EXTRACTOR ARGUMENTS" for details.
 492                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 493     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 494     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 495                        If True (default), DASH manifests and related
 496                        data will be downloaded and processed by extractor.
 497                        You can reduce network I/O by disabling it if you don't
 498                        care about DASH. (only for youtube)
 499     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 500                        If True (default), HLS manifests and related
 501                        data will be downloaded and processed by extractor.
 502                        You can reduce network I/O by disabling it if you don't
 503                        care about HLS. (only for youtube)
 504     """
 505
 506     _NUMERIC_FIELDS = set((
 507         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 508         'timestamp', 'release_timestamp',
 509         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 510         'average_rating', 'comment_count', 'age_limit',
 511         'start_time', 'end_time',
 512         'chapter_number', 'season_number', 'episode_number',
 513         'track_number', 'disc_number', 'release_year',
 514     ))
 515
 516     _format_fields = {
 517         # NB: Keep in sync with the docstring of extractor/common.py
 518         'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note',
 519         'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
 520         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
 521         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 522         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 523         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
 524         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 525     }
 526     _format_selection_exts = {
 527         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 528         'video': {'mp4', 'flv', 'webm', '3gp'},
 529         'storyboards': {'mhtml'},
 530     }
 531
 532     def __init__(self, params=None, auto_init=True):
 533         """Create a FileDownloader object with the given options.
 534         @param auto_init    Whether to load the default extractors and print header (if verbose).
 535                             Set to 'no_verbose_header' to not print the header
 536         """
 537         if params is None:
 538             params = {}
 539         self.params = params
 540         self._ies = {}
 541         self._ies_instances = {}
 542         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 543         self._printed_messages = set()
 544         self._first_webpage_request = True
 545         self._post_hooks = []
 546         self._progress_hooks = []
 547         self._postprocessor_hooks = []
 548         self._download_retcode = 0
 549         self._num_downloads = 0
 550         self._num_videos = 0
 551         self._playlist_level = 0
 552         self._playlist_urls = set()
 553         self.cache = Cache(self)
 554
 555         windows_enable_vt_mode()
 556         self._out_files = {
 557             'error': sys.stderr,
 558             'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
 559             'console': None if compat_os_name == 'nt' else next(
 560                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 561         }
 562         self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
 563         self._allow_colors = {
 564             type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
 565             for type_ in ('screen', 'error')
 566         }
 567
 568         if sys.version_info < (3, 6):
 569             self.report_warning(
 570                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 571
 572         if self.params.get('allow_unplayable_formats'):
 573             self.report_warning(
 574                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 575                 'This is a developer option intended for debugging. \n'
 576                 '         If you experience any issues while using this option, '
 577                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 578
 579         def check_deprecated(param, option, suggestion):
 580             if self.params.get(param) is not None:
 581                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 582                 return True
 583             return False
 584
 585         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 586             if self.params.get('geo_verification_proxy') is None:
 587                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 588
 589         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 590         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 591         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 592
 593         for msg in self.params.get('_warnings', []):
 594             self.report_warning(msg)
 595         for msg in self.params.get('_deprecation_warnings', []):
 596             self.deprecation_warning(msg)
 597
 598         if 'list-formats' in self.params.get('compat_opts', []):
 599             self.params['listformats_table'] = False
 600
 601         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 602             # nooverwrites was unnecessarily changed to overwrites
 603             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 604             # This ensures compatibility with both keys
 605             self.params['overwrites'] = not self.params['nooverwrites']
 606         elif self.params.get('overwrites') is None:
 607             self.params.pop('overwrites', None)
 608         else:
 609             self.params['nooverwrites'] = not self.params['overwrites']
 610
 611         self.params.setdefault('forceprint', {})
 612         self.params.setdefault('print_to_file', {})
 613
 614         # Compatibility with older syntax
 615         if not isinstance(params['forceprint'], dict):
 616             self.params['forceprint'] = {'video': params['forceprint']}
 617
 618         if self.params.get('bidi_workaround', False):
 619             try:
 620                 import pty
 621                 master, slave = pty.openpty()
 622                 width = compat_get_terminal_size().columns
 623                 if width is None:
 624                     width_args = []
 625                 else:
 626                     width_args = ['-w', str(width)]
 627                 sp_kwargs = dict(
 628                     stdin=subprocess.PIPE,
 629                     stdout=slave,
 630                     stderr=self._out_files['error'])
 631                 try:
 632                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 633                 except OSError:
 634                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 635                 self._output_channel = os.fdopen(master, 'rb')
 636             except OSError as ose:
 637                 if ose.errno == errno.ENOENT:
 638                     self.report_warning(
 639                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 640                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 641                 else:
 642                     raise
 643
 644         if (sys.platform != 'win32'
 645                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 646                 and not self.params.get('restrictfilenames', False)):
 647             # Unicode filesystem API will throw errors (#1474, #13027)
 648             self.report_warning(
 649                 'Assuming --restrict-filenames since file system encoding '
 650                 'cannot encode all characters. '
 651                 'Set the LC_ALL environment variable to fix this.')
 652             self.params['restrictfilenames'] = True
 653
 654         self.outtmpl_dict = self.parse_outtmpl()
 655
 656         # Creating format selector here allows us to catch syntax errors before the extraction
 657         self.format_selector = (
 658             self.params.get('format') if self.params.get('format') in (None, '-')
 659             else self.params['format'] if callable(self.params['format'])
 660             else self.build_format_selector(self.params['format']))
 661
 662         # Set http_headers defaults according to std_headers
 663         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 664
 665         self._setup_opener()
 666
 667         if auto_init:
 668             if auto_init != 'no_verbose_header':
 669                 self.print_debug_header()
 670             self.add_default_info_extractors()
 671
 672         hooks = {
 673             'post_hooks': self.add_post_hook,
 674             'progress_hooks': self.add_progress_hook,
 675             'postprocessor_hooks': self.add_postprocessor_hook,
 676         }
 677         for opt, fn in hooks.items():
 678             for ph in self.params.get(opt, []):
 679                 fn(ph)
 680
 681         for pp_def_raw in self.params.get('postprocessors', []):
 682             pp_def = dict(pp_def_raw)
 683             when = pp_def.pop('when', 'post_process')
 684             self.add_post_processor(
 685                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 686                 when=when)
 687
 688         register_socks_protocols()
 689
 690         def preload_download_archive(fn):
 691             """Preload the archive, if any is specified"""
 692             if fn is None:
 693                 return False
 694             self.write_debug(f'Loading archive file {fn!r}')
 695             try:
 696                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 697                     for line in archive_file:
 698                         self.archive.add(line.strip())
 699             except IOError as ioe:
 700                 if ioe.errno != errno.ENOENT:
 701                     raise
 702                 return False
 703             return True
 704
 705         self.archive = set()
 706         preload_download_archive(self.params.get('download_archive'))
 707
 708     def warn_if_short_id(self, argv):
 709         # short YouTube ID starting with dash?
 710         idxs = [
 711             i for i, a in enumerate(argv)
 712             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 713         if idxs:
 714             correct_argv = (
 715                 ['yt-dlp']
 716                 + [a for i, a in enumerate(argv) if i not in idxs]
 717                 + ['--'] + [argv[i] for i in idxs]
 718             )
 719             self.report_warning(
 720                 'Long argument string detected. '
 721                 'Use -- to separate parameters and URLs, like this:\n%s' %
 722                 args_to_str(correct_argv))
 723
 724     def add_info_extractor(self, ie):
 725         """Add an InfoExtractor object to the end of the list."""
 726         ie_key = ie.ie_key()
 727         self._ies[ie_key] = ie
 728         if not isinstance(ie, type):
 729             self._ies_instances[ie_key] = ie
 730             ie.set_downloader(self)
 731
 732     def _get_info_extractor_class(self, ie_key):
 733         ie = self._ies.get(ie_key)
 734         if ie is None:
 735             ie = get_info_extractor(ie_key)
 736             self.add_info_extractor(ie)
 737         return ie
 738
 739     def get_info_extractor(self, ie_key):
 740         """
 741         Get an instance of an IE with name ie_key, it will try to get one from
 742         the _ies list, if there's no instance it will create a new one and add
 743         it to the extractor list.
 744         """
 745         ie = self._ies_instances.get(ie_key)
 746         if ie is None:
 747             ie = get_info_extractor(ie_key)()
 748             self.add_info_extractor(ie)
 749         return ie
 750
 751     def add_default_info_extractors(self):
 752         """
 753         Add the InfoExtractors returned by gen_extractors to the end of the list
 754         """
 755         for ie in gen_extractor_classes():
 756             self.add_info_extractor(ie)
 757
 758     def add_post_processor(self, pp, when='post_process'):
 759         """Add a PostProcessor object to the end of the chain."""
 760         self._pps[when].append(pp)
 761         pp.set_downloader(self)
 762
 763     def add_post_hook(self, ph):
 764         """Add the post hook"""
 765         self._post_hooks.append(ph)
 766
 767     def add_progress_hook(self, ph):
 768         """Add the download progress hook"""
 769         self._progress_hooks.append(ph)
 770
 771     def add_postprocessor_hook(self, ph):
 772         """Add the postprocessing progress hook"""
 773         self._postprocessor_hooks.append(ph)
 774         for pps in self._pps.values():
 775             for pp in pps:
 776                 pp.add_progress_hook(ph)
 777
 778     def _bidi_workaround(self, message):
 779         if not hasattr(self, '_output_channel'):
 780             return message
 781
 782         assert hasattr(self, '_output_process')
 783         assert isinstance(message, compat_str)
 784         line_count = message.count('\n') + 1
 785         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 786         self._output_process.stdin.flush()
 787         res = ''.join(self._output_channel.readline().decode('utf-8')
 788                       for _ in range(line_count))
 789         return res[:-len('\n')]
 790
 791     def _write_string(self, message, out=None, only_once=False):
 792         if only_once:
 793             if message in self._printed_messages:
 794                 return
 795             self._printed_messages.add(message)
 796         write_string(message, out=out, encoding=self.params.get('encoding'))
 797
 798     def to_stdout(self, message, skip_eol=False, quiet=None):
 799         """Print message to stdout"""
 800         if quiet is not None:
 801             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
 802         self._write_string(
 803             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 804             self._out_files['print'])
 805
 806     def to_screen(self, message, skip_eol=False, quiet=None):
 807         """Print message to screen if not in quiet mode"""
 808         if self.params.get('logger'):
 809             self.params['logger'].debug(message)
 810             return
 811         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 812             return
 813         self._write_string(
 814             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 815             self._out_files['screen'])
 816
 817     def to_stderr(self, message, only_once=False):
 818         """Print message to stderr"""
 819         assert isinstance(message, compat_str)
 820         if self.params.get('logger'):
 821             self.params['logger'].error(message)
 822         else:
 823             self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
 824
 825     def _send_console_code(self, code):
 826         if compat_os_name == 'nt' or not self._out_files['console']:
 827             return
 828         self._write_string(code, self._out_files['console'])
 829
 830     def to_console_title(self, message):
 831         if not self.params.get('consoletitle', False):
 832             return
 833         message = remove_terminal_sequences(message)
 834         if compat_os_name == 'nt':
 835             if ctypes.windll.kernel32.GetConsoleWindow():
 836                 # c_wchar_p() might not be necessary if `message` is
 837                 # already of type unicode()
 838                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 839         else:
 840             self._send_console_code(f'\033]0;{message}\007')
 841
 842     def save_console_title(self):
 843         if not self.params.get('consoletitle') or self.params.get('simulate'):
 844             return
 845         self._send_console_code('\033[22;0t')  # Save the title on stack
 846
 847     def restore_console_title(self):
 848         if not self.params.get('consoletitle') or self.params.get('simulate'):
 849             return
 850         self._send_console_code('\033[23;0t')  # Restore the title from stack
 851
 852     def __enter__(self):
 853         self.save_console_title()
 854         return self
 855
 856     def __exit__(self, *args):
 857         self.restore_console_title()
 858
 859         if self.params.get('cookiefile') is not None:
 860             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 861
 862     def trouble(self, message=None, tb=None, is_error=True):
 863         """Determine action to take when a download problem appears.
 864
 865         Depending on if the downloader has been configured to ignore
 866         download errors or not, this method may throw an exception or
 867         not when errors are found, after printing the message.
 868
 869         @param tb          If given, is additional traceback information
 870         @param is_error    Whether to raise error according to ignorerrors
 871         """
 872         if message is not None:
 873             self.to_stderr(message)
 874         if self.params.get('verbose'):
 875             if tb is None:
 876                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 877                     tb = ''
 878                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 879                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 880                     tb += encode_compat_str(traceback.format_exc())
 881                 else:
 882                     tb_data = traceback.format_list(traceback.extract_stack())
 883                     tb = ''.join(tb_data)
 884             if tb:
 885                 self.to_stderr(tb)
 886         if not is_error:
 887             return
 888         if not self.params.get('ignoreerrors'):
 889             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 890                 exc_info = sys.exc_info()[1].exc_info
 891             else:
 892                 exc_info = sys.exc_info()
 893             raise DownloadError(message, exc_info)
 894         self._download_retcode = 1
 895
 896     class Styles(Enum):
 897         HEADERS = 'yellow'
 898         EMPHASIS = 'light blue'
 899         ID = 'green'
 900         DELIM = 'blue'
 901         ERROR = 'red'
 902         WARNING = 'yellow'
 903         SUPPRESS = 'light black'
 904
 905     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 906         if test_encoding:
 907             original_text = text
 908             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 909             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 910             text = text.encode(encoding, 'ignore').decode(encoding)
 911             if fallback is not None and text != original_text:
 912                 text = fallback
 913         if isinstance(f, self.Styles):
 914             f = f.value
 915         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 916
 917     def _format_screen(self, *args, **kwargs):
 918         return self._format_text(
 919             self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
 920
 921     def _format_err(self, *args, **kwargs):
 922         return self._format_text(
 923             self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
 924
 925     def report_warning(self, message, only_once=False):
 926         '''
 927         Print the message to stderr, it will be prefixed with 'WARNING:'
 928         If stderr is a tty file the 'WARNING:' will be colored
 929         '''
 930         if self.params.get('logger') is not None:
 931             self.params['logger'].warning(message)
 932         else:
 933             if self.params.get('no_warnings'):
 934                 return
 935             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 936
 937     def deprecation_warning(self, message):
 938         if self.params.get('logger') is not None:
 939             self.params['logger'].warning('DeprecationWarning: {message}')
 940         else:
 941             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 942
 943     def report_error(self, message, *args, **kwargs):
 944         '''
 945         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 946         in red if stderr is a tty file.
 947         '''
 948         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 949
 950     def write_debug(self, message, only_once=False):
 951         '''Log debug message or Print message to stderr'''
 952         if not self.params.get('verbose', False):
 953             return
 954         message = '[debug] %s' % message
 955         if self.params.get('logger'):
 956             self.params['logger'].debug(message)
 957         else:
 958             self.to_stderr(message, only_once)
 959
 960     def report_file_already_downloaded(self, file_name):
 961         """Report file has already been fully downloaded."""
 962         try:
 963             self.to_screen('[download] %s has already been downloaded' % file_name)
 964         except UnicodeEncodeError:
 965             self.to_screen('[download] The file has already been downloaded')
 966
 967     def report_file_delete(self, file_name):
 968         """Report that existing file will be deleted."""
 969         try:
 970             self.to_screen('Deleting existing file %s' % file_name)
 971         except UnicodeEncodeError:
 972             self.to_screen('Deleting existing file')
 973
 974     def raise_no_formats(self, info, forced=False, *, msg=None):
 975         has_drm = info.get('__has_drm')
 976         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
 977         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
 978         if forced or not ignored:
 979             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 980                                  expected=has_drm or ignored or expected)
 981         else:
 982             self.report_warning(msg)
 983
 984     def parse_outtmpl(self):
 985         outtmpl_dict = self.params.get('outtmpl', {})
 986         if not isinstance(outtmpl_dict, dict):
 987             outtmpl_dict = {'default': outtmpl_dict}
 988         # Remove spaces in the default template
 989         if self.params.get('restrictfilenames'):
 990             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 991         else:
 992             sanitize = lambda x: x
 993         outtmpl_dict.update({
 994             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 995             if outtmpl_dict.get(k) is None})
 996         for key, val in outtmpl_dict.items():
 997             if isinstance(val, bytes):
 998                 self.report_warning(
 999                     'Parameter outtmpl is bytes, but should be a unicode string. '
1000                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
1001         return outtmpl_dict
1002
1003     def get_output_path(self, dir_type='', filename=None):
1004         paths = self.params.get('paths', {})
1005         assert isinstance(paths, dict)
1006         path = os.path.join(
1007             expand_path(paths.get('home', '').strip()),
1008             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1009             filename or '')
1010
1011         # Temporary fix for #4787
1012         # 'Treat' all problem characters by passing filename through preferredencoding
1013         # to workaround encoding issues with subprocess on python2 @ Windows
1014         if sys.version_info < (3, 0) and sys.platform == 'win32':
1015             path = encodeFilename(path, True).decode(preferredencoding())
1016         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1017
1018     @staticmethod
1019     def _outtmpl_expandpath(outtmpl):
1020         # expand_path translates '%%' into '%' and '$$' into '$'
1021         # correspondingly that is not what we want since we need to keep
1022         # '%%' intact for template dict substitution step. Working around
1023         # with boundary-alike separator hack.
1024         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1025         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1026
1027         # outtmpl should be expand_path'ed before template dict substitution
1028         # because meta fields may contain env variables we don't want to
1029         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1030         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1031         return expand_path(outtmpl).replace(sep, '')
1032
1033     @staticmethod
1034     def escape_outtmpl(outtmpl):
1035         ''' Escape any remaining strings like %s, %abc% etc. '''
1036         return re.sub(
1037             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1038             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1039             outtmpl)
1040
1041     @classmethod
1042     def validate_outtmpl(cls, outtmpl):
1043         ''' @return None or Exception object '''
1044         outtmpl = re.sub(
1045             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1046             lambda mobj: f'{mobj.group(0)[:-1]}s',
1047             cls._outtmpl_expandpath(outtmpl))
1048         try:
1049             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1050             return None
1051         except ValueError as err:
1052             return err
1053
1054     @staticmethod
1055     def _copy_infodict(info_dict):
1056         info_dict = dict(info_dict)
1057         info_dict.pop('__postprocessors', None)
1058         return info_dict
1059
1060     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1061         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1062         @param sanitize    Whether to sanitize the output as a filename.
1063                            For backward compatibility, a function can also be passed
1064         """
1065
1066         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1067
1068         info_dict = self._copy_infodict(info_dict)
1069         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1070             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1071             if info_dict.get('duration', None) is not None
1072             else None)
1073         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1074         info_dict['video_autonumber'] = self._num_videos
1075         if info_dict.get('resolution') is None:
1076             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1077
1078         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1079         # of %(field)s to %(field)0Nd for backward compatibility
1080         field_size_compat_map = {
1081             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1082             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1083             'autonumber': self.params.get('autonumber_size') or 5,
1084         }
1085
1086         TMPL_DICT = {}
1087         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1088         MATH_FUNCTIONS = {
1089             '+': float.__add__,
1090             '-': float.__sub__,
1091         }
1092         # Field is of the form key1.key2...
1093         # where keys (except first) can be string, int or slice
1094         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1095         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1096         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1097         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1098             (?P<negate>-)?
1099             (?P<fields>{field})
1100             (?P<maths>(?:{math_op}{math_field})*)
1101             (?:>(?P<strf_format>.+?))?
1102             (?P<remaining>
1103                 (?P<alternate>(?<!\\),[^|&)]+)?
1104                 (?:&(?P<replacement>.*?))?
1105                 (?:\|(?P<default>.*?))?
1106             )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1107
1108         def _traverse_infodict(k):
1109             k = k.split('.')
1110             if k[0] == '':
1111                 k.pop(0)
1112             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1113
1114         def get_value(mdict):
1115             # Object traversal
1116             value = _traverse_infodict(mdict['fields'])
1117             # Negative
1118             if mdict['negate']:
1119                 value = float_or_none(value)
1120                 if value is not None:
1121                     value *= -1
1122             # Do maths
1123             offset_key = mdict['maths']
1124             if offset_key:
1125                 value = float_or_none(value)
1126                 operator = None
1127                 while offset_key:
1128                     item = re.match(
1129                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1130                         offset_key).group(0)
1131                     offset_key = offset_key[len(item):]
1132                     if operator is None:
1133                         operator = MATH_FUNCTIONS[item]
1134                         continue
1135                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1136                     offset = float_or_none(item)
1137                     if offset is None:
1138                         offset = float_or_none(_traverse_infodict(item))
1139                     try:
1140                         value = operator(value, multiplier * offset)
1141                     except (TypeError, ZeroDivisionError):
1142                         return None
1143                     operator = None
1144             # Datetime formatting
1145             if mdict['strf_format']:
1146                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1147
1148             return value
1149
1150         na = self.params.get('outtmpl_na_placeholder', 'NA')
1151
1152         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1153             return sanitize_filename(str(value), restricted=restricted,
1154                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1155
1156         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1157         sanitize = bool(sanitize)
1158
1159         def _dumpjson_default(obj):
1160             if isinstance(obj, (set, LazyList)):
1161                 return list(obj)
1162             return repr(obj)
1163
1164         def create_key(outer_mobj):
1165             if not outer_mobj.group('has_key'):
1166                 return outer_mobj.group(0)
1167             key = outer_mobj.group('key')
1168             mobj = re.match(INTERNAL_FORMAT_RE, key)
1169             initial_field = mobj.group('fields') if mobj else ''
1170             value, replacement, default = None, None, na
1171             while mobj:
1172                 mobj = mobj.groupdict()
1173                 default = mobj['default'] if mobj['default'] is not None else default
1174                 value = get_value(mobj)
1175                 replacement = mobj['replacement']
1176                 if value is None and mobj['alternate']:
1177                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1178                 else:
1179                     break
1180
1181             fmt = outer_mobj.group('format')
1182             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1183                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1184
1185             value = default if value is None else value if replacement is None else replacement
1186
1187             flags = outer_mobj.group('conversion') or ''
1188             str_fmt = f'{fmt[:-1]}s'
1189             if fmt[-1] == 'l':  # list
1190                 delim = '\n' if '#' in flags else ', '
1191                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1192             elif fmt[-1] == 'j':  # json
1193                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1194             elif fmt[-1] == 'q':  # quoted
1195                 value = map(str, variadic(value) if '#' in flags else [value])
1196                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1197             elif fmt[-1] == 'B':  # bytes
1198                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1199                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1200             elif fmt[-1] == 'U':  # unicode normalized
1201                 value, fmt = unicodedata.normalize(
1202                     # "+" = compatibility equivalence, "#" = NFD
1203                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1204                     value), str_fmt
1205             elif fmt[-1] == 'D':  # decimal suffix
1206                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1207                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1208                                               factor=1024 if '#' in flags else 1000)
1209             elif fmt[-1] == 'S':  # filename sanitization
1210                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1211             elif fmt[-1] == 'c':
1212                 if value:
1213                     value = str(value)[0]
1214                 else:
1215                     fmt = str_fmt
1216             elif fmt[-1] not in 'rs':  # numeric
1217                 value = float_or_none(value)
1218                 if value is None:
1219                     value, fmt = default, 's'
1220
1221             if sanitize:
1222                 if fmt[-1] == 'r':
1223                     # If value is an object, sanitize might convert it to a string
1224                     # So we convert it to repr first
1225                     value, fmt = repr(value), str_fmt
1226                 if fmt[-1] in 'csr':
1227                     value = sanitizer(initial_field, value)
1228
1229             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1230             TMPL_DICT[key] = value
1231             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1232
1233         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1234
1235     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1236         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1237         return self.escape_outtmpl(outtmpl) % info_dict
1238
1239     def _prepare_filename(self, info_dict, tmpl_type='default'):
1240         try:
1241             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1242             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1243             if not filename:
1244                 return None
1245
1246             if tmpl_type in ('default', 'temp'):
1247                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1248                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1249                     filename = replace_extension(filename, ext, final_ext)
1250             else:
1251                 force_ext = OUTTMPL_TYPES[tmpl_type]
1252                 if force_ext:
1253                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1254
1255             # https://github.com/blackjack4494/youtube-dlc/issues/85
1256             trim_file_name = self.params.get('trim_file_name', False)
1257             if trim_file_name:
1258                 no_ext, *ext = filename.rsplit('.', 2)
1259                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1260
1261             return filename
1262         except ValueError as err:
1263             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1264             return None
1265
1266     def prepare_filename(self, info_dict, dir_type='', warn=False):
1267         """Generate the output filename."""
1268
1269         filename = self._prepare_filename(info_dict, dir_type or 'default')
1270         if not filename and dir_type not in ('', 'temp'):
1271             return ''
1272
1273         if warn:
1274             if not self.params.get('paths'):
1275                 pass
1276             elif filename == '-':
1277                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1278             elif os.path.isabs(filename):
1279                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1280         if filename == '-' or not filename:
1281             return filename
1282
1283         return self.get_output_path(dir_type, filename)
1284
1285     def _match_entry(self, info_dict, incomplete=False, silent=False):
1286         """ Returns None if the file should be downloaded """
1287
1288         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1289
1290         def check_filter():
1291             if 'title' in info_dict:
1292                 # This can happen when we're just evaluating the playlist
1293                 title = info_dict['title']
1294                 matchtitle = self.params.get('matchtitle', False)
1295                 if matchtitle:
1296                     if not re.search(matchtitle, title, re.IGNORECASE):
1297                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1298                 rejecttitle = self.params.get('rejecttitle', False)
1299                 if rejecttitle:
1300                     if re.search(rejecttitle, title, re.IGNORECASE):
1301                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1302             date = info_dict.get('upload_date')
1303             if date is not None:
1304                 dateRange = self.params.get('daterange', DateRange())
1305                 if date not in dateRange:
1306                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1307             view_count = info_dict.get('view_count')
1308             if view_count is not None:
1309                 min_views = self.params.get('min_views')
1310                 if min_views is not None and view_count < min_views:
1311                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1312                 max_views = self.params.get('max_views')
1313                 if max_views is not None and view_count > max_views:
1314                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1315             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1316                 return 'Skipping "%s" because it is age restricted' % video_title
1317
1318             match_filter = self.params.get('match_filter')
1319             if match_filter is not None:
1320                 try:
1321                     ret = match_filter(info_dict, incomplete=incomplete)
1322                 except TypeError:
1323                     # For backward compatibility
1324                     ret = None if incomplete else match_filter(info_dict)
1325                 if ret is not None:
1326                     return ret
1327             return None
1328
1329         if self.in_download_archive(info_dict):
1330             reason = '%s has already been recorded in the archive' % video_title
1331             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1332         else:
1333             reason = check_filter()
1334             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1335         if reason is not None:
1336             if not silent:
1337                 self.to_screen('[download] ' + reason)
1338             if self.params.get(break_opt, False):
1339                 raise break_err()
1340         return reason
1341
1342     @staticmethod
1343     def add_extra_info(info_dict, extra_info):
1344         '''Set the keys from extra_info in info dict if they are missing'''
1345         for key, value in extra_info.items():
1346             info_dict.setdefault(key, value)
1347
1348     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1349                      process=True, force_generic_extractor=False):
1350         """
1351         Return a list with a dictionary for each video extracted.
1352
1353         Arguments:
1354         url -- URL to extract
1355
1356         Keyword arguments:
1357         download -- whether to download videos during extraction
1358         ie_key -- extractor key hint
1359         extra_info -- dictionary containing the extra values to add to each result
1360         process -- whether to resolve all unresolved references (URLs, playlist items),
1361             must be True for download to work.
1362         force_generic_extractor -- force using the generic extractor
1363         """
1364
1365         if extra_info is None:
1366             extra_info = {}
1367
1368         if not ie_key and force_generic_extractor:
1369             ie_key = 'Generic'
1370
1371         if ie_key:
1372             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1373         else:
1374             ies = self._ies
1375
1376         for ie_key, ie in ies.items():
1377             if not ie.suitable(url):
1378                 continue
1379
1380             if not ie.working():
1381                 self.report_warning('The program functionality for this site has been marked as broken, '
1382                                     'and will probably not work.')
1383
1384             temp_id = ie.get_temp_id(url)
1385             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1386                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1387                 if self.params.get('break_on_existing', False):
1388                     raise ExistingVideoReached()
1389                 break
1390             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1391         else:
1392             self.report_error('no suitable InfoExtractor for URL %s' % url)
1393
1394     def __handle_extraction_exceptions(func):
1395         @functools.wraps(func)
1396         def wrapper(self, *args, **kwargs):
1397             while True:
1398                 try:
1399                     return func(self, *args, **kwargs)
1400                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1401                     raise
1402                 except ReExtractInfo as e:
1403                     if e.expected:
1404                         self.to_screen(f'{e}; Re-extracting data')
1405                     else:
1406                         self.to_stderr('\r')
1407                         self.report_warning(f'{e}; Re-extracting data')
1408                     continue
1409                 except GeoRestrictedError as e:
1410                     msg = e.msg
1411                     if e.countries:
1412                         msg += '\nThis video is available in %s.' % ', '.join(
1413                             map(ISO3166Utils.short2full, e.countries))
1414                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1415                     self.report_error(msg)
1416                 except ExtractorError as e:  # An error we somewhat expected
1417                     self.report_error(str(e), e.format_traceback())
1418                 except Exception as e:
1419                     if self.params.get('ignoreerrors'):
1420                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1421                     else:
1422                         raise
1423                 break
1424         return wrapper
1425
1426     def _wait_for_video(self, ie_result):
1427         if (not self.params.get('wait_for_video')
1428                 or ie_result.get('_type', 'video') != 'video'
1429                 or ie_result.get('formats') or ie_result.get('url')):
1430             return
1431
1432         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1433         last_msg = ''
1434
1435         def progress(msg):
1436             nonlocal last_msg
1437             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1438             last_msg = msg
1439
1440         min_wait, max_wait = self.params.get('wait_for_video')
1441         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1442         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1443             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1444             self.report_warning('Release time of video is not known')
1445         elif (diff or 0) <= 0:
1446             self.report_warning('Video should already be available according to extracted info')
1447         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1448         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1449
1450         wait_till = time.time() + diff
1451         try:
1452             while True:
1453                 diff = wait_till - time.time()
1454                 if diff <= 0:
1455                     progress('')
1456                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1457                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1458                 time.sleep(1)
1459         except KeyboardInterrupt:
1460             progress('')
1461             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1462         except BaseException as e:
1463             if not isinstance(e, ReExtractInfo):
1464                 self.to_screen('')
1465             raise
1466
1467     @__handle_extraction_exceptions
1468     def __extract_info(self, url, ie, download, extra_info, process):
1469         ie_result = ie.extract(url)
1470         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1471             return
1472         if isinstance(ie_result, list):
1473             # Backwards compatibility: old IE result format
1474             ie_result = {
1475                 '_type': 'compat_list',
1476                 'entries': ie_result,
1477             }
1478         if extra_info.get('original_url'):
1479             ie_result.setdefault('original_url', extra_info['original_url'])
1480         self.add_default_extra_info(ie_result, ie, url)
1481         if process:
1482             self._wait_for_video(ie_result)
1483             return self.process_ie_result(ie_result, download, extra_info)
1484         else:
1485             return ie_result
1486
1487     def add_default_extra_info(self, ie_result, ie, url):
1488         if url is not None:
1489             self.add_extra_info(ie_result, {
1490                 'webpage_url': url,
1491                 'original_url': url,
1492             })
1493         webpage_url = ie_result.get('webpage_url')
1494         if webpage_url:
1495             self.add_extra_info(ie_result, {
1496                 'webpage_url_basename': url_basename(webpage_url),
1497                 'webpage_url_domain': get_domain(webpage_url),
1498             })
1499         if ie is not None:
1500             self.add_extra_info(ie_result, {
1501                 'extractor': ie.IE_NAME,
1502                 'extractor_key': ie.ie_key(),
1503             })
1504
1505     def process_ie_result(self, ie_result, download=True, extra_info=None):
1506         """
1507         Take the result of the ie(may be modified) and resolve all unresolved
1508         references (URLs, playlist items).
1509
1510         It will also download the videos if 'download'.
1511         Returns the resolved ie_result.
1512         """
1513         if extra_info is None:
1514             extra_info = {}
1515         result_type = ie_result.get('_type', 'video')
1516
1517         if result_type in ('url', 'url_transparent'):
1518             ie_result['url'] = sanitize_url(ie_result['url'])
1519             if ie_result.get('original_url'):
1520                 extra_info.setdefault('original_url', ie_result['original_url'])
1521
1522             extract_flat = self.params.get('extract_flat', False)
1523             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1524                     or extract_flat is True):
1525                 info_copy = ie_result.copy()
1526                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1527                 if ie and not ie_result.get('id'):
1528                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1529                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1530                 self.add_extra_info(info_copy, extra_info)
1531                 info_copy, _ = self.pre_process(info_copy)
1532                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1533                 if self.params.get('force_write_download_archive', False):
1534                     self.record_download_archive(info_copy)
1535                 return ie_result
1536
1537         if result_type == 'video':
1538             self.add_extra_info(ie_result, extra_info)
1539             ie_result = self.process_video_result(ie_result, download=download)
1540             additional_urls = (ie_result or {}).get('additional_urls')
1541             if additional_urls:
1542                 # TODO: Improve MetadataParserPP to allow setting a list
1543                 if isinstance(additional_urls, compat_str):
1544                     additional_urls = [additional_urls]
1545                 self.to_screen(
1546                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1547                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1548                 ie_result['additional_entries'] = [
1549                     self.extract_info(
1550                         url, download, extra_info=extra_info,
1551                         force_generic_extractor=self.params.get('force_generic_extractor'))
1552                     for url in additional_urls
1553                 ]
1554             return ie_result
1555         elif result_type == 'url':
1556             # We have to add extra_info to the results because it may be
1557             # contained in a playlist
1558             return self.extract_info(
1559                 ie_result['url'], download,
1560                 ie_key=ie_result.get('ie_key'),
1561                 extra_info=extra_info)
1562         elif result_type == 'url_transparent':
1563             # Use the information from the embedding page
1564             info = self.extract_info(
1565                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1566                 extra_info=extra_info, download=False, process=False)
1567
1568             # extract_info may return None when ignoreerrors is enabled and
1569             # extraction failed with an error, don't crash and return early
1570             # in this case
1571             if not info:
1572                 return info
1573
1574             force_properties = dict(
1575                 (k, v) for k, v in ie_result.items() if v is not None)
1576             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1577                 if f in force_properties:
1578                     del force_properties[f]
1579             new_result = info.copy()
1580             new_result.update(force_properties)
1581
1582             # Extracted info may not be a video result (i.e.
1583             # info.get('_type', 'video') != video) but rather an url or
1584             # url_transparent. In such cases outer metadata (from ie_result)
1585             # should be propagated to inner one (info). For this to happen
1586             # _type of info should be overridden with url_transparent. This
1587             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1588             if new_result.get('_type') == 'url':
1589                 new_result['_type'] = 'url_transparent'
1590
1591             return self.process_ie_result(
1592                 new_result, download=download, extra_info=extra_info)
1593         elif result_type in ('playlist', 'multi_video'):
1594             # Protect from infinite recursion due to recursively nested playlists
1595             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1596             webpage_url = ie_result['webpage_url']
1597             if webpage_url in self._playlist_urls:
1598                 self.to_screen(
1599                     '[download] Skipping already downloaded playlist: %s'
1600                     % ie_result.get('title') or ie_result.get('id'))
1601                 return
1602
1603             self._playlist_level += 1
1604             self._playlist_urls.add(webpage_url)
1605             self._fill_common_fields(ie_result, False)
1606             self._sanitize_thumbnails(ie_result)
1607             try:
1608                 return self.__process_playlist(ie_result, download)
1609             finally:
1610                 self._playlist_level -= 1
1611                 if not self._playlist_level:
1612                     self._playlist_urls.clear()
1613         elif result_type == 'compat_list':
1614             self.report_warning(
1615                 'Extractor %s returned a compat_list result. '
1616                 'It needs to be updated.' % ie_result.get('extractor'))
1617
1618             def _fixup(r):
1619                 self.add_extra_info(r, {
1620                     'extractor': ie_result['extractor'],
1621                     'webpage_url': ie_result['webpage_url'],
1622                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1623                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1624                     'extractor_key': ie_result['extractor_key'],
1625                 })
1626                 return r
1627             ie_result['entries'] = [
1628                 self.process_ie_result(_fixup(r), download, extra_info)
1629                 for r in ie_result['entries']
1630             ]
1631             return ie_result
1632         else:
1633             raise Exception('Invalid result type: %s' % result_type)
1634
1635     def _ensure_dir_exists(self, path):
1636         return make_dir(path, self.report_error)
1637
1638     @staticmethod
1639     def _playlist_infodict(ie_result, **kwargs):
1640         return {
1641             **ie_result,
1642             'playlist': ie_result.get('title') or ie_result.get('id'),
1643             'playlist_id': ie_result.get('id'),
1644             'playlist_title': ie_result.get('title'),
1645             'playlist_uploader': ie_result.get('uploader'),
1646             'playlist_uploader_id': ie_result.get('uploader_id'),
1647             'playlist_index': 0,
1648             **kwargs,
1649         }
1650
1651     def __process_playlist(self, ie_result, download):
1652         # We process each entry in the playlist
1653         playlist = ie_result.get('title') or ie_result.get('id')
1654         self.to_screen('[download] Downloading playlist: %s' % playlist)
1655
1656         if 'entries' not in ie_result:
1657             raise EntryNotInPlaylist('There are no entries')
1658
1659         MissingEntry = object()
1660         incomplete_entries = bool(ie_result.get('requested_entries'))
1661         if incomplete_entries:
1662             def fill_missing_entries(entries, indices):
1663                 ret = [MissingEntry] * max(indices)
1664                 for i, entry in zip(indices, entries):
1665                     ret[i - 1] = entry
1666                 return ret
1667             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1668
1669         playlist_results = []
1670
1671         playliststart = self.params.get('playliststart', 1)
1672         playlistend = self.params.get('playlistend')
1673         # For backwards compatibility, interpret -1 as whole list
1674         if playlistend == -1:
1675             playlistend = None
1676
1677         playlistitems_str = self.params.get('playlist_items')
1678         playlistitems = None
1679         if playlistitems_str is not None:
1680             def iter_playlistitems(format):
1681                 for string_segment in format.split(','):
1682                     if '-' in string_segment:
1683                         start, end = string_segment.split('-')
1684                         for item in range(int(start), int(end) + 1):
1685                             yield int(item)
1686                     else:
1687                         yield int(string_segment)
1688             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1689
1690         ie_entries = ie_result['entries']
1691         if isinstance(ie_entries, list):
1692             playlist_count = len(ie_entries)
1693             msg = f'Collected {playlist_count} videos; downloading %d of them'
1694             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1695
1696             def get_entry(i):
1697                 return ie_entries[i - 1]
1698         else:
1699             msg = 'Downloading %d videos'
1700             if not isinstance(ie_entries, (PagedList, LazyList)):
1701                 ie_entries = LazyList(ie_entries)
1702             elif isinstance(ie_entries, InAdvancePagedList):
1703                 if ie_entries._pagesize == 1:
1704                     playlist_count = ie_entries._pagecount
1705
1706             def get_entry(i):
1707                 return YoutubeDL.__handle_extraction_exceptions(
1708                     lambda self, i: ie_entries[i - 1]
1709                 )(self, i)
1710
1711         entries, broken = [], False
1712         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1713         for i in items:
1714             if i == 0:
1715                 continue
1716             if playlistitems is None and playlistend is not None and playlistend < i:
1717                 break
1718             entry = None
1719             try:
1720                 entry = get_entry(i)
1721                 if entry is MissingEntry:
1722                     raise EntryNotInPlaylist()
1723             except (IndexError, EntryNotInPlaylist):
1724                 if incomplete_entries:
1725                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1726                 elif not playlistitems:
1727                     break
1728             entries.append(entry)
1729             try:
1730                 if entry is not None:
1731                     self._match_entry(entry, incomplete=True, silent=True)
1732             except (ExistingVideoReached, RejectedVideoReached):
1733                 broken = True
1734                 break
1735         ie_result['entries'] = entries
1736
1737         # Save playlist_index before re-ordering
1738         entries = [
1739             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1740             for i, entry in enumerate(entries, 1)
1741             if entry is not None]
1742         n_entries = len(entries)
1743
1744         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1745             ie_result['playlist_count'] = n_entries
1746
1747         if not playlistitems and (playliststart != 1 or playlistend):
1748             playlistitems = list(range(playliststart, playliststart + n_entries))
1749         ie_result['requested_entries'] = playlistitems
1750
1751         _infojson_written = False
1752         write_playlist_files = self.params.get('allow_playlist_files', True)
1753         if write_playlist_files and self.params.get('list_thumbnails'):
1754             self.list_thumbnails(ie_result)
1755         if write_playlist_files and not self.params.get('simulate'):
1756             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1757             _infojson_written = self._write_info_json(
1758                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1759             if _infojson_written is None:
1760                 return
1761             if self._write_description('playlist', ie_result,
1762                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1763                 return
1764             # TODO: This should be passed to ThumbnailsConvertor if necessary
1765             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1766
1767         if self.params.get('playlistreverse', False):
1768             entries = entries[::-1]
1769         if self.params.get('playlistrandom', False):
1770             random.shuffle(entries)
1771
1772         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1773
1774         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1775         failures = 0
1776         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1777         for i, entry_tuple in enumerate(entries, 1):
1778             playlist_index, entry = entry_tuple
1779             if 'playlist-index' in self.params.get('compat_opts', []):
1780                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1781             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1782             # This __x_forwarded_for_ip thing is a bit ugly but requires
1783             # minimal changes
1784             if x_forwarded_for:
1785                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1786             extra = {
1787                 'n_entries': n_entries,
1788                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1789                 'playlist_count': ie_result.get('playlist_count'),
1790                 'playlist_index': playlist_index,
1791                 'playlist_autonumber': i,
1792                 'playlist': playlist,
1793                 'playlist_id': ie_result.get('id'),
1794                 'playlist_title': ie_result.get('title'),
1795                 'playlist_uploader': ie_result.get('uploader'),
1796                 'playlist_uploader_id': ie_result.get('uploader_id'),
1797                 'extractor': ie_result['extractor'],
1798                 'webpage_url': ie_result['webpage_url'],
1799                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1800                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1801                 'extractor_key': ie_result['extractor_key'],
1802             }
1803
1804             if self._match_entry(entry, incomplete=True) is not None:
1805                 continue
1806
1807             entry_result = self.__process_iterable_entry(entry, download, extra)
1808             if not entry_result:
1809                 failures += 1
1810             if failures >= max_failures:
1811                 self.report_error(
1812                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1813                 break
1814             playlist_results.append(entry_result)
1815         ie_result['entries'] = playlist_results
1816
1817         # Write the updated info to json
1818         if _infojson_written and self._write_info_json(
1819                 'updated playlist', ie_result,
1820                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1821             return
1822
1823         ie_result = self.run_all_pps('playlist', ie_result)
1824         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1825         return ie_result
1826
1827     @__handle_extraction_exceptions
1828     def __process_iterable_entry(self, entry, download, extra_info):
1829         return self.process_ie_result(
1830             entry, download=download, extra_info=extra_info)
1831
1832     def _build_format_filter(self, filter_spec):
1833         " Returns a function to filter the formats according to the filter_spec "
1834
1835         OPERATORS = {
1836             '<': operator.lt,
1837             '<=': operator.le,
1838             '>': operator.gt,
1839             '>=': operator.ge,
1840             '=': operator.eq,
1841             '!=': operator.ne,
1842         }
1843         operator_rex = re.compile(r'''(?x)\s*
1844             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1845             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1846             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1847             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1848         m = operator_rex.fullmatch(filter_spec)
1849         if m:
1850             try:
1851                 comparison_value = int(m.group('value'))
1852             except ValueError:
1853                 comparison_value = parse_filesize(m.group('value'))
1854                 if comparison_value is None:
1855                     comparison_value = parse_filesize(m.group('value') + 'B')
1856                 if comparison_value is None:
1857                     raise ValueError(
1858                         'Invalid value %r in format specification %r' % (
1859                             m.group('value'), filter_spec))
1860             op = OPERATORS[m.group('op')]
1861
1862         if not m:
1863             STR_OPERATORS = {
1864                 '=': operator.eq,
1865                 '^=': lambda attr, value: attr.startswith(value),
1866                 '$=': lambda attr, value: attr.endswith(value),
1867                 '*=': lambda attr, value: value in attr,
1868                 '~=': lambda attr, value: value.search(attr) is not None
1869             }
1870             str_operator_rex = re.compile(r'''(?x)\s*
1871                 (?P<key>[a-zA-Z0-9._-]+)\s*
1872                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1873                 (?P<quote>["'])?
1874                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1875                 (?(quote)(?P=quote))\s*
1876                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1877             m = str_operator_rex.fullmatch(filter_spec)
1878             if m:
1879                 if m.group('op') == '~=':
1880                     comparison_value = re.compile(m.group('value'))
1881                 else:
1882                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1883                 str_op = STR_OPERATORS[m.group('op')]
1884                 if m.group('negation'):
1885                     op = lambda attr, value: not str_op(attr, value)
1886                 else:
1887                     op = str_op
1888
1889         if not m:
1890             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1891
1892         def _filter(f):
1893             actual_value = f.get(m.group('key'))
1894             if actual_value is None:
1895                 return m.group('none_inclusive')
1896             return op(actual_value, comparison_value)
1897         return _filter
1898
1899     def _check_formats(self, formats):
1900         for f in formats:
1901             self.to_screen('[info] Testing format %s' % f['format_id'])
1902             path = self.get_output_path('temp')
1903             if not self._ensure_dir_exists(f'{path}/'):
1904                 continue
1905             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1906             temp_file.close()
1907             try:
1908                 success, _ = self.dl(temp_file.name, f, test=True)
1909             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1910                 success = False
1911             finally:
1912                 if os.path.exists(temp_file.name):
1913                     try:
1914                         os.remove(temp_file.name)
1915                     except OSError:
1916                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1917             if success:
1918                 yield f
1919             else:
1920                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1921
1922     def _default_format_spec(self, info_dict, download=True):
1923
1924         def can_merge():
1925             merger = FFmpegMergerPP(self)
1926             return merger.available and merger.can_merge()
1927
1928         prefer_best = (
1929             not self.params.get('simulate')
1930             and download
1931             and (
1932                 not can_merge()
1933                 or info_dict.get('is_live', False)
1934                 or self.outtmpl_dict['default'] == '-'))
1935         compat = (
1936             prefer_best
1937             or self.params.get('allow_multiple_audio_streams', False)
1938             or 'format-spec' in self.params.get('compat_opts', []))
1939
1940         return (
1941             'best/bestvideo+bestaudio' if prefer_best
1942             else 'bestvideo*+bestaudio/best' if not compat
1943             else 'bestvideo+bestaudio/best')
1944
1945     def build_format_selector(self, format_spec):
1946         def syntax_error(note, start):
1947             message = (
1948                 'Invalid format specification: '
1949                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1950             return SyntaxError(message)
1951
1952         PICKFIRST = 'PICKFIRST'
1953         MERGE = 'MERGE'
1954         SINGLE = 'SINGLE'
1955         GROUP = 'GROUP'
1956         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1957
1958         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1959                                   'video': self.params.get('allow_multiple_video_streams', False)}
1960
1961         check_formats = self.params.get('check_formats') == 'selected'
1962
1963         def _parse_filter(tokens):
1964             filter_parts = []
1965             for type, string, start, _, _ in tokens:
1966                 if type == tokenize.OP and string == ']':
1967                     return ''.join(filter_parts)
1968                 else:
1969                     filter_parts.append(string)
1970
1971         def _remove_unused_ops(tokens):
1972             # Remove operators that we don't use and join them with the surrounding strings
1973             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1974             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1975             last_string, last_start, last_end, last_line = None, None, None, None
1976             for type, string, start, end, line in tokens:
1977                 if type == tokenize.OP and string == '[':
1978                     if last_string:
1979                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1980                         last_string = None
1981                     yield type, string, start, end, line
1982                     # everything inside brackets will be handled by _parse_filter
1983                     for type, string, start, end, line in tokens:
1984                         yield type, string, start, end, line
1985                         if type == tokenize.OP and string == ']':
1986                             break
1987                 elif type == tokenize.OP and string in ALLOWED_OPS:
1988                     if last_string:
1989                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1990                         last_string = None
1991                     yield type, string, start, end, line
1992                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1993                     if not last_string:
1994                         last_string = string
1995                         last_start = start
1996                         last_end = end
1997                     else:
1998                         last_string += string
1999             if last_string:
2000                 yield tokenize.NAME, last_string, last_start, last_end, last_line
2001
2002         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2003             selectors = []
2004             current_selector = None
2005             for type, string, start, _, _ in tokens:
2006                 # ENCODING is only defined in python 3.x
2007                 if type == getattr(tokenize, 'ENCODING', None):
2008                     continue
2009                 elif type in [tokenize.NAME, tokenize.NUMBER]:
2010                     current_selector = FormatSelector(SINGLE, string, [])
2011                 elif type == tokenize.OP:
2012                     if string == ')':
2013                         if not inside_group:
2014                             # ')' will be handled by the parentheses group
2015                             tokens.restore_last_token()
2016                         break
2017                     elif inside_merge and string in ['/', ',']:
2018                         tokens.restore_last_token()
2019                         break
2020                     elif inside_choice and string == ',':
2021                         tokens.restore_last_token()
2022                         break
2023                     elif string == ',':
2024                         if not current_selector:
2025                             raise syntax_error('"," must follow a format selector', start)
2026                         selectors.append(current_selector)
2027                         current_selector = None
2028                     elif string == '/':
2029                         if not current_selector:
2030                             raise syntax_error('"/" must follow a format selector', start)
2031                         first_choice = current_selector
2032                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2033                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2034                     elif string == '[':
2035                         if not current_selector:
2036                             current_selector = FormatSelector(SINGLE, 'best', [])
2037                         format_filter = _parse_filter(tokens)
2038                         current_selector.filters.append(format_filter)
2039                     elif string == '(':
2040                         if current_selector:
2041                             raise syntax_error('Unexpected "("', start)
2042                         group = _parse_format_selection(tokens, inside_group=True)
2043                         current_selector = FormatSelector(GROUP, group, [])
2044                     elif string == '+':
2045                         if not current_selector:
2046                             raise syntax_error('Unexpected "+"', start)
2047                         selector_1 = current_selector
2048                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2049                         if not selector_2:
2050                             raise syntax_error('Expected a selector', start)
2051                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2052                     else:
2053                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2054                 elif type == tokenize.ENDMARKER:
2055                     break
2056             if current_selector:
2057                 selectors.append(current_selector)
2058             return selectors
2059
2060         def _merge(formats_pair):
2061             format_1, format_2 = formats_pair
2062
2063             formats_info = []
2064             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2065             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2066
2067             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2068                 get_no_more = {'video': False, 'audio': False}
2069                 for (i, fmt_info) in enumerate(formats_info):
2070                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2071                         formats_info.pop(i)
2072                         continue
2073                     for aud_vid in ['audio', 'video']:
2074                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2075                             if get_no_more[aud_vid]:
2076                                 formats_info.pop(i)
2077                                 break
2078                             get_no_more[aud_vid] = True
2079
2080             if len(formats_info) == 1:
2081                 return formats_info[0]
2082
2083             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2084             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2085
2086             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2087             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2088
2089             output_ext = self.params.get('merge_output_format')
2090             if not output_ext:
2091                 if the_only_video:
2092                     output_ext = the_only_video['ext']
2093                 elif the_only_audio and not video_fmts:
2094                     output_ext = the_only_audio['ext']
2095                 else:
2096                     output_ext = 'mkv'
2097
2098             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2099
2100             new_dict = {
2101                 'requested_formats': formats_info,
2102                 'format': '+'.join(filtered('format')),
2103                 'format_id': '+'.join(filtered('format_id')),
2104                 'ext': output_ext,
2105                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2106                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2107                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2108                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2109                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2110             }
2111
2112             if the_only_video:
2113                 new_dict.update({
2114                     'width': the_only_video.get('width'),
2115                     'height': the_only_video.get('height'),
2116                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2117                     'fps': the_only_video.get('fps'),
2118                     'dynamic_range': the_only_video.get('dynamic_range'),
2119                     'vcodec': the_only_video.get('vcodec'),
2120                     'vbr': the_only_video.get('vbr'),
2121                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2122                 })
2123
2124             if the_only_audio:
2125                 new_dict.update({
2126                     'acodec': the_only_audio.get('acodec'),
2127                     'abr': the_only_audio.get('abr'),
2128                     'asr': the_only_audio.get('asr'),
2129                 })
2130
2131             return new_dict
2132
2133         def _check_formats(formats):
2134             if not check_formats:
2135                 yield from formats
2136                 return
2137             yield from self._check_formats(formats)
2138
2139         def _build_selector_function(selector):
2140             if isinstance(selector, list):  # ,
2141                 fs = [_build_selector_function(s) for s in selector]
2142
2143                 def selector_function(ctx):
2144                     for f in fs:
2145                         yield from f(ctx)
2146                 return selector_function
2147
2148             elif selector.type == GROUP:  # ()
2149                 selector_function = _build_selector_function(selector.selector)
2150
2151             elif selector.type == PICKFIRST:  # /
2152                 fs = [_build_selector_function(s) for s in selector.selector]
2153
2154                 def selector_function(ctx):
2155                     for f in fs:
2156                         picked_formats = list(f(ctx))
2157                         if picked_formats:
2158                             return picked_formats
2159                     return []
2160
2161             elif selector.type == MERGE:  # +
2162                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2163
2164                 def selector_function(ctx):
2165                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2166                         yield _merge(pair)
2167
2168             elif selector.type == SINGLE:  # atom
2169                 format_spec = selector.selector or 'best'
2170
2171                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2172                 if format_spec == 'all':
2173                     def selector_function(ctx):
2174                         yield from _check_formats(ctx['formats'][::-1])
2175                 elif format_spec == 'mergeall':
2176                     def selector_function(ctx):
2177                         formats = list(_check_formats(ctx['formats']))
2178                         if not formats:
2179                             return
2180                         merged_format = formats[-1]
2181                         for f in formats[-2::-1]:
2182                             merged_format = _merge((merged_format, f))
2183                         yield merged_format
2184
2185                 else:
2186                     format_fallback, format_reverse, format_idx = False, True, 1
2187                     mobj = re.match(
2188                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2189                         format_spec)
2190                     if mobj is not None:
2191                         format_idx = int_or_none(mobj.group('n'), default=1)
2192                         format_reverse = mobj.group('bw')[0] == 'b'
2193                         format_type = (mobj.group('type') or [None])[0]
2194                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2195                         format_modified = mobj.group('mod') is not None
2196
2197                         format_fallback = not format_type and not format_modified  # for b, w
2198                         _filter_f = (
2199                             (lambda f: f.get('%scodec' % format_type) != 'none')
2200                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2201                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2202                             if format_type  # bv, ba, wv, wa
2203                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2204                             if not format_modified  # b, w
2205                             else lambda f: True)  # b*, w*
2206                         filter_f = lambda f: _filter_f(f) and (
2207                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2208                     else:
2209                         if format_spec in self._format_selection_exts['audio']:
2210                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2211                         elif format_spec in self._format_selection_exts['video']:
2212                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2213                         elif format_spec in self._format_selection_exts['storyboards']:
2214                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2215                         else:
2216                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2217
2218                     def selector_function(ctx):
2219                         formats = list(ctx['formats'])
2220                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2221                         if format_fallback and ctx['incomplete_formats'] and not matches:
2222                             # for extractors with incomplete formats (audio only (soundcloud)
2223                             # or video only (imgur)) best/worst will fallback to
2224                             # best/worst {video,audio}-only format
2225                             matches = formats
2226                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2227                         try:
2228                             yield matches[format_idx - 1]
2229                         except IndexError:
2230                             return
2231
2232             filters = [self._build_format_filter(f) for f in selector.filters]
2233
2234             def final_selector(ctx):
2235                 ctx_copy = dict(ctx)
2236                 for _filter in filters:
2237                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2238                 return selector_function(ctx_copy)
2239             return final_selector
2240
2241         stream = io.BytesIO(format_spec.encode('utf-8'))
2242         try:
2243             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2244         except tokenize.TokenError:
2245             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2246
2247         class TokenIterator(object):
2248             def __init__(self, tokens):
2249                 self.tokens = tokens
2250                 self.counter = 0
2251
2252             def __iter__(self):
2253                 return self
2254
2255             def __next__(self):
2256                 if self.counter >= len(self.tokens):
2257                     raise StopIteration()
2258                 value = self.tokens[self.counter]
2259                 self.counter += 1
2260                 return value
2261
2262             next = __next__
2263
2264             def restore_last_token(self):
2265                 self.counter -= 1
2266
2267         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2268         return _build_selector_function(parsed_selector)
2269
2270     def _calc_headers(self, info_dict):
2271         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2272
2273         cookies = self._calc_cookies(info_dict)
2274         if cookies:
2275             res['Cookie'] = cookies
2276
2277         if 'X-Forwarded-For' not in res:
2278             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2279             if x_forwarded_for_ip:
2280                 res['X-Forwarded-For'] = x_forwarded_for_ip
2281
2282         return res
2283
2284     def _calc_cookies(self, info_dict):
2285         pr = sanitized_Request(info_dict['url'])
2286         self.cookiejar.add_cookie_header(pr)
2287         return pr.get_header('Cookie')
2288
2289     def _sort_thumbnails(self, thumbnails):
2290         thumbnails.sort(key=lambda t: (
2291             t.get('preference') if t.get('preference') is not None else -1,
2292             t.get('width') if t.get('width') is not None else -1,
2293             t.get('height') if t.get('height') is not None else -1,
2294             t.get('id') if t.get('id') is not None else '',
2295             t.get('url')))
2296
2297     def _sanitize_thumbnails(self, info_dict):
2298         thumbnails = info_dict.get('thumbnails')
2299         if thumbnails is None:
2300             thumbnail = info_dict.get('thumbnail')
2301             if thumbnail:
2302                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2303         if not thumbnails:
2304             return
2305
2306         def check_thumbnails(thumbnails):
2307             for t in thumbnails:
2308                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2309                 try:
2310                     self.urlopen(HEADRequest(t['url']))
2311                 except network_exceptions as err:
2312                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2313                     continue
2314                 yield t
2315
2316         self._sort_thumbnails(thumbnails)
2317         for i, t in enumerate(thumbnails):
2318             if t.get('id') is None:
2319                 t['id'] = '%d' % i
2320             if t.get('width') and t.get('height'):
2321                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2322             t['url'] = sanitize_url(t['url'])
2323
2324         if self.params.get('check_formats') is True:
2325             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2326         else:
2327             info_dict['thumbnails'] = thumbnails
2328
2329     def _fill_common_fields(self, info_dict, is_video=True):
2330         # TODO: move sanitization here
2331         if is_video:
2332             # playlists are allowed to lack "title"
2333             info_dict['fulltitle'] = info_dict.get('title')
2334             if 'title' not in info_dict:
2335                 raise ExtractorError('Missing "title" field in extractor result',
2336                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2337             elif not info_dict.get('title'):
2338                 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2339                 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2340
2341         if info_dict.get('duration') is not None:
2342             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2343
2344         for ts_key, date_key in (
2345                 ('timestamp', 'upload_date'),
2346                 ('release_timestamp', 'release_date'),
2347                 ('modified_timestamp', 'modified_date'),
2348         ):
2349             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2350                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2351                 # see http://bugs.python.org/issue1646728)
2352                 try:
2353                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2354                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2355                 except (ValueError, OverflowError, OSError):
2356                     pass
2357
2358         live_keys = ('is_live', 'was_live')
2359         live_status = info_dict.get('live_status')
2360         if live_status is None:
2361             for key in live_keys:
2362                 if info_dict.get(key) is False:
2363                     continue
2364                 if info_dict.get(key):
2365                     live_status = key
2366                 break
2367             if all(info_dict.get(key) is False for key in live_keys):
2368                 live_status = 'not_live'
2369         if live_status:
2370             info_dict['live_status'] = live_status
2371             for key in live_keys:
2372                 if info_dict.get(key) is None:
2373                     info_dict[key] = (live_status == key)
2374
2375         # Auto generate title fields corresponding to the *_number fields when missing
2376         # in order to always have clean titles. This is very common for TV series.
2377         for field in ('chapter', 'season', 'episode'):
2378             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2379                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2380
2381     def process_video_result(self, info_dict, download=True):
2382         assert info_dict.get('_type', 'video') == 'video'
2383         self._num_videos += 1
2384
2385         if 'id' not in info_dict:
2386             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2387         elif not info_dict.get('id'):
2388             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2389
2390         def report_force_conversion(field, field_not, conversion):
2391             self.report_warning(
2392                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2393                 % (field, field_not, conversion))
2394
2395         def sanitize_string_field(info, string_field):
2396             field = info.get(string_field)
2397             if field is None or isinstance(field, compat_str):
2398                 return
2399             report_force_conversion(string_field, 'a string', 'string')
2400             info[string_field] = compat_str(field)
2401
2402         def sanitize_numeric_fields(info):
2403             for numeric_field in self._NUMERIC_FIELDS:
2404                 field = info.get(numeric_field)
2405                 if field is None or isinstance(field, compat_numeric_types):
2406                     continue
2407                 report_force_conversion(numeric_field, 'numeric', 'int')
2408                 info[numeric_field] = int_or_none(field)
2409
2410         sanitize_string_field(info_dict, 'id')
2411         sanitize_numeric_fields(info_dict)
2412         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2413             self.report_warning('"duration" field is negative, there is an error in extractor')
2414
2415         if 'playlist' not in info_dict:
2416             # It isn't part of a playlist
2417             info_dict['playlist'] = None
2418             info_dict['playlist_index'] = None
2419
2420         self._sanitize_thumbnails(info_dict)
2421
2422         thumbnail = info_dict.get('thumbnail')
2423         thumbnails = info_dict.get('thumbnails')
2424         if thumbnail:
2425             info_dict['thumbnail'] = sanitize_url(thumbnail)
2426         elif thumbnails:
2427             info_dict['thumbnail'] = thumbnails[-1]['url']
2428
2429         if info_dict.get('display_id') is None and 'id' in info_dict:
2430             info_dict['display_id'] = info_dict['id']
2431
2432         self._fill_common_fields(info_dict)
2433
2434         for cc_kind in ('subtitles', 'automatic_captions'):
2435             cc = info_dict.get(cc_kind)
2436             if cc:
2437                 for _, subtitle in cc.items():
2438                     for subtitle_format in subtitle:
2439                         if subtitle_format.get('url'):
2440                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2441                         if subtitle_format.get('ext') is None:
2442                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2443
2444         automatic_captions = info_dict.get('automatic_captions')
2445         subtitles = info_dict.get('subtitles')
2446
2447         info_dict['requested_subtitles'] = self.process_subtitles(
2448             info_dict['id'], subtitles, automatic_captions)
2449
2450         if info_dict.get('formats') is None:
2451             # There's only one format available
2452             formats = [info_dict]
2453         else:
2454             formats = info_dict['formats']
2455
2456         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2457         if not self.params.get('allow_unplayable_formats'):
2458             formats = [f for f in formats if not f.get('has_drm')]
2459
2460         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2461         if not get_from_start:
2462             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2463         if info_dict.get('is_live') and formats:
2464             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2465             if get_from_start and not formats:
2466                 self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2467                                                      'If you want to download from the current time, pass --no-live-from-start')
2468
2469         if not formats:
2470             self.raise_no_formats(info_dict)
2471
2472         def is_wellformed(f):
2473             url = f.get('url')
2474             if not url:
2475                 self.report_warning(
2476                     '"url" field is missing or empty - skipping format, '
2477                     'there is an error in extractor')
2478                 return False
2479             if isinstance(url, bytes):
2480                 sanitize_string_field(f, 'url')
2481             return True
2482
2483         # Filter out malformed formats for better extraction robustness
2484         formats = list(filter(is_wellformed, formats))
2485
2486         formats_dict = {}
2487
2488         # We check that all the formats have the format and format_id fields
2489         for i, format in enumerate(formats):
2490             sanitize_string_field(format, 'format_id')
2491             sanitize_numeric_fields(format)
2492             format['url'] = sanitize_url(format['url'])
2493             if not format.get('format_id'):
2494                 format['format_id'] = compat_str(i)
2495             else:
2496                 # Sanitize format_id from characters used in format selector expression
2497                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2498             format_id = format['format_id']
2499             if format_id not in formats_dict:
2500                 formats_dict[format_id] = []
2501             formats_dict[format_id].append(format)
2502
2503         # Make sure all formats have unique format_id
2504         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2505         for format_id, ambiguous_formats in formats_dict.items():
2506             ambigious_id = len(ambiguous_formats) > 1
2507             for i, format in enumerate(ambiguous_formats):
2508                 if ambigious_id:
2509                     format['format_id'] = '%s-%d' % (format_id, i)
2510                 if format.get('ext') is None:
2511                     format['ext'] = determine_ext(format['url']).lower()
2512                 # Ensure there is no conflict between id and ext in format selection
2513                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2514                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2515                     format['format_id'] = 'f%s' % format['format_id']
2516
2517         for i, format in enumerate(formats):
2518             if format.get('format') is None:
2519                 format['format'] = '{id} - {res}{note}'.format(
2520                     id=format['format_id'],
2521                     res=self.format_resolution(format),
2522                     note=format_field(format, 'format_note', ' (%s)'),
2523                 )
2524             if format.get('protocol') is None:
2525                 format['protocol'] = determine_protocol(format)
2526             if format.get('resolution') is None:
2527                 format['resolution'] = self.format_resolution(format, default=None)
2528             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2529                 format['dynamic_range'] = 'SDR'
2530             if (info_dict.get('duration') and format.get('tbr')
2531                     and not format.get('filesize') and not format.get('filesize_approx')):
2532                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2533
2534             # Add HTTP headers, so that external programs can use them from the
2535             # json output
2536             full_format_info = info_dict.copy()
2537             full_format_info.update(format)
2538             format['http_headers'] = self._calc_headers(full_format_info)
2539         # Remove private housekeeping stuff
2540         if '__x_forwarded_for_ip' in info_dict:
2541             del info_dict['__x_forwarded_for_ip']
2542
2543         if self.params.get('check_formats') is True:
2544             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2545
2546         if not formats or formats[0] is not info_dict:
2547             # only set the 'formats' fields if the original info_dict list them
2548             # otherwise we end up with a circular reference, the first (and unique)
2549             # element in the 'formats' field in info_dict is info_dict itself,
2550             # which can't be exported to json
2551             info_dict['formats'] = formats
2552
2553         info_dict, _ = self.pre_process(info_dict)
2554
2555         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2556             return info_dict
2557
2558         self.post_extract(info_dict)
2559         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2560
2561         # The pre-processors may have modified the formats
2562         formats = info_dict.get('formats', [info_dict])
2563
2564         list_only = self.params.get('simulate') is None and (
2565             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2566         interactive_format_selection = not list_only and self.format_selector == '-'
2567         if self.params.get('list_thumbnails'):
2568             self.list_thumbnails(info_dict)
2569         if self.params.get('listsubtitles'):
2570             if 'automatic_captions' in info_dict:
2571                 self.list_subtitles(
2572                     info_dict['id'], automatic_captions, 'automatic captions')
2573             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2574         if self.params.get('listformats') or interactive_format_selection:
2575             self.list_formats(info_dict)
2576         if list_only:
2577             # Without this printing, -F --print-json will not work
2578             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2579             return
2580
2581         format_selector = self.format_selector
2582         if format_selector is None:
2583             req_format = self._default_format_spec(info_dict, download=download)
2584             self.write_debug('Default format spec: %s' % req_format)
2585             format_selector = self.build_format_selector(req_format)
2586
2587         while True:
2588             if interactive_format_selection:
2589                 req_format = input(
2590                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2591                 try:
2592                     format_selector = self.build_format_selector(req_format)
2593                 except SyntaxError as err:
2594                     self.report_error(err, tb=False, is_error=False)
2595                     continue
2596
2597             # While in format selection we may need to have an access to the original
2598             # format set in order to calculate some metrics or do some processing.
2599             # For now we need to be able to guess whether original formats provided
2600             # by extractor are incomplete or not (i.e. whether extractor provides only
2601             # video-only or audio-only formats) for proper formats selection for
2602             # extractors with such incomplete formats (see
2603             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2604             # Since formats may be filtered during format selection and may not match
2605             # the original formats the results may be incorrect. Thus original formats
2606             # or pre-calculated metrics should be passed to format selection routines
2607             # as well.
2608             # We will pass a context object containing all necessary additional data
2609             # instead of just formats.
2610             # This fixes incorrect format selection issue (see
2611             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2612             incomplete_formats = (
2613                 # All formats are video-only or
2614                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2615                 # all formats are audio-only
2616                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2617
2618             ctx = {
2619                 'formats': formats,
2620                 'incomplete_formats': incomplete_formats,
2621             }
2622
2623             formats_to_download = list(format_selector(ctx))
2624             if interactive_format_selection and not formats_to_download:
2625                 self.report_error('Requested format is not available', tb=False, is_error=False)
2626                 continue
2627             break
2628
2629         if not formats_to_download:
2630             if not self.params.get('ignore_no_formats_error'):
2631                 raise ExtractorError('Requested format is not available', expected=True,
2632                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2633             self.report_warning('Requested format is not available')
2634             # Process what we can, even without any available formats.
2635             formats_to_download = [{}]
2636
2637         best_format = formats_to_download[-1]
2638         if download:
2639             if best_format:
2640                 self.to_screen(
2641                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2642                     + ', '.join([f['format_id'] for f in formats_to_download]))
2643             max_downloads_reached = False
2644             for i, fmt in enumerate(formats_to_download):
2645                 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
2646                 new_info.update(fmt)
2647                 try:
2648                     self.process_info(new_info)
2649                 except MaxDownloadsReached:
2650                     max_downloads_reached = True
2651                 # Remove copied info
2652                 for key, val in tuple(new_info.items()):
2653                     if info_dict.get(key) == val:
2654                         new_info.pop(key)
2655                 if max_downloads_reached:
2656                     break
2657
2658             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2659             assert write_archive.issubset({True, False, 'ignore'})
2660             if True in write_archive and False not in write_archive:
2661                 self.record_download_archive(info_dict)
2662
2663             info_dict['requested_downloads'] = formats_to_download
2664             info_dict = self.run_all_pps('after_video', info_dict)
2665             if max_downloads_reached:
2666                 raise MaxDownloadsReached()
2667
2668         # We update the info dict with the selected best quality format (backwards compatibility)
2669         info_dict.update(best_format)
2670         return info_dict
2671
2672     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2673         """Select the requested subtitles and their format"""
2674         available_subs = {}
2675         if normal_subtitles and self.params.get('writesubtitles'):
2676             available_subs.update(normal_subtitles)
2677         if automatic_captions and self.params.get('writeautomaticsub'):
2678             for lang, cap_info in automatic_captions.items():
2679                 if lang not in available_subs:
2680                     available_subs[lang] = cap_info
2681
2682         if (not self.params.get('writesubtitles') and not
2683                 self.params.get('writeautomaticsub') or not
2684                 available_subs):
2685             return None
2686
2687         all_sub_langs = available_subs.keys()
2688         if self.params.get('allsubtitles', False):
2689             requested_langs = all_sub_langs
2690         elif self.params.get('subtitleslangs', False):
2691             # A list is used so that the order of languages will be the same as
2692             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2693             requested_langs = []
2694             for lang_re in self.params.get('subtitleslangs'):
2695                 discard = lang_re[0] == '-'
2696                 if discard:
2697                     lang_re = lang_re[1:]
2698                 if lang_re == 'all':
2699                     if discard:
2700                         requested_langs = []
2701                     else:
2702                         requested_langs.extend(all_sub_langs)
2703                     continue
2704                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2705                 if discard:
2706                     for lang in current_langs:
2707                         while lang in requested_langs:
2708                             requested_langs.remove(lang)
2709                 else:
2710                     requested_langs.extend(current_langs)
2711             requested_langs = orderedSet(requested_langs)
2712         elif 'en' in available_subs:
2713             requested_langs = ['en']
2714         else:
2715             requested_langs = [list(all_sub_langs)[0]]
2716         if requested_langs:
2717             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2718
2719         formats_query = self.params.get('subtitlesformat', 'best')
2720         formats_preference = formats_query.split('/') if formats_query else []
2721         subs = {}
2722         for lang in requested_langs:
2723             formats = available_subs.get(lang)
2724             if formats is None:
2725                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2726                 continue
2727             for ext in formats_preference:
2728                 if ext == 'best':
2729                     f = formats[-1]
2730                     break
2731                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2732                 if matches:
2733                     f = matches[-1]
2734                     break
2735             else:
2736                 f = formats[-1]
2737                 self.report_warning(
2738                     'No subtitle format found matching "%s" for language %s, '
2739                     'using %s' % (formats_query, lang, f['ext']))
2740             subs[lang] = f
2741         return subs
2742
2743     def _forceprint(self, key, info_dict):
2744         if info_dict is None:
2745             return
2746         info_copy = info_dict.copy()
2747         info_copy['formats_table'] = self.render_formats_table(info_dict)
2748         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2749         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2750         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2751
2752         def format_tmpl(tmpl):
2753             mobj = re.match(r'\w+(=?)$', tmpl)
2754             if mobj and mobj.group(1):
2755                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2756             elif mobj:
2757                 return f'%({tmpl})s'
2758             return tmpl
2759
2760         for tmpl in self.params['forceprint'].get(key, []):
2761             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2762
2763         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2764             filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2765             tmpl = format_tmpl(tmpl)
2766             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2767             if self._ensure_dir_exists(filename):
2768                 with io.open(filename, 'a', encoding='utf-8') as f:
2769                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2770
2771     def __forced_printings(self, info_dict, filename, incomplete):
2772         def print_mandatory(field, actual_field=None):
2773             if actual_field is None:
2774                 actual_field = field
2775             if (self.params.get('force%s' % field, False)
2776                     and (not incomplete or info_dict.get(actual_field) is not None)):
2777                 self.to_stdout(info_dict[actual_field])
2778
2779         def print_optional(field):
2780             if (self.params.get('force%s' % field, False)
2781                     and info_dict.get(field) is not None):
2782                 self.to_stdout(info_dict[field])
2783
2784         info_dict = info_dict.copy()
2785         if filename is not None:
2786             info_dict['filename'] = filename
2787         if info_dict.get('requested_formats') is not None:
2788             # For RTMP URLs, also include the playpath
2789             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2790         elif info_dict.get('url'):
2791             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2792
2793         if (self.params.get('forcejson')
2794                 or self.params['forceprint'].get('video')
2795                 or self.params['print_to_file'].get('video')):
2796             self.post_extract(info_dict)
2797         self._forceprint('video', info_dict)
2798
2799         print_mandatory('title')
2800         print_mandatory('id')
2801         print_mandatory('url', 'urls')
2802         print_optional('thumbnail')
2803         print_optional('description')
2804         print_optional('filename')
2805         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2806             self.to_stdout(formatSeconds(info_dict['duration']))
2807         print_mandatory('format')
2808
2809         if self.params.get('forcejson'):
2810             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2811
2812     def dl(self, name, info, subtitle=False, test=False):
2813         if not info.get('url'):
2814             self.raise_no_formats(info, True)
2815
2816         if test:
2817             verbose = self.params.get('verbose')
2818             params = {
2819                 'test': True,
2820                 'quiet': self.params.get('quiet') or not verbose,
2821                 'verbose': verbose,
2822                 'noprogress': not verbose,
2823                 'nopart': True,
2824                 'skip_unavailable_fragments': False,
2825                 'keep_fragments': False,
2826                 'overwrites': True,
2827                 '_no_ytdl_file': True,
2828             }
2829         else:
2830             params = self.params
2831         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2832         if not test:
2833             for ph in self._progress_hooks:
2834                 fd.add_progress_hook(ph)
2835             urls = '", "'.join(
2836                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2837                 for f in info.get('requested_formats', []) or [info])
2838             self.write_debug('Invoking downloader on "%s"' % urls)
2839
2840         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2841         # But it may contain objects that are not deep-copyable
2842         new_info = self._copy_infodict(info)
2843         if new_info.get('http_headers') is None:
2844             new_info['http_headers'] = self._calc_headers(new_info)
2845         return fd.download(name, new_info, subtitle)
2846
2847     def existing_file(self, filepaths, *, default_overwrite=True):
2848         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2849         if existing_files and not self.params.get('overwrites', default_overwrite):
2850             return existing_files[0]
2851
2852         for file in existing_files:
2853             self.report_file_delete(file)
2854             os.remove(file)
2855         return None
2856
2857     def process_info(self, info_dict):
2858         """Process a single resolved IE result. (Modifies it in-place)"""
2859
2860         assert info_dict.get('_type', 'video') == 'video'
2861         original_infodict = info_dict
2862
2863         if 'format' not in info_dict and 'ext' in info_dict:
2864             info_dict['format'] = info_dict['ext']
2865
2866         # This is mostly just for backward compatibility of process_info
2867         # As a side-effect, this allows for format-specific filters
2868         if self._match_entry(info_dict) is not None:
2869             info_dict['__write_download_archive'] = 'ignore'
2870             return
2871
2872         # Does nothing under normal operation - for backward compatibility of process_info
2873         self.post_extract(info_dict)
2874         self._num_downloads += 1
2875
2876         # info_dict['_filename'] needs to be set for backward compatibility
2877         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2878         temp_filename = self.prepare_filename(info_dict, 'temp')
2879         files_to_move = {}
2880
2881         # Forced printings
2882         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2883
2884         if self.params.get('simulate'):
2885             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2886             return
2887
2888         if full_filename is None:
2889             return
2890         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2891             return
2892         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2893             return
2894
2895         if self._write_description('video', info_dict,
2896                                    self.prepare_filename(info_dict, 'description')) is None:
2897             return
2898
2899         sub_files = self._write_subtitles(info_dict, temp_filename)
2900         if sub_files is None:
2901             return
2902         files_to_move.update(dict(sub_files))
2903
2904         thumb_files = self._write_thumbnails(
2905             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2906         if thumb_files is None:
2907             return
2908         files_to_move.update(dict(thumb_files))
2909
2910         infofn = self.prepare_filename(info_dict, 'infojson')
2911         _infojson_written = self._write_info_json('video', info_dict, infofn)
2912         if _infojson_written:
2913             info_dict['infojson_filename'] = infofn
2914             # For backward compatibility, even though it was a private field
2915             info_dict['__infojson_filename'] = infofn
2916         elif _infojson_written is None:
2917             return
2918
2919         # Note: Annotations are deprecated
2920         annofn = None
2921         if self.params.get('writeannotations', False):
2922             annofn = self.prepare_filename(info_dict, 'annotation')
2923         if annofn:
2924             if not self._ensure_dir_exists(encodeFilename(annofn)):
2925                 return
2926             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2927                 self.to_screen('[info] Video annotations are already present')
2928             elif not info_dict.get('annotations'):
2929                 self.report_warning('There are no annotations to write.')
2930             else:
2931                 try:
2932                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2933                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2934                         annofile.write(info_dict['annotations'])
2935                 except (KeyError, TypeError):
2936                     self.report_warning('There are no annotations to write.')
2937                 except (OSError, IOError):
2938                     self.report_error('Cannot write annotations file: ' + annofn)
2939                     return
2940
2941         # Write internet shortcut files
2942         def _write_link_file(link_type):
2943             url = try_get(info_dict['webpage_url'], iri_to_uri)
2944             if not url:
2945                 self.report_warning(
2946                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2947                 return True
2948             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2949             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2950                 return False
2951             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2952                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2953                 return True
2954             try:
2955                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2956                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2957                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2958                     template_vars = {'url': url}
2959                     if link_type == 'desktop':
2960                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2961                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2962             except (OSError, IOError):
2963                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2964                 return False
2965             return True
2966
2967         write_links = {
2968             'url': self.params.get('writeurllink'),
2969             'webloc': self.params.get('writewebloclink'),
2970             'desktop': self.params.get('writedesktoplink'),
2971         }
2972         if self.params.get('writelink'):
2973             link_type = ('webloc' if sys.platform == 'darwin'
2974                          else 'desktop' if sys.platform.startswith('linux')
2975                          else 'url')
2976             write_links[link_type] = True
2977
2978         if any(should_write and not _write_link_file(link_type)
2979                for link_type, should_write in write_links.items()):
2980             return
2981
2982         def replace_info_dict(new_info):
2983             nonlocal info_dict
2984             if new_info == info_dict:
2985                 return
2986             info_dict.clear()
2987             info_dict.update(new_info)
2988
2989         try:
2990             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2991             replace_info_dict(new_info)
2992         except PostProcessingError as err:
2993             self.report_error('Preprocessing: %s' % str(err))
2994             return
2995
2996         if self.params.get('skip_download'):
2997             info_dict['filepath'] = temp_filename
2998             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2999             info_dict['__files_to_move'] = files_to_move
3000             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3001             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3002         else:
3003             # Download
3004             info_dict.setdefault('__postprocessors', [])
3005             try:
3006
3007                 def existing_video_file(*filepaths):
3008                     ext = info_dict.get('ext')
3009                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3010                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3011                                               default_overwrite=False)
3012                     if file:
3013                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3014                     return file
3015
3016                 success = True
3017                 if info_dict.get('requested_formats') is not None:
3018
3019                     def compatible_formats(formats):
3020                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3021                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
3022                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
3023                         if len(video_formats) > 2 or len(audio_formats) > 2:
3024                             return False
3025
3026                         # Check extension
3027                         exts = set(format.get('ext') for format in formats)
3028                         COMPATIBLE_EXTS = (
3029                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
3030                             set(('webm',)),
3031                         )
3032                         for ext_sets in COMPATIBLE_EXTS:
3033                             if ext_sets.issuperset(exts):
3034                                 return True
3035                         # TODO: Check acodec/vcodec
3036                         return False
3037
3038                     requested_formats = info_dict['requested_formats']
3039                     old_ext = info_dict['ext']
3040                     if self.params.get('merge_output_format') is None:
3041                         if not compatible_formats(requested_formats):
3042                             info_dict['ext'] = 'mkv'
3043                             self.report_warning(
3044                                 'Requested formats are incompatible for merge and will be merged into mkv')
3045                         if (info_dict['ext'] == 'webm'
3046                                 and info_dict.get('thumbnails')
3047                                 # check with type instead of pp_key, __name__, or isinstance
3048                                 # since we dont want any custom PPs to trigger this
3049                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3050                             info_dict['ext'] = 'mkv'
3051                             self.report_warning(
3052                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3053                     new_ext = info_dict['ext']
3054
3055                     def correct_ext(filename, ext=new_ext):
3056                         if filename == '-':
3057                             return filename
3058                         filename_real_ext = os.path.splitext(filename)[1][1:]
3059                         filename_wo_ext = (
3060                             os.path.splitext(filename)[0]
3061                             if filename_real_ext in (old_ext, new_ext)
3062                             else filename)
3063                         return '%s.%s' % (filename_wo_ext, ext)
3064
3065                     # Ensure filename always has a correct extension for successful merge
3066                     full_filename = correct_ext(full_filename)
3067                     temp_filename = correct_ext(temp_filename)
3068                     dl_filename = existing_video_file(full_filename, temp_filename)
3069                     info_dict['__real_download'] = False
3070
3071                     downloaded = []
3072                     merger = FFmpegMergerPP(self)
3073
3074                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3075                     if dl_filename is not None:
3076                         self.report_file_already_downloaded(dl_filename)
3077                     elif fd:
3078                         for f in requested_formats if fd != FFmpegFD else []:
3079                             f['filepath'] = fname = prepend_extension(
3080                                 correct_ext(temp_filename, info_dict['ext']),
3081                                 'f%s' % f['format_id'], info_dict['ext'])
3082                             downloaded.append(fname)
3083                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3084                         success, real_download = self.dl(temp_filename, info_dict)
3085                         info_dict['__real_download'] = real_download
3086                     else:
3087                         if self.params.get('allow_unplayable_formats'):
3088                             self.report_warning(
3089                                 'You have requested merging of multiple formats '
3090                                 'while also allowing unplayable formats to be downloaded. '
3091                                 'The formats won\'t be merged to prevent data corruption.')
3092                         elif not merger.available:
3093                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3094                             if not self.params.get('ignoreerrors'):
3095                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3096                                 return
3097                             self.report_warning(f'{msg}. The formats won\'t be merged')
3098
3099                         if temp_filename == '-':
3100                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3101                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3102                                       else 'but ffmpeg is not installed')
3103                             self.report_warning(
3104                                 f'You have requested downloading multiple formats to stdout {reason}. '
3105                                 'The formats will be streamed one after the other')
3106                             fname = temp_filename
3107                         for f in requested_formats:
3108                             new_info = dict(info_dict)
3109                             del new_info['requested_formats']
3110                             new_info.update(f)
3111                             if temp_filename != '-':
3112                                 fname = prepend_extension(
3113                                     correct_ext(temp_filename, new_info['ext']),
3114                                     'f%s' % f['format_id'], new_info['ext'])
3115                                 if not self._ensure_dir_exists(fname):
3116                                     return
3117                                 f['filepath'] = fname
3118                                 downloaded.append(fname)
3119                             partial_success, real_download = self.dl(fname, new_info)
3120                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3121                             success = success and partial_success
3122
3123                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3124                         info_dict['__postprocessors'].append(merger)
3125                         info_dict['__files_to_merge'] = downloaded
3126                         # Even if there were no downloads, it is being merged only now
3127                         info_dict['__real_download'] = True
3128                     else:
3129                         for file in downloaded:
3130                             files_to_move[file] = None
3131                 else:
3132                     # Just a single file
3133                     dl_filename = existing_video_file(full_filename, temp_filename)
3134                     if dl_filename is None or dl_filename == temp_filename:
3135                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3136                         # So we should try to resume the download
3137                         success, real_download = self.dl(temp_filename, info_dict)
3138                         info_dict['__real_download'] = real_download
3139                     else:
3140                         self.report_file_already_downloaded(dl_filename)
3141
3142                 dl_filename = dl_filename or temp_filename
3143                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3144
3145             except network_exceptions as err:
3146                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3147                 return
3148             except (OSError, IOError) as err:
3149                 raise UnavailableVideoError(err)
3150             except (ContentTooShortError, ) as err:
3151                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3152                 return
3153
3154             if success and full_filename != '-':
3155
3156                 def fixup():
3157                     do_fixup = True
3158                     fixup_policy = self.params.get('fixup')
3159                     vid = info_dict['id']
3160
3161                     if fixup_policy in ('ignore', 'never'):
3162                         return
3163                     elif fixup_policy == 'warn':
3164                         do_fixup = False
3165                     elif fixup_policy != 'force':
3166                         assert fixup_policy in ('detect_or_warn', None)
3167                         if not info_dict.get('__real_download'):
3168                             do_fixup = False
3169
3170                     def ffmpeg_fixup(cndn, msg, cls):
3171                         if not cndn:
3172                             return
3173                         if not do_fixup:
3174                             self.report_warning(f'{vid}: {msg}')
3175                             return
3176                         pp = cls(self)
3177                         if pp.available:
3178                             info_dict['__postprocessors'].append(pp)
3179                         else:
3180                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3181
3182                     stretched_ratio = info_dict.get('stretched_ratio')
3183                     ffmpeg_fixup(
3184                         stretched_ratio not in (1, None),
3185                         f'Non-uniform pixel ratio {stretched_ratio}',
3186                         FFmpegFixupStretchedPP)
3187
3188                     ffmpeg_fixup(
3189                         (info_dict.get('requested_formats') is None
3190                          and info_dict.get('container') == 'm4a_dash'
3191                          and info_dict.get('ext') == 'm4a'),
3192                         'writing DASH m4a. Only some players support this container',
3193                         FFmpegFixupM4aPP)
3194
3195                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3196                     downloader = downloader.__name__ if downloader else None
3197
3198                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3199                         ffmpeg_fixup(downloader == 'HlsFD',
3200                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3201                                      FFmpegFixupM3u8PP)
3202                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3203                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3204
3205                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3206                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3207
3208                 fixup()
3209                 try:
3210                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3211                 except PostProcessingError as err:
3212                     self.report_error('Postprocessing: %s' % str(err))
3213                     return
3214                 try:
3215                     for ph in self._post_hooks:
3216                         ph(info_dict['filepath'])
3217                 except Exception as err:
3218                     self.report_error('post hooks: %s' % str(err))
3219                     return
3220                 info_dict['__write_download_archive'] = True
3221
3222         if self.params.get('force_write_download_archive'):
3223             info_dict['__write_download_archive'] = True
3224
3225         # Make sure the info_dict was modified in-place
3226         assert info_dict is original_infodict
3227
3228         max_downloads = self.params.get('max_downloads')
3229         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3230             raise MaxDownloadsReached()
3231
3232     def __download_wrapper(self, func):
3233         @functools.wraps(func)
3234         def wrapper(*args, **kwargs):
3235             try:
3236                 res = func(*args, **kwargs)
3237             except UnavailableVideoError as e:
3238                 self.report_error(e)
3239             except MaxDownloadsReached as e:
3240                 self.to_screen(f'[info] {e}')
3241                 raise
3242             except DownloadCancelled as e:
3243                 self.to_screen(f'[info] {e}')
3244                 if not self.params.get('break_per_url'):
3245                     raise
3246             else:
3247                 if self.params.get('dump_single_json', False):
3248                     self.post_extract(res)
3249                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3250         return wrapper
3251
3252     def download(self, url_list):
3253         """Download a given list of URLs."""
3254         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3255         outtmpl = self.outtmpl_dict['default']
3256         if (len(url_list) > 1
3257                 and outtmpl != '-'
3258                 and '%' not in outtmpl
3259                 and self.params.get('max_downloads') != 1):
3260             raise SameFileError(outtmpl)
3261
3262         for url in url_list:
3263             self.__download_wrapper(self.extract_info)(
3264                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3265
3266         return self._download_retcode
3267
3268     def download_with_info_file(self, info_filename):
3269         with contextlib.closing(fileinput.FileInput(
3270                 [info_filename], mode='r',
3271                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3272             # FileInput doesn't have a read method, we can't call json.load
3273             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3274         try:
3275             self.__download_wrapper(self.process_ie_result)(info, download=True)
3276         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3277             if not isinstance(e, EntryNotInPlaylist):
3278                 self.to_stderr('\r')
3279             webpage_url = info.get('webpage_url')
3280             if webpage_url is not None:
3281                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3282                 return self.download([webpage_url])
3283             else:
3284                 raise
3285         return self._download_retcode
3286
3287     @staticmethod
3288     def sanitize_info(info_dict, remove_private_keys=False):
3289         ''' Sanitize the infodict for converting to json '''
3290         if info_dict is None:
3291             return info_dict
3292         info_dict.setdefault('epoch', int(time.time()))
3293         info_dict.setdefault('_type', 'video')
3294
3295         if remove_private_keys:
3296             reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
3297                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3298                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3299             }
3300         else:
3301             reject = lambda k, v: False
3302
3303         def filter_fn(obj):
3304             if isinstance(obj, dict):
3305                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3306             elif isinstance(obj, (list, tuple, set, LazyList)):
3307                 return list(map(filter_fn, obj))
3308             elif obj is None or isinstance(obj, (str, int, float, bool)):
3309                 return obj
3310             else:
3311                 return repr(obj)
3312
3313         return filter_fn(info_dict)
3314
3315     @staticmethod
3316     def filter_requested_info(info_dict, actually_filter=True):
3317         ''' Alias of sanitize_info for backward compatibility '''
3318         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3319
3320     @staticmethod
3321     def post_extract(info_dict):
3322         def actual_post_extract(info_dict):
3323             if info_dict.get('_type') in ('playlist', 'multi_video'):
3324                 for video_dict in info_dict.get('entries', {}):
3325                     actual_post_extract(video_dict or {})
3326                 return
3327
3328             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3329             info_dict.update(post_extractor())
3330
3331         actual_post_extract(info_dict or {})
3332
3333     def run_pp(self, pp, infodict):
3334         files_to_delete = []
3335         if '__files_to_move' not in infodict:
3336             infodict['__files_to_move'] = {}
3337         try:
3338             files_to_delete, infodict = pp.run(infodict)
3339         except PostProcessingError as e:
3340             # Must be True and not 'only_download'
3341             if self.params.get('ignoreerrors') is True:
3342                 self.report_error(e)
3343                 return infodict
3344             raise
3345
3346         if not files_to_delete:
3347             return infodict
3348         if self.params.get('keepvideo', False):
3349             for f in files_to_delete:
3350                 infodict['__files_to_move'].setdefault(f, '')
3351         else:
3352             for old_filename in set(files_to_delete):
3353                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3354                 try:
3355                     os.remove(encodeFilename(old_filename))
3356                 except (IOError, OSError):
3357                     self.report_warning('Unable to remove downloaded original file')
3358                 if old_filename in infodict['__files_to_move']:
3359                     del infodict['__files_to_move'][old_filename]
3360         return infodict
3361
3362     def run_all_pps(self, key, info, *, additional_pps=None):
3363         self._forceprint(key, info)
3364         for pp in (additional_pps or []) + self._pps[key]:
3365             info = self.run_pp(pp, info)
3366         return info
3367
3368     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3369         info = dict(ie_info)
3370         info['__files_to_move'] = files_to_move or {}
3371         info = self.run_all_pps(key, info)
3372         return info, info.pop('__files_to_move', None)
3373
3374     def post_process(self, filename, info, files_to_move=None):
3375         """Run all the postprocessors on the given file."""
3376         info['filepath'] = filename
3377         info['__files_to_move'] = files_to_move or {}
3378         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3379         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3380         del info['__files_to_move']
3381         return self.run_all_pps('after_move', info)
3382
3383     def _make_archive_id(self, info_dict):
3384         video_id = info_dict.get('id')
3385         if not video_id:
3386             return
3387         # Future-proof against any change in case
3388         # and backwards compatibility with prior versions
3389         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3390         if extractor is None:
3391             url = str_or_none(info_dict.get('url'))
3392             if not url:
3393                 return
3394             # Try to find matching extractor for the URL and take its ie_key
3395             for ie_key, ie in self._ies.items():
3396                 if ie.suitable(url):
3397                     extractor = ie_key
3398                     break
3399             else:
3400                 return
3401         return '%s %s' % (extractor.lower(), video_id)
3402
3403     def in_download_archive(self, info_dict):
3404         fn = self.params.get('download_archive')
3405         if fn is None:
3406             return False
3407
3408         vid_id = self._make_archive_id(info_dict)
3409         if not vid_id:
3410             return False  # Incomplete video information
3411
3412         return vid_id in self.archive
3413
3414     def record_download_archive(self, info_dict):
3415         fn = self.params.get('download_archive')
3416         if fn is None:
3417             return
3418         vid_id = self._make_archive_id(info_dict)
3419         assert vid_id
3420         self.write_debug(f'Adding to archive: {vid_id}')
3421         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3422             archive_file.write(vid_id + '\n')
3423         self.archive.add(vid_id)
3424
3425     @staticmethod
3426     def format_resolution(format, default='unknown'):
3427         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3428             return 'audio only'
3429         if format.get('resolution') is not None:
3430             return format['resolution']
3431         if format.get('width') and format.get('height'):
3432             return '%dx%d' % (format['width'], format['height'])
3433         elif format.get('height'):
3434             return '%sp' % format['height']
3435         elif format.get('width'):
3436             return '%dx?' % format['width']
3437         return default
3438
3439     def _list_format_headers(self, *headers):
3440         if self.params.get('listformats_table', True) is not False:
3441             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3442         return headers
3443
3444     def _format_note(self, fdict):
3445         res = ''
3446         if fdict.get('ext') in ['f4f', 'f4m']:
3447             res += '(unsupported)'
3448         if fdict.get('language'):
3449             if res:
3450                 res += ' '
3451             res += '[%s]' % fdict['language']
3452         if fdict.get('format_note') is not None:
3453             if res:
3454                 res += ' '
3455             res += fdict['format_note']
3456         if fdict.get('tbr') is not None:
3457             if res:
3458                 res += ', '
3459             res += '%4dk' % fdict['tbr']
3460         if fdict.get('container') is not None:
3461             if res:
3462                 res += ', '
3463             res += '%s container' % fdict['container']
3464         if (fdict.get('vcodec') is not None
3465                 and fdict.get('vcodec') != 'none'):
3466             if res:
3467                 res += ', '
3468             res += fdict['vcodec']
3469             if fdict.get('vbr') is not None:
3470                 res += '@'
3471         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3472             res += 'video@'
3473         if fdict.get('vbr') is not None:
3474             res += '%4dk' % fdict['vbr']
3475         if fdict.get('fps') is not None:
3476             if res:
3477                 res += ', '
3478             res += '%sfps' % fdict['fps']
3479         if fdict.get('acodec') is not None:
3480             if res:
3481                 res += ', '
3482             if fdict['acodec'] == 'none':
3483                 res += 'video only'
3484             else:
3485                 res += '%-5s' % fdict['acodec']
3486         elif fdict.get('abr') is not None:
3487             if res:
3488                 res += ', '
3489             res += 'audio'
3490         if fdict.get('abr') is not None:
3491             res += '@%3dk' % fdict['abr']
3492         if fdict.get('asr') is not None:
3493             res += ' (%5dHz)' % fdict['asr']
3494         if fdict.get('filesize') is not None:
3495             if res:
3496                 res += ', '
3497             res += format_bytes(fdict['filesize'])
3498         elif fdict.get('filesize_approx') is not None:
3499             if res:
3500                 res += ', '
3501             res += '~' + format_bytes(fdict['filesize_approx'])
3502         return res
3503
3504     def render_formats_table(self, info_dict):
3505         if not info_dict.get('formats') and not info_dict.get('url'):
3506             return None
3507
3508         formats = info_dict.get('formats', [info_dict])
3509         if not self.params.get('listformats_table', True) is not False:
3510             table = [
3511                 [
3512                     format_field(f, 'format_id'),
3513                     format_field(f, 'ext'),
3514                     self.format_resolution(f),
3515                     self._format_note(f)
3516                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3517             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3518
3519         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3520         table = [
3521             [
3522                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3523                 format_field(f, 'ext'),
3524                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3525                 format_field(f, 'fps', '\t%d'),
3526                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3527                 delim,
3528                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3529                 format_field(f, 'tbr', '\t%dk'),
3530                 shorten_protocol_name(f.get('protocol', '')),
3531                 delim,
3532                 format_field(f, 'vcodec', default='unknown').replace(
3533                     'none', 'images' if f.get('acodec') == 'none'
3534                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3535                 format_field(f, 'vbr', '\t%dk'),
3536                 format_field(f, 'acodec', default='unknown').replace(
3537                     'none', '' if f.get('vcodec') == 'none'
3538                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3539                 format_field(f, 'abr', '\t%dk'),
3540                 format_field(f, 'asr', '\t%dHz'),
3541                 join_nonempty(
3542                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3543                     format_field(f, 'language', '[%s]'),
3544                     join_nonempty(format_field(f, 'format_note'),
3545                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3546                                   delim=', '),
3547                     delim=' '),
3548             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3549         header_line = self._list_format_headers(
3550             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3551             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3552
3553         return render_table(
3554             header_line, table, hide_empty=True,
3555             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3556
3557     def render_thumbnails_table(self, info_dict):
3558         thumbnails = list(info_dict.get('thumbnails') or [])
3559         if not thumbnails:
3560             return None
3561         return render_table(
3562             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3563             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3564
3565     def render_subtitles_table(self, video_id, subtitles):
3566         def _row(lang, formats):
3567             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3568             if len(set(names)) == 1:
3569                 names = [] if names[0] == 'unknown' else names[:1]
3570             return [lang, ', '.join(names), ', '.join(exts)]
3571
3572         if not subtitles:
3573             return None
3574         return render_table(
3575             self._list_format_headers('Language', 'Name', 'Formats'),
3576             [_row(lang, formats) for lang, formats in subtitles.items()],
3577             hide_empty=True)
3578
3579     def __list_table(self, video_id, name, func, *args):
3580         table = func(*args)
3581         if not table:
3582             self.to_screen(f'{video_id} has no {name}')
3583             return
3584         self.to_screen(f'[info] Available {name} for {video_id}:')
3585         self.to_stdout(table)
3586
3587     def list_formats(self, info_dict):
3588         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3589
3590     def list_thumbnails(self, info_dict):
3591         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3592
3593     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3594         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3595
3596     def urlopen(self, req):
3597         """ Start an HTTP download """
3598         if isinstance(req, compat_basestring):
3599             req = sanitized_Request(req)
3600         return self._opener.open(req, timeout=self._socket_timeout)
3601
3602     def print_debug_header(self):
3603         if not self.params.get('verbose'):
3604             return
3605
3606         def get_encoding(stream):
3607             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3608             if not supports_terminal_sequences(stream):
3609                 from .compat import WINDOWS_VT_MODE
3610                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3611             return ret
3612
3613         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3614             locale.getpreferredencoding(),
3615             sys.getfilesystemencoding(),
3616             get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
3617             self.get_encoding())
3618
3619         logger = self.params.get('logger')
3620         if logger:
3621             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3622             write_debug(encoding_str)
3623         else:
3624             write_string(f'[debug] {encoding_str}\n', encoding=None)
3625             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3626
3627         source = detect_variant()
3628         write_debug(join_nonempty(
3629             'yt-dlp version', __version__,
3630             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3631             '' if source == 'unknown' else f'({source})',
3632             delim=' '))
3633         if not _LAZY_LOADER:
3634             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3635                 write_debug('Lazy loading extractors is forcibly disabled')
3636             else:
3637                 write_debug('Lazy loading extractors is disabled')
3638         if plugin_extractors or plugin_postprocessors:
3639             write_debug('Plugins: %s' % [
3640                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3641                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3642         if self.params.get('compat_opts'):
3643             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3644
3645         if source == 'source':
3646             try:
3647                 sp = Popen(
3648                     ['git', 'rev-parse', '--short', 'HEAD'],
3649                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3650                     cwd=os.path.dirname(os.path.abspath(__file__)))
3651                 out, err = sp.communicate_or_kill()
3652                 out = out.decode().strip()
3653                 if re.match('[0-9a-f]+', out):
3654                     write_debug('Git HEAD: %s' % out)
3655             except Exception:
3656                 try:
3657                     sys.exc_clear()
3658                 except Exception:
3659                     pass
3660
3661         def python_implementation():
3662             impl_name = platform.python_implementation()
3663             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3664                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3665             return impl_name
3666
3667         write_debug('Python version %s (%s %s) - %s' % (
3668             platform.python_version(),
3669             python_implementation(),
3670             platform.architecture()[0],
3671             platform_name()))
3672
3673         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3674         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3675         if ffmpeg_features:
3676             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3677
3678         exe_versions['rtmpdump'] = rtmpdump_version()
3679         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3680         exe_str = ', '.join(
3681             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3682         ) or 'none'
3683         write_debug('exe versions: %s' % exe_str)
3684
3685         from .downloader.websocket import has_websockets
3686         from .postprocessor.embedthumbnail import has_mutagen
3687         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3688
3689         lib_str = join_nonempty(
3690             compat_brotli and compat_brotli.__name__,
3691             has_certifi and 'certifi',
3692             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3693             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3694             has_mutagen and 'mutagen',
3695             SQLITE_AVAILABLE and 'sqlite',
3696             has_websockets and 'websockets',
3697             delim=', ') or 'none'
3698         write_debug('Optional libraries: %s' % lib_str)
3699
3700         proxy_map = {}
3701         for handler in self._opener.handlers:
3702             if hasattr(handler, 'proxies'):
3703                 proxy_map.update(handler.proxies)
3704         write_debug(f'Proxy map: {proxy_map}')
3705
3706         # Not implemented
3707         if False and self.params.get('call_home'):
3708             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3709             write_debug('Public IP address: %s' % ipaddr)
3710             latest_version = self.urlopen(
3711                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3712             if version_tuple(latest_version) > version_tuple(__version__):
3713                 self.report_warning(
3714                     'You are using an outdated version (newest version: %s)! '
3715                     'See https://yt-dl.org/update if you need help updating.' %
3716                     latest_version)
3717
3718     def _setup_opener(self):
3719         timeout_val = self.params.get('socket_timeout')
3720         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3721
3722         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3723         opts_cookiefile = self.params.get('cookiefile')
3724         opts_proxy = self.params.get('proxy')
3725
3726         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3727
3728         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3729         if opts_proxy is not None:
3730             if opts_proxy == '':
3731                 proxies = {}
3732             else:
3733                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3734         else:
3735             proxies = compat_urllib_request.getproxies()
3736             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3737             if 'http' in proxies and 'https' not in proxies:
3738                 proxies['https'] = proxies['http']
3739         proxy_handler = PerRequestProxyHandler(proxies)
3740
3741         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3742         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3743         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3744         redirect_handler = YoutubeDLRedirectHandler()
3745         data_handler = compat_urllib_request_DataHandler()
3746
3747         # When passing our own FileHandler instance, build_opener won't add the
3748         # default FileHandler and allows us to disable the file protocol, which
3749         # can be used for malicious purposes (see
3750         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3751         file_handler = compat_urllib_request.FileHandler()
3752
3753         def file_open(*args, **kwargs):
3754             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3755         file_handler.file_open = file_open
3756
3757         opener = compat_urllib_request.build_opener(
3758             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3759
3760         # Delete the default user-agent header, which would otherwise apply in
3761         # cases where our custom HTTP handler doesn't come into play
3762         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3763         opener.addheaders = []
3764         self._opener = opener
3765
3766     def encode(self, s):
3767         if isinstance(s, bytes):
3768             return s  # Already encoded
3769
3770         try:
3771             return s.encode(self.get_encoding())
3772         except UnicodeEncodeError as err:
3773             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3774             raise
3775
3776     def get_encoding(self):
3777         encoding = self.params.get('encoding')
3778         if encoding is None:
3779             encoding = preferredencoding()
3780         return encoding
3781
3782     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3783         ''' Write infojson and returns True = written, False = skip, None = error '''
3784         if overwrite is None:
3785             overwrite = self.params.get('overwrites', True)
3786         if not self.params.get('writeinfojson'):
3787             return False
3788         elif not infofn:
3789             self.write_debug(f'Skipping writing {label} infojson')
3790             return False
3791         elif not self._ensure_dir_exists(infofn):
3792             return None
3793         elif not overwrite and os.path.exists(infofn):
3794             self.to_screen(f'[info] {label.title()} metadata is already present')
3795         else:
3796             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3797             try:
3798                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3799             except (OSError, IOError):
3800                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3801                 return None
3802         return True
3803
3804     def _write_description(self, label, ie_result, descfn):
3805         ''' Write description and returns True = written, False = skip, None = error '''
3806         if not self.params.get('writedescription'):
3807             return False
3808         elif not descfn:
3809             self.write_debug(f'Skipping writing {label} description')
3810             return False
3811         elif not self._ensure_dir_exists(descfn):
3812             return None
3813         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3814             self.to_screen(f'[info] {label.title()} description is already present')
3815         elif ie_result.get('description') is None:
3816             self.report_warning(f'There\'s no {label} description to write')
3817             return False
3818         else:
3819             try:
3820                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3821                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3822                     descfile.write(ie_result['description'])
3823             except (OSError, IOError):
3824                 self.report_error(f'Cannot write {label} description file {descfn}')
3825                 return None
3826         return True
3827
3828     def _write_subtitles(self, info_dict, filename):
3829         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3830         ret = []
3831         subtitles = info_dict.get('requested_subtitles')
3832         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3833             # subtitles download errors are already managed as troubles in relevant IE
3834             # that way it will silently go on when used with unsupporting IE
3835             return ret
3836
3837         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3838         if not sub_filename_base:
3839             self.to_screen('[info] Skipping writing video subtitles')
3840             return ret
3841         for sub_lang, sub_info in subtitles.items():
3842             sub_format = sub_info['ext']
3843             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3844             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3845             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3846             if existing_sub:
3847                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3848                 sub_info['filepath'] = existing_sub
3849                 ret.append((existing_sub, sub_filename_final))
3850                 continue
3851
3852             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3853             if sub_info.get('data') is not None:
3854                 try:
3855                     # Use newline='' to prevent conversion of newline characters
3856                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3857                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3858                         subfile.write(sub_info['data'])
3859                     sub_info['filepath'] = sub_filename
3860                     ret.append((sub_filename, sub_filename_final))
3861                     continue
3862                 except (OSError, IOError):
3863                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3864                     return None
3865
3866             try:
3867                 sub_copy = sub_info.copy()
3868                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3869                 self.dl(sub_filename, sub_copy, subtitle=True)
3870                 sub_info['filepath'] = sub_filename
3871                 ret.append((sub_filename, sub_filename_final))
3872             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3873                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3874                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3875                     if not self.params.get('ignoreerrors'):
3876                         self.report_error(msg)
3877                     raise DownloadError(msg)
3878                 self.report_warning(msg)
3879         return ret
3880
3881     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3882         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3883         write_all = self.params.get('write_all_thumbnails', False)
3884         thumbnails, ret = [], []
3885         if write_all or self.params.get('writethumbnail', False):
3886             thumbnails = info_dict.get('thumbnails') or []
3887         multiple = write_all and len(thumbnails) > 1
3888
3889         if thumb_filename_base is None:
3890             thumb_filename_base = filename
3891         if thumbnails and not thumb_filename_base:
3892             self.write_debug(f'Skipping writing {label} thumbnail')
3893             return ret
3894
3895         for idx, t in list(enumerate(thumbnails))[::-1]:
3896             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3897             thumb_display_id = f'{label} thumbnail {t["id"]}'
3898             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3899             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3900
3901             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3902             if existing_thumb:
3903                 self.to_screen('[info] %s is already present' % (
3904                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3905                 t['filepath'] = existing_thumb
3906                 ret.append((existing_thumb, thumb_filename_final))
3907             else:
3908                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3909                 try:
3910                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3911                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3912                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3913                         shutil.copyfileobj(uf, thumbf)
3914                     ret.append((thumb_filename, thumb_filename_final))
3915                     t['filepath'] = thumb_filename
3916                 except network_exceptions as err:
3917                     thumbnails.pop(idx)
3918                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3919             if ret and not write_all:
3920                 break
3921         return ret