yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     network_exceptions,
  86     number_of_digits,
  87     orderedSet,
  88     OUTTMPL_TYPES,
  89     PagedList,
  90     parse_filesize,
  91     PerRequestProxyHandler,
  92     platform_name,
  93     Popen,
  94     POSTPROCESS_WHEN,
  95     PostProcessingError,
  96     preferredencoding,
  97     prepend_extension,
  98     ReExtractInfo,
  99     register_socks_protocols,
 100     RejectedVideoReached,
 101     remove_terminal_sequences,
 102     render_table,
 103     replace_extension,
 104     SameFileError,
 105     sanitize_filename,
 106     sanitize_path,
 107     sanitize_url,
 108     sanitized_Request,
 109     std_headers,
 110     STR_FORMAT_RE_TMPL,
 111     STR_FORMAT_TYPES,
 112     str_or_none,
 113     strftime_or_none,
 114     subtitles_filename,
 115     supports_terminal_sequences,
 116     timetuple_from_msec,
 117     to_high_limit_path,
 118     traverse_obj,
 119     try_get,
 120     UnavailableVideoError,
 121     url_basename,
 122     variadic,
 123     version_tuple,
 124     write_json_file,
 125     write_string,
 126     YoutubeDLCookieProcessor,
 127     YoutubeDLHandler,
 128     YoutubeDLRedirectHandler,
 129 )
 130 from .cache import Cache
 131 from .minicurses import format_text
 132 from .extractor import (
 133     gen_extractor_classes,
 134     get_info_extractor,
 135     _LAZY_LOADER,
 136     _PLUGIN_CLASSES as plugin_extractors
 137 )
 138 from .extractor.openload import PhantomJSwrapper
 139 from .downloader import (
 140     FFmpegFD,
 141     get_suitable_downloader,
 142     shorten_protocol_name
 143 )
 144 from .downloader.rtmp import rtmpdump_version
 145 from .postprocessor import (
 146     get_postprocessor,
 147     EmbedThumbnailPP,
 148     FFmpegFixupDuplicateMoovPP,
 149     FFmpegFixupDurationPP,
 150     FFmpegFixupM3u8PP,
 151     FFmpegFixupM4aPP,
 152     FFmpegFixupStretchedPP,
 153     FFmpegFixupTimestampPP,
 154     FFmpegMergerPP,
 155     FFmpegPostProcessor,
 156     MoveFilesAfterDownloadPP,
 157     _PLUGIN_CLASSES as plugin_postprocessors
 158 )
 159 from .update import detect_variant
 160 from .version import __version__, RELEASE_GIT_HEAD
 161
 162 if compat_os_name == 'nt':
 163     import ctypes
 164
 165
 166 class YoutubeDL(object):
 167     """YoutubeDL class.
 168
 169     YoutubeDL objects are the ones responsible of downloading the
 170     actual video file and writing it to disk if the user has requested
 171     it, among some other tasks. In most cases there should be one per
 172     program. As, given a video URL, the downloader doesn't know how to
 173     extract all the needed information, task that InfoExtractors do, it
 174     has to pass the URL to one of them.
 175
 176     For this, YoutubeDL objects have a method that allows
 177     InfoExtractors to be registered in a given order. When it is passed
 178     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 179     finds that reports being able to handle it. The InfoExtractor extracts
 180     all the information about the video or videos the URL refers to, and
 181     YoutubeDL process the extracted information, possibly using a File
 182     Downloader to download the video.
 183
 184     YoutubeDL objects accept a lot of parameters. In order not to saturate
 185     the object constructor with arguments, it receives a dictionary of
 186     options instead. These options are available through the params
 187     attribute for the InfoExtractors to use. The YoutubeDL also
 188     registers itself as the downloader in charge for the InfoExtractors
 189     that are added to it, so this is a "mutual registration".
 190
 191     Available options:
 192
 193     username:          Username for authentication purposes.
 194     password:          Password for authentication purposes.
 195     videopassword:     Password for accessing a video.
 196     ap_mso:            Adobe Pass multiple-system operator identifier.
 197     ap_username:       Multiple-system operator account username.
 198     ap_password:       Multiple-system operator account password.
 199     usenetrc:          Use netrc for authentication instead.
 200     verbose:           Print additional info to stdout.
 201     quiet:             Do not print messages to stdout.
 202     no_warnings:       Do not print out anything for warnings.
 203     forceprint:        A dict with keys video/playlist mapped to
 204                        a list of templates to force print to stdout
 205                        For compatibility, a single list is also accepted
 206     forceurl:          Force printing final URL. (Deprecated)
 207     forcetitle:        Force printing title. (Deprecated)
 208     forceid:           Force printing ID. (Deprecated)
 209     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 210     forcedescription:  Force printing description. (Deprecated)
 211     forcefilename:     Force printing final filename. (Deprecated)
 212     forceduration:     Force printing duration. (Deprecated)
 213     forcejson:         Force printing info_dict as JSON.
 214     dump_single_json:  Force printing the info_dict of the whole playlist
 215                        (or video) as a single JSON line.
 216     force_write_download_archive: Force writing download archive regardless
 217                        of 'skip_download' or 'simulate'.
 218     simulate:          Do not download the video files. If unset (or None),
 219                        simulate only if listsubtitles, listformats or list_thumbnails is used
 220     format:            Video format code. see "FORMAT SELECTION" for more details.
 221                        You can also pass a function. The function takes 'ctx' as
 222                        argument and returns the formats to download.
 223                        See "build_format_selector" for an implementation
 224     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 225     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 226                        extracting metadata even if the video is not actually
 227                        available for download (experimental)
 228     format_sort:       A list of fields by which to sort the video formats.
 229                        See "Sorting Formats" for more details.
 230     format_sort_force: Force the given format_sort. see "Sorting Formats"
 231                        for more details.
 232     allow_multiple_video_streams:   Allow multiple video streams to be merged
 233                        into a single file
 234     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 235                        into a single file
 236     check_formats      Whether to test if the formats are downloadable.
 237                        Can be True (check all), False (check none),
 238                        'selected' (check selected formats),
 239                        or None (check only if requested by extractor)
 240     paths:             Dictionary of output paths. The allowed keys are 'home'
 241                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 242     outtmpl:           Dictionary of templates for output names. Allowed keys
 243                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 244                        For compatibility with youtube-dl, a single string can also be used
 245     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 246     restrictfilenames: Do not allow "&" and spaces in file names
 247     trim_file_name:    Limit length of filename (extension excluded)
 248     windowsfilenames:  Force the filenames to be windows compatible
 249     ignoreerrors:      Do not stop on download/postprocessing errors.
 250                        Can be 'only_download' to ignore only download errors.
 251                        Default is 'only_download' for CLI, but False for API
 252     skip_playlist_after_errors: Number of allowed failures until the rest of
 253                        the playlist is skipped
 254     force_generic_extractor: Force downloader to use the generic extractor
 255     overwrites:        Overwrite all video and metadata files if True,
 256                        overwrite only non-video files if None
 257                        and don't overwrite any file if False
 258                        For compatibility with youtube-dl,
 259                        "nooverwrites" may also be used instead
 260     playliststart:     Playlist item to start at.
 261     playlistend:       Playlist item to end at.
 262     playlist_items:    Specific indices of playlist to download.
 263     playlistreverse:   Download playlist items in reverse order.
 264     playlistrandom:    Download playlist items in random order.
 265     matchtitle:        Download only matching titles.
 266     rejecttitle:       Reject downloads for matching titles.
 267     logger:            Log messages to a logging.Logger instance.
 268     logtostderr:       Log messages to stderr instead of stdout.
 269     consoletitle:       Display progress in console window's titlebar.
 270     writedescription:  Write the video description to a .description file
 271     writeinfojson:     Write the video description to a .info.json file
 272     clean_infojson:    Remove private fields from the infojson
 273     getcomments:       Extract video comments. This will not be written to disk
 274                        unless writeinfojson is also given
 275     writeannotations:  Write the video annotations to a .annotations.xml file
 276     writethumbnail:    Write the thumbnail image to a file
 277     allow_playlist_files: Whether to write playlists' description, infojson etc
 278                        also to disk when using the 'write*' options
 279     write_all_thumbnails:  Write all thumbnail formats to files
 280     writelink:         Write an internet shortcut file, depending on the
 281                        current platform (.url/.webloc/.desktop)
 282     writeurllink:      Write a Windows internet shortcut file (.url)
 283     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 284     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 285     writesubtitles:    Write the video subtitles to a file
 286     writeautomaticsub: Write the automatically generated subtitles to a file
 287     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 288                        Downloads all the subtitles of the video
 289                        (requires writesubtitles or writeautomaticsub)
 290     listsubtitles:     Lists all available subtitles for the video
 291     subtitlesformat:   The format code for subtitles
 292     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 293                        The list may contain "all" to refer to all the available
 294                        subtitles. The language can be prefixed with a "-" to
 295                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 296     keepvideo:         Keep the video file after post-processing
 297     daterange:         A DateRange object, download only if the upload_date is in the range.
 298     skip_download:     Skip the actual download of the video file
 299     cachedir:          Location of the cache files in the filesystem.
 300                        False to disable filesystem cache.
 301     noplaylist:        Download single video instead of a playlist if in doubt.
 302     age_limit:         An integer representing the user's age in years.
 303                        Unsuitable videos for the given age are skipped.
 304     min_views:         An integer representing the minimum view count the video
 305                        must have in order to not be skipped.
 306                        Videos without view count information are always
 307                        downloaded. None for no limit.
 308     max_views:         An integer representing the maximum view count.
 309                        Videos that are more popular than that are not
 310                        downloaded.
 311                        Videos without view count information are always
 312                        downloaded. None for no limit.
 313     download_archive:  File name of a file where all downloads are recorded.
 314                        Videos already present in the file are not downloaded
 315                        again.
 316     break_on_existing: Stop the download process after attempting to download a
 317                        file that is in the archive.
 318     break_on_reject:   Stop the download process when encountering a video that
 319                        has been filtered out.
 320     break_per_url:     Whether break_on_reject and break_on_existing
 321                        should act on each input URL as opposed to for the entire queue
 322     cookiefile:        File name where cookies should be read from and dumped to
 323     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 324                        name/pathfrom where cookies are loaded, and the name of the
 325                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 326     nocheckcertificate:  Do not verify SSL certificates
 327     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 328                        At the moment, this is only supported by YouTube.
 329     proxy:             URL of the proxy server to use
 330     geo_verification_proxy:  URL of the proxy to use for IP address verification
 331                        on geo-restricted sites.
 332     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 333     bidi_workaround:   Work around buggy terminals without bidirectional text
 334                        support, using fridibi
 335     debug_printtraffic:Print out sent and received HTTP traffic
 336     include_ads:       Download ads as well (deprecated)
 337     default_search:    Prepend this string if an input url is not valid.
 338                        'auto' for elaborate guessing
 339     encoding:          Use this encoding instead of the system-specified.
 340     extract_flat:      Do not resolve URLs, return the immediate result.
 341                        Pass in 'in_playlist' to only show this behavior for
 342                        playlist items.
 343     wait_for_video:    If given, wait for scheduled streams to become available.
 344                        The value should be a tuple containing the range
 345                        (min_secs, max_secs) to wait between retries
 346     postprocessors:    A list of dictionaries, each with an entry
 347                        * key:  The name of the postprocessor. See
 348                                yt_dlp/postprocessor/__init__.py for a list.
 349                        * when: When to run the postprocessor. Can be one of
 350                                pre_process|before_dl|post_process|after_move.
 351                                Assumed to be 'post_process' if not given
 352     post_hooks:        Deprecated - Register a custom postprocessor instead
 353                        A list of functions that get called as the final step
 354                        for each video file, after all postprocessors have been
 355                        called. The filename will be passed as the only argument.
 356     progress_hooks:    A list of functions that get called on download
 357                        progress, with a dictionary with the entries
 358                        * status: One of "downloading", "error", or "finished".
 359                                  Check this first and ignore unknown values.
 360                        * info_dict: The extracted info_dict
 361
 362                        If status is one of "downloading", or "finished", the
 363                        following properties may also be present:
 364                        * filename: The final filename (always present)
 365                        * tmpfilename: The filename we're currently writing to
 366                        * downloaded_bytes: Bytes on disk
 367                        * total_bytes: Size of the whole file, None if unknown
 368                        * total_bytes_estimate: Guess of the eventual file size,
 369                                                None if unavailable.
 370                        * elapsed: The number of seconds since download started.
 371                        * eta: The estimated time in seconds, None if unknown
 372                        * speed: The download speed in bytes/second, None if
 373                                 unknown
 374                        * fragment_index: The counter of the currently
 375                                          downloaded video fragment.
 376                        * fragment_count: The number of fragments (= individual
 377                                          files that will be merged)
 378
 379                        Progress hooks are guaranteed to be called at least once
 380                        (with status "finished") if the download is successful.
 381     postprocessor_hooks:  A list of functions that get called on postprocessing
 382                        progress, with a dictionary with the entries
 383                        * status: One of "started", "processing", or "finished".
 384                                  Check this first and ignore unknown values.
 385                        * postprocessor: Name of the postprocessor
 386                        * info_dict: The extracted info_dict
 387
 388                        Progress hooks are guaranteed to be called at least twice
 389                        (with status "started" and "finished") if the processing is successful.
 390     merge_output_format: Extension to use when merging formats.
 391     final_ext:         Expected final extension; used to detect when the file was
 392                        already downloaded and converted
 393     fixup:             Automatically correct known faults of the file.
 394                        One of:
 395                        - "never": do nothing
 396                        - "warn": only emit a warning
 397                        - "detect_or_warn": check whether we can do anything
 398                                            about it, warn otherwise (default)
 399     source_address:    Client-side IP address to bind to.
 400     call_home:         Boolean, true iff we are allowed to contact the
 401                        yt-dlp servers for debugging. (BROKEN)
 402     sleep_interval_requests: Number of seconds to sleep between requests
 403                        during extraction
 404     sleep_interval:    Number of seconds to sleep before each download when
 405                        used alone or a lower bound of a range for randomized
 406                        sleep before each download (minimum possible number
 407                        of seconds to sleep) when used along with
 408                        max_sleep_interval.
 409     max_sleep_interval:Upper bound of a range for randomized sleep before each
 410                        download (maximum possible number of seconds to sleep).
 411                        Must only be used along with sleep_interval.
 412                        Actual sleep time will be a random float from range
 413                        [sleep_interval; max_sleep_interval].
 414     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 415     listformats:       Print an overview of available video formats and exit.
 416     list_thumbnails:   Print a table of all thumbnails and exit.
 417     match_filter:      A function that gets called with the info_dict of
 418                        every video.
 419                        If it returns a message, the video is ignored.
 420                        If it returns None, the video is downloaded.
 421                        match_filter_func in utils.py is one example for this.
 422     no_color:          Do not emit color codes in output.
 423     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 424                        HTTP header
 425     geo_bypass_country:
 426                        Two-letter ISO 3166-2 country code that will be used for
 427                        explicit geographic restriction bypassing via faking
 428                        X-Forwarded-For HTTP header
 429     geo_bypass_ip_block:
 430                        IP range in CIDR notation that will be used similarly to
 431                        geo_bypass_country
 432
 433     The following options determine which downloader is picked:
 434     external_downloader: A dictionary of protocol keys and the executable of the
 435                        external downloader to use for it. The allowed protocols
 436                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 437                        Set the value to 'native' to use the native downloader
 438     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 439                        or {'m3u8': 'ffmpeg'} instead.
 440                        Use the native HLS downloader instead of ffmpeg/avconv
 441                        if True, otherwise use ffmpeg/avconv if False, otherwise
 442                        use downloader suggested by extractor if None.
 443     compat_opts:       Compatibility options. See "Differences in default behavior".
 444                        The following options do not work when used through the API:
 445                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 446                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 447                        Refer __init__.py for their implementation
 448     progress_template: Dictionary of templates for progress outputs.
 449                        Allowed keys are 'download', 'postprocess',
 450                        'download-title' (console title) and 'postprocess-title'.
 451                        The template is mapped on a dictionary with keys 'progress' and 'info'
 452
 453     The following parameters are not used by YoutubeDL itself, they are used by
 454     the downloader (see yt_dlp/downloader/common.py):
 455     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 456     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 457     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 458     external_downloader_args, concurrent_fragment_downloads.
 459
 460     The following options are used by the post processors:
 461     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 462                        otherwise prefer ffmpeg. (avconv support is deprecated)
 463     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 464                        to the binary or its containing directory.
 465     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 466                        and a list of additional command-line arguments for the
 467                        postprocessor/executable. The dict can also have "PP+EXE" keys
 468                        which are used when the given exe is used by the given PP.
 469                        Use 'default' as the name for arguments to passed to all PP
 470                        For compatibility with youtube-dl, a single list of args
 471                        can also be used
 472
 473     The following options are used by the extractors:
 474     extractor_retries: Number of times to retry for known errors
 475     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 476     hls_split_discontinuity: Split HLS playlists to different formats at
 477                        discontinuities such as ad breaks (default: False)
 478     extractor_args:    A dictionary of arguments to be passed to the extractors.
 479                        See "EXTRACTOR ARGUMENTS" for details.
 480                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 481     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 482                        If True (default), DASH manifests and related
 483                        data will be downloaded and processed by extractor.
 484                        You can reduce network I/O by disabling it if you don't
 485                        care about DASH. (only for youtube)
 486     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 487                        If True (default), HLS manifests and related
 488                        data will be downloaded and processed by extractor.
 489                        You can reduce network I/O by disabling it if you don't
 490                        care about HLS. (only for youtube)
 491     """
 492
 493     _NUMERIC_FIELDS = set((
 494         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 495         'timestamp', 'release_timestamp',
 496         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 497         'average_rating', 'comment_count', 'age_limit',
 498         'start_time', 'end_time',
 499         'chapter_number', 'season_number', 'episode_number',
 500         'track_number', 'disc_number', 'release_year',
 501     ))
 502
 503     _format_selection_exts = {
 504         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 505         'video': {'mp4', 'flv', 'webm', '3gp'},
 506         'storyboards': {'mhtml'},
 507     }
 508
 509     params = None
 510     _ies = {}
 511     _pps = {k: [] for k in POSTPROCESS_WHEN}
 512     _printed_messages = set()
 513     _first_webpage_request = True
 514     _download_retcode = None
 515     _num_downloads = None
 516     _playlist_level = 0
 517     _playlist_urls = set()
 518     _screen_file = None
 519
 520     def __init__(self, params=None, auto_init=True):
 521         """Create a FileDownloader object with the given options.
 522         @param auto_init    Whether to load the default extractors and print header (if verbose).
 523                             Set to 'no_verbose_header' to not print the header
 524         """
 525         if params is None:
 526             params = {}
 527         self._ies = {}
 528         self._ies_instances = {}
 529         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 530         self._printed_messages = set()
 531         self._first_webpage_request = True
 532         self._post_hooks = []
 533         self._progress_hooks = []
 534         self._postprocessor_hooks = []
 535         self._download_retcode = 0
 536         self._num_downloads = 0
 537         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 538         self._err_file = sys.stderr
 539         self.params = params
 540         self.cache = Cache(self)
 541
 542         windows_enable_vt_mode()
 543         self._allow_colors = {
 544             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 545             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 546         }
 547
 548         if sys.version_info < (3, 6):
 549             self.report_warning(
 550                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 551
 552         if self.params.get('allow_unplayable_formats'):
 553             self.report_warning(
 554                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 555                 'This is a developer option intended for debugging. \n'
 556                 '         If you experience any issues while using this option, '
 557                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 558
 559         def check_deprecated(param, option, suggestion):
 560             if self.params.get(param) is not None:
 561                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 562                 return True
 563             return False
 564
 565         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 566             if self.params.get('geo_verification_proxy') is None:
 567                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 568
 569         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 570         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 571         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 572
 573         for msg in self.params.get('_warnings', []):
 574             self.report_warning(msg)
 575         for msg in self.params.get('_deprecation_warnings', []):
 576             self.deprecation_warning(msg)
 577
 578         if 'list-formats' in self.params.get('compat_opts', []):
 579             self.params['listformats_table'] = False
 580
 581         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 582             # nooverwrites was unnecessarily changed to overwrites
 583             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 584             # This ensures compatibility with both keys
 585             self.params['overwrites'] = not self.params['nooverwrites']
 586         elif self.params.get('overwrites') is None:
 587             self.params.pop('overwrites', None)
 588         else:
 589             self.params['nooverwrites'] = not self.params['overwrites']
 590
 591         # Compatibility with older syntax
 592         params.setdefault('forceprint', {})
 593         if not isinstance(params['forceprint'], dict):
 594             params['forceprint'] = {'video': params['forceprint']}
 595
 596         if params.get('bidi_workaround', False):
 597             try:
 598                 import pty
 599                 master, slave = pty.openpty()
 600                 width = compat_get_terminal_size().columns
 601                 if width is None:
 602                     width_args = []
 603                 else:
 604                     width_args = ['-w', str(width)]
 605                 sp_kwargs = dict(
 606                     stdin=subprocess.PIPE,
 607                     stdout=slave,
 608                     stderr=self._err_file)
 609                 try:
 610                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 611                 except OSError:
 612                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 613                 self._output_channel = os.fdopen(master, 'rb')
 614             except OSError as ose:
 615                 if ose.errno == errno.ENOENT:
 616                     self.report_warning(
 617                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 618                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 619                 else:
 620                     raise
 621
 622         if (sys.platform != 'win32'
 623                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 624                 and not params.get('restrictfilenames', False)):
 625             # Unicode filesystem API will throw errors (#1474, #13027)
 626             self.report_warning(
 627                 'Assuming --restrict-filenames since file system encoding '
 628                 'cannot encode all characters. '
 629                 'Set the LC_ALL environment variable to fix this.')
 630             self.params['restrictfilenames'] = True
 631
 632         self.outtmpl_dict = self.parse_outtmpl()
 633
 634         # Creating format selector here allows us to catch syntax errors before the extraction
 635         self.format_selector = (
 636             self.params.get('format') if self.params.get('format') in (None, '-')
 637             else self.params['format'] if callable(self.params['format'])
 638             else self.build_format_selector(self.params['format']))
 639
 640         self._setup_opener()
 641
 642         if auto_init:
 643             if auto_init != 'no_verbose_header':
 644                 self.print_debug_header()
 645             self.add_default_info_extractors()
 646
 647         hooks = {
 648             'post_hooks': self.add_post_hook,
 649             'progress_hooks': self.add_progress_hook,
 650             'postprocessor_hooks': self.add_postprocessor_hook,
 651         }
 652         for opt, fn in hooks.items():
 653             for ph in self.params.get(opt, []):
 654                 fn(ph)
 655
 656         for pp_def_raw in self.params.get('postprocessors', []):
 657             pp_def = dict(pp_def_raw)
 658             when = pp_def.pop('when', 'post_process')
 659             self.add_post_processor(
 660                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 661                 when=when)
 662
 663         register_socks_protocols()
 664
 665         def preload_download_archive(fn):
 666             """Preload the archive, if any is specified"""
 667             if fn is None:
 668                 return False
 669             self.write_debug(f'Loading archive file {fn!r}')
 670             try:
 671                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 672                     for line in archive_file:
 673                         self.archive.add(line.strip())
 674             except IOError as ioe:
 675                 if ioe.errno != errno.ENOENT:
 676                     raise
 677                 return False
 678             return True
 679
 680         self.archive = set()
 681         preload_download_archive(self.params.get('download_archive'))
 682
 683     def warn_if_short_id(self, argv):
 684         # short YouTube ID starting with dash?
 685         idxs = [
 686             i for i, a in enumerate(argv)
 687             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 688         if idxs:
 689             correct_argv = (
 690                 ['yt-dlp']
 691                 + [a for i, a in enumerate(argv) if i not in idxs]
 692                 + ['--'] + [argv[i] for i in idxs]
 693             )
 694             self.report_warning(
 695                 'Long argument string detected. '
 696                 'Use -- to separate parameters and URLs, like this:\n%s' %
 697                 args_to_str(correct_argv))
 698
 699     def add_info_extractor(self, ie):
 700         """Add an InfoExtractor object to the end of the list."""
 701         ie_key = ie.ie_key()
 702         self._ies[ie_key] = ie
 703         if not isinstance(ie, type):
 704             self._ies_instances[ie_key] = ie
 705             ie.set_downloader(self)
 706
 707     def _get_info_extractor_class(self, ie_key):
 708         ie = self._ies.get(ie_key)
 709         if ie is None:
 710             ie = get_info_extractor(ie_key)
 711             self.add_info_extractor(ie)
 712         return ie
 713
 714     def get_info_extractor(self, ie_key):
 715         """
 716         Get an instance of an IE with name ie_key, it will try to get one from
 717         the _ies list, if there's no instance it will create a new one and add
 718         it to the extractor list.
 719         """
 720         ie = self._ies_instances.get(ie_key)
 721         if ie is None:
 722             ie = get_info_extractor(ie_key)()
 723             self.add_info_extractor(ie)
 724         return ie
 725
 726     def add_default_info_extractors(self):
 727         """
 728         Add the InfoExtractors returned by gen_extractors to the end of the list
 729         """
 730         for ie in gen_extractor_classes():
 731             self.add_info_extractor(ie)
 732
 733     def add_post_processor(self, pp, when='post_process'):
 734         """Add a PostProcessor object to the end of the chain."""
 735         self._pps[when].append(pp)
 736         pp.set_downloader(self)
 737
 738     def add_post_hook(self, ph):
 739         """Add the post hook"""
 740         self._post_hooks.append(ph)
 741
 742     def add_progress_hook(self, ph):
 743         """Add the download progress hook"""
 744         self._progress_hooks.append(ph)
 745
 746     def add_postprocessor_hook(self, ph):
 747         """Add the postprocessing progress hook"""
 748         self._postprocessor_hooks.append(ph)
 749         for pps in self._pps.values():
 750             for pp in pps:
 751                 pp.add_progress_hook(ph)
 752
 753     def _bidi_workaround(self, message):
 754         if not hasattr(self, '_output_channel'):
 755             return message
 756
 757         assert hasattr(self, '_output_process')
 758         assert isinstance(message, compat_str)
 759         line_count = message.count('\n') + 1
 760         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 761         self._output_process.stdin.flush()
 762         res = ''.join(self._output_channel.readline().decode('utf-8')
 763                       for _ in range(line_count))
 764         return res[:-len('\n')]
 765
 766     def _write_string(self, message, out=None, only_once=False):
 767         if only_once:
 768             if message in self._printed_messages:
 769                 return
 770             self._printed_messages.add(message)
 771         write_string(message, out=out, encoding=self.params.get('encoding'))
 772
 773     def to_stdout(self, message, skip_eol=False, quiet=False):
 774         """Print message to stdout"""
 775         if self.params.get('logger'):
 776             self.params['logger'].debug(message)
 777         elif not quiet or self.params.get('verbose'):
 778             self._write_string(
 779                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 780                 self._err_file if quiet else self._screen_file)
 781
 782     def to_stderr(self, message, only_once=False):
 783         """Print message to stderr"""
 784         assert isinstance(message, compat_str)
 785         if self.params.get('logger'):
 786             self.params['logger'].error(message)
 787         else:
 788             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 789
 790     def to_console_title(self, message):
 791         if not self.params.get('consoletitle', False):
 792             return
 793         message = remove_terminal_sequences(message)
 794         if compat_os_name == 'nt':
 795             if ctypes.windll.kernel32.GetConsoleWindow():
 796                 # c_wchar_p() might not be necessary if `message` is
 797                 # already of type unicode()
 798                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 799         elif 'TERM' in os.environ:
 800             self._write_string('\033]0;%s\007' % message, self._screen_file)
 801
 802     def save_console_title(self):
 803         if not self.params.get('consoletitle', False):
 804             return
 805         if self.params.get('simulate'):
 806             return
 807         if compat_os_name != 'nt' and 'TERM' in os.environ:
 808             # Save the title on stack
 809             self._write_string('\033[22;0t', self._screen_file)
 810
 811     def restore_console_title(self):
 812         if not self.params.get('consoletitle', False):
 813             return
 814         if self.params.get('simulate'):
 815             return
 816         if compat_os_name != 'nt' and 'TERM' in os.environ:
 817             # Restore the title from stack
 818             self._write_string('\033[23;0t', self._screen_file)
 819
 820     def __enter__(self):
 821         self.save_console_title()
 822         return self
 823
 824     def __exit__(self, *args):
 825         self.restore_console_title()
 826
 827         if self.params.get('cookiefile') is not None:
 828             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 829
 830     def trouble(self, message=None, tb=None, is_error=True):
 831         """Determine action to take when a download problem appears.
 832
 833         Depending on if the downloader has been configured to ignore
 834         download errors or not, this method may throw an exception or
 835         not when errors are found, after printing the message.
 836
 837         @param tb          If given, is additional traceback information
 838         @param is_error    Whether to raise error according to ignorerrors
 839         """
 840         if message is not None:
 841             self.to_stderr(message)
 842         if self.params.get('verbose'):
 843             if tb is None:
 844                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 845                     tb = ''
 846                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 847                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 848                     tb += encode_compat_str(traceback.format_exc())
 849                 else:
 850                     tb_data = traceback.format_list(traceback.extract_stack())
 851                     tb = ''.join(tb_data)
 852             if tb:
 853                 self.to_stderr(tb)
 854         if not is_error:
 855             return
 856         if not self.params.get('ignoreerrors'):
 857             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 858                 exc_info = sys.exc_info()[1].exc_info
 859             else:
 860                 exc_info = sys.exc_info()
 861             raise DownloadError(message, exc_info)
 862         self._download_retcode = 1
 863
 864     def to_screen(self, message, skip_eol=False):
 865         """Print message to stdout if not in quiet mode"""
 866         self.to_stdout(
 867             message, skip_eol, quiet=self.params.get('quiet', False))
 868
 869     class Styles(Enum):
 870         HEADERS = 'yellow'
 871         EMPHASIS = 'light blue'
 872         ID = 'green'
 873         DELIM = 'blue'
 874         ERROR = 'red'
 875         WARNING = 'yellow'
 876         SUPPRESS = 'light black'
 877
 878     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 879         if test_encoding:
 880             original_text = text
 881             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 882             text = text.encode(encoding, 'ignore').decode(encoding)
 883             if fallback is not None and text != original_text:
 884                 text = fallback
 885         if isinstance(f, self.Styles):
 886             f = f.value
 887         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 888
 889     def _format_screen(self, *args, **kwargs):
 890         return self._format_text(
 891             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 892
 893     def _format_err(self, *args, **kwargs):
 894         return self._format_text(
 895             self._err_file, self._allow_colors['err'], *args, **kwargs)
 896
 897     def report_warning(self, message, only_once=False):
 898         '''
 899         Print the message to stderr, it will be prefixed with 'WARNING:'
 900         If stderr is a tty file the 'WARNING:' will be colored
 901         '''
 902         if self.params.get('logger') is not None:
 903             self.params['logger'].warning(message)
 904         else:
 905             if self.params.get('no_warnings'):
 906                 return
 907             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 908
 909     def deprecation_warning(self, message):
 910         if self.params.get('logger') is not None:
 911             self.params['logger'].warning('DeprecationWarning: {message}')
 912         else:
 913             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 914
 915     def report_error(self, message, *args, **kwargs):
 916         '''
 917         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 918         in red if stderr is a tty file.
 919         '''
 920         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 921
 922     def write_debug(self, message, only_once=False):
 923         '''Log debug message or Print message to stderr'''
 924         if not self.params.get('verbose', False):
 925             return
 926         message = '[debug] %s' % message
 927         if self.params.get('logger'):
 928             self.params['logger'].debug(message)
 929         else:
 930             self.to_stderr(message, only_once)
 931
 932     def report_file_already_downloaded(self, file_name):
 933         """Report file has already been fully downloaded."""
 934         try:
 935             self.to_screen('[download] %s has already been downloaded' % file_name)
 936         except UnicodeEncodeError:
 937             self.to_screen('[download] The file has already been downloaded')
 938
 939     def report_file_delete(self, file_name):
 940         """Report that existing file will be deleted."""
 941         try:
 942             self.to_screen('Deleting existing file %s' % file_name)
 943         except UnicodeEncodeError:
 944             self.to_screen('Deleting existing file')
 945
 946     def raise_no_formats(self, info, forced=False):
 947         has_drm = info.get('__has_drm')
 948         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 949         expected = self.params.get('ignore_no_formats_error')
 950         if forced or not expected:
 951             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 952                                  expected=has_drm or expected)
 953         else:
 954             self.report_warning(msg)
 955
 956     def parse_outtmpl(self):
 957         outtmpl_dict = self.params.get('outtmpl', {})
 958         if not isinstance(outtmpl_dict, dict):
 959             outtmpl_dict = {'default': outtmpl_dict}
 960         # Remove spaces in the default template
 961         if self.params.get('restrictfilenames'):
 962             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 963         else:
 964             sanitize = lambda x: x
 965         outtmpl_dict.update({
 966             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 967             if outtmpl_dict.get(k) is None})
 968         for key, val in outtmpl_dict.items():
 969             if isinstance(val, bytes):
 970                 self.report_warning(
 971                     'Parameter outtmpl is bytes, but should be a unicode string. '
 972                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 973         return outtmpl_dict
 974
 975     def get_output_path(self, dir_type='', filename=None):
 976         paths = self.params.get('paths', {})
 977         assert isinstance(paths, dict)
 978         path = os.path.join(
 979             expand_path(paths.get('home', '').strip()),
 980             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 981             filename or '')
 982
 983         # Temporary fix for #4787
 984         # 'Treat' all problem characters by passing filename through preferredencoding
 985         # to workaround encoding issues with subprocess on python2 @ Windows
 986         if sys.version_info < (3, 0) and sys.platform == 'win32':
 987             path = encodeFilename(path, True).decode(preferredencoding())
 988         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 989
 990     @staticmethod
 991     def _outtmpl_expandpath(outtmpl):
 992         # expand_path translates '%%' into '%' and '$$' into '$'
 993         # correspondingly that is not what we want since we need to keep
 994         # '%%' intact for template dict substitution step. Working around
 995         # with boundary-alike separator hack.
 996         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 997         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 998
 999         # outtmpl should be expand_path'ed before template dict substitution
1000         # because meta fields may contain env variables we don't want to
1001         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1002         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1003         return expand_path(outtmpl).replace(sep, '')
1004
1005     @staticmethod
1006     def escape_outtmpl(outtmpl):
1007         ''' Escape any remaining strings like %s, %abc% etc. '''
1008         return re.sub(
1009             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1010             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1011             outtmpl)
1012
1013     @classmethod
1014     def validate_outtmpl(cls, outtmpl):
1015         ''' @return None or Exception object '''
1016         outtmpl = re.sub(
1017             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1018             lambda mobj: f'{mobj.group(0)[:-1]}s',
1019             cls._outtmpl_expandpath(outtmpl))
1020         try:
1021             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1022             return None
1023         except ValueError as err:
1024             return err
1025
1026     @staticmethod
1027     def _copy_infodict(info_dict):
1028         info_dict = dict(info_dict)
1029         for key in ('__original_infodict', '__postprocessors'):
1030             info_dict.pop(key, None)
1031         return info_dict
1032
1033     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1034         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1035         @param sanitize    Whether to sanitize the output as a filename.
1036                            For backward compatibility, a function can also be passed
1037         """
1038
1039         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1040
1041         info_dict = self._copy_infodict(info_dict)
1042         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1043             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1044             if info_dict.get('duration', None) is not None
1045             else None)
1046         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1047         if info_dict.get('resolution') is None:
1048             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1049
1050         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1051         # of %(field)s to %(field)0Nd for backward compatibility
1052         field_size_compat_map = {
1053             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1054             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1055             'autonumber': self.params.get('autonumber_size') or 5,
1056         }
1057
1058         TMPL_DICT = {}
1059         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1060         MATH_FUNCTIONS = {
1061             '+': float.__add__,
1062             '-': float.__sub__,
1063         }
1064         # Field is of the form key1.key2...
1065         # where keys (except first) can be string, int or slice
1066         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1067         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1068         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1069         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1070             (?P<negate>-)?
1071             (?P<fields>{field})
1072             (?P<maths>(?:{math_op}{math_field})*)
1073             (?:>(?P<strf_format>.+?))?
1074             (?P<alternate>(?<!\\),[^|&)]+)?
1075             (?:&(?P<replacement>.*?))?
1076             (?:\|(?P<default>.*?))?
1077             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1078
1079         def _traverse_infodict(k):
1080             k = k.split('.')
1081             if k[0] == '':
1082                 k.pop(0)
1083             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1084
1085         def get_value(mdict):
1086             # Object traversal
1087             value = _traverse_infodict(mdict['fields'])
1088             # Negative
1089             if mdict['negate']:
1090                 value = float_or_none(value)
1091                 if value is not None:
1092                     value *= -1
1093             # Do maths
1094             offset_key = mdict['maths']
1095             if offset_key:
1096                 value = float_or_none(value)
1097                 operator = None
1098                 while offset_key:
1099                     item = re.match(
1100                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1101                         offset_key).group(0)
1102                     offset_key = offset_key[len(item):]
1103                     if operator is None:
1104                         operator = MATH_FUNCTIONS[item]
1105                         continue
1106                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1107                     offset = float_or_none(item)
1108                     if offset is None:
1109                         offset = float_or_none(_traverse_infodict(item))
1110                     try:
1111                         value = operator(value, multiplier * offset)
1112                     except (TypeError, ZeroDivisionError):
1113                         return None
1114                     operator = None
1115             # Datetime formatting
1116             if mdict['strf_format']:
1117                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1118
1119             return value
1120
1121         na = self.params.get('outtmpl_na_placeholder', 'NA')
1122
1123         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1124             return sanitize_filename(str(value), restricted=restricted,
1125                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1126
1127         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1128         sanitize = bool(sanitize)
1129
1130         def _dumpjson_default(obj):
1131             if isinstance(obj, (set, LazyList)):
1132                 return list(obj)
1133             return repr(obj)
1134
1135         def create_key(outer_mobj):
1136             if not outer_mobj.group('has_key'):
1137                 return outer_mobj.group(0)
1138             key = outer_mobj.group('key')
1139             mobj = re.match(INTERNAL_FORMAT_RE, key)
1140             initial_field = mobj.group('fields') if mobj else ''
1141             value, replacement, default = None, None, na
1142             while mobj:
1143                 mobj = mobj.groupdict()
1144                 default = mobj['default'] if mobj['default'] is not None else default
1145                 value = get_value(mobj)
1146                 replacement = mobj['replacement']
1147                 if value is None and mobj['alternate']:
1148                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1149                 else:
1150                     break
1151
1152             fmt = outer_mobj.group('format')
1153             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1154                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1155
1156             value = default if value is None else value if replacement is None else replacement
1157
1158             flags = outer_mobj.group('conversion') or ''
1159             str_fmt = f'{fmt[:-1]}s'
1160             if fmt[-1] == 'l':  # list
1161                 delim = '\n' if '#' in flags else ', '
1162                 value, fmt = delim.join(variadic(value, allowed_types=(str, bytes))), str_fmt
1163             elif fmt[-1] == 'j':  # json
1164                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1165             elif fmt[-1] == 'q':  # quoted
1166                 value = map(str, variadic(value) if '#' in flags else [value])
1167                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1168             elif fmt[-1] == 'B':  # bytes
1169                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1170                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1171             elif fmt[-1] == 'U':  # unicode normalized
1172                 value, fmt = unicodedata.normalize(
1173                     # "+" = compatibility equivalence, "#" = NFD
1174                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1175                     value), str_fmt
1176             elif fmt[-1] == 'D':  # decimal suffix
1177                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1178                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1179                                               factor=1024 if '#' in flags else 1000)
1180             elif fmt[-1] == 'S':  # filename sanitization
1181                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1182             elif fmt[-1] == 'c':
1183                 if value:
1184                     value = str(value)[0]
1185                 else:
1186                     fmt = str_fmt
1187             elif fmt[-1] not in 'rs':  # numeric
1188                 value = float_or_none(value)
1189                 if value is None:
1190                     value, fmt = default, 's'
1191
1192             if sanitize:
1193                 if fmt[-1] == 'r':
1194                     # If value is an object, sanitize might convert it to a string
1195                     # So we convert it to repr first
1196                     value, fmt = repr(value), str_fmt
1197                 if fmt[-1] in 'csr':
1198                     value = sanitizer(initial_field, value)
1199
1200             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1201             TMPL_DICT[key] = value
1202             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1203
1204         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1205
1206     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1207         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1208         return self.escape_outtmpl(outtmpl) % info_dict
1209
1210     def _prepare_filename(self, info_dict, tmpl_type='default'):
1211         try:
1212             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1213             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1214
1215             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1216             if filename and force_ext is not None:
1217                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1218
1219             # https://github.com/blackjack4494/youtube-dlc/issues/85
1220             trim_file_name = self.params.get('trim_file_name', False)
1221             if trim_file_name:
1222                 no_ext, *ext = filename.rsplit('.', 2)
1223                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1224
1225             return filename
1226         except ValueError as err:
1227             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1228             return None
1229
1230     def prepare_filename(self, info_dict, dir_type='', warn=False):
1231         """Generate the output filename."""
1232
1233         filename = self._prepare_filename(info_dict, dir_type or 'default')
1234         if not filename and dir_type not in ('', 'temp'):
1235             return ''
1236
1237         if warn:
1238             if not self.params.get('paths'):
1239                 pass
1240             elif filename == '-':
1241                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1242             elif os.path.isabs(filename):
1243                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1244         if filename == '-' or not filename:
1245             return filename
1246
1247         return self.get_output_path(dir_type, filename)
1248
1249     def _match_entry(self, info_dict, incomplete=False, silent=False):
1250         """ Returns None if the file should be downloaded """
1251
1252         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1253
1254         def check_filter():
1255             if 'title' in info_dict:
1256                 # This can happen when we're just evaluating the playlist
1257                 title = info_dict['title']
1258                 matchtitle = self.params.get('matchtitle', False)
1259                 if matchtitle:
1260                     if not re.search(matchtitle, title, re.IGNORECASE):
1261                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1262                 rejecttitle = self.params.get('rejecttitle', False)
1263                 if rejecttitle:
1264                     if re.search(rejecttitle, title, re.IGNORECASE):
1265                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1266             date = info_dict.get('upload_date')
1267             if date is not None:
1268                 dateRange = self.params.get('daterange', DateRange())
1269                 if date not in dateRange:
1270                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1271             view_count = info_dict.get('view_count')
1272             if view_count is not None:
1273                 min_views = self.params.get('min_views')
1274                 if min_views is not None and view_count < min_views:
1275                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1276                 max_views = self.params.get('max_views')
1277                 if max_views is not None and view_count > max_views:
1278                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1279             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1280                 return 'Skipping "%s" because it is age restricted' % video_title
1281
1282             match_filter = self.params.get('match_filter')
1283             if match_filter is not None:
1284                 try:
1285                     ret = match_filter(info_dict, incomplete=incomplete)
1286                 except TypeError:
1287                     # For backward compatibility
1288                     ret = None if incomplete else match_filter(info_dict)
1289                 if ret is not None:
1290                     return ret
1291             return None
1292
1293         if self.in_download_archive(info_dict):
1294             reason = '%s has already been recorded in the archive' % video_title
1295             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1296         else:
1297             reason = check_filter()
1298             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1299         if reason is not None:
1300             if not silent:
1301                 self.to_screen('[download] ' + reason)
1302             if self.params.get(break_opt, False):
1303                 raise break_err()
1304         return reason
1305
1306     @staticmethod
1307     def add_extra_info(info_dict, extra_info):
1308         '''Set the keys from extra_info in info dict if they are missing'''
1309         for key, value in extra_info.items():
1310             info_dict.setdefault(key, value)
1311
1312     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1313                      process=True, force_generic_extractor=False):
1314         """
1315         Return a list with a dictionary for each video extracted.
1316
1317         Arguments:
1318         url -- URL to extract
1319
1320         Keyword arguments:
1321         download -- whether to download videos during extraction
1322         ie_key -- extractor key hint
1323         extra_info -- dictionary containing the extra values to add to each result
1324         process -- whether to resolve all unresolved references (URLs, playlist items),
1325             must be True for download to work.
1326         force_generic_extractor -- force using the generic extractor
1327         """
1328
1329         if extra_info is None:
1330             extra_info = {}
1331
1332         if not ie_key and force_generic_extractor:
1333             ie_key = 'Generic'
1334
1335         if ie_key:
1336             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1337         else:
1338             ies = self._ies
1339
1340         for ie_key, ie in ies.items():
1341             if not ie.suitable(url):
1342                 continue
1343
1344             if not ie.working():
1345                 self.report_warning('The program functionality for this site has been marked as broken, '
1346                                     'and will probably not work.')
1347
1348             temp_id = ie.get_temp_id(url)
1349             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1350                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1351                 if self.params.get('break_on_existing', False):
1352                     raise ExistingVideoReached()
1353                 break
1354             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1355         else:
1356             self.report_error('no suitable InfoExtractor for URL %s' % url)
1357
1358     def __handle_extraction_exceptions(func):
1359         @functools.wraps(func)
1360         def wrapper(self, *args, **kwargs):
1361             while True:
1362                 try:
1363                     return func(self, *args, **kwargs)
1364                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1365                     raise
1366                 except ReExtractInfo as e:
1367                     if e.expected:
1368                         self.to_screen(f'{e}; Re-extracting data')
1369                     else:
1370                         self.to_stderr('\r')
1371                         self.report_warning(f'{e}; Re-extracting data')
1372                     continue
1373                 except GeoRestrictedError as e:
1374                     msg = e.msg
1375                     if e.countries:
1376                         msg += '\nThis video is available in %s.' % ', '.join(
1377                             map(ISO3166Utils.short2full, e.countries))
1378                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1379                     self.report_error(msg)
1380                 except ExtractorError as e:  # An error we somewhat expected
1381                     self.report_error(str(e), e.format_traceback())
1382                 except Exception as e:
1383                     if self.params.get('ignoreerrors'):
1384                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1385                     else:
1386                         raise
1387                 break
1388         return wrapper
1389
1390     def _wait_for_video(self, ie_result):
1391         if (not self.params.get('wait_for_video')
1392                 or ie_result.get('_type', 'video') != 'video'
1393                 or ie_result.get('formats') or ie_result.get('url')):
1394             return
1395
1396         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1397         last_msg = ''
1398
1399         def progress(msg):
1400             nonlocal last_msg
1401             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1402             last_msg = msg
1403
1404         min_wait, max_wait = self.params.get('wait_for_video')
1405         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1406         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1407             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1408             self.report_warning('Release time of video is not known')
1409         elif (diff or 0) <= 0:
1410             self.report_warning('Video should already be available according to extracted info')
1411         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1412         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1413
1414         wait_till = time.time() + diff
1415         try:
1416             while True:
1417                 diff = wait_till - time.time()
1418                 if diff <= 0:
1419                     progress('')
1420                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1421                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1422                 time.sleep(1)
1423         except KeyboardInterrupt:
1424             progress('')
1425             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1426         except BaseException as e:
1427             if not isinstance(e, ReExtractInfo):
1428                 self.to_screen('')
1429             raise
1430
1431     @__handle_extraction_exceptions
1432     def __extract_info(self, url, ie, download, extra_info, process):
1433         ie_result = ie.extract(url)
1434         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1435             return
1436         if isinstance(ie_result, list):
1437             # Backwards compatibility: old IE result format
1438             ie_result = {
1439                 '_type': 'compat_list',
1440                 'entries': ie_result,
1441             }
1442         if extra_info.get('original_url'):
1443             ie_result.setdefault('original_url', extra_info['original_url'])
1444         self.add_default_extra_info(ie_result, ie, url)
1445         if process:
1446             self._wait_for_video(ie_result)
1447             return self.process_ie_result(ie_result, download, extra_info)
1448         else:
1449             return ie_result
1450
1451     def add_default_extra_info(self, ie_result, ie, url):
1452         if url is not None:
1453             self.add_extra_info(ie_result, {
1454                 'webpage_url': url,
1455                 'original_url': url,
1456                 'webpage_url_basename': url_basename(url),
1457                 'webpage_url_domain': get_domain(url),
1458             })
1459         if ie is not None:
1460             self.add_extra_info(ie_result, {
1461                 'extractor': ie.IE_NAME,
1462                 'extractor_key': ie.ie_key(),
1463             })
1464
1465     def process_ie_result(self, ie_result, download=True, extra_info=None):
1466         """
1467         Take the result of the ie(may be modified) and resolve all unresolved
1468         references (URLs, playlist items).
1469
1470         It will also download the videos if 'download'.
1471         Returns the resolved ie_result.
1472         """
1473         if extra_info is None:
1474             extra_info = {}
1475         result_type = ie_result.get('_type', 'video')
1476
1477         if result_type in ('url', 'url_transparent'):
1478             ie_result['url'] = sanitize_url(ie_result['url'])
1479             if ie_result.get('original_url'):
1480                 extra_info.setdefault('original_url', ie_result['original_url'])
1481
1482             extract_flat = self.params.get('extract_flat', False)
1483             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1484                     or extract_flat is True):
1485                 info_copy = ie_result.copy()
1486                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1487                 if ie and not ie_result.get('id'):
1488                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1489                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1490                 self.add_extra_info(info_copy, extra_info)
1491                 info_copy, _ = self.pre_process(info_copy)
1492                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1493                 if self.params.get('force_write_download_archive', False):
1494                     self.record_download_archive(info_copy)
1495                 return ie_result
1496
1497         if result_type == 'video':
1498             self.add_extra_info(ie_result, extra_info)
1499             ie_result = self.process_video_result(ie_result, download=download)
1500             additional_urls = (ie_result or {}).get('additional_urls')
1501             if additional_urls:
1502                 # TODO: Improve MetadataParserPP to allow setting a list
1503                 if isinstance(additional_urls, compat_str):
1504                     additional_urls = [additional_urls]
1505                 self.to_screen(
1506                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1507                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1508                 ie_result['additional_entries'] = [
1509                     self.extract_info(
1510                         url, download, extra_info=extra_info,
1511                         force_generic_extractor=self.params.get('force_generic_extractor'))
1512                     for url in additional_urls
1513                 ]
1514             return ie_result
1515         elif result_type == 'url':
1516             # We have to add extra_info to the results because it may be
1517             # contained in a playlist
1518             return self.extract_info(
1519                 ie_result['url'], download,
1520                 ie_key=ie_result.get('ie_key'),
1521                 extra_info=extra_info)
1522         elif result_type == 'url_transparent':
1523             # Use the information from the embedding page
1524             info = self.extract_info(
1525                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1526                 extra_info=extra_info, download=False, process=False)
1527
1528             # extract_info may return None when ignoreerrors is enabled and
1529             # extraction failed with an error, don't crash and return early
1530             # in this case
1531             if not info:
1532                 return info
1533
1534             force_properties = dict(
1535                 (k, v) for k, v in ie_result.items() if v is not None)
1536             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1537                 if f in force_properties:
1538                     del force_properties[f]
1539             new_result = info.copy()
1540             new_result.update(force_properties)
1541
1542             # Extracted info may not be a video result (i.e.
1543             # info.get('_type', 'video') != video) but rather an url or
1544             # url_transparent. In such cases outer metadata (from ie_result)
1545             # should be propagated to inner one (info). For this to happen
1546             # _type of info should be overridden with url_transparent. This
1547             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1548             if new_result.get('_type') == 'url':
1549                 new_result['_type'] = 'url_transparent'
1550
1551             return self.process_ie_result(
1552                 new_result, download=download, extra_info=extra_info)
1553         elif result_type in ('playlist', 'multi_video'):
1554             # Protect from infinite recursion due to recursively nested playlists
1555             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1556             webpage_url = ie_result['webpage_url']
1557             if webpage_url in self._playlist_urls:
1558                 self.to_screen(
1559                     '[download] Skipping already downloaded playlist: %s'
1560                     % ie_result.get('title') or ie_result.get('id'))
1561                 return
1562
1563             self._playlist_level += 1
1564             self._playlist_urls.add(webpage_url)
1565             self._sanitize_thumbnails(ie_result)
1566             try:
1567                 return self.__process_playlist(ie_result, download)
1568             finally:
1569                 self._playlist_level -= 1
1570                 if not self._playlist_level:
1571                     self._playlist_urls.clear()
1572         elif result_type == 'compat_list':
1573             self.report_warning(
1574                 'Extractor %s returned a compat_list result. '
1575                 'It needs to be updated.' % ie_result.get('extractor'))
1576
1577             def _fixup(r):
1578                 self.add_extra_info(r, {
1579                     'extractor': ie_result['extractor'],
1580                     'webpage_url': ie_result['webpage_url'],
1581                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1582                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1583                     'extractor_key': ie_result['extractor_key'],
1584                 })
1585                 return r
1586             ie_result['entries'] = [
1587                 self.process_ie_result(_fixup(r), download, extra_info)
1588                 for r in ie_result['entries']
1589             ]
1590             return ie_result
1591         else:
1592             raise Exception('Invalid result type: %s' % result_type)
1593
1594     def _ensure_dir_exists(self, path):
1595         return make_dir(path, self.report_error)
1596
1597     def __process_playlist(self, ie_result, download):
1598         # We process each entry in the playlist
1599         playlist = ie_result.get('title') or ie_result.get('id')
1600         self.to_screen('[download] Downloading playlist: %s' % playlist)
1601
1602         if 'entries' not in ie_result:
1603             raise EntryNotInPlaylist('There are no entries')
1604
1605         MissingEntry = object()
1606         incomplete_entries = bool(ie_result.get('requested_entries'))
1607         if incomplete_entries:
1608             def fill_missing_entries(entries, indices):
1609                 ret = [MissingEntry] * max(indices)
1610                 for i, entry in zip(indices, entries):
1611                     ret[i - 1] = entry
1612                 return ret
1613             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1614
1615         playlist_results = []
1616
1617         playliststart = self.params.get('playliststart', 1)
1618         playlistend = self.params.get('playlistend')
1619         # For backwards compatibility, interpret -1 as whole list
1620         if playlistend == -1:
1621             playlistend = None
1622
1623         playlistitems_str = self.params.get('playlist_items')
1624         playlistitems = None
1625         if playlistitems_str is not None:
1626             def iter_playlistitems(format):
1627                 for string_segment in format.split(','):
1628                     if '-' in string_segment:
1629                         start, end = string_segment.split('-')
1630                         for item in range(int(start), int(end) + 1):
1631                             yield int(item)
1632                     else:
1633                         yield int(string_segment)
1634             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1635
1636         ie_entries = ie_result['entries']
1637         msg = (
1638             'Downloading %d videos' if not isinstance(ie_entries, list)
1639             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1640
1641         if isinstance(ie_entries, list):
1642             def get_entry(i):
1643                 return ie_entries[i - 1]
1644         else:
1645             if not isinstance(ie_entries, (PagedList, LazyList)):
1646                 ie_entries = LazyList(ie_entries)
1647
1648             def get_entry(i):
1649                 return YoutubeDL.__handle_extraction_exceptions(
1650                     lambda self, i: ie_entries[i - 1]
1651                 )(self, i)
1652
1653         entries = []
1654         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1655         for i in items:
1656             if i == 0:
1657                 continue
1658             if playlistitems is None and playlistend is not None and playlistend < i:
1659                 break
1660             entry = None
1661             try:
1662                 entry = get_entry(i)
1663                 if entry is MissingEntry:
1664                     raise EntryNotInPlaylist()
1665             except (IndexError, EntryNotInPlaylist):
1666                 if incomplete_entries:
1667                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1668                 elif not playlistitems:
1669                     break
1670             entries.append(entry)
1671             try:
1672                 if entry is not None:
1673                     self._match_entry(entry, incomplete=True, silent=True)
1674             except (ExistingVideoReached, RejectedVideoReached):
1675                 break
1676         ie_result['entries'] = entries
1677
1678         # Save playlist_index before re-ordering
1679         entries = [
1680             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1681             for i, entry in enumerate(entries, 1)
1682             if entry is not None]
1683         n_entries = len(entries)
1684
1685         if not playlistitems and (playliststart != 1 or playlistend):
1686             playlistitems = list(range(playliststart, playliststart + n_entries))
1687         ie_result['requested_entries'] = playlistitems
1688
1689         _infojson_written = False
1690         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1691             ie_copy = {
1692                 'playlist': playlist,
1693                 'playlist_id': ie_result.get('id'),
1694                 'playlist_title': ie_result.get('title'),
1695                 'playlist_uploader': ie_result.get('uploader'),
1696                 'playlist_uploader_id': ie_result.get('uploader_id'),
1697                 'playlist_index': 0,
1698                 'n_entries': n_entries,
1699             }
1700             ie_copy.update(dict(ie_result))
1701
1702             _infojson_written = self._write_info_json(
1703                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1704             if _infojson_written is None:
1705                 return
1706             if self._write_description('playlist', ie_result,
1707                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1708                 return
1709             # TODO: This should be passed to ThumbnailsConvertor if necessary
1710             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1711
1712         if self.params.get('playlistreverse', False):
1713             entries = entries[::-1]
1714         if self.params.get('playlistrandom', False):
1715             random.shuffle(entries)
1716
1717         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1718
1719         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1720         failures = 0
1721         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1722         for i, entry_tuple in enumerate(entries, 1):
1723             playlist_index, entry = entry_tuple
1724             if 'playlist-index' in self.params.get('compat_opts', []):
1725                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1726             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1727             # This __x_forwarded_for_ip thing is a bit ugly but requires
1728             # minimal changes
1729             if x_forwarded_for:
1730                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1731             extra = {
1732                 'n_entries': n_entries,
1733                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1734                 'playlist_index': playlist_index,
1735                 'playlist_autonumber': i,
1736                 'playlist': playlist,
1737                 'playlist_id': ie_result.get('id'),
1738                 'playlist_title': ie_result.get('title'),
1739                 'playlist_uploader': ie_result.get('uploader'),
1740                 'playlist_uploader_id': ie_result.get('uploader_id'),
1741                 'extractor': ie_result['extractor'],
1742                 'webpage_url': ie_result['webpage_url'],
1743                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1744                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1745                 'extractor_key': ie_result['extractor_key'],
1746             }
1747
1748             if self._match_entry(entry, incomplete=True) is not None:
1749                 continue
1750
1751             entry_result = self.__process_iterable_entry(entry, download, extra)
1752             if not entry_result:
1753                 failures += 1
1754             if failures >= max_failures:
1755                 self.report_error(
1756                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1757                 break
1758             playlist_results.append(entry_result)
1759         ie_result['entries'] = playlist_results
1760
1761         # Write the updated info to json
1762         if _infojson_written and self._write_info_json(
1763                 'updated playlist', ie_result,
1764                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1765             return
1766
1767         for tmpl in self.params['forceprint'].get('playlist', []):
1768             self._forceprint(tmpl, ie_result)
1769         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1770         return ie_result
1771
1772     @__handle_extraction_exceptions
1773     def __process_iterable_entry(self, entry, download, extra_info):
1774         return self.process_ie_result(
1775             entry, download=download, extra_info=extra_info)
1776
1777     def _build_format_filter(self, filter_spec):
1778         " Returns a function to filter the formats according to the filter_spec "
1779
1780         OPERATORS = {
1781             '<': operator.lt,
1782             '<=': operator.le,
1783             '>': operator.gt,
1784             '>=': operator.ge,
1785             '=': operator.eq,
1786             '!=': operator.ne,
1787         }
1788         operator_rex = re.compile(r'''(?x)\s*
1789             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1790             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1791             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1792             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1793         m = operator_rex.fullmatch(filter_spec)
1794         if m:
1795             try:
1796                 comparison_value = int(m.group('value'))
1797             except ValueError:
1798                 comparison_value = parse_filesize(m.group('value'))
1799                 if comparison_value is None:
1800                     comparison_value = parse_filesize(m.group('value') + 'B')
1801                 if comparison_value is None:
1802                     raise ValueError(
1803                         'Invalid value %r in format specification %r' % (
1804                             m.group('value'), filter_spec))
1805             op = OPERATORS[m.group('op')]
1806
1807         if not m:
1808             STR_OPERATORS = {
1809                 '=': operator.eq,
1810                 '^=': lambda attr, value: attr.startswith(value),
1811                 '$=': lambda attr, value: attr.endswith(value),
1812                 '*=': lambda attr, value: value in attr,
1813             }
1814             str_operator_rex = re.compile(r'''(?x)\s*
1815                 (?P<key>[a-zA-Z0-9._-]+)\s*
1816                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1817                 (?P<value>[a-zA-Z0-9._-]+)\s*
1818                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1819             m = str_operator_rex.fullmatch(filter_spec)
1820             if m:
1821                 comparison_value = m.group('value')
1822                 str_op = STR_OPERATORS[m.group('op')]
1823                 if m.group('negation'):
1824                     op = lambda attr, value: not str_op(attr, value)
1825                 else:
1826                     op = str_op
1827
1828         if not m:
1829             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1830
1831         def _filter(f):
1832             actual_value = f.get(m.group('key'))
1833             if actual_value is None:
1834                 return m.group('none_inclusive')
1835             return op(actual_value, comparison_value)
1836         return _filter
1837
1838     def _check_formats(self, formats):
1839         for f in formats:
1840             self.to_screen('[info] Testing format %s' % f['format_id'])
1841             path = self.get_output_path('temp')
1842             if not self._ensure_dir_exists(f'{path}/'):
1843                 continue
1844             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1845             temp_file.close()
1846             try:
1847                 success, _ = self.dl(temp_file.name, f, test=True)
1848             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1849                 success = False
1850             finally:
1851                 if os.path.exists(temp_file.name):
1852                     try:
1853                         os.remove(temp_file.name)
1854                     except OSError:
1855                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1856             if success:
1857                 yield f
1858             else:
1859                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1860
1861     def _default_format_spec(self, info_dict, download=True):
1862
1863         def can_merge():
1864             merger = FFmpegMergerPP(self)
1865             return merger.available and merger.can_merge()
1866
1867         prefer_best = (
1868             not self.params.get('simulate')
1869             and download
1870             and (
1871                 not can_merge()
1872                 or info_dict.get('is_live', False)
1873                 or self.outtmpl_dict['default'] == '-'))
1874         compat = (
1875             prefer_best
1876             or self.params.get('allow_multiple_audio_streams', False)
1877             or 'format-spec' in self.params.get('compat_opts', []))
1878
1879         return (
1880             'best/bestvideo+bestaudio' if prefer_best
1881             else 'bestvideo*+bestaudio/best' if not compat
1882             else 'bestvideo+bestaudio/best')
1883
1884     def build_format_selector(self, format_spec):
1885         def syntax_error(note, start):
1886             message = (
1887                 'Invalid format specification: '
1888                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1889             return SyntaxError(message)
1890
1891         PICKFIRST = 'PICKFIRST'
1892         MERGE = 'MERGE'
1893         SINGLE = 'SINGLE'
1894         GROUP = 'GROUP'
1895         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1896
1897         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1898                                   'video': self.params.get('allow_multiple_video_streams', False)}
1899
1900         check_formats = self.params.get('check_formats') == 'selected'
1901
1902         def _parse_filter(tokens):
1903             filter_parts = []
1904             for type, string, start, _, _ in tokens:
1905                 if type == tokenize.OP and string == ']':
1906                     return ''.join(filter_parts)
1907                 else:
1908                     filter_parts.append(string)
1909
1910         def _remove_unused_ops(tokens):
1911             # Remove operators that we don't use and join them with the surrounding strings
1912             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1913             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1914             last_string, last_start, last_end, last_line = None, None, None, None
1915             for type, string, start, end, line in tokens:
1916                 if type == tokenize.OP and string == '[':
1917                     if last_string:
1918                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1919                         last_string = None
1920                     yield type, string, start, end, line
1921                     # everything inside brackets will be handled by _parse_filter
1922                     for type, string, start, end, line in tokens:
1923                         yield type, string, start, end, line
1924                         if type == tokenize.OP and string == ']':
1925                             break
1926                 elif type == tokenize.OP and string in ALLOWED_OPS:
1927                     if last_string:
1928                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1929                         last_string = None
1930                     yield type, string, start, end, line
1931                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1932                     if not last_string:
1933                         last_string = string
1934                         last_start = start
1935                         last_end = end
1936                     else:
1937                         last_string += string
1938             if last_string:
1939                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1940
1941         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1942             selectors = []
1943             current_selector = None
1944             for type, string, start, _, _ in tokens:
1945                 # ENCODING is only defined in python 3.x
1946                 if type == getattr(tokenize, 'ENCODING', None):
1947                     continue
1948                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1949                     current_selector = FormatSelector(SINGLE, string, [])
1950                 elif type == tokenize.OP:
1951                     if string == ')':
1952                         if not inside_group:
1953                             # ')' will be handled by the parentheses group
1954                             tokens.restore_last_token()
1955                         break
1956                     elif inside_merge and string in ['/', ',']:
1957                         tokens.restore_last_token()
1958                         break
1959                     elif inside_choice and string == ',':
1960                         tokens.restore_last_token()
1961                         break
1962                     elif string == ',':
1963                         if not current_selector:
1964                             raise syntax_error('"," must follow a format selector', start)
1965                         selectors.append(current_selector)
1966                         current_selector = None
1967                     elif string == '/':
1968                         if not current_selector:
1969                             raise syntax_error('"/" must follow a format selector', start)
1970                         first_choice = current_selector
1971                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1972                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1973                     elif string == '[':
1974                         if not current_selector:
1975                             current_selector = FormatSelector(SINGLE, 'best', [])
1976                         format_filter = _parse_filter(tokens)
1977                         current_selector.filters.append(format_filter)
1978                     elif string == '(':
1979                         if current_selector:
1980                             raise syntax_error('Unexpected "("', start)
1981                         group = _parse_format_selection(tokens, inside_group=True)
1982                         current_selector = FormatSelector(GROUP, group, [])
1983                     elif string == '+':
1984                         if not current_selector:
1985                             raise syntax_error('Unexpected "+"', start)
1986                         selector_1 = current_selector
1987                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1988                         if not selector_2:
1989                             raise syntax_error('Expected a selector', start)
1990                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1991                     else:
1992                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1993                 elif type == tokenize.ENDMARKER:
1994                     break
1995             if current_selector:
1996                 selectors.append(current_selector)
1997             return selectors
1998
1999         def _merge(formats_pair):
2000             format_1, format_2 = formats_pair
2001
2002             formats_info = []
2003             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2004             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2005
2006             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2007                 get_no_more = {'video': False, 'audio': False}
2008                 for (i, fmt_info) in enumerate(formats_info):
2009                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2010                         formats_info.pop(i)
2011                         continue
2012                     for aud_vid in ['audio', 'video']:
2013                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2014                             if get_no_more[aud_vid]:
2015                                 formats_info.pop(i)
2016                                 break
2017                             get_no_more[aud_vid] = True
2018
2019             if len(formats_info) == 1:
2020                 return formats_info[0]
2021
2022             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2023             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2024
2025             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2026             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2027
2028             output_ext = self.params.get('merge_output_format')
2029             if not output_ext:
2030                 if the_only_video:
2031                     output_ext = the_only_video['ext']
2032                 elif the_only_audio and not video_fmts:
2033                     output_ext = the_only_audio['ext']
2034                 else:
2035                     output_ext = 'mkv'
2036
2037             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2038
2039             new_dict = {
2040                 'requested_formats': formats_info,
2041                 'format': '+'.join(filtered('format')),
2042                 'format_id': '+'.join(filtered('format_id')),
2043                 'ext': output_ext,
2044                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2045                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2046                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2047                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2048                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2049             }
2050
2051             if the_only_video:
2052                 new_dict.update({
2053                     'width': the_only_video.get('width'),
2054                     'height': the_only_video.get('height'),
2055                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2056                     'fps': the_only_video.get('fps'),
2057                     'dynamic_range': the_only_video.get('dynamic_range'),
2058                     'vcodec': the_only_video.get('vcodec'),
2059                     'vbr': the_only_video.get('vbr'),
2060                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2061                 })
2062
2063             if the_only_audio:
2064                 new_dict.update({
2065                     'acodec': the_only_audio.get('acodec'),
2066                     'abr': the_only_audio.get('abr'),
2067                     'asr': the_only_audio.get('asr'),
2068                 })
2069
2070             return new_dict
2071
2072         def _check_formats(formats):
2073             if not check_formats:
2074                 yield from formats
2075                 return
2076             yield from self._check_formats(formats)
2077
2078         def _build_selector_function(selector):
2079             if isinstance(selector, list):  # ,
2080                 fs = [_build_selector_function(s) for s in selector]
2081
2082                 def selector_function(ctx):
2083                     for f in fs:
2084                         yield from f(ctx)
2085                 return selector_function
2086
2087             elif selector.type == GROUP:  # ()
2088                 selector_function = _build_selector_function(selector.selector)
2089
2090             elif selector.type == PICKFIRST:  # /
2091                 fs = [_build_selector_function(s) for s in selector.selector]
2092
2093                 def selector_function(ctx):
2094                     for f in fs:
2095                         picked_formats = list(f(ctx))
2096                         if picked_formats:
2097                             return picked_formats
2098                     return []
2099
2100             elif selector.type == MERGE:  # +
2101                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2102
2103                 def selector_function(ctx):
2104                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2105                         yield _merge(pair)
2106
2107             elif selector.type == SINGLE:  # atom
2108                 format_spec = selector.selector or 'best'
2109
2110                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2111                 if format_spec == 'all':
2112                     def selector_function(ctx):
2113                         yield from _check_formats(ctx['formats'][::-1])
2114                 elif format_spec == 'mergeall':
2115                     def selector_function(ctx):
2116                         formats = list(_check_formats(ctx['formats']))
2117                         if not formats:
2118                             return
2119                         merged_format = formats[-1]
2120                         for f in formats[-2::-1]:
2121                             merged_format = _merge((merged_format, f))
2122                         yield merged_format
2123
2124                 else:
2125                     format_fallback, format_reverse, format_idx = False, True, 1
2126                     mobj = re.match(
2127                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2128                         format_spec)
2129                     if mobj is not None:
2130                         format_idx = int_or_none(mobj.group('n'), default=1)
2131                         format_reverse = mobj.group('bw')[0] == 'b'
2132                         format_type = (mobj.group('type') or [None])[0]
2133                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2134                         format_modified = mobj.group('mod') is not None
2135
2136                         format_fallback = not format_type and not format_modified  # for b, w
2137                         _filter_f = (
2138                             (lambda f: f.get('%scodec' % format_type) != 'none')
2139                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2140                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2141                             if format_type  # bv, ba, wv, wa
2142                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2143                             if not format_modified  # b, w
2144                             else lambda f: True)  # b*, w*
2145                         filter_f = lambda f: _filter_f(f) and (
2146                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2147                     else:
2148                         if format_spec in self._format_selection_exts['audio']:
2149                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2150                         elif format_spec in self._format_selection_exts['video']:
2151                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2152                         elif format_spec in self._format_selection_exts['storyboards']:
2153                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2154                         else:
2155                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2156
2157                     def selector_function(ctx):
2158                         formats = list(ctx['formats'])
2159                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2160                         if format_fallback and ctx['incomplete_formats'] and not matches:
2161                             # for extractors with incomplete formats (audio only (soundcloud)
2162                             # or video only (imgur)) best/worst will fallback to
2163                             # best/worst {video,audio}-only format
2164                             matches = formats
2165                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2166                         try:
2167                             yield matches[format_idx - 1]
2168                         except IndexError:
2169                             return
2170
2171             filters = [self._build_format_filter(f) for f in selector.filters]
2172
2173             def final_selector(ctx):
2174                 ctx_copy = dict(ctx)
2175                 for _filter in filters:
2176                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2177                 return selector_function(ctx_copy)
2178             return final_selector
2179
2180         stream = io.BytesIO(format_spec.encode('utf-8'))
2181         try:
2182             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2183         except tokenize.TokenError:
2184             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2185
2186         class TokenIterator(object):
2187             def __init__(self, tokens):
2188                 self.tokens = tokens
2189                 self.counter = 0
2190
2191             def __iter__(self):
2192                 return self
2193
2194             def __next__(self):
2195                 if self.counter >= len(self.tokens):
2196                     raise StopIteration()
2197                 value = self.tokens[self.counter]
2198                 self.counter += 1
2199                 return value
2200
2201             next = __next__
2202
2203             def restore_last_token(self):
2204                 self.counter -= 1
2205
2206         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2207         return _build_selector_function(parsed_selector)
2208
2209     def _calc_headers(self, info_dict):
2210         res = std_headers.copy()
2211
2212         add_headers = info_dict.get('http_headers')
2213         if add_headers:
2214             res.update(add_headers)
2215
2216         cookies = self._calc_cookies(info_dict)
2217         if cookies:
2218             res['Cookie'] = cookies
2219
2220         if 'X-Forwarded-For' not in res:
2221             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2222             if x_forwarded_for_ip:
2223                 res['X-Forwarded-For'] = x_forwarded_for_ip
2224
2225         return res
2226
2227     def _calc_cookies(self, info_dict):
2228         pr = sanitized_Request(info_dict['url'])
2229         self.cookiejar.add_cookie_header(pr)
2230         return pr.get_header('Cookie')
2231
2232     def _sort_thumbnails(self, thumbnails):
2233         thumbnails.sort(key=lambda t: (
2234             t.get('preference') if t.get('preference') is not None else -1,
2235             t.get('width') if t.get('width') is not None else -1,
2236             t.get('height') if t.get('height') is not None else -1,
2237             t.get('id') if t.get('id') is not None else '',
2238             t.get('url')))
2239
2240     def _sanitize_thumbnails(self, info_dict):
2241         thumbnails = info_dict.get('thumbnails')
2242         if thumbnails is None:
2243             thumbnail = info_dict.get('thumbnail')
2244             if thumbnail:
2245                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2246         if not thumbnails:
2247             return
2248
2249         def check_thumbnails(thumbnails):
2250             for t in thumbnails:
2251                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2252                 try:
2253                     self.urlopen(HEADRequest(t['url']))
2254                 except network_exceptions as err:
2255                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2256                     continue
2257                 yield t
2258
2259         self._sort_thumbnails(thumbnails)
2260         for i, t in enumerate(thumbnails):
2261             if t.get('id') is None:
2262                 t['id'] = '%d' % i
2263             if t.get('width') and t.get('height'):
2264                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2265             t['url'] = sanitize_url(t['url'])
2266
2267         if self.params.get('check_formats') is True:
2268             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2269         else:
2270             info_dict['thumbnails'] = thumbnails
2271
2272     def process_video_result(self, info_dict, download=True):
2273         assert info_dict.get('_type', 'video') == 'video'
2274
2275         if 'id' not in info_dict:
2276             raise ExtractorError('Missing "id" field in extractor result')
2277         if 'title' not in info_dict:
2278             raise ExtractorError('Missing "title" field in extractor result',
2279                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2280
2281         def report_force_conversion(field, field_not, conversion):
2282             self.report_warning(
2283                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2284                 % (field, field_not, conversion))
2285
2286         def sanitize_string_field(info, string_field):
2287             field = info.get(string_field)
2288             if field is None or isinstance(field, compat_str):
2289                 return
2290             report_force_conversion(string_field, 'a string', 'string')
2291             info[string_field] = compat_str(field)
2292
2293         def sanitize_numeric_fields(info):
2294             for numeric_field in self._NUMERIC_FIELDS:
2295                 field = info.get(numeric_field)
2296                 if field is None or isinstance(field, compat_numeric_types):
2297                     continue
2298                 report_force_conversion(numeric_field, 'numeric', 'int')
2299                 info[numeric_field] = int_or_none(field)
2300
2301         sanitize_string_field(info_dict, 'id')
2302         sanitize_numeric_fields(info_dict)
2303
2304         if 'playlist' not in info_dict:
2305             # It isn't part of a playlist
2306             info_dict['playlist'] = None
2307             info_dict['playlist_index'] = None
2308
2309         self._sanitize_thumbnails(info_dict)
2310
2311         thumbnail = info_dict.get('thumbnail')
2312         thumbnails = info_dict.get('thumbnails')
2313         if thumbnail:
2314             info_dict['thumbnail'] = sanitize_url(thumbnail)
2315         elif thumbnails:
2316             info_dict['thumbnail'] = thumbnails[-1]['url']
2317
2318         if info_dict.get('display_id') is None and 'id' in info_dict:
2319             info_dict['display_id'] = info_dict['id']
2320
2321         if info_dict.get('duration') is not None:
2322             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2323
2324         for ts_key, date_key in (
2325                 ('timestamp', 'upload_date'),
2326                 ('release_timestamp', 'release_date'),
2327         ):
2328             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2329                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2330                 # see http://bugs.python.org/issue1646728)
2331                 try:
2332                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2333                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2334                 except (ValueError, OverflowError, OSError):
2335                     pass
2336
2337         live_keys = ('is_live', 'was_live')
2338         live_status = info_dict.get('live_status')
2339         if live_status is None:
2340             for key in live_keys:
2341                 if info_dict.get(key) is False:
2342                     continue
2343                 if info_dict.get(key):
2344                     live_status = key
2345                 break
2346             if all(info_dict.get(key) is False for key in live_keys):
2347                 live_status = 'not_live'
2348         if live_status:
2349             info_dict['live_status'] = live_status
2350             for key in live_keys:
2351                 if info_dict.get(key) is None:
2352                     info_dict[key] = (live_status == key)
2353
2354         # Auto generate title fields corresponding to the *_number fields when missing
2355         # in order to always have clean titles. This is very common for TV series.
2356         for field in ('chapter', 'season', 'episode'):
2357             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2358                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2359
2360         for cc_kind in ('subtitles', 'automatic_captions'):
2361             cc = info_dict.get(cc_kind)
2362             if cc:
2363                 for _, subtitle in cc.items():
2364                     for subtitle_format in subtitle:
2365                         if subtitle_format.get('url'):
2366                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2367                         if subtitle_format.get('ext') is None:
2368                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2369
2370         automatic_captions = info_dict.get('automatic_captions')
2371         subtitles = info_dict.get('subtitles')
2372
2373         info_dict['requested_subtitles'] = self.process_subtitles(
2374             info_dict['id'], subtitles, automatic_captions)
2375
2376         if info_dict.get('formats') is None:
2377             # There's only one format available
2378             formats = [info_dict]
2379         else:
2380             formats = info_dict['formats']
2381
2382         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2383         if not self.params.get('allow_unplayable_formats'):
2384             formats = [f for f in formats if not f.get('has_drm')]
2385
2386         if info_dict.get('is_live'):
2387             get_from_start = bool(self.params.get('live_from_start'))
2388             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2389             if not get_from_start:
2390                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2391
2392         if not formats:
2393             self.raise_no_formats(info_dict)
2394
2395         def is_wellformed(f):
2396             url = f.get('url')
2397             if not url:
2398                 self.report_warning(
2399                     '"url" field is missing or empty - skipping format, '
2400                     'there is an error in extractor')
2401                 return False
2402             if isinstance(url, bytes):
2403                 sanitize_string_field(f, 'url')
2404             return True
2405
2406         # Filter out malformed formats for better extraction robustness
2407         formats = list(filter(is_wellformed, formats))
2408
2409         formats_dict = {}
2410
2411         # We check that all the formats have the format and format_id fields
2412         for i, format in enumerate(formats):
2413             sanitize_string_field(format, 'format_id')
2414             sanitize_numeric_fields(format)
2415             format['url'] = sanitize_url(format['url'])
2416             if not format.get('format_id'):
2417                 format['format_id'] = compat_str(i)
2418             else:
2419                 # Sanitize format_id from characters used in format selector expression
2420                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2421             format_id = format['format_id']
2422             if format_id not in formats_dict:
2423                 formats_dict[format_id] = []
2424             formats_dict[format_id].append(format)
2425
2426         # Make sure all formats have unique format_id
2427         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2428         for format_id, ambiguous_formats in formats_dict.items():
2429             ambigious_id = len(ambiguous_formats) > 1
2430             for i, format in enumerate(ambiguous_formats):
2431                 if ambigious_id:
2432                     format['format_id'] = '%s-%d' % (format_id, i)
2433                 if format.get('ext') is None:
2434                     format['ext'] = determine_ext(format['url']).lower()
2435                 # Ensure there is no conflict between id and ext in format selection
2436                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2437                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2438                     format['format_id'] = 'f%s' % format['format_id']
2439
2440         for i, format in enumerate(formats):
2441             if format.get('format') is None:
2442                 format['format'] = '{id} - {res}{note}'.format(
2443                     id=format['format_id'],
2444                     res=self.format_resolution(format),
2445                     note=format_field(format, 'format_note', ' (%s)'),
2446                 )
2447             if format.get('protocol') is None:
2448                 format['protocol'] = determine_protocol(format)
2449             if format.get('resolution') is None:
2450                 format['resolution'] = self.format_resolution(format, default=None)
2451             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2452                 format['dynamic_range'] = 'SDR'
2453             if (info_dict.get('duration') and format.get('tbr')
2454                     and not format.get('filesize') and not format.get('filesize_approx')):
2455                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2456
2457             # Add HTTP headers, so that external programs can use them from the
2458             # json output
2459             full_format_info = info_dict.copy()
2460             full_format_info.update(format)
2461             format['http_headers'] = self._calc_headers(full_format_info)
2462         # Remove private housekeeping stuff
2463         if '__x_forwarded_for_ip' in info_dict:
2464             del info_dict['__x_forwarded_for_ip']
2465
2466         # TODO Central sorting goes here
2467
2468         if self.params.get('check_formats') is True:
2469             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2470
2471         if not formats or formats[0] is not info_dict:
2472             # only set the 'formats' fields if the original info_dict list them
2473             # otherwise we end up with a circular reference, the first (and unique)
2474             # element in the 'formats' field in info_dict is info_dict itself,
2475             # which can't be exported to json
2476             info_dict['formats'] = formats
2477
2478         info_dict, _ = self.pre_process(info_dict)
2479
2480         # The pre-processors may have modified the formats
2481         formats = info_dict.get('formats', [info_dict])
2482
2483         list_only = self.params.get('simulate') is None and (
2484             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2485         interactive_format_selection = not list_only and self.format_selector == '-'
2486         if self.params.get('list_thumbnails'):
2487             self.list_thumbnails(info_dict)
2488         if self.params.get('listsubtitles'):
2489             if 'automatic_captions' in info_dict:
2490                 self.list_subtitles(
2491                     info_dict['id'], automatic_captions, 'automatic captions')
2492             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2493         if self.params.get('listformats') or interactive_format_selection:
2494             self.list_formats(info_dict)
2495         if list_only:
2496             # Without this printing, -F --print-json will not work
2497             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2498             return
2499
2500         format_selector = self.format_selector
2501         if format_selector is None:
2502             req_format = self._default_format_spec(info_dict, download=download)
2503             self.write_debug('Default format spec: %s' % req_format)
2504             format_selector = self.build_format_selector(req_format)
2505
2506         while True:
2507             if interactive_format_selection:
2508                 req_format = input(
2509                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2510                 try:
2511                     format_selector = self.build_format_selector(req_format)
2512                 except SyntaxError as err:
2513                     self.report_error(err, tb=False, is_error=False)
2514                     continue
2515
2516             # While in format selection we may need to have an access to the original
2517             # format set in order to calculate some metrics or do some processing.
2518             # For now we need to be able to guess whether original formats provided
2519             # by extractor are incomplete or not (i.e. whether extractor provides only
2520             # video-only or audio-only formats) for proper formats selection for
2521             # extractors with such incomplete formats (see
2522             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2523             # Since formats may be filtered during format selection and may not match
2524             # the original formats the results may be incorrect. Thus original formats
2525             # or pre-calculated metrics should be passed to format selection routines
2526             # as well.
2527             # We will pass a context object containing all necessary additional data
2528             # instead of just formats.
2529             # This fixes incorrect format selection issue (see
2530             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2531             incomplete_formats = (
2532                 # All formats are video-only or
2533                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2534                 # all formats are audio-only
2535                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2536
2537             ctx = {
2538                 'formats': formats,
2539                 'incomplete_formats': incomplete_formats,
2540             }
2541
2542             formats_to_download = list(format_selector(ctx))
2543             if interactive_format_selection and not formats_to_download:
2544                 self.report_error('Requested format is not available', tb=False, is_error=False)
2545                 continue
2546             break
2547
2548         if not formats_to_download:
2549             if not self.params.get('ignore_no_formats_error'):
2550                 raise ExtractorError('Requested format is not available', expected=True,
2551                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2552             else:
2553                 self.report_warning('Requested format is not available')
2554                 # Process what we can, even without any available formats.
2555                 self.process_info(dict(info_dict))
2556         elif download:
2557             self.to_screen(
2558                 '[info] %s: Downloading %d format(s): %s' % (
2559                     info_dict['id'], len(formats_to_download),
2560                     ", ".join([f['format_id'] for f in formats_to_download])))
2561             for fmt in formats_to_download:
2562                 new_info = dict(info_dict)
2563                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2564                 new_info['__original_infodict'] = info_dict
2565                 new_info.update(fmt)
2566                 self.process_info(new_info)
2567         # We update the info dict with the selected best quality format (backwards compatibility)
2568         if formats_to_download:
2569             info_dict.update(formats_to_download[-1])
2570         return info_dict
2571
2572     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2573         """Select the requested subtitles and their format"""
2574         available_subs = {}
2575         if normal_subtitles and self.params.get('writesubtitles'):
2576             available_subs.update(normal_subtitles)
2577         if automatic_captions and self.params.get('writeautomaticsub'):
2578             for lang, cap_info in automatic_captions.items():
2579                 if lang not in available_subs:
2580                     available_subs[lang] = cap_info
2581
2582         if (not self.params.get('writesubtitles') and not
2583                 self.params.get('writeautomaticsub') or not
2584                 available_subs):
2585             return None
2586
2587         all_sub_langs = available_subs.keys()
2588         if self.params.get('allsubtitles', False):
2589             requested_langs = all_sub_langs
2590         elif self.params.get('subtitleslangs', False):
2591             # A list is used so that the order of languages will be the same as
2592             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2593             requested_langs = []
2594             for lang_re in self.params.get('subtitleslangs'):
2595                 if lang_re == 'all':
2596                     requested_langs.extend(all_sub_langs)
2597                     continue
2598                 discard = lang_re[0] == '-'
2599                 if discard:
2600                     lang_re = lang_re[1:]
2601                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2602                 if discard:
2603                     for lang in current_langs:
2604                         while lang in requested_langs:
2605                             requested_langs.remove(lang)
2606                 else:
2607                     requested_langs.extend(current_langs)
2608             requested_langs = orderedSet(requested_langs)
2609         elif 'en' in available_subs:
2610             requested_langs = ['en']
2611         else:
2612             requested_langs = [list(all_sub_langs)[0]]
2613         if requested_langs:
2614             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2615
2616         formats_query = self.params.get('subtitlesformat', 'best')
2617         formats_preference = formats_query.split('/') if formats_query else []
2618         subs = {}
2619         for lang in requested_langs:
2620             formats = available_subs.get(lang)
2621             if formats is None:
2622                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2623                 continue
2624             for ext in formats_preference:
2625                 if ext == 'best':
2626                     f = formats[-1]
2627                     break
2628                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2629                 if matches:
2630                     f = matches[-1]
2631                     break
2632             else:
2633                 f = formats[-1]
2634                 self.report_warning(
2635                     'No subtitle format found matching "%s" for language %s, '
2636                     'using %s' % (formats_query, lang, f['ext']))
2637             subs[lang] = f
2638         return subs
2639
2640     def _forceprint(self, tmpl, info_dict):
2641         mobj = re.match(r'\w+(=?)$', tmpl)
2642         if mobj and mobj.group(1):
2643             tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2644         elif mobj:
2645             tmpl = '%({})s'.format(tmpl)
2646         self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2647
2648     def __forced_printings(self, info_dict, filename, incomplete):
2649         def print_mandatory(field, actual_field=None):
2650             if actual_field is None:
2651                 actual_field = field
2652             if (self.params.get('force%s' % field, False)
2653                     and (not incomplete or info_dict.get(actual_field) is not None)):
2654                 self.to_stdout(info_dict[actual_field])
2655
2656         def print_optional(field):
2657             if (self.params.get('force%s' % field, False)
2658                     and info_dict.get(field) is not None):
2659                 self.to_stdout(info_dict[field])
2660
2661         info_dict = info_dict.copy()
2662         if filename is not None:
2663             info_dict['filename'] = filename
2664         if info_dict.get('requested_formats') is not None:
2665             # For RTMP URLs, also include the playpath
2666             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2667         elif 'url' in info_dict:
2668             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2669
2670         if self.params['forceprint'].get('video') or self.params.get('forcejson'):
2671             self.post_extract(info_dict)
2672         for tmpl in self.params['forceprint'].get('video', []):
2673             self._forceprint(tmpl, info_dict)
2674
2675         print_mandatory('title')
2676         print_mandatory('id')
2677         print_mandatory('url', 'urls')
2678         print_optional('thumbnail')
2679         print_optional('description')
2680         print_optional('filename')
2681         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2682             self.to_stdout(formatSeconds(info_dict['duration']))
2683         print_mandatory('format')
2684
2685         if self.params.get('forcejson'):
2686             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2687
2688     def dl(self, name, info, subtitle=False, test=False):
2689         if not info.get('url'):
2690             self.raise_no_formats(info, True)
2691
2692         if test:
2693             verbose = self.params.get('verbose')
2694             params = {
2695                 'test': True,
2696                 'quiet': self.params.get('quiet') or not verbose,
2697                 'verbose': verbose,
2698                 'noprogress': not verbose,
2699                 'nopart': True,
2700                 'skip_unavailable_fragments': False,
2701                 'keep_fragments': False,
2702                 'overwrites': True,
2703                 '_no_ytdl_file': True,
2704             }
2705         else:
2706             params = self.params
2707         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2708         if not test:
2709             for ph in self._progress_hooks:
2710                 fd.add_progress_hook(ph)
2711             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2712             self.write_debug('Invoking downloader on "%s"' % urls)
2713
2714         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2715         # But it may contain objects that are not deep-copyable
2716         new_info = self._copy_infodict(info)
2717         if new_info.get('http_headers') is None:
2718             new_info['http_headers'] = self._calc_headers(new_info)
2719         return fd.download(name, new_info, subtitle)
2720
2721     def process_info(self, info_dict):
2722         """Process a single resolved IE result."""
2723
2724         assert info_dict.get('_type', 'video') == 'video'
2725
2726         max_downloads = self.params.get('max_downloads')
2727         if max_downloads is not None:
2728             if self._num_downloads >= int(max_downloads):
2729                 raise MaxDownloadsReached()
2730
2731         # TODO: backward compatibility, to be removed
2732         info_dict['fulltitle'] = info_dict['title']
2733
2734         if 'format' not in info_dict and 'ext' in info_dict:
2735             info_dict['format'] = info_dict['ext']
2736
2737         if self._match_entry(info_dict) is not None:
2738             return
2739
2740         self.post_extract(info_dict)
2741         self._num_downloads += 1
2742
2743         # info_dict['_filename'] needs to be set for backward compatibility
2744         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2745         temp_filename = self.prepare_filename(info_dict, 'temp')
2746         files_to_move = {}
2747
2748         # Forced printings
2749         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2750
2751         if self.params.get('simulate'):
2752             if self.params.get('force_write_download_archive', False):
2753                 self.record_download_archive(info_dict)
2754             # Do nothing else if in simulate mode
2755             return
2756
2757         if full_filename is None:
2758             return
2759         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2760             return
2761         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2762             return
2763
2764         if self._write_description('video', info_dict,
2765                                    self.prepare_filename(info_dict, 'description')) is None:
2766             return
2767
2768         sub_files = self._write_subtitles(info_dict, temp_filename)
2769         if sub_files is None:
2770             return
2771         files_to_move.update(dict(sub_files))
2772
2773         thumb_files = self._write_thumbnails(
2774             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2775         if thumb_files is None:
2776             return
2777         files_to_move.update(dict(thumb_files))
2778
2779         infofn = self.prepare_filename(info_dict, 'infojson')
2780         _infojson_written = self._write_info_json('video', info_dict, infofn)
2781         if _infojson_written:
2782             info_dict['infojson_filename'] = infofn
2783             # For backward compatibility, even though it was a private field
2784             info_dict['__infojson_filename'] = infofn
2785         elif _infojson_written is None:
2786             return
2787
2788         # Note: Annotations are deprecated
2789         annofn = None
2790         if self.params.get('writeannotations', False):
2791             annofn = self.prepare_filename(info_dict, 'annotation')
2792         if annofn:
2793             if not self._ensure_dir_exists(encodeFilename(annofn)):
2794                 return
2795             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2796                 self.to_screen('[info] Video annotations are already present')
2797             elif not info_dict.get('annotations'):
2798                 self.report_warning('There are no annotations to write.')
2799             else:
2800                 try:
2801                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2802                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2803                         annofile.write(info_dict['annotations'])
2804                 except (KeyError, TypeError):
2805                     self.report_warning('There are no annotations to write.')
2806                 except (OSError, IOError):
2807                     self.report_error('Cannot write annotations file: ' + annofn)
2808                     return
2809
2810         # Write internet shortcut files
2811         def _write_link_file(link_type):
2812             if 'webpage_url' not in info_dict:
2813                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2814                 return False
2815             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2816             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2817                 return False
2818             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2819                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2820                 return True
2821             try:
2822                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2823                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2824                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2825                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2826                     if link_type == 'desktop':
2827                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2828                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2829             except (OSError, IOError):
2830                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2831                 return False
2832             return True
2833
2834         write_links = {
2835             'url': self.params.get('writeurllink'),
2836             'webloc': self.params.get('writewebloclink'),
2837             'desktop': self.params.get('writedesktoplink'),
2838         }
2839         if self.params.get('writelink'):
2840             link_type = ('webloc' if sys.platform == 'darwin'
2841                          else 'desktop' if sys.platform.startswith('linux')
2842                          else 'url')
2843             write_links[link_type] = True
2844
2845         if any(should_write and not _write_link_file(link_type)
2846                for link_type, should_write in write_links.items()):
2847             return
2848
2849         try:
2850             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2851         except PostProcessingError as err:
2852             self.report_error('Preprocessing: %s' % str(err))
2853             return
2854
2855         must_record_download_archive = False
2856         if self.params.get('skip_download', False):
2857             info_dict['filepath'] = temp_filename
2858             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2859             info_dict['__files_to_move'] = files_to_move
2860             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2861         else:
2862             # Download
2863             info_dict.setdefault('__postprocessors', [])
2864             try:
2865
2866                 def existing_file(*filepaths):
2867                     ext = info_dict.get('ext')
2868                     final_ext = self.params.get('final_ext', ext)
2869                     existing_files = []
2870                     for file in orderedSet(filepaths):
2871                         if final_ext != ext:
2872                             converted = replace_extension(file, final_ext, ext)
2873                             if os.path.exists(encodeFilename(converted)):
2874                                 existing_files.append(converted)
2875                         if os.path.exists(encodeFilename(file)):
2876                             existing_files.append(file)
2877
2878                     if not existing_files or self.params.get('overwrites', False):
2879                         for file in orderedSet(existing_files):
2880                             self.report_file_delete(file)
2881                             os.remove(encodeFilename(file))
2882                         return None
2883
2884                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2885                     return existing_files[0]
2886
2887                 success = True
2888                 if info_dict.get('requested_formats') is not None:
2889
2890                     def compatible_formats(formats):
2891                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2892                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2893                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2894                         if len(video_formats) > 2 or len(audio_formats) > 2:
2895                             return False
2896
2897                         # Check extension
2898                         exts = set(format.get('ext') for format in formats)
2899                         COMPATIBLE_EXTS = (
2900                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2901                             set(('webm',)),
2902                         )
2903                         for ext_sets in COMPATIBLE_EXTS:
2904                             if ext_sets.issuperset(exts):
2905                                 return True
2906                         # TODO: Check acodec/vcodec
2907                         return False
2908
2909                     requested_formats = info_dict['requested_formats']
2910                     old_ext = info_dict['ext']
2911                     if self.params.get('merge_output_format') is None:
2912                         if not compatible_formats(requested_formats):
2913                             info_dict['ext'] = 'mkv'
2914                             self.report_warning(
2915                                 'Requested formats are incompatible for merge and will be merged into mkv')
2916                         if (info_dict['ext'] == 'webm'
2917                                 and info_dict.get('thumbnails')
2918                                 # check with type instead of pp_key, __name__, or isinstance
2919                                 # since we dont want any custom PPs to trigger this
2920                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2921                             info_dict['ext'] = 'mkv'
2922                             self.report_warning(
2923                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2924                     new_ext = info_dict['ext']
2925
2926                     def correct_ext(filename, ext=new_ext):
2927                         if filename == '-':
2928                             return filename
2929                         filename_real_ext = os.path.splitext(filename)[1][1:]
2930                         filename_wo_ext = (
2931                             os.path.splitext(filename)[0]
2932                             if filename_real_ext in (old_ext, new_ext)
2933                             else filename)
2934                         return '%s.%s' % (filename_wo_ext, ext)
2935
2936                     # Ensure filename always has a correct extension for successful merge
2937                     full_filename = correct_ext(full_filename)
2938                     temp_filename = correct_ext(temp_filename)
2939                     dl_filename = existing_file(full_filename, temp_filename)
2940                     info_dict['__real_download'] = False
2941
2942                     downloaded = []
2943                     merger = FFmpegMergerPP(self)
2944
2945                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2946                     if dl_filename is not None:
2947                         self.report_file_already_downloaded(dl_filename)
2948                     elif fd:
2949                         for f in requested_formats if fd != FFmpegFD else []:
2950                             f['filepath'] = fname = prepend_extension(
2951                                 correct_ext(temp_filename, info_dict['ext']),
2952                                 'f%s' % f['format_id'], info_dict['ext'])
2953                             downloaded.append(fname)
2954                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2955                         success, real_download = self.dl(temp_filename, info_dict)
2956                         info_dict['__real_download'] = real_download
2957                     else:
2958                         if self.params.get('allow_unplayable_formats'):
2959                             self.report_warning(
2960                                 'You have requested merging of multiple formats '
2961                                 'while also allowing unplayable formats to be downloaded. '
2962                                 'The formats won\'t be merged to prevent data corruption.')
2963                         elif not merger.available:
2964                             self.report_warning(
2965                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2966                                 'The formats won\'t be merged.')
2967
2968                         if temp_filename == '-':
2969                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
2970                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2971                                       else 'but ffmpeg is not installed')
2972                             self.report_warning(
2973                                 f'You have requested downloading multiple formats to stdout {reason}. '
2974                                 'The formats will be streamed one after the other')
2975                             fname = temp_filename
2976                         for f in requested_formats:
2977                             new_info = dict(info_dict)
2978                             del new_info['requested_formats']
2979                             new_info.update(f)
2980                             if temp_filename != '-':
2981                                 fname = prepend_extension(
2982                                     correct_ext(temp_filename, new_info['ext']),
2983                                     'f%s' % f['format_id'], new_info['ext'])
2984                                 if not self._ensure_dir_exists(fname):
2985                                     return
2986                                 f['filepath'] = fname
2987                                 downloaded.append(fname)
2988                             partial_success, real_download = self.dl(fname, new_info)
2989                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2990                             success = success and partial_success
2991
2992                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
2993                         info_dict['__postprocessors'].append(merger)
2994                         info_dict['__files_to_merge'] = downloaded
2995                         # Even if there were no downloads, it is being merged only now
2996                         info_dict['__real_download'] = True
2997                     else:
2998                         for file in downloaded:
2999                             files_to_move[file] = None
3000                 else:
3001                     # Just a single file
3002                     dl_filename = existing_file(full_filename, temp_filename)
3003                     if dl_filename is None or dl_filename == temp_filename:
3004                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3005                         # So we should try to resume the download
3006                         success, real_download = self.dl(temp_filename, info_dict)
3007                         info_dict['__real_download'] = real_download
3008                     else:
3009                         self.report_file_already_downloaded(dl_filename)
3010
3011                 dl_filename = dl_filename or temp_filename
3012                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3013
3014             except network_exceptions as err:
3015                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3016                 return
3017             except (OSError, IOError) as err:
3018                 raise UnavailableVideoError(err)
3019             except (ContentTooShortError, ) as err:
3020                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3021                 return
3022
3023             if success and full_filename != '-':
3024
3025                 def fixup():
3026                     do_fixup = True
3027                     fixup_policy = self.params.get('fixup')
3028                     vid = info_dict['id']
3029
3030                     if fixup_policy in ('ignore', 'never'):
3031                         return
3032                     elif fixup_policy == 'warn':
3033                         do_fixup = False
3034                     elif fixup_policy != 'force':
3035                         assert fixup_policy in ('detect_or_warn', None)
3036                         if not info_dict.get('__real_download'):
3037                             do_fixup = False
3038
3039                     def ffmpeg_fixup(cndn, msg, cls):
3040                         if not cndn:
3041                             return
3042                         if not do_fixup:
3043                             self.report_warning(f'{vid}: {msg}')
3044                             return
3045                         pp = cls(self)
3046                         if pp.available:
3047                             info_dict['__postprocessors'].append(pp)
3048                         else:
3049                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3050
3051                     stretched_ratio = info_dict.get('stretched_ratio')
3052                     ffmpeg_fixup(
3053                         stretched_ratio not in (1, None),
3054                         f'Non-uniform pixel ratio {stretched_ratio}',
3055                         FFmpegFixupStretchedPP)
3056
3057                     ffmpeg_fixup(
3058                         (info_dict.get('requested_formats') is None
3059                          and info_dict.get('container') == 'm4a_dash'
3060                          and info_dict.get('ext') == 'm4a'),
3061                         'writing DASH m4a. Only some players support this container',
3062                         FFmpegFixupM4aPP)
3063
3064                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3065                     downloader = downloader.__name__ if downloader else None
3066
3067                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3068                         ffmpeg_fixup(downloader == 'HlsFD',
3069                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3070                                      FFmpegFixupM3u8PP)
3071                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3072                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3073
3074                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3075                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3076
3077                 fixup()
3078                 try:
3079                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
3080                 except PostProcessingError as err:
3081                     self.report_error('Postprocessing: %s' % str(err))
3082                     return
3083                 try:
3084                     for ph in self._post_hooks:
3085                         ph(info_dict['filepath'])
3086                 except Exception as err:
3087                     self.report_error('post hooks: %s' % str(err))
3088                     return
3089                 must_record_download_archive = True
3090
3091         if must_record_download_archive or self.params.get('force_write_download_archive', False):
3092             self.record_download_archive(info_dict)
3093         max_downloads = self.params.get('max_downloads')
3094         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3095             raise MaxDownloadsReached()
3096
3097     def __download_wrapper(self, func):
3098         @functools.wraps(func)
3099         def wrapper(*args, **kwargs):
3100             try:
3101                 res = func(*args, **kwargs)
3102             except UnavailableVideoError as e:
3103                 self.report_error(e)
3104             except MaxDownloadsReached as e:
3105                 self.to_screen(f'[info] {e}')
3106                 raise
3107             except DownloadCancelled as e:
3108                 self.to_screen(f'[info] {e}')
3109                 if not self.params.get('break_per_url'):
3110                     raise
3111             else:
3112                 if self.params.get('dump_single_json', False):
3113                     self.post_extract(res)
3114                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3115         return wrapper
3116
3117     def download(self, url_list):
3118         """Download a given list of URLs."""
3119         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3120         outtmpl = self.outtmpl_dict['default']
3121         if (len(url_list) > 1
3122                 and outtmpl != '-'
3123                 and '%' not in outtmpl
3124                 and self.params.get('max_downloads') != 1):
3125             raise SameFileError(outtmpl)
3126
3127         for url in url_list:
3128             self.__download_wrapper(self.extract_info)(
3129                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3130
3131         return self._download_retcode
3132
3133     def download_with_info_file(self, info_filename):
3134         with contextlib.closing(fileinput.FileInput(
3135                 [info_filename], mode='r',
3136                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3137             # FileInput doesn't have a read method, we can't call json.load
3138             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3139         try:
3140             self.__download_wrapper(self.process_ie_result)(info, download=True)
3141         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3142             if not isinstance(e, EntryNotInPlaylist):
3143                 self.to_stderr('\r')
3144             webpage_url = info.get('webpage_url')
3145             if webpage_url is not None:
3146                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3147                 return self.download([webpage_url])
3148             else:
3149                 raise
3150         return self._download_retcode
3151
3152     @staticmethod
3153     def sanitize_info(info_dict, remove_private_keys=False):
3154         ''' Sanitize the infodict for converting to json '''
3155         if info_dict is None:
3156             return info_dict
3157         info_dict.setdefault('epoch', int(time.time()))
3158         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3159         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3160         if remove_private_keys:
3161             remove_keys |= {
3162                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3163                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3164             }
3165             reject = lambda k, v: k not in keep_keys and (
3166                 k.startswith('_') or k in remove_keys or v is None)
3167         else:
3168             reject = lambda k, v: k in remove_keys
3169
3170         def filter_fn(obj):
3171             if isinstance(obj, dict):
3172                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3173             elif isinstance(obj, (list, tuple, set, LazyList)):
3174                 return list(map(filter_fn, obj))
3175             elif obj is None or isinstance(obj, (str, int, float, bool)):
3176                 return obj
3177             else:
3178                 return repr(obj)
3179
3180         return filter_fn(info_dict)
3181
3182     @staticmethod
3183     def filter_requested_info(info_dict, actually_filter=True):
3184         ''' Alias of sanitize_info for backward compatibility '''
3185         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3186
3187     def run_pp(self, pp, infodict):
3188         files_to_delete = []
3189         if '__files_to_move' not in infodict:
3190             infodict['__files_to_move'] = {}
3191         try:
3192             files_to_delete, infodict = pp.run(infodict)
3193         except PostProcessingError as e:
3194             # Must be True and not 'only_download'
3195             if self.params.get('ignoreerrors') is True:
3196                 self.report_error(e)
3197                 return infodict
3198             raise
3199
3200         if not files_to_delete:
3201             return infodict
3202         if self.params.get('keepvideo', False):
3203             for f in files_to_delete:
3204                 infodict['__files_to_move'].setdefault(f, '')
3205         else:
3206             for old_filename in set(files_to_delete):
3207                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3208                 try:
3209                     os.remove(encodeFilename(old_filename))
3210                 except (IOError, OSError):
3211                     self.report_warning('Unable to remove downloaded original file')
3212                 if old_filename in infodict['__files_to_move']:
3213                     del infodict['__files_to_move'][old_filename]
3214         return infodict
3215
3216     @staticmethod
3217     def post_extract(info_dict):
3218         def actual_post_extract(info_dict):
3219             if info_dict.get('_type') in ('playlist', 'multi_video'):
3220                 for video_dict in info_dict.get('entries', {}):
3221                     actual_post_extract(video_dict or {})
3222                 return
3223
3224             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3225             extra = post_extractor().items()
3226             info_dict.update(extra)
3227             info_dict.pop('__post_extractor', None)
3228
3229             original_infodict = info_dict.get('__original_infodict') or {}
3230             original_infodict.update(extra)
3231             original_infodict.pop('__post_extractor', None)
3232
3233         actual_post_extract(info_dict or {})
3234
3235     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3236         info = dict(ie_info)
3237         info['__files_to_move'] = files_to_move or {}
3238         for pp in self._pps[key]:
3239             info = self.run_pp(pp, info)
3240         return info, info.pop('__files_to_move', None)
3241
3242     def post_process(self, filename, ie_info, files_to_move=None):
3243         """Run all the postprocessors on the given file."""
3244         info = dict(ie_info)
3245         info['filepath'] = filename
3246         info['__files_to_move'] = files_to_move or {}
3247
3248         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3249             info = self.run_pp(pp, info)
3250         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3251         del info['__files_to_move']
3252         for pp in self._pps['after_move']:
3253             info = self.run_pp(pp, info)
3254         return info
3255
3256     def _make_archive_id(self, info_dict):
3257         video_id = info_dict.get('id')
3258         if not video_id:
3259             return
3260         # Future-proof against any change in case
3261         # and backwards compatibility with prior versions
3262         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3263         if extractor is None:
3264             url = str_or_none(info_dict.get('url'))
3265             if not url:
3266                 return
3267             # Try to find matching extractor for the URL and take its ie_key
3268             for ie_key, ie in self._ies.items():
3269                 if ie.suitable(url):
3270                     extractor = ie_key
3271                     break
3272             else:
3273                 return
3274         return '%s %s' % (extractor.lower(), video_id)
3275
3276     def in_download_archive(self, info_dict):
3277         fn = self.params.get('download_archive')
3278         if fn is None:
3279             return False
3280
3281         vid_id = self._make_archive_id(info_dict)
3282         if not vid_id:
3283             return False  # Incomplete video information
3284
3285         return vid_id in self.archive
3286
3287     def record_download_archive(self, info_dict):
3288         fn = self.params.get('download_archive')
3289         if fn is None:
3290             return
3291         vid_id = self._make_archive_id(info_dict)
3292         assert vid_id
3293         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3294             archive_file.write(vid_id + '\n')
3295         self.archive.add(vid_id)
3296
3297     @staticmethod
3298     def format_resolution(format, default='unknown'):
3299         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3300             return 'audio only'
3301         if format.get('resolution') is not None:
3302             return format['resolution']
3303         if format.get('width') and format.get('height'):
3304             return '%dx%d' % (format['width'], format['height'])
3305         elif format.get('height'):
3306             return '%sp' % format['height']
3307         elif format.get('width'):
3308             return '%dx?' % format['width']
3309         return default
3310
3311     def _format_note(self, fdict):
3312         res = ''
3313         if fdict.get('ext') in ['f4f', 'f4m']:
3314             res += '(unsupported)'
3315         if fdict.get('language'):
3316             if res:
3317                 res += ' '
3318             res += '[%s]' % fdict['language']
3319         if fdict.get('format_note') is not None:
3320             if res:
3321                 res += ' '
3322             res += fdict['format_note']
3323         if fdict.get('tbr') is not None:
3324             if res:
3325                 res += ', '
3326             res += '%4dk' % fdict['tbr']
3327         if fdict.get('container') is not None:
3328             if res:
3329                 res += ', '
3330             res += '%s container' % fdict['container']
3331         if (fdict.get('vcodec') is not None
3332                 and fdict.get('vcodec') != 'none'):
3333             if res:
3334                 res += ', '
3335             res += fdict['vcodec']
3336             if fdict.get('vbr') is not None:
3337                 res += '@'
3338         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3339             res += 'video@'
3340         if fdict.get('vbr') is not None:
3341             res += '%4dk' % fdict['vbr']
3342         if fdict.get('fps') is not None:
3343             if res:
3344                 res += ', '
3345             res += '%sfps' % fdict['fps']
3346         if fdict.get('acodec') is not None:
3347             if res:
3348                 res += ', '
3349             if fdict['acodec'] == 'none':
3350                 res += 'video only'
3351             else:
3352                 res += '%-5s' % fdict['acodec']
3353         elif fdict.get('abr') is not None:
3354             if res:
3355                 res += ', '
3356             res += 'audio'
3357         if fdict.get('abr') is not None:
3358             res += '@%3dk' % fdict['abr']
3359         if fdict.get('asr') is not None:
3360             res += ' (%5dHz)' % fdict['asr']
3361         if fdict.get('filesize') is not None:
3362             if res:
3363                 res += ', '
3364             res += format_bytes(fdict['filesize'])
3365         elif fdict.get('filesize_approx') is not None:
3366             if res:
3367                 res += ', '
3368             res += '~' + format_bytes(fdict['filesize_approx'])
3369         return res
3370
3371     def _list_format_headers(self, *headers):
3372         if self.params.get('listformats_table', True) is not False:
3373             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3374         return headers
3375
3376     def list_formats(self, info_dict):
3377         if not info_dict.get('formats') and not info_dict.get('url'):
3378             self.to_screen('%s has no formats' % info_dict['id'])
3379             return
3380         self.to_screen('[info] Available formats for %s:' % info_dict['id'])
3381
3382         formats = info_dict.get('formats', [info_dict])
3383         new_format = self.params.get('listformats_table', True) is not False
3384         if new_format:
3385             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3386             table = [
3387                 [
3388                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3389                     format_field(f, 'ext'),
3390                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3391                     format_field(f, 'fps', '\t%d'),
3392                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3393                     delim,
3394                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3395                     format_field(f, 'tbr', '\t%dk'),
3396                     shorten_protocol_name(f.get('protocol', '')),
3397                     delim,
3398                     format_field(f, 'vcodec', default='unknown').replace(
3399                         'none',
3400                         'images' if f.get('acodec') == 'none'
3401                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3402                     format_field(f, 'vbr', '\t%dk'),
3403                     format_field(f, 'acodec', default='unknown').replace(
3404                         'none',
3405                         '' if f.get('vcodec') == 'none'
3406                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3407                     format_field(f, 'abr', '\t%dk'),
3408                     format_field(f, 'asr', '\t%dHz'),
3409                     join_nonempty(
3410                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3411                         format_field(f, 'language', '[%s]'),
3412                         join_nonempty(
3413                             format_field(f, 'format_note'),
3414                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3415                             delim=', '),
3416                         delim=' '),
3417                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3418             header_line = self._list_format_headers(
3419                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3420                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3421         else:
3422             table = [
3423                 [
3424                     format_field(f, 'format_id'),
3425                     format_field(f, 'ext'),
3426                     self.format_resolution(f),
3427                     self._format_note(f)]
3428                 for f in formats
3429                 if f.get('preference') is None or f['preference'] >= -1000]
3430             header_line = ['format code', 'extension', 'resolution', 'note']
3431
3432         self.to_stdout(render_table(
3433             header_line, table,
3434             extra_gap=(0 if new_format else 1),
3435             hide_empty=new_format,
3436             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3437
3438     def list_thumbnails(self, info_dict):
3439         thumbnails = list(info_dict.get('thumbnails'))
3440         if not thumbnails:
3441             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3442             return
3443
3444         self.to_screen(
3445             '[info] Thumbnails for %s:' % info_dict['id'])
3446         self.to_stdout(render_table(
3447             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3448             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3449
3450     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3451         if not subtitles:
3452             self.to_screen('%s has no %s' % (video_id, name))
3453             return
3454         self.to_screen(
3455             'Available %s for %s:' % (name, video_id))
3456
3457         def _row(lang, formats):
3458             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3459             if len(set(names)) == 1:
3460                 names = [] if names[0] == 'unknown' else names[:1]
3461             return [lang, ', '.join(names), ', '.join(exts)]
3462
3463         self.to_stdout(render_table(
3464             self._list_format_headers('Language', 'Name', 'Formats'),
3465             [_row(lang, formats) for lang, formats in subtitles.items()],
3466             hide_empty=True))
3467
3468     def urlopen(self, req):
3469         """ Start an HTTP download """
3470         if isinstance(req, compat_basestring):
3471             req = sanitized_Request(req)
3472         return self._opener.open(req, timeout=self._socket_timeout)
3473
3474     def print_debug_header(self):
3475         if not self.params.get('verbose'):
3476             return
3477
3478         def get_encoding(stream):
3479             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3480             if not supports_terminal_sequences(stream):
3481                 from .compat import WINDOWS_VT_MODE
3482                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3483             return ret
3484
3485         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3486             locale.getpreferredencoding(),
3487             sys.getfilesystemencoding(),
3488             get_encoding(self._screen_file), get_encoding(self._err_file),
3489             self.get_encoding())
3490
3491         logger = self.params.get('logger')
3492         if logger:
3493             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3494             write_debug(encoding_str)
3495         else:
3496             write_string(f'[debug] {encoding_str}\n', encoding=None)
3497             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3498
3499         source = detect_variant()
3500         write_debug(join_nonempty(
3501             'yt-dlp version', __version__,
3502             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3503             '' if source == 'unknown' else f'({source})',
3504             delim=' '))
3505         if not _LAZY_LOADER:
3506             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3507                 write_debug('Lazy loading extractors is forcibly disabled')
3508             else:
3509                 write_debug('Lazy loading extractors is disabled')
3510         if plugin_extractors or plugin_postprocessors:
3511             write_debug('Plugins: %s' % [
3512                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3513                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3514         if self.params.get('compat_opts'):
3515             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3516
3517         if source == 'source':
3518             try:
3519                 sp = Popen(
3520                     ['git', 'rev-parse', '--short', 'HEAD'],
3521                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3522                     cwd=os.path.dirname(os.path.abspath(__file__)))
3523                 out, err = sp.communicate_or_kill()
3524                 out = out.decode().strip()
3525                 if re.match('[0-9a-f]+', out):
3526                     write_debug('Git HEAD: %s' % out)
3527             except Exception:
3528                 try:
3529                     sys.exc_clear()
3530                 except Exception:
3531                     pass
3532
3533         def python_implementation():
3534             impl_name = platform.python_implementation()
3535             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3536                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3537             return impl_name
3538
3539         write_debug('Python version %s (%s %s) - %s' % (
3540             platform.python_version(),
3541             python_implementation(),
3542             platform.architecture()[0],
3543             platform_name()))
3544
3545         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3546         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3547         if ffmpeg_features:
3548             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3549
3550         exe_versions['rtmpdump'] = rtmpdump_version()
3551         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3552         exe_str = ', '.join(
3553             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3554         ) or 'none'
3555         write_debug('exe versions: %s' % exe_str)
3556
3557         from .downloader.websocket import has_websockets
3558         from .postprocessor.embedthumbnail import has_mutagen
3559         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3560
3561         lib_str = join_nonempty(
3562             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3563             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3564             has_mutagen and 'mutagen',
3565             SQLITE_AVAILABLE and 'sqlite',
3566             has_websockets and 'websockets',
3567             delim=', ') or 'none'
3568         write_debug('Optional libraries: %s' % lib_str)
3569
3570         proxy_map = {}
3571         for handler in self._opener.handlers:
3572             if hasattr(handler, 'proxies'):
3573                 proxy_map.update(handler.proxies)
3574         write_debug(f'Proxy map: {proxy_map}')
3575
3576         # Not implemented
3577         if False and self.params.get('call_home'):
3578             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3579             write_debug('Public IP address: %s' % ipaddr)
3580             latest_version = self.urlopen(
3581                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3582             if version_tuple(latest_version) > version_tuple(__version__):
3583                 self.report_warning(
3584                     'You are using an outdated version (newest version: %s)! '
3585                     'See https://yt-dl.org/update if you need help updating.' %
3586                     latest_version)
3587
3588     def _setup_opener(self):
3589         timeout_val = self.params.get('socket_timeout')
3590         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3591
3592         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3593         opts_cookiefile = self.params.get('cookiefile')
3594         opts_proxy = self.params.get('proxy')
3595
3596         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3597
3598         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3599         if opts_proxy is not None:
3600             if opts_proxy == '':
3601                 proxies = {}
3602             else:
3603                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3604         else:
3605             proxies = compat_urllib_request.getproxies()
3606             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3607             if 'http' in proxies and 'https' not in proxies:
3608                 proxies['https'] = proxies['http']
3609         proxy_handler = PerRequestProxyHandler(proxies)
3610
3611         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3612         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3613         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3614         redirect_handler = YoutubeDLRedirectHandler()
3615         data_handler = compat_urllib_request_DataHandler()
3616
3617         # When passing our own FileHandler instance, build_opener won't add the
3618         # default FileHandler and allows us to disable the file protocol, which
3619         # can be used for malicious purposes (see
3620         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3621         file_handler = compat_urllib_request.FileHandler()
3622
3623         def file_open(*args, **kwargs):
3624             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3625         file_handler.file_open = file_open
3626
3627         opener = compat_urllib_request.build_opener(
3628             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3629
3630         # Delete the default user-agent header, which would otherwise apply in
3631         # cases where our custom HTTP handler doesn't come into play
3632         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3633         opener.addheaders = []
3634         self._opener = opener
3635
3636     def encode(self, s):
3637         if isinstance(s, bytes):
3638             return s  # Already encoded
3639
3640         try:
3641             return s.encode(self.get_encoding())
3642         except UnicodeEncodeError as err:
3643             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3644             raise
3645
3646     def get_encoding(self):
3647         encoding = self.params.get('encoding')
3648         if encoding is None:
3649             encoding = preferredencoding()
3650         return encoding
3651
3652     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3653         ''' Write infojson and returns True = written, False = skip, None = error '''
3654         if overwrite is None:
3655             overwrite = self.params.get('overwrites', True)
3656         if not self.params.get('writeinfojson'):
3657             return False
3658         elif not infofn:
3659             self.write_debug(f'Skipping writing {label} infojson')
3660             return False
3661         elif not self._ensure_dir_exists(infofn):
3662             return None
3663         elif not overwrite and os.path.exists(infofn):
3664             self.to_screen(f'[info] {label.title()} metadata is already present')
3665         else:
3666             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3667             try:
3668                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3669             except (OSError, IOError):
3670                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3671                 return None
3672         return True
3673
3674     def _write_description(self, label, ie_result, descfn):
3675         ''' Write description and returns True = written, False = skip, None = error '''
3676         if not self.params.get('writedescription'):
3677             return False
3678         elif not descfn:
3679             self.write_debug(f'Skipping writing {label} description')
3680             return False
3681         elif not self._ensure_dir_exists(descfn):
3682             return None
3683         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3684             self.to_screen(f'[info] {label.title()} description is already present')
3685         elif ie_result.get('description') is None:
3686             self.report_warning(f'There\'s no {label} description to write')
3687             return False
3688         else:
3689             try:
3690                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3691                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3692                     descfile.write(ie_result['description'])
3693             except (OSError, IOError):
3694                 self.report_error(f'Cannot write {label} description file {descfn}')
3695                 return None
3696         return True
3697
3698     def _write_subtitles(self, info_dict, filename):
3699         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3700         ret = []
3701         subtitles = info_dict.get('requested_subtitles')
3702         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3703             # subtitles download errors are already managed as troubles in relevant IE
3704             # that way it will silently go on when used with unsupporting IE
3705             return ret
3706
3707         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3708         if not sub_filename_base:
3709             self.to_screen('[info] Skipping writing video subtitles')
3710             return ret
3711         for sub_lang, sub_info in subtitles.items():
3712             sub_format = sub_info['ext']
3713             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3714             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3715             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3716                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3717                 sub_info['filepath'] = sub_filename
3718                 ret.append((sub_filename, sub_filename_final))
3719                 continue
3720
3721             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3722             if sub_info.get('data') is not None:
3723                 try:
3724                     # Use newline='' to prevent conversion of newline characters
3725                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3726                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3727                         subfile.write(sub_info['data'])
3728                     sub_info['filepath'] = sub_filename
3729                     ret.append((sub_filename, sub_filename_final))
3730                     continue
3731                 except (OSError, IOError):
3732                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3733                     return None
3734
3735             try:
3736                 sub_copy = sub_info.copy()
3737                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3738                 self.dl(sub_filename, sub_copy, subtitle=True)
3739                 sub_info['filepath'] = sub_filename
3740                 ret.append((sub_filename, sub_filename_final))
3741             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3742                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3743                 continue
3744         return ret
3745
3746     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3747         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3748         write_all = self.params.get('write_all_thumbnails', False)
3749         thumbnails, ret = [], []
3750         if write_all or self.params.get('writethumbnail', False):
3751             thumbnails = info_dict.get('thumbnails') or []
3752         multiple = write_all and len(thumbnails) > 1
3753
3754         if thumb_filename_base is None:
3755             thumb_filename_base = filename
3756         if thumbnails and not thumb_filename_base:
3757             self.write_debug(f'Skipping writing {label} thumbnail')
3758             return ret
3759
3760         for idx, t in list(enumerate(thumbnails))[::-1]:
3761             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3762             thumb_display_id = f'{label} thumbnail {t["id"]}'
3763             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3764             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3765
3766             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3767                 ret.append((thumb_filename, thumb_filename_final))
3768                 t['filepath'] = thumb_filename
3769                 self.to_screen('[info] %s is already present' % (
3770                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3771             else:
3772                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3773                 try:
3774                     uf = self.urlopen(t['url'])
3775                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3776                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3777                         shutil.copyfileobj(uf, thumbf)
3778                     ret.append((thumb_filename, thumb_filename_final))
3779                     t['filepath'] = thumb_filename
3780                 except network_exceptions as err:
3781                     thumbnails.pop(idx)
3782                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3783             if ret and not write_all:
3784                 break
3785         return ret