yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     join_nonempty,
  78     LazyList,
  79     LINK_TEMPLATES,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     ReExtractInfo,
  97     register_socks_protocols,
  98     RejectedVideoReached,
  99     render_table,
 100     replace_extension,
 101     SameFileError,
 102     sanitize_filename,
 103     sanitize_path,
 104     sanitize_url,
 105     sanitized_Request,
 106     std_headers,
 107     STR_FORMAT_RE_TMPL,
 108     STR_FORMAT_TYPES,
 109     str_or_none,
 110     strftime_or_none,
 111     subtitles_filename,
 112     supports_terminal_sequences,
 113     timetuple_from_msec,
 114     to_high_limit_path,
 115     traverse_obj,
 116     try_get,
 117     UnavailableVideoError,
 118     url_basename,
 119     variadic,
 120     version_tuple,
 121     write_json_file,
 122     write_string,
 123     YoutubeDLCookieProcessor,
 124     YoutubeDLHandler,
 125     YoutubeDLRedirectHandler,
 126 )
 127 from .cache import Cache
 128 from .minicurses import format_text
 129 from .extractor import (
 130     gen_extractor_classes,
 131     get_info_extractor,
 132     _LAZY_LOADER,
 133     _PLUGIN_CLASSES as plugin_extractors
 134 )
 135 from .extractor.openload import PhantomJSwrapper
 136 from .downloader import (
 137     FFmpegFD,
 138     get_suitable_downloader,
 139     shorten_protocol_name
 140 )
 141 from .downloader.rtmp import rtmpdump_version
 142 from .postprocessor import (
 143     get_postprocessor,
 144     EmbedThumbnailPP,
 145     FFmpegFixupDurationPP,
 146     FFmpegFixupM3u8PP,
 147     FFmpegFixupM4aPP,
 148     FFmpegFixupStretchedPP,
 149     FFmpegFixupTimestampPP,
 150     FFmpegMergerPP,
 151     FFmpegPostProcessor,
 152     MoveFilesAfterDownloadPP,
 153     _PLUGIN_CLASSES as plugin_postprocessors
 154 )
 155 from .update import detect_variant
 156 from .version import __version__, RELEASE_GIT_HEAD
 157
 158 if compat_os_name == 'nt':
 159     import ctypes
 160
 161
 162 class YoutubeDL(object):
 163     """YoutubeDL class.
 164
 165     YoutubeDL objects are the ones responsible of downloading the
 166     actual video file and writing it to disk if the user has requested
 167     it, among some other tasks. In most cases there should be one per
 168     program. As, given a video URL, the downloader doesn't know how to
 169     extract all the needed information, task that InfoExtractors do, it
 170     has to pass the URL to one of them.
 171
 172     For this, YoutubeDL objects have a method that allows
 173     InfoExtractors to be registered in a given order. When it is passed
 174     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 175     finds that reports being able to handle it. The InfoExtractor extracts
 176     all the information about the video or videos the URL refers to, and
 177     YoutubeDL process the extracted information, possibly using a File
 178     Downloader to download the video.
 179
 180     YoutubeDL objects accept a lot of parameters. In order not to saturate
 181     the object constructor with arguments, it receives a dictionary of
 182     options instead. These options are available through the params
 183     attribute for the InfoExtractors to use. The YoutubeDL also
 184     registers itself as the downloader in charge for the InfoExtractors
 185     that are added to it, so this is a "mutual registration".
 186
 187     Available options:
 188
 189     username:          Username for authentication purposes.
 190     password:          Password for authentication purposes.
 191     videopassword:     Password for accessing a video.
 192     ap_mso:            Adobe Pass multiple-system operator identifier.
 193     ap_username:       Multiple-system operator account username.
 194     ap_password:       Multiple-system operator account password.
 195     usenetrc:          Use netrc for authentication instead.
 196     verbose:           Print additional info to stdout.
 197     quiet:             Do not print messages to stdout.
 198     no_warnings:       Do not print out anything for warnings.
 199     forceprint:        A list of templates to force print
 200     forceurl:          Force printing final URL. (Deprecated)
 201     forcetitle:        Force printing title. (Deprecated)
 202     forceid:           Force printing ID. (Deprecated)
 203     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 204     forcedescription:  Force printing description. (Deprecated)
 205     forcefilename:     Force printing final filename. (Deprecated)
 206     forceduration:     Force printing duration. (Deprecated)
 207     forcejson:         Force printing info_dict as JSON.
 208     dump_single_json:  Force printing the info_dict of the whole playlist
 209                        (or video) as a single JSON line.
 210     force_write_download_archive: Force writing download archive regardless
 211                        of 'skip_download' or 'simulate'.
 212     simulate:          Do not download the video files. If unset (or None),
 213                        simulate only if listsubtitles, listformats or list_thumbnails is used
 214     format:            Video format code. see "FORMAT SELECTION" for more details.
 215                        You can also pass a function. The function takes 'ctx' as
 216                        argument and returns the formats to download.
 217                        See "build_format_selector" for an implementation
 218     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 219     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 220                        extracting metadata even if the video is not actually
 221                        available for download (experimental)
 222     format_sort:       A list of fields by which to sort the video formats.
 223                        See "Sorting Formats" for more details.
 224     format_sort_force: Force the given format_sort. see "Sorting Formats"
 225                        for more details.
 226     allow_multiple_video_streams:   Allow multiple video streams to be merged
 227                        into a single file
 228     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 229                        into a single file
 230     check_formats      Whether to test if the formats are downloadable.
 231                        Can be True (check all), False (check none),
 232                        'selected' (check selected formats),
 233                        or None (check only if requested by extractor)
 234     paths:             Dictionary of output paths. The allowed keys are 'home'
 235                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 236     outtmpl:           Dictionary of templates for output names. Allowed keys
 237                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 238                        For compatibility with youtube-dl, a single string can also be used
 239     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 240     restrictfilenames: Do not allow "&" and spaces in file names
 241     trim_file_name:    Limit length of filename (extension excluded)
 242     windowsfilenames:  Force the filenames to be windows compatible
 243     ignoreerrors:      Do not stop on download/postprocessing errors.
 244                        Can be 'only_download' to ignore only download errors.
 245                        Default is 'only_download' for CLI, but False for API
 246     skip_playlist_after_errors: Number of allowed failures until the rest of
 247                        the playlist is skipped
 248     force_generic_extractor: Force downloader to use the generic extractor
 249     overwrites:        Overwrite all video and metadata files if True,
 250                        overwrite only non-video files if None
 251                        and don't overwrite any file if False
 252                        For compatibility with youtube-dl,
 253                        "nooverwrites" may also be used instead
 254     playliststart:     Playlist item to start at.
 255     playlistend:       Playlist item to end at.
 256     playlist_items:    Specific indices of playlist to download.
 257     playlistreverse:   Download playlist items in reverse order.
 258     playlistrandom:    Download playlist items in random order.
 259     matchtitle:        Download only matching titles.
 260     rejecttitle:       Reject downloads for matching titles.
 261     logger:            Log messages to a logging.Logger instance.
 262     logtostderr:       Log messages to stderr instead of stdout.
 263     consoletitle:       Display progress in console window's titlebar.
 264     writedescription:  Write the video description to a .description file
 265     writeinfojson:     Write the video description to a .info.json file
 266     clean_infojson:    Remove private fields from the infojson
 267     getcomments:       Extract video comments. This will not be written to disk
 268                        unless writeinfojson is also given
 269     writeannotations:  Write the video annotations to a .annotations.xml file
 270     writethumbnail:    Write the thumbnail image to a file
 271     allow_playlist_files: Whether to write playlists' description, infojson etc
 272                        also to disk when using the 'write*' options
 273     write_all_thumbnails:  Write all thumbnail formats to files
 274     writelink:         Write an internet shortcut file, depending on the
 275                        current platform (.url/.webloc/.desktop)
 276     writeurllink:      Write a Windows internet shortcut file (.url)
 277     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 278     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 279     writesubtitles:    Write the video subtitles to a file
 280     writeautomaticsub: Write the automatically generated subtitles to a file
 281     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 282                        Downloads all the subtitles of the video
 283                        (requires writesubtitles or writeautomaticsub)
 284     listsubtitles:     Lists all available subtitles for the video
 285     subtitlesformat:   The format code for subtitles
 286     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 287                        The list may contain "all" to refer to all the available
 288                        subtitles. The language can be prefixed with a "-" to
 289                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 290     keepvideo:         Keep the video file after post-processing
 291     daterange:         A DateRange object, download only if the upload_date is in the range.
 292     skip_download:     Skip the actual download of the video file
 293     cachedir:          Location of the cache files in the filesystem.
 294                        False to disable filesystem cache.
 295     noplaylist:        Download single video instead of a playlist if in doubt.
 296     age_limit:         An integer representing the user's age in years.
 297                        Unsuitable videos for the given age are skipped.
 298     min_views:         An integer representing the minimum view count the video
 299                        must have in order to not be skipped.
 300                        Videos without view count information are always
 301                        downloaded. None for no limit.
 302     max_views:         An integer representing the maximum view count.
 303                        Videos that are more popular than that are not
 304                        downloaded.
 305                        Videos without view count information are always
 306                        downloaded. None for no limit.
 307     download_archive:  File name of a file where all downloads are recorded.
 308                        Videos already present in the file are not downloaded
 309                        again.
 310     break_on_existing: Stop the download process after attempting to download a
 311                        file that is in the archive.
 312     break_on_reject:   Stop the download process when encountering a video that
 313                        has been filtered out.
 314     break_per_url:     Whether break_on_reject and break_on_existing
 315                        should act on each input URL as opposed to for the entire queue
 316     cookiefile:        File name where cookies should be read from and dumped to
 317     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 318                        name/path from where cookies are loaded.
 319                        Eg: ('chrome', ) or ('vivaldi', 'default')
 320     nocheckcertificate:Do not verify SSL certificates
 321     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 322                        At the moment, this is only supported by YouTube.
 323     proxy:             URL of the proxy server to use
 324     geo_verification_proxy:  URL of the proxy to use for IP address verification
 325                        on geo-restricted sites.
 326     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 327     bidi_workaround:   Work around buggy terminals without bidirectional text
 328                        support, using fridibi
 329     debug_printtraffic:Print out sent and received HTTP traffic
 330     include_ads:       Download ads as well
 331     default_search:    Prepend this string if an input url is not valid.
 332                        'auto' for elaborate guessing
 333     encoding:          Use this encoding instead of the system-specified.
 334     extract_flat:      Do not resolve URLs, return the immediate result.
 335                        Pass in 'in_playlist' to only show this behavior for
 336                        playlist items.
 337     wait_for_video:    If given, wait for scheduled streams to become available.
 338                        The value should be a tuple containing the range
 339                        (min_secs, max_secs) to wait between retries
 340     postprocessors:    A list of dictionaries, each with an entry
 341                        * key:  The name of the postprocessor. See
 342                                yt_dlp/postprocessor/__init__.py for a list.
 343                        * when: When to run the postprocessor. Can be one of
 344                                pre_process|before_dl|post_process|after_move.
 345                                Assumed to be 'post_process' if not given
 346     post_hooks:        Deprecated - Register a custom postprocessor instead
 347                        A list of functions that get called as the final step
 348                        for each video file, after all postprocessors have been
 349                        called. The filename will be passed as the only argument.
 350     progress_hooks:    A list of functions that get called on download
 351                        progress, with a dictionary with the entries
 352                        * status: One of "downloading", "error", or "finished".
 353                                  Check this first and ignore unknown values.
 354                        * info_dict: The extracted info_dict
 355
 356                        If status is one of "downloading", or "finished", the
 357                        following properties may also be present:
 358                        * filename: The final filename (always present)
 359                        * tmpfilename: The filename we're currently writing to
 360                        * downloaded_bytes: Bytes on disk
 361                        * total_bytes: Size of the whole file, None if unknown
 362                        * total_bytes_estimate: Guess of the eventual file size,
 363                                                None if unavailable.
 364                        * elapsed: The number of seconds since download started.
 365                        * eta: The estimated time in seconds, None if unknown
 366                        * speed: The download speed in bytes/second, None if
 367                                 unknown
 368                        * fragment_index: The counter of the currently
 369                                          downloaded video fragment.
 370                        * fragment_count: The number of fragments (= individual
 371                                          files that will be merged)
 372
 373                        Progress hooks are guaranteed to be called at least once
 374                        (with status "finished") if the download is successful.
 375     postprocessor_hooks:  A list of functions that get called on postprocessing
 376                        progress, with a dictionary with the entries
 377                        * status: One of "started", "processing", or "finished".
 378                                  Check this first and ignore unknown values.
 379                        * postprocessor: Name of the postprocessor
 380                        * info_dict: The extracted info_dict
 381
 382                        Progress hooks are guaranteed to be called at least twice
 383                        (with status "started" and "finished") if the processing is successful.
 384     merge_output_format: Extension to use when merging formats.
 385     final_ext:         Expected final extension; used to detect when the file was
 386                        already downloaded and converted
 387     fixup:             Automatically correct known faults of the file.
 388                        One of:
 389                        - "never": do nothing
 390                        - "warn": only emit a warning
 391                        - "detect_or_warn": check whether we can do anything
 392                                            about it, warn otherwise (default)
 393     source_address:    Client-side IP address to bind to.
 394     call_home:         Boolean, true iff we are allowed to contact the
 395                        yt-dlp servers for debugging. (BROKEN)
 396     sleep_interval_requests: Number of seconds to sleep between requests
 397                        during extraction
 398     sleep_interval:    Number of seconds to sleep before each download when
 399                        used alone or a lower bound of a range for randomized
 400                        sleep before each download (minimum possible number
 401                        of seconds to sleep) when used along with
 402                        max_sleep_interval.
 403     max_sleep_interval:Upper bound of a range for randomized sleep before each
 404                        download (maximum possible number of seconds to sleep).
 405                        Must only be used along with sleep_interval.
 406                        Actual sleep time will be a random float from range
 407                        [sleep_interval; max_sleep_interval].
 408     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 409     listformats:       Print an overview of available video formats and exit.
 410     list_thumbnails:   Print a table of all thumbnails and exit.
 411     match_filter:      A function that gets called with the info_dict of
 412                        every video.
 413                        If it returns a message, the video is ignored.
 414                        If it returns None, the video is downloaded.
 415                        match_filter_func in utils.py is one example for this.
 416     no_color:          Do not emit color codes in output.
 417     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 418                        HTTP header
 419     geo_bypass_country:
 420                        Two-letter ISO 3166-2 country code that will be used for
 421                        explicit geographic restriction bypassing via faking
 422                        X-Forwarded-For HTTP header
 423     geo_bypass_ip_block:
 424                        IP range in CIDR notation that will be used similarly to
 425                        geo_bypass_country
 426
 427     The following options determine which downloader is picked:
 428     external_downloader: A dictionary of protocol keys and the executable of the
 429                        external downloader to use for it. The allowed protocols
 430                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 431                        Set the value to 'native' to use the native downloader
 432     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 433                        or {'m3u8': 'ffmpeg'} instead.
 434                        Use the native HLS downloader instead of ffmpeg/avconv
 435                        if True, otherwise use ffmpeg/avconv if False, otherwise
 436                        use downloader suggested by extractor if None.
 437     compat_opts:       Compatibility options. See "Differences in default behavior".
 438                        The following options do not work when used through the API:
 439                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 440                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 441                        Refer __init__.py for their implementation
 442     progress_template: Dictionary of templates for progress outputs.
 443                        Allowed keys are 'download', 'postprocess',
 444                        'download-title' (console title) and 'postprocess-title'.
 445                        The template is mapped on a dictionary with keys 'progress' and 'info'
 446
 447     The following parameters are not used by YoutubeDL itself, they are used by
 448     the downloader (see yt_dlp/downloader/common.py):
 449     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 450     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 451     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 452     external_downloader_args, concurrent_fragment_downloads.
 453
 454     The following options are used by the post processors:
 455     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 456                        otherwise prefer ffmpeg. (avconv support is deprecated)
 457     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 458                        to the binary or its containing directory.
 459     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 460                        and a list of additional command-line arguments for the
 461                        postprocessor/executable. The dict can also have "PP+EXE" keys
 462                        which are used when the given exe is used by the given PP.
 463                        Use 'default' as the name for arguments to passed to all PP
 464                        For compatibility with youtube-dl, a single list of args
 465                        can also be used
 466
 467     The following options are used by the extractors:
 468     extractor_retries: Number of times to retry for known errors
 469     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 470     hls_split_discontinuity: Split HLS playlists to different formats at
 471                        discontinuities such as ad breaks (default: False)
 472     extractor_args:    A dictionary of arguments to be passed to the extractors.
 473                        See "EXTRACTOR ARGUMENTS" for details.
 474                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 475     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 476                        If True (default), DASH manifests and related
 477                        data will be downloaded and processed by extractor.
 478                        You can reduce network I/O by disabling it if you don't
 479                        care about DASH. (only for youtube)
 480     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 481                        If True (default), HLS manifests and related
 482                        data will be downloaded and processed by extractor.
 483                        You can reduce network I/O by disabling it if you don't
 484                        care about HLS. (only for youtube)
 485     """
 486
 487     _NUMERIC_FIELDS = set((
 488         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 489         'timestamp', 'release_timestamp',
 490         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 491         'average_rating', 'comment_count', 'age_limit',
 492         'start_time', 'end_time',
 493         'chapter_number', 'season_number', 'episode_number',
 494         'track_number', 'disc_number', 'release_year',
 495     ))
 496
 497     _format_selection_exts = {
 498         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 499         'video': {'mp4', 'flv', 'webm', '3gp'},
 500         'storyboards': {'mhtml'},
 501     }
 502
 503     params = None
 504     _ies = {}
 505     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 506     _printed_messages = set()
 507     _first_webpage_request = True
 508     _download_retcode = None
 509     _num_downloads = None
 510     _playlist_level = 0
 511     _playlist_urls = set()
 512     _screen_file = None
 513
 514     def __init__(self, params=None, auto_init=True):
 515         """Create a FileDownloader object with the given options.
 516         @param auto_init    Whether to load the default extractors and print header (if verbose).
 517                             Set to 'no_verbose_header' to not print the header
 518         """
 519         if params is None:
 520             params = {}
 521         self._ies = {}
 522         self._ies_instances = {}
 523         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 524         self._printed_messages = set()
 525         self._first_webpage_request = True
 526         self._post_hooks = []
 527         self._progress_hooks = []
 528         self._postprocessor_hooks = []
 529         self._download_retcode = 0
 530         self._num_downloads = 0
 531         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 532         self._err_file = sys.stderr
 533         self.params = params
 534         self.cache = Cache(self)
 535
 536         windows_enable_vt_mode()
 537         self._allow_colors = {
 538             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 539             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 540         }
 541
 542         if sys.version_info < (3, 6):
 543             self.report_warning(
 544                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 545
 546         if self.params.get('allow_unplayable_formats'):
 547             self.report_warning(
 548                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 549                 'This is a developer option intended for debugging. \n'
 550                 '         If you experience any issues while using this option, '
 551                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 552
 553         def check_deprecated(param, option, suggestion):
 554             if self.params.get(param) is not None:
 555                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 556                 return True
 557             return False
 558
 559         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 560             if self.params.get('geo_verification_proxy') is None:
 561                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 562
 563         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 564         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 565         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 566
 567         for msg in self.params.get('_warnings', []):
 568             self.report_warning(msg)
 569         for msg in self.params.get('_deprecation_warnings', []):
 570             self.deprecation_warning(msg)
 571
 572         if 'list-formats' in self.params.get('compat_opts', []):
 573             self.params['listformats_table'] = False
 574
 575         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 576             # nooverwrites was unnecessarily changed to overwrites
 577             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 578             # This ensures compatibility with both keys
 579             self.params['overwrites'] = not self.params['nooverwrites']
 580         elif self.params.get('overwrites') is None:
 581             self.params.pop('overwrites', None)
 582         else:
 583             self.params['nooverwrites'] = not self.params['overwrites']
 584
 585         if params.get('bidi_workaround', False):
 586             try:
 587                 import pty
 588                 master, slave = pty.openpty()
 589                 width = compat_get_terminal_size().columns
 590                 if width is None:
 591                     width_args = []
 592                 else:
 593                     width_args = ['-w', str(width)]
 594                 sp_kwargs = dict(
 595                     stdin=subprocess.PIPE,
 596                     stdout=slave,
 597                     stderr=self._err_file)
 598                 try:
 599                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 600                 except OSError:
 601                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 602                 self._output_channel = os.fdopen(master, 'rb')
 603             except OSError as ose:
 604                 if ose.errno == errno.ENOENT:
 605                     self.report_warning(
 606                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 607                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 608                 else:
 609                     raise
 610
 611         if (sys.platform != 'win32'
 612                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 613                 and not params.get('restrictfilenames', False)):
 614             # Unicode filesystem API will throw errors (#1474, #13027)
 615             self.report_warning(
 616                 'Assuming --restrict-filenames since file system encoding '
 617                 'cannot encode all characters. '
 618                 'Set the LC_ALL environment variable to fix this.')
 619             self.params['restrictfilenames'] = True
 620
 621         self.outtmpl_dict = self.parse_outtmpl()
 622
 623         # Creating format selector here allows us to catch syntax errors before the extraction
 624         self.format_selector = (
 625             None if self.params.get('format') is None
 626             else self.params['format'] if callable(self.params['format'])
 627             else self.build_format_selector(self.params['format']))
 628
 629         self._setup_opener()
 630
 631         if auto_init:
 632             if auto_init != 'no_verbose_header':
 633                 self.print_debug_header()
 634             self.add_default_info_extractors()
 635
 636         for pp_def_raw in self.params.get('postprocessors', []):
 637             pp_def = dict(pp_def_raw)
 638             when = pp_def.pop('when', 'post_process')
 639             pp_class = get_postprocessor(pp_def.pop('key'))
 640             pp = pp_class(self, **compat_kwargs(pp_def))
 641             self.add_post_processor(pp, when=when)
 642
 643         hooks = {
 644             'post_hooks': self.add_post_hook,
 645             'progress_hooks': self.add_progress_hook,
 646             'postprocessor_hooks': self.add_postprocessor_hook,
 647         }
 648         for opt, fn in hooks.items():
 649             for ph in self.params.get(opt, []):
 650                 fn(ph)
 651
 652         register_socks_protocols()
 653
 654         def preload_download_archive(fn):
 655             """Preload the archive, if any is specified"""
 656             if fn is None:
 657                 return False
 658             self.write_debug(f'Loading archive file {fn!r}')
 659             try:
 660                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 661                     for line in archive_file:
 662                         self.archive.add(line.strip())
 663             except IOError as ioe:
 664                 if ioe.errno != errno.ENOENT:
 665                     raise
 666                 return False
 667             return True
 668
 669         self.archive = set()
 670         preload_download_archive(self.params.get('download_archive'))
 671
 672     def warn_if_short_id(self, argv):
 673         # short YouTube ID starting with dash?
 674         idxs = [
 675             i for i, a in enumerate(argv)
 676             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 677         if idxs:
 678             correct_argv = (
 679                 ['yt-dlp']
 680                 + [a for i, a in enumerate(argv) if i not in idxs]
 681                 + ['--'] + [argv[i] for i in idxs]
 682             )
 683             self.report_warning(
 684                 'Long argument string detected. '
 685                 'Use -- to separate parameters and URLs, like this:\n%s' %
 686                 args_to_str(correct_argv))
 687
 688     def add_info_extractor(self, ie):
 689         """Add an InfoExtractor object to the end of the list."""
 690         ie_key = ie.ie_key()
 691         self._ies[ie_key] = ie
 692         if not isinstance(ie, type):
 693             self._ies_instances[ie_key] = ie
 694             ie.set_downloader(self)
 695
 696     def _get_info_extractor_class(self, ie_key):
 697         ie = self._ies.get(ie_key)
 698         if ie is None:
 699             ie = get_info_extractor(ie_key)
 700             self.add_info_extractor(ie)
 701         return ie
 702
 703     def get_info_extractor(self, ie_key):
 704         """
 705         Get an instance of an IE with name ie_key, it will try to get one from
 706         the _ies list, if there's no instance it will create a new one and add
 707         it to the extractor list.
 708         """
 709         ie = self._ies_instances.get(ie_key)
 710         if ie is None:
 711             ie = get_info_extractor(ie_key)()
 712             self.add_info_extractor(ie)
 713         return ie
 714
 715     def add_default_info_extractors(self):
 716         """
 717         Add the InfoExtractors returned by gen_extractors to the end of the list
 718         """
 719         for ie in gen_extractor_classes():
 720             self.add_info_extractor(ie)
 721
 722     def add_post_processor(self, pp, when='post_process'):
 723         """Add a PostProcessor object to the end of the chain."""
 724         self._pps[when].append(pp)
 725         pp.set_downloader(self)
 726
 727     def add_post_hook(self, ph):
 728         """Add the post hook"""
 729         self._post_hooks.append(ph)
 730
 731     def add_progress_hook(self, ph):
 732         """Add the download progress hook"""
 733         self._progress_hooks.append(ph)
 734
 735     def add_postprocessor_hook(self, ph):
 736         """Add the postprocessing progress hook"""
 737         self._postprocessor_hooks.append(ph)
 738
 739     def _bidi_workaround(self, message):
 740         if not hasattr(self, '_output_channel'):
 741             return message
 742
 743         assert hasattr(self, '_output_process')
 744         assert isinstance(message, compat_str)
 745         line_count = message.count('\n') + 1
 746         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 747         self._output_process.stdin.flush()
 748         res = ''.join(self._output_channel.readline().decode('utf-8')
 749                       for _ in range(line_count))
 750         return res[:-len('\n')]
 751
 752     def _write_string(self, message, out=None, only_once=False):
 753         if only_once:
 754             if message in self._printed_messages:
 755                 return
 756             self._printed_messages.add(message)
 757         write_string(message, out=out, encoding=self.params.get('encoding'))
 758
 759     def to_stdout(self, message, skip_eol=False, quiet=False):
 760         """Print message to stdout"""
 761         if self.params.get('logger'):
 762             self.params['logger'].debug(message)
 763         elif not quiet or self.params.get('verbose'):
 764             self._write_string(
 765                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 766                 self._err_file if quiet else self._screen_file)
 767
 768     def to_stderr(self, message, only_once=False):
 769         """Print message to stderr"""
 770         assert isinstance(message, compat_str)
 771         if self.params.get('logger'):
 772             self.params['logger'].error(message)
 773         else:
 774             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 775
 776     def to_console_title(self, message):
 777         if not self.params.get('consoletitle', False):
 778             return
 779         if compat_os_name == 'nt':
 780             if ctypes.windll.kernel32.GetConsoleWindow():
 781                 # c_wchar_p() might not be necessary if `message` is
 782                 # already of type unicode()
 783                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 784         elif 'TERM' in os.environ:
 785             self._write_string('\033]0;%s\007' % message, self._screen_file)
 786
 787     def save_console_title(self):
 788         if not self.params.get('consoletitle', False):
 789             return
 790         if self.params.get('simulate'):
 791             return
 792         if compat_os_name != 'nt' and 'TERM' in os.environ:
 793             # Save the title on stack
 794             self._write_string('\033[22;0t', self._screen_file)
 795
 796     def restore_console_title(self):
 797         if not self.params.get('consoletitle', False):
 798             return
 799         if self.params.get('simulate'):
 800             return
 801         if compat_os_name != 'nt' and 'TERM' in os.environ:
 802             # Restore the title from stack
 803             self._write_string('\033[23;0t', self._screen_file)
 804
 805     def __enter__(self):
 806         self.save_console_title()
 807         return self
 808
 809     def __exit__(self, *args):
 810         self.restore_console_title()
 811
 812         if self.params.get('cookiefile') is not None:
 813             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 814
 815     def trouble(self, message=None, tb=None):
 816         """Determine action to take when a download problem appears.
 817
 818         Depending on if the downloader has been configured to ignore
 819         download errors or not, this method may throw an exception or
 820         not when errors are found, after printing the message.
 821
 822         tb, if given, is additional traceback information.
 823         """
 824         if message is not None:
 825             self.to_stderr(message)
 826         if self.params.get('verbose'):
 827             if tb is None:
 828                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 829                     tb = ''
 830                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 831                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 832                     tb += encode_compat_str(traceback.format_exc())
 833                 else:
 834                     tb_data = traceback.format_list(traceback.extract_stack())
 835                     tb = ''.join(tb_data)
 836             if tb:
 837                 self.to_stderr(tb)
 838         if not self.params.get('ignoreerrors'):
 839             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 840                 exc_info = sys.exc_info()[1].exc_info
 841             else:
 842                 exc_info = sys.exc_info()
 843             raise DownloadError(message, exc_info)
 844         self._download_retcode = 1
 845
 846     def to_screen(self, message, skip_eol=False):
 847         """Print message to stdout if not in quiet mode"""
 848         self.to_stdout(
 849             message, skip_eol, quiet=self.params.get('quiet', False))
 850
 851     class Styles(Enum):
 852         HEADERS = 'yellow'
 853         EMPHASIS = 'light blue'
 854         ID = 'green'
 855         DELIM = 'blue'
 856         ERROR = 'red'
 857         WARNING = 'yellow'
 858         SUPPRESS = 'light black'
 859
 860     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 861         if test_encoding:
 862             original_text = text
 863             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 864             text = text.encode(encoding, 'ignore').decode(encoding)
 865             if fallback is not None and text != original_text:
 866                 text = fallback
 867         if isinstance(f, self.Styles):
 868             f = f.value
 869         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 870
 871     def _format_screen(self, *args, **kwargs):
 872         return self._format_text(
 873             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 874
 875     def _format_err(self, *args, **kwargs):
 876         return self._format_text(
 877             self._err_file, self._allow_colors['err'], *args, **kwargs)
 878
 879     def report_warning(self, message, only_once=False):
 880         '''
 881         Print the message to stderr, it will be prefixed with 'WARNING:'
 882         If stderr is a tty file the 'WARNING:' will be colored
 883         '''
 884         if self.params.get('logger') is not None:
 885             self.params['logger'].warning(message)
 886         else:
 887             if self.params.get('no_warnings'):
 888                 return
 889             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 890
 891     def deprecation_warning(self, message):
 892         if self.params.get('logger') is not None:
 893             self.params['logger'].warning('DeprecationWarning: {message}')
 894         else:
 895             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 896
 897     def report_error(self, message, tb=None):
 898         '''
 899         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 900         in red if stderr is a tty file.
 901         '''
 902         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 903
 904     def write_debug(self, message, only_once=False):
 905         '''Log debug message or Print message to stderr'''
 906         if not self.params.get('verbose', False):
 907             return
 908         message = '[debug] %s' % message
 909         if self.params.get('logger'):
 910             self.params['logger'].debug(message)
 911         else:
 912             self.to_stderr(message, only_once)
 913
 914     def report_file_already_downloaded(self, file_name):
 915         """Report file has already been fully downloaded."""
 916         try:
 917             self.to_screen('[download] %s has already been downloaded' % file_name)
 918         except UnicodeEncodeError:
 919             self.to_screen('[download] The file has already been downloaded')
 920
 921     def report_file_delete(self, file_name):
 922         """Report that existing file will be deleted."""
 923         try:
 924             self.to_screen('Deleting existing file %s' % file_name)
 925         except UnicodeEncodeError:
 926             self.to_screen('Deleting existing file')
 927
 928     def raise_no_formats(self, info, forced=False):
 929         has_drm = info.get('__has_drm')
 930         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 931         expected = self.params.get('ignore_no_formats_error')
 932         if forced or not expected:
 933             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 934                                  expected=has_drm or expected)
 935         else:
 936             self.report_warning(msg)
 937
 938     def parse_outtmpl(self):
 939         outtmpl_dict = self.params.get('outtmpl', {})
 940         if not isinstance(outtmpl_dict, dict):
 941             outtmpl_dict = {'default': outtmpl_dict}
 942         # Remove spaces in the default template
 943         if self.params.get('restrictfilenames'):
 944             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 945         else:
 946             sanitize = lambda x: x
 947         outtmpl_dict.update({
 948             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 949             if outtmpl_dict.get(k) is None})
 950         for key, val in outtmpl_dict.items():
 951             if isinstance(val, bytes):
 952                 self.report_warning(
 953                     'Parameter outtmpl is bytes, but should be a unicode string. '
 954                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 955         return outtmpl_dict
 956
 957     def get_output_path(self, dir_type='', filename=None):
 958         paths = self.params.get('paths', {})
 959         assert isinstance(paths, dict)
 960         path = os.path.join(
 961             expand_path(paths.get('home', '').strip()),
 962             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 963             filename or '')
 964
 965         # Temporary fix for #4787
 966         # 'Treat' all problem characters by passing filename through preferredencoding
 967         # to workaround encoding issues with subprocess on python2 @ Windows
 968         if sys.version_info < (3, 0) and sys.platform == 'win32':
 969             path = encodeFilename(path, True).decode(preferredencoding())
 970         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 971
 972     @staticmethod
 973     def _outtmpl_expandpath(outtmpl):
 974         # expand_path translates '%%' into '%' and '$$' into '$'
 975         # correspondingly that is not what we want since we need to keep
 976         # '%%' intact for template dict substitution step. Working around
 977         # with boundary-alike separator hack.
 978         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 979         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 980
 981         # outtmpl should be expand_path'ed before template dict substitution
 982         # because meta fields may contain env variables we don't want to
 983         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 984         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 985         return expand_path(outtmpl).replace(sep, '')
 986
 987     @staticmethod
 988     def escape_outtmpl(outtmpl):
 989         ''' Escape any remaining strings like %s, %abc% etc. '''
 990         return re.sub(
 991             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 992             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 993             outtmpl)
 994
 995     @classmethod
 996     def validate_outtmpl(cls, outtmpl):
 997         ''' @return None or Exception object '''
 998         outtmpl = re.sub(
 999             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
1000             lambda mobj: f'{mobj.group(0)[:-1]}s',
1001             cls._outtmpl_expandpath(outtmpl))
1002         try:
1003             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1004             return None
1005         except ValueError as err:
1006             return err
1007
1008     @staticmethod
1009     def _copy_infodict(info_dict):
1010         info_dict = dict(info_dict)
1011         for key in ('__original_infodict', '__postprocessors'):
1012             info_dict.pop(key, None)
1013         return info_dict
1014
1015     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
1016         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1017         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1018
1019         info_dict = self._copy_infodict(info_dict)
1020         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1021             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1022             if info_dict.get('duration', None) is not None
1023             else None)
1024         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1025         if info_dict.get('resolution') is None:
1026             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1027
1028         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1029         # of %(field)s to %(field)0Nd for backward compatibility
1030         field_size_compat_map = {
1031             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1032             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1033             'autonumber': self.params.get('autonumber_size') or 5,
1034         }
1035
1036         TMPL_DICT = {}
1037         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1038         MATH_FUNCTIONS = {
1039             '+': float.__add__,
1040             '-': float.__sub__,
1041         }
1042         # Field is of the form key1.key2...
1043         # where keys (except first) can be string, int or slice
1044         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1045         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1046         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1047         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1048             (?P<negate>-)?
1049             (?P<fields>{field})
1050             (?P<maths>(?:{math_op}{math_field})*)
1051             (?:>(?P<strf_format>.+?))?
1052             (?P<alternate>(?<!\\),[^|)]+)?
1053             (?:\|(?P<default>.*?))?
1054             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1055
1056         def _traverse_infodict(k):
1057             k = k.split('.')
1058             if k[0] == '':
1059                 k.pop(0)
1060             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1061
1062         def get_value(mdict):
1063             # Object traversal
1064             value = _traverse_infodict(mdict['fields'])
1065             # Negative
1066             if mdict['negate']:
1067                 value = float_or_none(value)
1068                 if value is not None:
1069                     value *= -1
1070             # Do maths
1071             offset_key = mdict['maths']
1072             if offset_key:
1073                 value = float_or_none(value)
1074                 operator = None
1075                 while offset_key:
1076                     item = re.match(
1077                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1078                         offset_key).group(0)
1079                     offset_key = offset_key[len(item):]
1080                     if operator is None:
1081                         operator = MATH_FUNCTIONS[item]
1082                         continue
1083                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1084                     offset = float_or_none(item)
1085                     if offset is None:
1086                         offset = float_or_none(_traverse_infodict(item))
1087                     try:
1088                         value = operator(value, multiplier * offset)
1089                     except (TypeError, ZeroDivisionError):
1090                         return None
1091                     operator = None
1092             # Datetime formatting
1093             if mdict['strf_format']:
1094                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1095
1096             return value
1097
1098         na = self.params.get('outtmpl_na_placeholder', 'NA')
1099
1100         def _dumpjson_default(obj):
1101             if isinstance(obj, (set, LazyList)):
1102                 return list(obj)
1103             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1104
1105         def create_key(outer_mobj):
1106             if not outer_mobj.group('has_key'):
1107                 return outer_mobj.group(0)
1108             key = outer_mobj.group('key')
1109             mobj = re.match(INTERNAL_FORMAT_RE, key)
1110             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1111             value, default = None, na
1112             while mobj:
1113                 mobj = mobj.groupdict()
1114                 default = mobj['default'] if mobj['default'] is not None else default
1115                 value = get_value(mobj)
1116                 if value is None and mobj['alternate']:
1117                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1118                 else:
1119                     break
1120
1121             fmt = outer_mobj.group('format')
1122             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1123                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1124
1125             value = default if value is None else value
1126
1127             flags = outer_mobj.group('conversion') or ''
1128             str_fmt = f'{fmt[:-1]}s'
1129             if fmt[-1] == 'l':  # list
1130                 delim = '\n' if '#' in flags else ', '
1131                 value, fmt = delim.join(variadic(value)), str_fmt
1132             elif fmt[-1] == 'j':  # json
1133                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1134             elif fmt[-1] == 'q':  # quoted
1135                 value = map(str, variadic(value) if '#' in flags else [value])
1136                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1137             elif fmt[-1] == 'B':  # bytes
1138                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1139                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1140             elif fmt[-1] == 'U':  # unicode normalized
1141                 value, fmt = unicodedata.normalize(
1142                     # "+" = compatibility equivalence, "#" = NFD
1143                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1144                     value), str_fmt
1145             elif fmt[-1] == 'c':
1146                 if value:
1147                     value = str(value)[0]
1148                 else:
1149                     fmt = str_fmt
1150             elif fmt[-1] not in 'rs':  # numeric
1151                 value = float_or_none(value)
1152                 if value is None:
1153                     value, fmt = default, 's'
1154
1155             if sanitize:
1156                 if fmt[-1] == 'r':
1157                     # If value is an object, sanitize might convert it to a string
1158                     # So we convert it to repr first
1159                     value, fmt = repr(value), str_fmt
1160                 if fmt[-1] in 'csr':
1161                     value = sanitize(initial_field, value)
1162
1163             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1164             TMPL_DICT[key] = value
1165             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1166
1167         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1168
1169     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1170         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1171         return self.escape_outtmpl(outtmpl) % info_dict
1172
1173     def _prepare_filename(self, info_dict, tmpl_type='default'):
1174         try:
1175             sanitize = lambda k, v: sanitize_filename(
1176                 compat_str(v),
1177                 restricted=self.params.get('restrictfilenames'),
1178                 is_id=(k == 'id' or k.endswith('_id')))
1179             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1180             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1181
1182             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1183             if filename and force_ext is not None:
1184                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1185
1186             # https://github.com/blackjack4494/youtube-dlc/issues/85
1187             trim_file_name = self.params.get('trim_file_name', False)
1188             if trim_file_name:
1189                 no_ext, *ext = filename.rsplit('.', 2)
1190                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1191
1192             return filename
1193         except ValueError as err:
1194             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1195             return None
1196
1197     def prepare_filename(self, info_dict, dir_type='', warn=False):
1198         """Generate the output filename."""
1199
1200         filename = self._prepare_filename(info_dict, dir_type or 'default')
1201         if not filename and dir_type not in ('', 'temp'):
1202             return ''
1203
1204         if warn:
1205             if not self.params.get('paths'):
1206                 pass
1207             elif filename == '-':
1208                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1209             elif os.path.isabs(filename):
1210                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1211         if filename == '-' or not filename:
1212             return filename
1213
1214         return self.get_output_path(dir_type, filename)
1215
1216     def _match_entry(self, info_dict, incomplete=False, silent=False):
1217         """ Returns None if the file should be downloaded """
1218
1219         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1220
1221         def check_filter():
1222             if 'title' in info_dict:
1223                 # This can happen when we're just evaluating the playlist
1224                 title = info_dict['title']
1225                 matchtitle = self.params.get('matchtitle', False)
1226                 if matchtitle:
1227                     if not re.search(matchtitle, title, re.IGNORECASE):
1228                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1229                 rejecttitle = self.params.get('rejecttitle', False)
1230                 if rejecttitle:
1231                     if re.search(rejecttitle, title, re.IGNORECASE):
1232                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1233             date = info_dict.get('upload_date')
1234             if date is not None:
1235                 dateRange = self.params.get('daterange', DateRange())
1236                 if date not in dateRange:
1237                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1238             view_count = info_dict.get('view_count')
1239             if view_count is not None:
1240                 min_views = self.params.get('min_views')
1241                 if min_views is not None and view_count < min_views:
1242                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1243                 max_views = self.params.get('max_views')
1244                 if max_views is not None and view_count > max_views:
1245                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1246             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1247                 return 'Skipping "%s" because it is age restricted' % video_title
1248
1249             match_filter = self.params.get('match_filter')
1250             if match_filter is not None:
1251                 try:
1252                     ret = match_filter(info_dict, incomplete=incomplete)
1253                 except TypeError:
1254                     # For backward compatibility
1255                     ret = None if incomplete else match_filter(info_dict)
1256                 if ret is not None:
1257                     return ret
1258             return None
1259
1260         if self.in_download_archive(info_dict):
1261             reason = '%s has already been recorded in the archive' % video_title
1262             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1263         else:
1264             reason = check_filter()
1265             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1266         if reason is not None:
1267             if not silent:
1268                 self.to_screen('[download] ' + reason)
1269             if self.params.get(break_opt, False):
1270                 raise break_err()
1271         return reason
1272
1273     @staticmethod
1274     def add_extra_info(info_dict, extra_info):
1275         '''Set the keys from extra_info in info dict if they are missing'''
1276         for key, value in extra_info.items():
1277             info_dict.setdefault(key, value)
1278
1279     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1280                      process=True, force_generic_extractor=False):
1281         """
1282         Return a list with a dictionary for each video extracted.
1283
1284         Arguments:
1285         url -- URL to extract
1286
1287         Keyword arguments:
1288         download -- whether to download videos during extraction
1289         ie_key -- extractor key hint
1290         extra_info -- dictionary containing the extra values to add to each result
1291         process -- whether to resolve all unresolved references (URLs, playlist items),
1292             must be True for download to work.
1293         force_generic_extractor -- force using the generic extractor
1294         """
1295
1296         if extra_info is None:
1297             extra_info = {}
1298
1299         if not ie_key and force_generic_extractor:
1300             ie_key = 'Generic'
1301
1302         if ie_key:
1303             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1304         else:
1305             ies = self._ies
1306
1307         for ie_key, ie in ies.items():
1308             if not ie.suitable(url):
1309                 continue
1310
1311             if not ie.working():
1312                 self.report_warning('The program functionality for this site has been marked as broken, '
1313                                     'and will probably not work.')
1314
1315             temp_id = ie.get_temp_id(url)
1316             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1317                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1318                 if self.params.get('break_on_existing', False):
1319                     raise ExistingVideoReached()
1320                 break
1321             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1322         else:
1323             self.report_error('no suitable InfoExtractor for URL %s' % url)
1324
1325     def __handle_extraction_exceptions(func):
1326         @functools.wraps(func)
1327         def wrapper(self, *args, **kwargs):
1328             try:
1329                 return func(self, *args, **kwargs)
1330             except GeoRestrictedError as e:
1331                 msg = e.msg
1332                 if e.countries:
1333                     msg += '\nThis video is available in %s.' % ', '.join(
1334                         map(ISO3166Utils.short2full, e.countries))
1335                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1336                 self.report_error(msg)
1337             except ExtractorError as e:  # An error we somewhat expected
1338                 self.report_error(compat_str(e), e.format_traceback())
1339             except ReExtractInfo as e:
1340                 if e.expected:
1341                     self.to_screen(f'{e}; Re-extracting data')
1342                 else:
1343                     self.to_stderr('\r')
1344                     self.report_warning(f'{e}; Re-extracting data')
1345                 return wrapper(self, *args, **kwargs)
1346             except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1347                 raise
1348             except Exception as e:
1349                 if self.params.get('ignoreerrors'):
1350                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1351                 else:
1352                     raise
1353         return wrapper
1354
1355     def _wait_for_video(self, ie_result):
1356         if (not self.params.get('wait_for_video')
1357                 or ie_result.get('_type', 'video') != 'video'
1358                 or ie_result.get('formats') or ie_result.get('url')):
1359             return
1360
1361         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1362         last_msg = ''
1363
1364         def progress(msg):
1365             nonlocal last_msg
1366             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1367             last_msg = msg
1368
1369         min_wait, max_wait = self.params.get('wait_for_video')
1370         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1371         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1372             diff = random.randrange(min_wait or 0, max_wait) if max_wait else min_wait
1373             self.report_warning('Release time of video is not known')
1374         elif (diff or 0) <= 0:
1375             self.report_warning('Video should already be available according to extracted info')
1376         diff = min(max(diff, min_wait or 0), max_wait or float('inf'))
1377         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1378
1379         wait_till = time.time() + diff
1380         try:
1381             while True:
1382                 diff = wait_till - time.time()
1383                 if diff <= 0:
1384                     progress('')
1385                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1386                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1387                 time.sleep(1)
1388         except KeyboardInterrupt:
1389             progress('')
1390             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1391         except BaseException as e:
1392             if not isinstance(e, ReExtractInfo):
1393                 self.to_screen('')
1394             raise
1395
1396     @__handle_extraction_exceptions
1397     def __extract_info(self, url, ie, download, extra_info, process):
1398         ie_result = ie.extract(url)
1399         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1400             return
1401         if isinstance(ie_result, list):
1402             # Backwards compatibility: old IE result format
1403             ie_result = {
1404                 '_type': 'compat_list',
1405                 'entries': ie_result,
1406             }
1407         if extra_info.get('original_url'):
1408             ie_result.setdefault('original_url', extra_info['original_url'])
1409         self.add_default_extra_info(ie_result, ie, url)
1410         if process:
1411             self._wait_for_video(ie_result)
1412             return self.process_ie_result(ie_result, download, extra_info)
1413         else:
1414             return ie_result
1415
1416     def add_default_extra_info(self, ie_result, ie, url):
1417         if url is not None:
1418             self.add_extra_info(ie_result, {
1419                 'webpage_url': url,
1420                 'original_url': url,
1421                 'webpage_url_basename': url_basename(url),
1422             })
1423         if ie is not None:
1424             self.add_extra_info(ie_result, {
1425                 'extractor': ie.IE_NAME,
1426                 'extractor_key': ie.ie_key(),
1427             })
1428
1429     def process_ie_result(self, ie_result, download=True, extra_info=None):
1430         """
1431         Take the result of the ie(may be modified) and resolve all unresolved
1432         references (URLs, playlist items).
1433
1434         It will also download the videos if 'download'.
1435         Returns the resolved ie_result.
1436         """
1437         if extra_info is None:
1438             extra_info = {}
1439         result_type = ie_result.get('_type', 'video')
1440
1441         if result_type in ('url', 'url_transparent'):
1442             ie_result['url'] = sanitize_url(ie_result['url'])
1443             if ie_result.get('original_url'):
1444                 extra_info.setdefault('original_url', ie_result['original_url'])
1445
1446             extract_flat = self.params.get('extract_flat', False)
1447             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1448                     or extract_flat is True):
1449                 info_copy = ie_result.copy()
1450                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1451                 if ie and not ie_result.get('id'):
1452                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1453                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1454                 self.add_extra_info(info_copy, extra_info)
1455                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1456                 if self.params.get('force_write_download_archive', False):
1457                     self.record_download_archive(info_copy)
1458                 return ie_result
1459
1460         if result_type == 'video':
1461             self.add_extra_info(ie_result, extra_info)
1462             ie_result = self.process_video_result(ie_result, download=download)
1463             additional_urls = (ie_result or {}).get('additional_urls')
1464             if additional_urls:
1465                 # TODO: Improve MetadataParserPP to allow setting a list
1466                 if isinstance(additional_urls, compat_str):
1467                     additional_urls = [additional_urls]
1468                 self.to_screen(
1469                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1470                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1471                 ie_result['additional_entries'] = [
1472                     self.extract_info(
1473                         url, download, extra_info,
1474                         force_generic_extractor=self.params.get('force_generic_extractor'))
1475                     for url in additional_urls
1476                 ]
1477             return ie_result
1478         elif result_type == 'url':
1479             # We have to add extra_info to the results because it may be
1480             # contained in a playlist
1481             return self.extract_info(
1482                 ie_result['url'], download,
1483                 ie_key=ie_result.get('ie_key'),
1484                 extra_info=extra_info)
1485         elif result_type == 'url_transparent':
1486             # Use the information from the embedding page
1487             info = self.extract_info(
1488                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1489                 extra_info=extra_info, download=False, process=False)
1490
1491             # extract_info may return None when ignoreerrors is enabled and
1492             # extraction failed with an error, don't crash and return early
1493             # in this case
1494             if not info:
1495                 return info
1496
1497             force_properties = dict(
1498                 (k, v) for k, v in ie_result.items() if v is not None)
1499             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1500                 if f in force_properties:
1501                     del force_properties[f]
1502             new_result = info.copy()
1503             new_result.update(force_properties)
1504
1505             # Extracted info may not be a video result (i.e.
1506             # info.get('_type', 'video') != video) but rather an url or
1507             # url_transparent. In such cases outer metadata (from ie_result)
1508             # should be propagated to inner one (info). For this to happen
1509             # _type of info should be overridden with url_transparent. This
1510             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1511             if new_result.get('_type') == 'url':
1512                 new_result['_type'] = 'url_transparent'
1513
1514             return self.process_ie_result(
1515                 new_result, download=download, extra_info=extra_info)
1516         elif result_type in ('playlist', 'multi_video'):
1517             # Protect from infinite recursion due to recursively nested playlists
1518             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1519             webpage_url = ie_result['webpage_url']
1520             if webpage_url in self._playlist_urls:
1521                 self.to_screen(
1522                     '[download] Skipping already downloaded playlist: %s'
1523                     % ie_result.get('title') or ie_result.get('id'))
1524                 return
1525
1526             self._playlist_level += 1
1527             self._playlist_urls.add(webpage_url)
1528             self._sanitize_thumbnails(ie_result)
1529             try:
1530                 return self.__process_playlist(ie_result, download)
1531             finally:
1532                 self._playlist_level -= 1
1533                 if not self._playlist_level:
1534                     self._playlist_urls.clear()
1535         elif result_type == 'compat_list':
1536             self.report_warning(
1537                 'Extractor %s returned a compat_list result. '
1538                 'It needs to be updated.' % ie_result.get('extractor'))
1539
1540             def _fixup(r):
1541                 self.add_extra_info(r, {
1542                     'extractor': ie_result['extractor'],
1543                     'webpage_url': ie_result['webpage_url'],
1544                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1545                     'extractor_key': ie_result['extractor_key'],
1546                 })
1547                 return r
1548             ie_result['entries'] = [
1549                 self.process_ie_result(_fixup(r), download, extra_info)
1550                 for r in ie_result['entries']
1551             ]
1552             return ie_result
1553         else:
1554             raise Exception('Invalid result type: %s' % result_type)
1555
1556     def _ensure_dir_exists(self, path):
1557         return make_dir(path, self.report_error)
1558
1559     def __process_playlist(self, ie_result, download):
1560         # We process each entry in the playlist
1561         playlist = ie_result.get('title') or ie_result.get('id')
1562         self.to_screen('[download] Downloading playlist: %s' % playlist)
1563
1564         if 'entries' not in ie_result:
1565             raise EntryNotInPlaylist('There are no entries')
1566
1567         MissingEntry = object()
1568         incomplete_entries = bool(ie_result.get('requested_entries'))
1569         if incomplete_entries:
1570             def fill_missing_entries(entries, indices):
1571                 ret = [MissingEntry] * max(indices)
1572                 for i, entry in zip(indices, entries):
1573                     ret[i - 1] = entry
1574                 return ret
1575             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1576
1577         playlist_results = []
1578
1579         playliststart = self.params.get('playliststart', 1)
1580         playlistend = self.params.get('playlistend')
1581         # For backwards compatibility, interpret -1 as whole list
1582         if playlistend == -1:
1583             playlistend = None
1584
1585         playlistitems_str = self.params.get('playlist_items')
1586         playlistitems = None
1587         if playlistitems_str is not None:
1588             def iter_playlistitems(format):
1589                 for string_segment in format.split(','):
1590                     if '-' in string_segment:
1591                         start, end = string_segment.split('-')
1592                         for item in range(int(start), int(end) + 1):
1593                             yield int(item)
1594                     else:
1595                         yield int(string_segment)
1596             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1597
1598         ie_entries = ie_result['entries']
1599         msg = (
1600             'Downloading %d videos' if not isinstance(ie_entries, list)
1601             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1602
1603         if isinstance(ie_entries, list):
1604             def get_entry(i):
1605                 return ie_entries[i - 1]
1606         else:
1607             if not isinstance(ie_entries, (PagedList, LazyList)):
1608                 ie_entries = LazyList(ie_entries)
1609
1610             def get_entry(i):
1611                 return YoutubeDL.__handle_extraction_exceptions(
1612                     lambda self, i: ie_entries[i - 1]
1613                 )(self, i)
1614
1615         entries = []
1616         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1617         for i in items:
1618             if i == 0:
1619                 continue
1620             if playlistitems is None and playlistend is not None and playlistend < i:
1621                 break
1622             entry = None
1623             try:
1624                 entry = get_entry(i)
1625                 if entry is MissingEntry:
1626                     raise EntryNotInPlaylist()
1627             except (IndexError, EntryNotInPlaylist):
1628                 if incomplete_entries:
1629                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1630                 elif not playlistitems:
1631                     break
1632             entries.append(entry)
1633             try:
1634                 if entry is not None:
1635                     self._match_entry(entry, incomplete=True, silent=True)
1636             except (ExistingVideoReached, RejectedVideoReached):
1637                 break
1638         ie_result['entries'] = entries
1639
1640         # Save playlist_index before re-ordering
1641         entries = [
1642             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1643             for i, entry in enumerate(entries, 1)
1644             if entry is not None]
1645         n_entries = len(entries)
1646
1647         if not playlistitems and (playliststart != 1 or playlistend):
1648             playlistitems = list(range(playliststart, playliststart + n_entries))
1649         ie_result['requested_entries'] = playlistitems
1650
1651         _infojson_written = False
1652         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1653             ie_copy = {
1654                 'playlist': playlist,
1655                 'playlist_id': ie_result.get('id'),
1656                 'playlist_title': ie_result.get('title'),
1657                 'playlist_uploader': ie_result.get('uploader'),
1658                 'playlist_uploader_id': ie_result.get('uploader_id'),
1659                 'playlist_index': 0,
1660                 'n_entries': n_entries,
1661             }
1662             ie_copy.update(dict(ie_result))
1663
1664             _infojson_written = self._write_info_json(
1665                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1666             if _infojson_written is None:
1667                 return
1668             if self._write_description('playlist', ie_result,
1669                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1670                 return
1671             # TODO: This should be passed to ThumbnailsConvertor if necessary
1672             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1673
1674         if self.params.get('playlistreverse', False):
1675             entries = entries[::-1]
1676         if self.params.get('playlistrandom', False):
1677             random.shuffle(entries)
1678
1679         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1680
1681         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1682         failures = 0
1683         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1684         for i, entry_tuple in enumerate(entries, 1):
1685             playlist_index, entry = entry_tuple
1686             if 'playlist-index' in self.params.get('compat_opts', []):
1687                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1688             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1689             # This __x_forwarded_for_ip thing is a bit ugly but requires
1690             # minimal changes
1691             if x_forwarded_for:
1692                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1693             extra = {
1694                 'n_entries': n_entries,
1695                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1696                 'playlist_index': playlist_index,
1697                 'playlist_autonumber': i,
1698                 'playlist': playlist,
1699                 'playlist_id': ie_result.get('id'),
1700                 'playlist_title': ie_result.get('title'),
1701                 'playlist_uploader': ie_result.get('uploader'),
1702                 'playlist_uploader_id': ie_result.get('uploader_id'),
1703                 'extractor': ie_result['extractor'],
1704                 'webpage_url': ie_result['webpage_url'],
1705                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1706                 'extractor_key': ie_result['extractor_key'],
1707             }
1708
1709             if self._match_entry(entry, incomplete=True) is not None:
1710                 continue
1711
1712             entry_result = self.__process_iterable_entry(entry, download, extra)
1713             if not entry_result:
1714                 failures += 1
1715             if failures >= max_failures:
1716                 self.report_error(
1717                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1718                 break
1719             playlist_results.append(entry_result)
1720         ie_result['entries'] = playlist_results
1721
1722         # Write the updated info to json
1723         if _infojson_written and self._write_info_json(
1724                 'updated playlist', ie_result,
1725                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1726             return
1727         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1728         return ie_result
1729
1730     @__handle_extraction_exceptions
1731     def __process_iterable_entry(self, entry, download, extra_info):
1732         return self.process_ie_result(
1733             entry, download=download, extra_info=extra_info)
1734
1735     def _build_format_filter(self, filter_spec):
1736         " Returns a function to filter the formats according to the filter_spec "
1737
1738         OPERATORS = {
1739             '<': operator.lt,
1740             '<=': operator.le,
1741             '>': operator.gt,
1742             '>=': operator.ge,
1743             '=': operator.eq,
1744             '!=': operator.ne,
1745         }
1746         operator_rex = re.compile(r'''(?x)\s*
1747             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1748             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1749             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1750             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1751         m = operator_rex.fullmatch(filter_spec)
1752         if m:
1753             try:
1754                 comparison_value = int(m.group('value'))
1755             except ValueError:
1756                 comparison_value = parse_filesize(m.group('value'))
1757                 if comparison_value is None:
1758                     comparison_value = parse_filesize(m.group('value') + 'B')
1759                 if comparison_value is None:
1760                     raise ValueError(
1761                         'Invalid value %r in format specification %r' % (
1762                             m.group('value'), filter_spec))
1763             op = OPERATORS[m.group('op')]
1764
1765         if not m:
1766             STR_OPERATORS = {
1767                 '=': operator.eq,
1768                 '^=': lambda attr, value: attr.startswith(value),
1769                 '$=': lambda attr, value: attr.endswith(value),
1770                 '*=': lambda attr, value: value in attr,
1771             }
1772             str_operator_rex = re.compile(r'''(?x)\s*
1773                 (?P<key>[a-zA-Z0-9._-]+)\s*
1774                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1775                 (?P<value>[a-zA-Z0-9._-]+)\s*
1776                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1777             m = str_operator_rex.fullmatch(filter_spec)
1778             if m:
1779                 comparison_value = m.group('value')
1780                 str_op = STR_OPERATORS[m.group('op')]
1781                 if m.group('negation'):
1782                     op = lambda attr, value: not str_op(attr, value)
1783                 else:
1784                     op = str_op
1785
1786         if not m:
1787             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1788
1789         def _filter(f):
1790             actual_value = f.get(m.group('key'))
1791             if actual_value is None:
1792                 return m.group('none_inclusive')
1793             return op(actual_value, comparison_value)
1794         return _filter
1795
1796     def _check_formats(self, formats):
1797         for f in formats:
1798             self.to_screen('[info] Testing format %s' % f['format_id'])
1799             path = self.get_output_path('temp')
1800             if not self._ensure_dir_exists(f'{path}/'):
1801                 continue
1802             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1803             temp_file.close()
1804             try:
1805                 success, _ = self.dl(temp_file.name, f, test=True)
1806             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1807                 success = False
1808             finally:
1809                 if os.path.exists(temp_file.name):
1810                     try:
1811                         os.remove(temp_file.name)
1812                     except OSError:
1813                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1814             if success:
1815                 yield f
1816             else:
1817                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1818
1819     def _default_format_spec(self, info_dict, download=True):
1820
1821         def can_merge():
1822             merger = FFmpegMergerPP(self)
1823             return merger.available and merger.can_merge()
1824
1825         prefer_best = (
1826             not self.params.get('simulate')
1827             and download
1828             and (
1829                 not can_merge()
1830                 or info_dict.get('is_live', False)
1831                 or self.outtmpl_dict['default'] == '-'))
1832         compat = (
1833             prefer_best
1834             or self.params.get('allow_multiple_audio_streams', False)
1835             or 'format-spec' in self.params.get('compat_opts', []))
1836
1837         return (
1838             'best/bestvideo+bestaudio' if prefer_best
1839             else 'bestvideo*+bestaudio/best' if not compat
1840             else 'bestvideo+bestaudio/best')
1841
1842     def build_format_selector(self, format_spec):
1843         def syntax_error(note, start):
1844             message = (
1845                 'Invalid format specification: '
1846                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1847             return SyntaxError(message)
1848
1849         PICKFIRST = 'PICKFIRST'
1850         MERGE = 'MERGE'
1851         SINGLE = 'SINGLE'
1852         GROUP = 'GROUP'
1853         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1854
1855         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1856                                   'video': self.params.get('allow_multiple_video_streams', False)}
1857
1858         check_formats = self.params.get('check_formats') == 'selected'
1859
1860         def _parse_filter(tokens):
1861             filter_parts = []
1862             for type, string, start, _, _ in tokens:
1863                 if type == tokenize.OP and string == ']':
1864                     return ''.join(filter_parts)
1865                 else:
1866                     filter_parts.append(string)
1867
1868         def _remove_unused_ops(tokens):
1869             # Remove operators that we don't use and join them with the surrounding strings
1870             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1871             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1872             last_string, last_start, last_end, last_line = None, None, None, None
1873             for type, string, start, end, line in tokens:
1874                 if type == tokenize.OP and string == '[':
1875                     if last_string:
1876                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1877                         last_string = None
1878                     yield type, string, start, end, line
1879                     # everything inside brackets will be handled by _parse_filter
1880                     for type, string, start, end, line in tokens:
1881                         yield type, string, start, end, line
1882                         if type == tokenize.OP and string == ']':
1883                             break
1884                 elif type == tokenize.OP and string in ALLOWED_OPS:
1885                     if last_string:
1886                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1887                         last_string = None
1888                     yield type, string, start, end, line
1889                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1890                     if not last_string:
1891                         last_string = string
1892                         last_start = start
1893                         last_end = end
1894                     else:
1895                         last_string += string
1896             if last_string:
1897                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1898
1899         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1900             selectors = []
1901             current_selector = None
1902             for type, string, start, _, _ in tokens:
1903                 # ENCODING is only defined in python 3.x
1904                 if type == getattr(tokenize, 'ENCODING', None):
1905                     continue
1906                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1907                     current_selector = FormatSelector(SINGLE, string, [])
1908                 elif type == tokenize.OP:
1909                     if string == ')':
1910                         if not inside_group:
1911                             # ')' will be handled by the parentheses group
1912                             tokens.restore_last_token()
1913                         break
1914                     elif inside_merge and string in ['/', ',']:
1915                         tokens.restore_last_token()
1916                         break
1917                     elif inside_choice and string == ',':
1918                         tokens.restore_last_token()
1919                         break
1920                     elif string == ',':
1921                         if not current_selector:
1922                             raise syntax_error('"," must follow a format selector', start)
1923                         selectors.append(current_selector)
1924                         current_selector = None
1925                     elif string == '/':
1926                         if not current_selector:
1927                             raise syntax_error('"/" must follow a format selector', start)
1928                         first_choice = current_selector
1929                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1930                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1931                     elif string == '[':
1932                         if not current_selector:
1933                             current_selector = FormatSelector(SINGLE, 'best', [])
1934                         format_filter = _parse_filter(tokens)
1935                         current_selector.filters.append(format_filter)
1936                     elif string == '(':
1937                         if current_selector:
1938                             raise syntax_error('Unexpected "("', start)
1939                         group = _parse_format_selection(tokens, inside_group=True)
1940                         current_selector = FormatSelector(GROUP, group, [])
1941                     elif string == '+':
1942                         if not current_selector:
1943                             raise syntax_error('Unexpected "+"', start)
1944                         selector_1 = current_selector
1945                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1946                         if not selector_2:
1947                             raise syntax_error('Expected a selector', start)
1948                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1949                     else:
1950                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1951                 elif type == tokenize.ENDMARKER:
1952                     break
1953             if current_selector:
1954                 selectors.append(current_selector)
1955             return selectors
1956
1957         def _merge(formats_pair):
1958             format_1, format_2 = formats_pair
1959
1960             formats_info = []
1961             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1962             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1963
1964             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1965                 get_no_more = {'video': False, 'audio': False}
1966                 for (i, fmt_info) in enumerate(formats_info):
1967                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1968                         formats_info.pop(i)
1969                         continue
1970                     for aud_vid in ['audio', 'video']:
1971                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1972                             if get_no_more[aud_vid]:
1973                                 formats_info.pop(i)
1974                                 break
1975                             get_no_more[aud_vid] = True
1976
1977             if len(formats_info) == 1:
1978                 return formats_info[0]
1979
1980             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1981             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1982
1983             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1984             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1985
1986             output_ext = self.params.get('merge_output_format')
1987             if not output_ext:
1988                 if the_only_video:
1989                     output_ext = the_only_video['ext']
1990                 elif the_only_audio and not video_fmts:
1991                     output_ext = the_only_audio['ext']
1992                 else:
1993                     output_ext = 'mkv'
1994
1995             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1996
1997             new_dict = {
1998                 'requested_formats': formats_info,
1999                 'format': '+'.join(filtered('format')),
2000                 'format_id': '+'.join(filtered('format_id')),
2001                 'ext': output_ext,
2002                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2003                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2004                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2005                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2006                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2007             }
2008
2009             if the_only_video:
2010                 new_dict.update({
2011                     'width': the_only_video.get('width'),
2012                     'height': the_only_video.get('height'),
2013                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2014                     'fps': the_only_video.get('fps'),
2015                     'dynamic_range': the_only_video.get('dynamic_range'),
2016                     'vcodec': the_only_video.get('vcodec'),
2017                     'vbr': the_only_video.get('vbr'),
2018                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2019                 })
2020
2021             if the_only_audio:
2022                 new_dict.update({
2023                     'acodec': the_only_audio.get('acodec'),
2024                     'abr': the_only_audio.get('abr'),
2025                     'asr': the_only_audio.get('asr'),
2026                 })
2027
2028             return new_dict
2029
2030         def _check_formats(formats):
2031             if not check_formats:
2032                 yield from formats
2033                 return
2034             yield from self._check_formats(formats)
2035
2036         def _build_selector_function(selector):
2037             if isinstance(selector, list):  # ,
2038                 fs = [_build_selector_function(s) for s in selector]
2039
2040                 def selector_function(ctx):
2041                     for f in fs:
2042                         yield from f(ctx)
2043                 return selector_function
2044
2045             elif selector.type == GROUP:  # ()
2046                 selector_function = _build_selector_function(selector.selector)
2047
2048             elif selector.type == PICKFIRST:  # /
2049                 fs = [_build_selector_function(s) for s in selector.selector]
2050
2051                 def selector_function(ctx):
2052                     for f in fs:
2053                         picked_formats = list(f(ctx))
2054                         if picked_formats:
2055                             return picked_formats
2056                     return []
2057
2058             elif selector.type == MERGE:  # +
2059                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2060
2061                 def selector_function(ctx):
2062                     for pair in itertools.product(
2063                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
2064                         yield _merge(pair)
2065
2066             elif selector.type == SINGLE:  # atom
2067                 format_spec = selector.selector or 'best'
2068
2069                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2070                 if format_spec == 'all':
2071                     def selector_function(ctx):
2072                         yield from _check_formats(ctx['formats'][::-1])
2073                 elif format_spec == 'mergeall':
2074                     def selector_function(ctx):
2075                         formats = list(_check_formats(ctx['formats']))
2076                         if not formats:
2077                             return
2078                         merged_format = formats[-1]
2079                         for f in formats[-2::-1]:
2080                             merged_format = _merge((merged_format, f))
2081                         yield merged_format
2082
2083                 else:
2084                     format_fallback, format_reverse, format_idx = False, True, 1
2085                     mobj = re.match(
2086                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2087                         format_spec)
2088                     if mobj is not None:
2089                         format_idx = int_or_none(mobj.group('n'), default=1)
2090                         format_reverse = mobj.group('bw')[0] == 'b'
2091                         format_type = (mobj.group('type') or [None])[0]
2092                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2093                         format_modified = mobj.group('mod') is not None
2094
2095                         format_fallback = not format_type and not format_modified  # for b, w
2096                         _filter_f = (
2097                             (lambda f: f.get('%scodec' % format_type) != 'none')
2098                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2099                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2100                             if format_type  # bv, ba, wv, wa
2101                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2102                             if not format_modified  # b, w
2103                             else lambda f: True)  # b*, w*
2104                         filter_f = lambda f: _filter_f(f) and (
2105                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2106                     else:
2107                         if format_spec in self._format_selection_exts['audio']:
2108                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2109                         elif format_spec in self._format_selection_exts['video']:
2110                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2111                         elif format_spec in self._format_selection_exts['storyboards']:
2112                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2113                         else:
2114                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2115
2116                     def selector_function(ctx):
2117                         formats = list(ctx['formats'])
2118                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2119                         if format_fallback and ctx['incomplete_formats'] and not matches:
2120                             # for extractors with incomplete formats (audio only (soundcloud)
2121                             # or video only (imgur)) best/worst will fallback to
2122                             # best/worst {video,audio}-only format
2123                             matches = formats
2124                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2125                         try:
2126                             yield matches[format_idx - 1]
2127                         except IndexError:
2128                             return
2129
2130             filters = [self._build_format_filter(f) for f in selector.filters]
2131
2132             def final_selector(ctx):
2133                 ctx_copy = copy.deepcopy(ctx)
2134                 for _filter in filters:
2135                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2136                 return selector_function(ctx_copy)
2137             return final_selector
2138
2139         stream = io.BytesIO(format_spec.encode('utf-8'))
2140         try:
2141             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2142         except tokenize.TokenError:
2143             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2144
2145         class TokenIterator(object):
2146             def __init__(self, tokens):
2147                 self.tokens = tokens
2148                 self.counter = 0
2149
2150             def __iter__(self):
2151                 return self
2152
2153             def __next__(self):
2154                 if self.counter >= len(self.tokens):
2155                     raise StopIteration()
2156                 value = self.tokens[self.counter]
2157                 self.counter += 1
2158                 return value
2159
2160             next = __next__
2161
2162             def restore_last_token(self):
2163                 self.counter -= 1
2164
2165         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2166         return _build_selector_function(parsed_selector)
2167
2168     def _calc_headers(self, info_dict):
2169         res = std_headers.copy()
2170
2171         add_headers = info_dict.get('http_headers')
2172         if add_headers:
2173             res.update(add_headers)
2174
2175         cookies = self._calc_cookies(info_dict)
2176         if cookies:
2177             res['Cookie'] = cookies
2178
2179         if 'X-Forwarded-For' not in res:
2180             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2181             if x_forwarded_for_ip:
2182                 res['X-Forwarded-For'] = x_forwarded_for_ip
2183
2184         return res
2185
2186     def _calc_cookies(self, info_dict):
2187         pr = sanitized_Request(info_dict['url'])
2188         self.cookiejar.add_cookie_header(pr)
2189         return pr.get_header('Cookie')
2190
2191     def _sort_thumbnails(self, thumbnails):
2192         thumbnails.sort(key=lambda t: (
2193             t.get('preference') if t.get('preference') is not None else -1,
2194             t.get('width') if t.get('width') is not None else -1,
2195             t.get('height') if t.get('height') is not None else -1,
2196             t.get('id') if t.get('id') is not None else '',
2197             t.get('url')))
2198
2199     def _sanitize_thumbnails(self, info_dict):
2200         thumbnails = info_dict.get('thumbnails')
2201         if thumbnails is None:
2202             thumbnail = info_dict.get('thumbnail')
2203             if thumbnail:
2204                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2205         if not thumbnails:
2206             return
2207
2208         def check_thumbnails(thumbnails):
2209             for t in thumbnails:
2210                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2211                 try:
2212                     self.urlopen(HEADRequest(t['url']))
2213                 except network_exceptions as err:
2214                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2215                     continue
2216                 yield t
2217
2218         self._sort_thumbnails(thumbnails)
2219         for i, t in enumerate(thumbnails):
2220             if t.get('id') is None:
2221                 t['id'] = '%d' % i
2222             if t.get('width') and t.get('height'):
2223                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2224             t['url'] = sanitize_url(t['url'])
2225
2226         if self.params.get('check_formats') is True:
2227             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2228         else:
2229             info_dict['thumbnails'] = thumbnails
2230
2231     def process_video_result(self, info_dict, download=True):
2232         assert info_dict.get('_type', 'video') == 'video'
2233
2234         if 'id' not in info_dict:
2235             raise ExtractorError('Missing "id" field in extractor result')
2236         if 'title' not in info_dict:
2237             raise ExtractorError('Missing "title" field in extractor result',
2238                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2239
2240         def report_force_conversion(field, field_not, conversion):
2241             self.report_warning(
2242                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2243                 % (field, field_not, conversion))
2244
2245         def sanitize_string_field(info, string_field):
2246             field = info.get(string_field)
2247             if field is None or isinstance(field, compat_str):
2248                 return
2249             report_force_conversion(string_field, 'a string', 'string')
2250             info[string_field] = compat_str(field)
2251
2252         def sanitize_numeric_fields(info):
2253             for numeric_field in self._NUMERIC_FIELDS:
2254                 field = info.get(numeric_field)
2255                 if field is None or isinstance(field, compat_numeric_types):
2256                     continue
2257                 report_force_conversion(numeric_field, 'numeric', 'int')
2258                 info[numeric_field] = int_or_none(field)
2259
2260         sanitize_string_field(info_dict, 'id')
2261         sanitize_numeric_fields(info_dict)
2262
2263         if 'playlist' not in info_dict:
2264             # It isn't part of a playlist
2265             info_dict['playlist'] = None
2266             info_dict['playlist_index'] = None
2267
2268         self._sanitize_thumbnails(info_dict)
2269
2270         thumbnail = info_dict.get('thumbnail')
2271         thumbnails = info_dict.get('thumbnails')
2272         if thumbnail:
2273             info_dict['thumbnail'] = sanitize_url(thumbnail)
2274         elif thumbnails:
2275             info_dict['thumbnail'] = thumbnails[-1]['url']
2276
2277         if info_dict.get('display_id') is None and 'id' in info_dict:
2278             info_dict['display_id'] = info_dict['id']
2279
2280         if info_dict.get('duration') is not None:
2281             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2282
2283         for ts_key, date_key in (
2284                 ('timestamp', 'upload_date'),
2285                 ('release_timestamp', 'release_date'),
2286         ):
2287             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2288                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2289                 # see http://bugs.python.org/issue1646728)
2290                 try:
2291                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2292                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2293                 except (ValueError, OverflowError, OSError):
2294                     pass
2295
2296         live_keys = ('is_live', 'was_live')
2297         live_status = info_dict.get('live_status')
2298         if live_status is None:
2299             for key in live_keys:
2300                 if info_dict.get(key) is False:
2301                     continue
2302                 if info_dict.get(key):
2303                     live_status = key
2304                 break
2305             if all(info_dict.get(key) is False for key in live_keys):
2306                 live_status = 'not_live'
2307         if live_status:
2308             info_dict['live_status'] = live_status
2309             for key in live_keys:
2310                 if info_dict.get(key) is None:
2311                     info_dict[key] = (live_status == key)
2312
2313         # Auto generate title fields corresponding to the *_number fields when missing
2314         # in order to always have clean titles. This is very common for TV series.
2315         for field in ('chapter', 'season', 'episode'):
2316             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2317                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2318
2319         for cc_kind in ('subtitles', 'automatic_captions'):
2320             cc = info_dict.get(cc_kind)
2321             if cc:
2322                 for _, subtitle in cc.items():
2323                     for subtitle_format in subtitle:
2324                         if subtitle_format.get('url'):
2325                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2326                         if subtitle_format.get('ext') is None:
2327                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2328
2329         automatic_captions = info_dict.get('automatic_captions')
2330         subtitles = info_dict.get('subtitles')
2331
2332         info_dict['requested_subtitles'] = self.process_subtitles(
2333             info_dict['id'], subtitles, automatic_captions)
2334
2335         if info_dict.get('formats') is None:
2336             # There's only one format available
2337             formats = [info_dict]
2338         else:
2339             formats = info_dict['formats']
2340
2341         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2342         if not self.params.get('allow_unplayable_formats'):
2343             formats = [f for f in formats if not f.get('has_drm')]
2344
2345         if not formats:
2346             self.raise_no_formats(info_dict)
2347
2348         def is_wellformed(f):
2349             url = f.get('url')
2350             if not url:
2351                 self.report_warning(
2352                     '"url" field is missing or empty - skipping format, '
2353                     'there is an error in extractor')
2354                 return False
2355             if isinstance(url, bytes):
2356                 sanitize_string_field(f, 'url')
2357             return True
2358
2359         # Filter out malformed formats for better extraction robustness
2360         formats = list(filter(is_wellformed, formats))
2361
2362         formats_dict = {}
2363
2364         # We check that all the formats have the format and format_id fields
2365         for i, format in enumerate(formats):
2366             sanitize_string_field(format, 'format_id')
2367             sanitize_numeric_fields(format)
2368             format['url'] = sanitize_url(format['url'])
2369             if not format.get('format_id'):
2370                 format['format_id'] = compat_str(i)
2371             else:
2372                 # Sanitize format_id from characters used in format selector expression
2373                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2374             format_id = format['format_id']
2375             if format_id not in formats_dict:
2376                 formats_dict[format_id] = []
2377             formats_dict[format_id].append(format)
2378
2379         # Make sure all formats have unique format_id
2380         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2381         for format_id, ambiguous_formats in formats_dict.items():
2382             ambigious_id = len(ambiguous_formats) > 1
2383             for i, format in enumerate(ambiguous_formats):
2384                 if ambigious_id:
2385                     format['format_id'] = '%s-%d' % (format_id, i)
2386                 if format.get('ext') is None:
2387                     format['ext'] = determine_ext(format['url']).lower()
2388                 # Ensure there is no conflict between id and ext in format selection
2389                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2390                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2391                     format['format_id'] = 'f%s' % format['format_id']
2392
2393         for i, format in enumerate(formats):
2394             if format.get('format') is None:
2395                 format['format'] = '{id} - {res}{note}'.format(
2396                     id=format['format_id'],
2397                     res=self.format_resolution(format),
2398                     note=format_field(format, 'format_note', ' (%s)'),
2399                 )
2400             if format.get('protocol') is None:
2401                 format['protocol'] = determine_protocol(format)
2402             if format.get('resolution') is None:
2403                 format['resolution'] = self.format_resolution(format, default=None)
2404             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2405                 format['dynamic_range'] = 'SDR'
2406             if (info_dict.get('duration') and format.get('tbr')
2407                     and not format.get('filesize') and not format.get('filesize_approx')):
2408                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2409
2410             # Add HTTP headers, so that external programs can use them from the
2411             # json output
2412             full_format_info = info_dict.copy()
2413             full_format_info.update(format)
2414             format['http_headers'] = self._calc_headers(full_format_info)
2415         # Remove private housekeeping stuff
2416         if '__x_forwarded_for_ip' in info_dict:
2417             del info_dict['__x_forwarded_for_ip']
2418
2419         # TODO Central sorting goes here
2420
2421         if self.params.get('check_formats') is True:
2422             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2423
2424         if not formats or formats[0] is not info_dict:
2425             # only set the 'formats' fields if the original info_dict list them
2426             # otherwise we end up with a circular reference, the first (and unique)
2427             # element in the 'formats' field in info_dict is info_dict itself,
2428             # which can't be exported to json
2429             info_dict['formats'] = formats
2430
2431         info_dict, _ = self.pre_process(info_dict)
2432
2433         # The pre-processors may have modified the formats
2434         formats = info_dict.get('formats', [info_dict])
2435
2436         if self.params.get('list_thumbnails'):
2437             self.list_thumbnails(info_dict)
2438         if self.params.get('listformats'):
2439             if not info_dict.get('formats') and not info_dict.get('url'):
2440                 self.to_screen('%s has no formats' % info_dict['id'])
2441             else:
2442                 self.list_formats(info_dict)
2443         if self.params.get('listsubtitles'):
2444             if 'automatic_captions' in info_dict:
2445                 self.list_subtitles(
2446                     info_dict['id'], automatic_captions, 'automatic captions')
2447             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2448         list_only = self.params.get('simulate') is None and (
2449             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2450         if list_only:
2451             # Without this printing, -F --print-json will not work
2452             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2453             return
2454
2455         format_selector = self.format_selector
2456         if format_selector is None:
2457             req_format = self._default_format_spec(info_dict, download=download)
2458             self.write_debug('Default format spec: %s' % req_format)
2459             format_selector = self.build_format_selector(req_format)
2460
2461         # While in format selection we may need to have an access to the original
2462         # format set in order to calculate some metrics or do some processing.
2463         # For now we need to be able to guess whether original formats provided
2464         # by extractor are incomplete or not (i.e. whether extractor provides only
2465         # video-only or audio-only formats) for proper formats selection for
2466         # extractors with such incomplete formats (see
2467         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2468         # Since formats may be filtered during format selection and may not match
2469         # the original formats the results may be incorrect. Thus original formats
2470         # or pre-calculated metrics should be passed to format selection routines
2471         # as well.
2472         # We will pass a context object containing all necessary additional data
2473         # instead of just formats.
2474         # This fixes incorrect format selection issue (see
2475         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2476         incomplete_formats = (
2477             # All formats are video-only or
2478             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2479             # all formats are audio-only
2480             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2481
2482         ctx = {
2483             'formats': formats,
2484             'incomplete_formats': incomplete_formats,
2485         }
2486
2487         formats_to_download = list(format_selector(ctx))
2488         if not formats_to_download:
2489             if not self.params.get('ignore_no_formats_error'):
2490                 raise ExtractorError('Requested format is not available', expected=True,
2491                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2492             else:
2493                 self.report_warning('Requested format is not available')
2494                 # Process what we can, even without any available formats.
2495                 self.process_info(dict(info_dict))
2496         elif download:
2497             self.to_screen(
2498                 '[info] %s: Downloading %d format(s): %s' % (
2499                     info_dict['id'], len(formats_to_download),
2500                     ", ".join([f['format_id'] for f in formats_to_download])))
2501             for fmt in formats_to_download:
2502                 new_info = dict(info_dict)
2503                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2504                 new_info['__original_infodict'] = info_dict
2505                 new_info.update(fmt)
2506                 self.process_info(new_info)
2507         # We update the info dict with the selected best quality format (backwards compatibility)
2508         if formats_to_download:
2509             info_dict.update(formats_to_download[-1])
2510         return info_dict
2511
2512     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2513         """Select the requested subtitles and their format"""
2514         available_subs = {}
2515         if normal_subtitles and self.params.get('writesubtitles'):
2516             available_subs.update(normal_subtitles)
2517         if automatic_captions and self.params.get('writeautomaticsub'):
2518             for lang, cap_info in automatic_captions.items():
2519                 if lang not in available_subs:
2520                     available_subs[lang] = cap_info
2521
2522         if (not self.params.get('writesubtitles') and not
2523                 self.params.get('writeautomaticsub') or not
2524                 available_subs):
2525             return None
2526
2527         all_sub_langs = available_subs.keys()
2528         if self.params.get('allsubtitles', False):
2529             requested_langs = all_sub_langs
2530         elif self.params.get('subtitleslangs', False):
2531             # A list is used so that the order of languages will be the same as
2532             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2533             requested_langs = []
2534             for lang_re in self.params.get('subtitleslangs'):
2535                 if lang_re == 'all':
2536                     requested_langs.extend(all_sub_langs)
2537                     continue
2538                 discard = lang_re[0] == '-'
2539                 if discard:
2540                     lang_re = lang_re[1:]
2541                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2542                 if discard:
2543                     for lang in current_langs:
2544                         while lang in requested_langs:
2545                             requested_langs.remove(lang)
2546                 else:
2547                     requested_langs.extend(current_langs)
2548             requested_langs = orderedSet(requested_langs)
2549         elif 'en' in available_subs:
2550             requested_langs = ['en']
2551         else:
2552             requested_langs = [list(all_sub_langs)[0]]
2553         if requested_langs:
2554             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2555
2556         formats_query = self.params.get('subtitlesformat', 'best')
2557         formats_preference = formats_query.split('/') if formats_query else []
2558         subs = {}
2559         for lang in requested_langs:
2560             formats = available_subs.get(lang)
2561             if formats is None:
2562                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2563                 continue
2564             for ext in formats_preference:
2565                 if ext == 'best':
2566                     f = formats[-1]
2567                     break
2568                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2569                 if matches:
2570                     f = matches[-1]
2571                     break
2572             else:
2573                 f = formats[-1]
2574                 self.report_warning(
2575                     'No subtitle format found matching "%s" for language %s, '
2576                     'using %s' % (formats_query, lang, f['ext']))
2577             subs[lang] = f
2578         return subs
2579
2580     def __forced_printings(self, info_dict, filename, incomplete):
2581         def print_mandatory(field, actual_field=None):
2582             if actual_field is None:
2583                 actual_field = field
2584             if (self.params.get('force%s' % field, False)
2585                     and (not incomplete or info_dict.get(actual_field) is not None)):
2586                 self.to_stdout(info_dict[actual_field])
2587
2588         def print_optional(field):
2589             if (self.params.get('force%s' % field, False)
2590                     and info_dict.get(field) is not None):
2591                 self.to_stdout(info_dict[field])
2592
2593         info_dict = info_dict.copy()
2594         if filename is not None:
2595             info_dict['filename'] = filename
2596         if info_dict.get('requested_formats') is not None:
2597             # For RTMP URLs, also include the playpath
2598             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2599         elif 'url' in info_dict:
2600             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2601
2602         if self.params.get('forceprint') or self.params.get('forcejson'):
2603             self.post_extract(info_dict)
2604         for tmpl in self.params.get('forceprint', []):
2605             mobj = re.match(r'\w+(=?)$', tmpl)
2606             if mobj and mobj.group(1):
2607                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2608             elif mobj:
2609                 tmpl = '%({})s'.format(tmpl)
2610             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2611
2612         print_mandatory('title')
2613         print_mandatory('id')
2614         print_mandatory('url', 'urls')
2615         print_optional('thumbnail')
2616         print_optional('description')
2617         print_optional('filename')
2618         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2619             self.to_stdout(formatSeconds(info_dict['duration']))
2620         print_mandatory('format')
2621
2622         if self.params.get('forcejson'):
2623             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2624
2625     def dl(self, name, info, subtitle=False, test=False):
2626         if not info.get('url'):
2627             self.raise_no_formats(info, True)
2628
2629         if test:
2630             verbose = self.params.get('verbose')
2631             params = {
2632                 'test': True,
2633                 'quiet': self.params.get('quiet') or not verbose,
2634                 'verbose': verbose,
2635                 'noprogress': not verbose,
2636                 'nopart': True,
2637                 'skip_unavailable_fragments': False,
2638                 'keep_fragments': False,
2639                 'overwrites': True,
2640                 '_no_ytdl_file': True,
2641             }
2642         else:
2643             params = self.params
2644         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2645         if not test:
2646             for ph in self._progress_hooks:
2647                 fd.add_progress_hook(ph)
2648             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2649             self.write_debug('Invoking downloader on "%s"' % urls)
2650
2651         new_info = copy.deepcopy(self._copy_infodict(info))
2652         if new_info.get('http_headers') is None:
2653             new_info['http_headers'] = self._calc_headers(new_info)
2654         return fd.download(name, new_info, subtitle)
2655
2656     def process_info(self, info_dict):
2657         """Process a single resolved IE result."""
2658
2659         assert info_dict.get('_type', 'video') == 'video'
2660
2661         max_downloads = self.params.get('max_downloads')
2662         if max_downloads is not None:
2663             if self._num_downloads >= int(max_downloads):
2664                 raise MaxDownloadsReached()
2665
2666         # TODO: backward compatibility, to be removed
2667         info_dict['fulltitle'] = info_dict['title']
2668
2669         if 'format' not in info_dict and 'ext' in info_dict:
2670             info_dict['format'] = info_dict['ext']
2671
2672         if self._match_entry(info_dict) is not None:
2673             return
2674
2675         self.post_extract(info_dict)
2676         self._num_downloads += 1
2677
2678         # info_dict['_filename'] needs to be set for backward compatibility
2679         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2680         temp_filename = self.prepare_filename(info_dict, 'temp')
2681         files_to_move = {}
2682
2683         # Forced printings
2684         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2685
2686         if self.params.get('simulate'):
2687             if self.params.get('force_write_download_archive', False):
2688                 self.record_download_archive(info_dict)
2689             # Do nothing else if in simulate mode
2690             return
2691
2692         if full_filename is None:
2693             return
2694         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2695             return
2696         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2697             return
2698
2699         if self._write_description('video', info_dict,
2700                                    self.prepare_filename(info_dict, 'description')) is None:
2701             return
2702
2703         sub_files = self._write_subtitles(info_dict, temp_filename)
2704         if sub_files is None:
2705             return
2706         files_to_move.update(dict(sub_files))
2707
2708         thumb_files = self._write_thumbnails(
2709             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2710         if thumb_files is None:
2711             return
2712         files_to_move.update(dict(thumb_files))
2713
2714         infofn = self.prepare_filename(info_dict, 'infojson')
2715         _infojson_written = self._write_info_json('video', info_dict, infofn)
2716         if _infojson_written:
2717             info_dict['infojson_filename'] = infofn
2718             # For backward compatability, even though it was a private field
2719             info_dict['__infojson_filename'] = infofn
2720         elif _infojson_written is None:
2721             return
2722
2723         # Note: Annotations are deprecated
2724         annofn = None
2725         if self.params.get('writeannotations', False):
2726             annofn = self.prepare_filename(info_dict, 'annotation')
2727         if annofn:
2728             if not self._ensure_dir_exists(encodeFilename(annofn)):
2729                 return
2730             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2731                 self.to_screen('[info] Video annotations are already present')
2732             elif not info_dict.get('annotations'):
2733                 self.report_warning('There are no annotations to write.')
2734             else:
2735                 try:
2736                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2737                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2738                         annofile.write(info_dict['annotations'])
2739                 except (KeyError, TypeError):
2740                     self.report_warning('There are no annotations to write.')
2741                 except (OSError, IOError):
2742                     self.report_error('Cannot write annotations file: ' + annofn)
2743                     return
2744
2745         # Write internet shortcut files
2746         def _write_link_file(link_type):
2747             if 'webpage_url' not in info_dict:
2748                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2749                 return False
2750             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2751             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2752                 return False
2753             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2754                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2755                 return True
2756             try:
2757                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2758                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2759                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2760                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2761                     if link_type == 'desktop':
2762                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2763                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2764             except (OSError, IOError):
2765                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2766                 return False
2767             return True
2768
2769         write_links = {
2770             'url': self.params.get('writeurllink'),
2771             'webloc': self.params.get('writewebloclink'),
2772             'desktop': self.params.get('writedesktoplink'),
2773         }
2774         if self.params.get('writelink'):
2775             link_type = ('webloc' if sys.platform == 'darwin'
2776                          else 'desktop' if sys.platform.startswith('linux')
2777                          else 'url')
2778             write_links[link_type] = True
2779
2780         if any(should_write and not _write_link_file(link_type)
2781                for link_type, should_write in write_links.items()):
2782             return
2783
2784         try:
2785             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2786         except PostProcessingError as err:
2787             self.report_error('Preprocessing: %s' % str(err))
2788             return
2789
2790         must_record_download_archive = False
2791         if self.params.get('skip_download', False):
2792             info_dict['filepath'] = temp_filename
2793             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2794             info_dict['__files_to_move'] = files_to_move
2795             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2796         else:
2797             # Download
2798             info_dict.setdefault('__postprocessors', [])
2799             try:
2800
2801                 def existing_file(*filepaths):
2802                     ext = info_dict.get('ext')
2803                     final_ext = self.params.get('final_ext', ext)
2804                     existing_files = []
2805                     for file in orderedSet(filepaths):
2806                         if final_ext != ext:
2807                             converted = replace_extension(file, final_ext, ext)
2808                             if os.path.exists(encodeFilename(converted)):
2809                                 existing_files.append(converted)
2810                         if os.path.exists(encodeFilename(file)):
2811                             existing_files.append(file)
2812
2813                     if not existing_files or self.params.get('overwrites', False):
2814                         for file in orderedSet(existing_files):
2815                             self.report_file_delete(file)
2816                             os.remove(encodeFilename(file))
2817                         return None
2818
2819                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2820                     return existing_files[0]
2821
2822                 success = True
2823                 if info_dict.get('requested_formats') is not None:
2824
2825                     def compatible_formats(formats):
2826                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2827                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2828                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2829                         if len(video_formats) > 2 or len(audio_formats) > 2:
2830                             return False
2831
2832                         # Check extension
2833                         exts = set(format.get('ext') for format in formats)
2834                         COMPATIBLE_EXTS = (
2835                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2836                             set(('webm',)),
2837                         )
2838                         for ext_sets in COMPATIBLE_EXTS:
2839                             if ext_sets.issuperset(exts):
2840                                 return True
2841                         # TODO: Check acodec/vcodec
2842                         return False
2843
2844                     requested_formats = info_dict['requested_formats']
2845                     old_ext = info_dict['ext']
2846                     if self.params.get('merge_output_format') is None:
2847                         if not compatible_formats(requested_formats):
2848                             info_dict['ext'] = 'mkv'
2849                             self.report_warning(
2850                                 'Requested formats are incompatible for merge and will be merged into mkv')
2851                         if (info_dict['ext'] == 'webm'
2852                                 and info_dict.get('thumbnails')
2853                                 # check with type instead of pp_key, __name__, or isinstance
2854                                 # since we dont want any custom PPs to trigger this
2855                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2856                             info_dict['ext'] = 'mkv'
2857                             self.report_warning(
2858                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2859                     new_ext = info_dict['ext']
2860
2861                     def correct_ext(filename, ext=new_ext):
2862                         if filename == '-':
2863                             return filename
2864                         filename_real_ext = os.path.splitext(filename)[1][1:]
2865                         filename_wo_ext = (
2866                             os.path.splitext(filename)[0]
2867                             if filename_real_ext in (old_ext, new_ext)
2868                             else filename)
2869                         return '%s.%s' % (filename_wo_ext, ext)
2870
2871                     # Ensure filename always has a correct extension for successful merge
2872                     full_filename = correct_ext(full_filename)
2873                     temp_filename = correct_ext(temp_filename)
2874                     dl_filename = existing_file(full_filename, temp_filename)
2875                     info_dict['__real_download'] = False
2876
2877                     if dl_filename is not None:
2878                         self.report_file_already_downloaded(dl_filename)
2879                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2880                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2881                         success, real_download = self.dl(temp_filename, info_dict)
2882                         info_dict['__real_download'] = real_download
2883                     else:
2884                         downloaded = []
2885                         merger = FFmpegMergerPP(self)
2886                         if self.params.get('allow_unplayable_formats'):
2887                             self.report_warning(
2888                                 'You have requested merging of multiple formats '
2889                                 'while also allowing unplayable formats to be downloaded. '
2890                                 'The formats won\'t be merged to prevent data corruption.')
2891                         elif not merger.available:
2892                             self.report_warning(
2893                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2894                                 'The formats won\'t be merged.')
2895
2896                         if temp_filename == '-':
2897                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2898                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2899                                       else 'but ffmpeg is not installed')
2900                             self.report_warning(
2901                                 f'You have requested downloading multiple formats to stdout {reason}. '
2902                                 'The formats will be streamed one after the other')
2903                             fname = temp_filename
2904                         for f in requested_formats:
2905                             new_info = dict(info_dict)
2906                             del new_info['requested_formats']
2907                             new_info.update(f)
2908                             if temp_filename != '-':
2909                                 fname = prepend_extension(
2910                                     correct_ext(temp_filename, new_info['ext']),
2911                                     'f%s' % f['format_id'], new_info['ext'])
2912                                 if not self._ensure_dir_exists(fname):
2913                                     return
2914                                 f['filepath'] = fname
2915                                 downloaded.append(fname)
2916                             partial_success, real_download = self.dl(fname, new_info)
2917                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2918                             success = success and partial_success
2919                         if merger.available and not self.params.get('allow_unplayable_formats'):
2920                             info_dict['__postprocessors'].append(merger)
2921                             info_dict['__files_to_merge'] = downloaded
2922                             # Even if there were no downloads, it is being merged only now
2923                             info_dict['__real_download'] = True
2924                         else:
2925                             for file in downloaded:
2926                                 files_to_move[file] = None
2927                 else:
2928                     # Just a single file
2929                     dl_filename = existing_file(full_filename, temp_filename)
2930                     if dl_filename is None or dl_filename == temp_filename:
2931                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2932                         # So we should try to resume the download
2933                         success, real_download = self.dl(temp_filename, info_dict)
2934                         info_dict['__real_download'] = real_download
2935                     else:
2936                         self.report_file_already_downloaded(dl_filename)
2937
2938                 dl_filename = dl_filename or temp_filename
2939                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2940
2941             except network_exceptions as err:
2942                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2943                 return
2944             except (OSError, IOError) as err:
2945                 raise UnavailableVideoError(err)
2946             except (ContentTooShortError, ) as err:
2947                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2948                 return
2949
2950             if success and full_filename != '-':
2951
2952                 def fixup():
2953                     do_fixup = True
2954                     fixup_policy = self.params.get('fixup')
2955                     vid = info_dict['id']
2956
2957                     if fixup_policy in ('ignore', 'never'):
2958                         return
2959                     elif fixup_policy == 'warn':
2960                         do_fixup = False
2961                     elif fixup_policy != 'force':
2962                         assert fixup_policy in ('detect_or_warn', None)
2963                         if not info_dict.get('__real_download'):
2964                             do_fixup = False
2965
2966                     def ffmpeg_fixup(cndn, msg, cls):
2967                         if not cndn:
2968                             return
2969                         if not do_fixup:
2970                             self.report_warning(f'{vid}: {msg}')
2971                             return
2972                         pp = cls(self)
2973                         if pp.available:
2974                             info_dict['__postprocessors'].append(pp)
2975                         else:
2976                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2977
2978                     stretched_ratio = info_dict.get('stretched_ratio')
2979                     ffmpeg_fixup(
2980                         stretched_ratio not in (1, None),
2981                         f'Non-uniform pixel ratio {stretched_ratio}',
2982                         FFmpegFixupStretchedPP)
2983
2984                     ffmpeg_fixup(
2985                         (info_dict.get('requested_formats') is None
2986                          and info_dict.get('container') == 'm4a_dash'
2987                          and info_dict.get('ext') == 'm4a'),
2988                         'writing DASH m4a. Only some players support this container',
2989                         FFmpegFixupM4aPP)
2990
2991                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2992                     downloader = downloader.__name__ if downloader else None
2993                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2994                                  'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
2995                                  FFmpegFixupM3u8PP)
2996                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
2997                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
2998
2999                 fixup()
3000                 try:
3001                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
3002                 except PostProcessingError as err:
3003                     self.report_error('Postprocessing: %s' % str(err))
3004                     return
3005                 try:
3006                     for ph in self._post_hooks:
3007                         ph(info_dict['filepath'])
3008                 except Exception as err:
3009                     self.report_error('post hooks: %s' % str(err))
3010                     return
3011                 must_record_download_archive = True
3012
3013         if must_record_download_archive or self.params.get('force_write_download_archive', False):
3014             self.record_download_archive(info_dict)
3015         max_downloads = self.params.get('max_downloads')
3016         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3017             raise MaxDownloadsReached()
3018
3019     def __download_wrapper(self, func):
3020         @functools.wraps(func)
3021         def wrapper(*args, **kwargs):
3022             try:
3023                 res = func(*args, **kwargs)
3024             except UnavailableVideoError as e:
3025                 self.report_error(e)
3026             except MaxDownloadsReached as e:
3027                 self.to_screen(f'[info] {e}')
3028                 raise
3029             except DownloadCancelled as e:
3030                 self.to_screen(f'[info] {e}')
3031                 if not self.params.get('break_per_url'):
3032                     raise
3033             else:
3034                 if self.params.get('dump_single_json', False):
3035                     self.post_extract(res)
3036                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3037         return wrapper
3038
3039     def download(self, url_list):
3040         """Download a given list of URLs."""
3041         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3042         outtmpl = self.outtmpl_dict['default']
3043         if (len(url_list) > 1
3044                 and outtmpl != '-'
3045                 and '%' not in outtmpl
3046                 and self.params.get('max_downloads') != 1):
3047             raise SameFileError(outtmpl)
3048
3049         for url in url_list:
3050             self.__download_wrapper(self.extract_info)(
3051                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3052
3053         return self._download_retcode
3054
3055     def download_with_info_file(self, info_filename):
3056         with contextlib.closing(fileinput.FileInput(
3057                 [info_filename], mode='r',
3058                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3059             # FileInput doesn't have a read method, we can't call json.load
3060             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3061         try:
3062             self.__download_wrapper(self.process_ie_result)(info, download=True)
3063         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3064             if not isinstance(e, EntryNotInPlaylist):
3065                 self.to_stderr('\r')
3066             webpage_url = info.get('webpage_url')
3067             if webpage_url is not None:
3068                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3069                 return self.download([webpage_url])
3070             else:
3071                 raise
3072         return self._download_retcode
3073
3074     @staticmethod
3075     def sanitize_info(info_dict, remove_private_keys=False):
3076         ''' Sanitize the infodict for converting to json '''
3077         if info_dict is None:
3078             return info_dict
3079         info_dict.setdefault('epoch', int(time.time()))
3080         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3081         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3082         if remove_private_keys:
3083             remove_keys |= {
3084                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3085                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3086             }
3087             empty_values = (None, {}, [], set(), tuple())
3088             reject = lambda k, v: k not in keep_keys and (
3089                 k.startswith('_') or k in remove_keys or v in empty_values)
3090         else:
3091             reject = lambda k, v: k in remove_keys
3092         filter_fn = lambda obj: (
3093             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3094             else obj if not isinstance(obj, dict)
3095             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3096         return filter_fn(info_dict)
3097
3098     @staticmethod
3099     def filter_requested_info(info_dict, actually_filter=True):
3100         ''' Alias of sanitize_info for backward compatibility '''
3101         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3102
3103     def run_pp(self, pp, infodict):
3104         files_to_delete = []
3105         if '__files_to_move' not in infodict:
3106             infodict['__files_to_move'] = {}
3107         try:
3108             files_to_delete, infodict = pp.run(infodict)
3109         except PostProcessingError as e:
3110             # Must be True and not 'only_download'
3111             if self.params.get('ignoreerrors') is True:
3112                 self.report_error(e)
3113                 return infodict
3114             raise
3115
3116         if not files_to_delete:
3117             return infodict
3118         if self.params.get('keepvideo', False):
3119             for f in files_to_delete:
3120                 infodict['__files_to_move'].setdefault(f, '')
3121         else:
3122             for old_filename in set(files_to_delete):
3123                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3124                 try:
3125                     os.remove(encodeFilename(old_filename))
3126                 except (IOError, OSError):
3127                     self.report_warning('Unable to remove downloaded original file')
3128                 if old_filename in infodict['__files_to_move']:
3129                     del infodict['__files_to_move'][old_filename]
3130         return infodict
3131
3132     @staticmethod
3133     def post_extract(info_dict):
3134         def actual_post_extract(info_dict):
3135             if info_dict.get('_type') in ('playlist', 'multi_video'):
3136                 for video_dict in info_dict.get('entries', {}):
3137                     actual_post_extract(video_dict or {})
3138                 return
3139
3140             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3141             extra = post_extractor().items()
3142             info_dict.update(extra)
3143             info_dict.pop('__post_extractor', None)
3144
3145             original_infodict = info_dict.get('__original_infodict') or {}
3146             original_infodict.update(extra)
3147             original_infodict.pop('__post_extractor', None)
3148
3149         actual_post_extract(info_dict or {})
3150
3151     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3152         info = dict(ie_info)
3153         info['__files_to_move'] = files_to_move or {}
3154         for pp in self._pps[key]:
3155             info = self.run_pp(pp, info)
3156         return info, info.pop('__files_to_move', None)
3157
3158     def post_process(self, filename, ie_info, files_to_move=None):
3159         """Run all the postprocessors on the given file."""
3160         info = dict(ie_info)
3161         info['filepath'] = filename
3162         info['__files_to_move'] = files_to_move or {}
3163
3164         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3165             info = self.run_pp(pp, info)
3166         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3167         del info['__files_to_move']
3168         for pp in self._pps['after_move']:
3169             info = self.run_pp(pp, info)
3170         return info
3171
3172     def _make_archive_id(self, info_dict):
3173         video_id = info_dict.get('id')
3174         if not video_id:
3175             return
3176         # Future-proof against any change in case
3177         # and backwards compatibility with prior versions
3178         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3179         if extractor is None:
3180             url = str_or_none(info_dict.get('url'))
3181             if not url:
3182                 return
3183             # Try to find matching extractor for the URL and take its ie_key
3184             for ie_key, ie in self._ies.items():
3185                 if ie.suitable(url):
3186                     extractor = ie_key
3187                     break
3188             else:
3189                 return
3190         return '%s %s' % (extractor.lower(), video_id)
3191
3192     def in_download_archive(self, info_dict):
3193         fn = self.params.get('download_archive')
3194         if fn is None:
3195             return False
3196
3197         vid_id = self._make_archive_id(info_dict)
3198         if not vid_id:
3199             return False  # Incomplete video information
3200
3201         return vid_id in self.archive
3202
3203     def record_download_archive(self, info_dict):
3204         fn = self.params.get('download_archive')
3205         if fn is None:
3206             return
3207         vid_id = self._make_archive_id(info_dict)
3208         assert vid_id
3209         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3210             archive_file.write(vid_id + '\n')
3211         self.archive.add(vid_id)
3212
3213     @staticmethod
3214     def format_resolution(format, default='unknown'):
3215         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3216             return 'audio only'
3217         if format.get('resolution') is not None:
3218             return format['resolution']
3219         if format.get('width') and format.get('height'):
3220             return '%dx%d' % (format['width'], format['height'])
3221         elif format.get('height'):
3222             return '%sp' % format['height']
3223         elif format.get('width'):
3224             return '%dx?' % format['width']
3225         return default
3226
3227     def _format_note(self, fdict):
3228         res = ''
3229         if fdict.get('ext') in ['f4f', 'f4m']:
3230             res += '(unsupported)'
3231         if fdict.get('language'):
3232             if res:
3233                 res += ' '
3234             res += '[%s]' % fdict['language']
3235         if fdict.get('format_note') is not None:
3236             if res:
3237                 res += ' '
3238             res += fdict['format_note']
3239         if fdict.get('tbr') is not None:
3240             if res:
3241                 res += ', '
3242             res += '%4dk' % fdict['tbr']
3243         if fdict.get('container') is not None:
3244             if res:
3245                 res += ', '
3246             res += '%s container' % fdict['container']
3247         if (fdict.get('vcodec') is not None
3248                 and fdict.get('vcodec') != 'none'):
3249             if res:
3250                 res += ', '
3251             res += fdict['vcodec']
3252             if fdict.get('vbr') is not None:
3253                 res += '@'
3254         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3255             res += 'video@'
3256         if fdict.get('vbr') is not None:
3257             res += '%4dk' % fdict['vbr']
3258         if fdict.get('fps') is not None:
3259             if res:
3260                 res += ', '
3261             res += '%sfps' % fdict['fps']
3262         if fdict.get('acodec') is not None:
3263             if res:
3264                 res += ', '
3265             if fdict['acodec'] == 'none':
3266                 res += 'video only'
3267             else:
3268                 res += '%-5s' % fdict['acodec']
3269         elif fdict.get('abr') is not None:
3270             if res:
3271                 res += ', '
3272             res += 'audio'
3273         if fdict.get('abr') is not None:
3274             res += '@%3dk' % fdict['abr']
3275         if fdict.get('asr') is not None:
3276             res += ' (%5dHz)' % fdict['asr']
3277         if fdict.get('filesize') is not None:
3278             if res:
3279                 res += ', '
3280             res += format_bytes(fdict['filesize'])
3281         elif fdict.get('filesize_approx') is not None:
3282             if res:
3283                 res += ', '
3284             res += '~' + format_bytes(fdict['filesize_approx'])
3285         return res
3286
3287     def _list_format_headers(self, *headers):
3288         if self.params.get('listformats_table', True) is not False:
3289             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3290         return headers
3291
3292     def list_formats(self, info_dict):
3293         formats = info_dict.get('formats', [info_dict])
3294         new_format = self.params.get('listformats_table', True) is not False
3295         if new_format:
3296             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3297             table = [
3298                 [
3299                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3300                     format_field(f, 'ext'),
3301                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3302                     format_field(f, 'fps', '\t%d'),
3303                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3304                     delim,
3305                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3306                     format_field(f, 'tbr', '\t%dk'),
3307                     shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
3308                     delim,
3309                     format_field(f, 'vcodec', default='unknown').replace(
3310                         'none',
3311                         'images' if f.get('acodec') == 'none'
3312                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3313                     format_field(f, 'vbr', '\t%dk'),
3314                     format_field(f, 'acodec', default='unknown').replace(
3315                         'none',
3316                         '' if f.get('vcodec') == 'none'
3317                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3318                     format_field(f, 'abr', '\t%dk'),
3319                     format_field(f, 'asr', '\t%dHz'),
3320                     join_nonempty(
3321                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3322                         format_field(f, 'language', '[%s]'),
3323                         join_nonempty(
3324                             format_field(f, 'format_note'),
3325                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3326                             delim=', '),
3327                         delim=' '),
3328                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3329             header_line = self._list_format_headers(
3330                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3331                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3332         else:
3333             table = [
3334                 [
3335                     format_field(f, 'format_id'),
3336                     format_field(f, 'ext'),
3337                     self.format_resolution(f),
3338                     self._format_note(f)]
3339                 for f in formats
3340                 if f.get('preference') is None or f['preference'] >= -1000]
3341             header_line = ['format code', 'extension', 'resolution', 'note']
3342
3343         self.to_screen(
3344             '[info] Available formats for %s:' % info_dict['id'])
3345         self.to_stdout(render_table(
3346             header_line, table,
3347             extra_gap=(0 if new_format else 1),
3348             hide_empty=new_format,
3349             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3350
3351     def list_thumbnails(self, info_dict):
3352         thumbnails = list(info_dict.get('thumbnails'))
3353         if not thumbnails:
3354             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3355             return
3356
3357         self.to_screen(
3358             '[info] Thumbnails for %s:' % info_dict['id'])
3359         self.to_stdout(render_table(
3360             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3361             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3362
3363     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3364         if not subtitles:
3365             self.to_screen('%s has no %s' % (video_id, name))
3366             return
3367         self.to_screen(
3368             'Available %s for %s:' % (name, video_id))
3369
3370         def _row(lang, formats):
3371             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3372             if len(set(names)) == 1:
3373                 names = [] if names[0] == 'unknown' else names[:1]
3374             return [lang, ', '.join(names), ', '.join(exts)]
3375
3376         self.to_stdout(render_table(
3377             self._list_format_headers('Language', 'Name', 'Formats'),
3378             [_row(lang, formats) for lang, formats in subtitles.items()],
3379             hide_empty=True))
3380
3381     def urlopen(self, req):
3382         """ Start an HTTP download """
3383         if isinstance(req, compat_basestring):
3384             req = sanitized_Request(req)
3385         return self._opener.open(req, timeout=self._socket_timeout)
3386
3387     def print_debug_header(self):
3388         if not self.params.get('verbose'):
3389             return
3390
3391         def get_encoding(stream):
3392             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3393             if not supports_terminal_sequences(stream):
3394                 ret += ' (No ANSI)'
3395             return ret
3396
3397         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3398             locale.getpreferredencoding(),
3399             sys.getfilesystemencoding(),
3400             get_encoding(self._screen_file), get_encoding(self._err_file),
3401             self.get_encoding())
3402
3403         logger = self.params.get('logger')
3404         if logger:
3405             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3406             write_debug(encoding_str)
3407         else:
3408             write_string(f'[debug] {encoding_str}\n', encoding=None)
3409             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3410
3411         source = detect_variant()
3412         write_debug(join_nonempty(
3413             'yt-dlp version', __version__,
3414             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3415             '' if source == 'unknown' else f'({source})',
3416             delim=' '))
3417         if not _LAZY_LOADER:
3418             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3419                 write_debug('Lazy loading extractors is forcibly disabled')
3420             else:
3421                 write_debug('Lazy loading extractors is disabled')
3422         if plugin_extractors or plugin_postprocessors:
3423             write_debug('Plugins: %s' % [
3424                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3425                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3426         if self.params.get('compat_opts'):
3427             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3428
3429         if source == 'source':
3430             try:
3431                 sp = Popen(
3432                     ['git', 'rev-parse', '--short', 'HEAD'],
3433                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3434                     cwd=os.path.dirname(os.path.abspath(__file__)))
3435                 out, err = sp.communicate_or_kill()
3436                 out = out.decode().strip()
3437                 if re.match('[0-9a-f]+', out):
3438                     write_debug('Git HEAD: %s' % out)
3439             except Exception:
3440                 try:
3441                     sys.exc_clear()
3442                 except Exception:
3443                     pass
3444
3445         def python_implementation():
3446             impl_name = platform.python_implementation()
3447             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3448                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3449             return impl_name
3450
3451         write_debug('Python version %s (%s %s) - %s' % (
3452             platform.python_version(),
3453             python_implementation(),
3454             platform.architecture()[0],
3455             platform_name()))
3456
3457         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3458         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3459         if ffmpeg_features:
3460             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3461
3462         exe_versions['rtmpdump'] = rtmpdump_version()
3463         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3464         exe_str = ', '.join(
3465             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3466         ) or 'none'
3467         write_debug('exe versions: %s' % exe_str)
3468
3469         from .downloader.websocket import has_websockets
3470         from .postprocessor.embedthumbnail import has_mutagen
3471         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3472
3473         lib_str = join_nonempty(
3474             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3475             KEYRING_AVAILABLE and 'keyring',
3476             has_mutagen and 'mutagen',
3477             SQLITE_AVAILABLE and 'sqlite',
3478             has_websockets and 'websockets',
3479             delim=', ') or 'none'
3480         write_debug('Optional libraries: %s' % lib_str)
3481
3482         proxy_map = {}
3483         for handler in self._opener.handlers:
3484             if hasattr(handler, 'proxies'):
3485                 proxy_map.update(handler.proxies)
3486         write_debug(f'Proxy map: {proxy_map}')
3487
3488         # Not implemented
3489         if False and self.params.get('call_home'):
3490             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3491             write_debug('Public IP address: %s' % ipaddr)
3492             latest_version = self.urlopen(
3493                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3494             if version_tuple(latest_version) > version_tuple(__version__):
3495                 self.report_warning(
3496                     'You are using an outdated version (newest version: %s)! '
3497                     'See https://yt-dl.org/update if you need help updating.' %
3498                     latest_version)
3499
3500     def _setup_opener(self):
3501         timeout_val = self.params.get('socket_timeout')
3502         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3503
3504         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3505         opts_cookiefile = self.params.get('cookiefile')
3506         opts_proxy = self.params.get('proxy')
3507
3508         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3509
3510         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3511         if opts_proxy is not None:
3512             if opts_proxy == '':
3513                 proxies = {}
3514             else:
3515                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3516         else:
3517             proxies = compat_urllib_request.getproxies()
3518             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3519             if 'http' in proxies and 'https' not in proxies:
3520                 proxies['https'] = proxies['http']
3521         proxy_handler = PerRequestProxyHandler(proxies)
3522
3523         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3524         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3525         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3526         redirect_handler = YoutubeDLRedirectHandler()
3527         data_handler = compat_urllib_request_DataHandler()
3528
3529         # When passing our own FileHandler instance, build_opener won't add the
3530         # default FileHandler and allows us to disable the file protocol, which
3531         # can be used for malicious purposes (see
3532         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3533         file_handler = compat_urllib_request.FileHandler()
3534
3535         def file_open(*args, **kwargs):
3536             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3537         file_handler.file_open = file_open
3538
3539         opener = compat_urllib_request.build_opener(
3540             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3541
3542         # Delete the default user-agent header, which would otherwise apply in
3543         # cases where our custom HTTP handler doesn't come into play
3544         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3545         opener.addheaders = []
3546         self._opener = opener
3547
3548     def encode(self, s):
3549         if isinstance(s, bytes):
3550             return s  # Already encoded
3551
3552         try:
3553             return s.encode(self.get_encoding())
3554         except UnicodeEncodeError as err:
3555             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3556             raise
3557
3558     def get_encoding(self):
3559         encoding = self.params.get('encoding')
3560         if encoding is None:
3561             encoding = preferredencoding()
3562         return encoding
3563
3564     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3565         ''' Write infojson and returns True = written, False = skip, None = error '''
3566         if overwrite is None:
3567             overwrite = self.params.get('overwrites', True)
3568         if not self.params.get('writeinfojson'):
3569             return False
3570         elif not infofn:
3571             self.write_debug(f'Skipping writing {label} infojson')
3572             return False
3573         elif not self._ensure_dir_exists(infofn):
3574             return None
3575         elif not overwrite and os.path.exists(infofn):
3576             self.to_screen(f'[info] {label.title()} metadata is already present')
3577         else:
3578             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3579             try:
3580                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3581             except (OSError, IOError):
3582                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3583                 return None
3584         return True
3585
3586     def _write_description(self, label, ie_result, descfn):
3587         ''' Write description and returns True = written, False = skip, None = error '''
3588         if not self.params.get('writedescription'):
3589             return False
3590         elif not descfn:
3591             self.write_debug(f'Skipping writing {label} description')
3592             return False
3593         elif not self._ensure_dir_exists(descfn):
3594             return None
3595         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3596             self.to_screen(f'[info] {label.title()} description is already present')
3597         elif ie_result.get('description') is None:
3598             self.report_warning(f'There\'s no {label} description to write')
3599             return False
3600         else:
3601             try:
3602                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3603                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3604                     descfile.write(ie_result['description'])
3605             except (OSError, IOError):
3606                 self.report_error(f'Cannot write {label} description file {descfn}')
3607                 return None
3608         return True
3609
3610     def _write_subtitles(self, info_dict, filename):
3611         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3612         ret = []
3613         subtitles = info_dict.get('requested_subtitles')
3614         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3615             # subtitles download errors are already managed as troubles in relevant IE
3616             # that way it will silently go on when used with unsupporting IE
3617             return ret
3618
3619         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3620         if not sub_filename_base:
3621             self.to_screen('[info] Skipping writing video subtitles')
3622             return ret
3623         for sub_lang, sub_info in subtitles.items():
3624             sub_format = sub_info['ext']
3625             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3626             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3627             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3628                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3629                 sub_info['filepath'] = sub_filename
3630                 ret.append((sub_filename, sub_filename_final))
3631                 continue
3632
3633             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3634             if sub_info.get('data') is not None:
3635                 try:
3636                     # Use newline='' to prevent conversion of newline characters
3637                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3638                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3639                         subfile.write(sub_info['data'])
3640                     sub_info['filepath'] = sub_filename
3641                     ret.append((sub_filename, sub_filename_final))
3642                     continue
3643                 except (OSError, IOError):
3644                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3645                     return None
3646
3647             try:
3648                 sub_copy = sub_info.copy()
3649                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3650                 self.dl(sub_filename, sub_copy, subtitle=True)
3651                 sub_info['filepath'] = sub_filename
3652                 ret.append((sub_filename, sub_filename_final))
3653             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3654                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3655                 continue
3656         return ret
3657
3658     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3659         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3660         write_all = self.params.get('write_all_thumbnails', False)
3661         thumbnails, ret = [], []
3662         if write_all or self.params.get('writethumbnail', False):
3663             thumbnails = info_dict.get('thumbnails') or []
3664         multiple = write_all and len(thumbnails) > 1
3665
3666         if thumb_filename_base is None:
3667             thumb_filename_base = filename
3668         if thumbnails and not thumb_filename_base:
3669             self.write_debug(f'Skipping writing {label} thumbnail')
3670             return ret
3671
3672         for t in thumbnails[::-1]:
3673             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3674             thumb_display_id = f'{label} thumbnail {t["id"]}'
3675             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3676             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3677
3678             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3679                 ret.append((thumb_filename, thumb_filename_final))
3680                 t['filepath'] = thumb_filename
3681                 self.to_screen('[info] %s is already present' % (
3682                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3683             else:
3684                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3685                 try:
3686                     uf = self.urlopen(t['url'])
3687                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3688                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3689                         shutil.copyfileobj(uf, thumbf)
3690                     ret.append((thumb_filename, thumb_filename_final))
3691                     t['filepath'] = thumb_filename
3692                 except network_exceptions as err:
3693                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3694             if ret and not write_all:
3695                 break
3696         return ret