yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     network_exceptions,
  86     number_of_digits,
  87     orderedSet,
  88     OUTTMPL_TYPES,
  89     PagedList,
  90     parse_filesize,
  91     PerRequestProxyHandler,
  92     platform_name,
  93     Popen,
  94     PostProcessingError,
  95     preferredencoding,
  96     prepend_extension,
  97     ReExtractInfo,
  98     register_socks_protocols,
  99     RejectedVideoReached,
 100     remove_terminal_sequences,
 101     render_table,
 102     replace_extension,
 103     SameFileError,
 104     sanitize_filename,
 105     sanitize_path,
 106     sanitize_url,
 107     sanitized_Request,
 108     std_headers,
 109     STR_FORMAT_RE_TMPL,
 110     STR_FORMAT_TYPES,
 111     str_or_none,
 112     strftime_or_none,
 113     subtitles_filename,
 114     supports_terminal_sequences,
 115     timetuple_from_msec,
 116     to_high_limit_path,
 117     traverse_obj,
 118     try_get,
 119     UnavailableVideoError,
 120     url_basename,
 121     variadic,
 122     version_tuple,
 123     write_json_file,
 124     write_string,
 125     YoutubeDLCookieProcessor,
 126     YoutubeDLHandler,
 127     YoutubeDLRedirectHandler,
 128 )
 129 from .cache import Cache
 130 from .minicurses import format_text
 131 from .extractor import (
 132     gen_extractor_classes,
 133     get_info_extractor,
 134     _LAZY_LOADER,
 135     _PLUGIN_CLASSES as plugin_extractors
 136 )
 137 from .extractor.openload import PhantomJSwrapper
 138 from .downloader import (
 139     FFmpegFD,
 140     get_suitable_downloader,
 141     shorten_protocol_name
 142 )
 143 from .downloader.rtmp import rtmpdump_version
 144 from .postprocessor import (
 145     get_postprocessor,
 146     EmbedThumbnailPP,
 147     FFmpegFixupDurationPP,
 148     FFmpegFixupM3u8PP,
 149     FFmpegFixupM4aPP,
 150     FFmpegFixupStretchedPP,
 151     FFmpegFixupTimestampPP,
 152     FFmpegMergerPP,
 153     FFmpegPostProcessor,
 154     MoveFilesAfterDownloadPP,
 155     _PLUGIN_CLASSES as plugin_postprocessors
 156 )
 157 from .update import detect_variant
 158 from .version import __version__, RELEASE_GIT_HEAD
 159
 160 if compat_os_name == 'nt':
 161     import ctypes
 162
 163
 164 class YoutubeDL(object):
 165     """YoutubeDL class.
 166
 167     YoutubeDL objects are the ones responsible of downloading the
 168     actual video file and writing it to disk if the user has requested
 169     it, among some other tasks. In most cases there should be one per
 170     program. As, given a video URL, the downloader doesn't know how to
 171     extract all the needed information, task that InfoExtractors do, it
 172     has to pass the URL to one of them.
 173
 174     For this, YoutubeDL objects have a method that allows
 175     InfoExtractors to be registered in a given order. When it is passed
 176     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 177     finds that reports being able to handle it. The InfoExtractor extracts
 178     all the information about the video or videos the URL refers to, and
 179     YoutubeDL process the extracted information, possibly using a File
 180     Downloader to download the video.
 181
 182     YoutubeDL objects accept a lot of parameters. In order not to saturate
 183     the object constructor with arguments, it receives a dictionary of
 184     options instead. These options are available through the params
 185     attribute for the InfoExtractors to use. The YoutubeDL also
 186     registers itself as the downloader in charge for the InfoExtractors
 187     that are added to it, so this is a "mutual registration".
 188
 189     Available options:
 190
 191     username:          Username for authentication purposes.
 192     password:          Password for authentication purposes.
 193     videopassword:     Password for accessing a video.
 194     ap_mso:            Adobe Pass multiple-system operator identifier.
 195     ap_username:       Multiple-system operator account username.
 196     ap_password:       Multiple-system operator account password.
 197     usenetrc:          Use netrc for authentication instead.
 198     verbose:           Print additional info to stdout.
 199     quiet:             Do not print messages to stdout.
 200     no_warnings:       Do not print out anything for warnings.
 201     forceprint:        A list of templates to force print
 202     forceurl:          Force printing final URL. (Deprecated)
 203     forcetitle:        Force printing title. (Deprecated)
 204     forceid:           Force printing ID. (Deprecated)
 205     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 206     forcedescription:  Force printing description. (Deprecated)
 207     forcefilename:     Force printing final filename. (Deprecated)
 208     forceduration:     Force printing duration. (Deprecated)
 209     forcejson:         Force printing info_dict as JSON.
 210     dump_single_json:  Force printing the info_dict of the whole playlist
 211                        (or video) as a single JSON line.
 212     force_write_download_archive: Force writing download archive regardless
 213                        of 'skip_download' or 'simulate'.
 214     simulate:          Do not download the video files. If unset (or None),
 215                        simulate only if listsubtitles, listformats or list_thumbnails is used
 216     format:            Video format code. see "FORMAT SELECTION" for more details.
 217                        You can also pass a function. The function takes 'ctx' as
 218                        argument and returns the formats to download.
 219                        See "build_format_selector" for an implementation
 220     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 221     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 222                        extracting metadata even if the video is not actually
 223                        available for download (experimental)
 224     format_sort:       A list of fields by which to sort the video formats.
 225                        See "Sorting Formats" for more details.
 226     format_sort_force: Force the given format_sort. see "Sorting Formats"
 227                        for more details.
 228     allow_multiple_video_streams:   Allow multiple video streams to be merged
 229                        into a single file
 230     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 231                        into a single file
 232     check_formats      Whether to test if the formats are downloadable.
 233                        Can be True (check all), False (check none),
 234                        'selected' (check selected formats),
 235                        or None (check only if requested by extractor)
 236     paths:             Dictionary of output paths. The allowed keys are 'home'
 237                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 238     outtmpl:           Dictionary of templates for output names. Allowed keys
 239                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 240                        For compatibility with youtube-dl, a single string can also be used
 241     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 242     restrictfilenames: Do not allow "&" and spaces in file names
 243     trim_file_name:    Limit length of filename (extension excluded)
 244     windowsfilenames:  Force the filenames to be windows compatible
 245     ignoreerrors:      Do not stop on download/postprocessing errors.
 246                        Can be 'only_download' to ignore only download errors.
 247                        Default is 'only_download' for CLI, but False for API
 248     skip_playlist_after_errors: Number of allowed failures until the rest of
 249                        the playlist is skipped
 250     force_generic_extractor: Force downloader to use the generic extractor
 251     overwrites:        Overwrite all video and metadata files if True,
 252                        overwrite only non-video files if None
 253                        and don't overwrite any file if False
 254                        For compatibility with youtube-dl,
 255                        "nooverwrites" may also be used instead
 256     playliststart:     Playlist item to start at.
 257     playlistend:       Playlist item to end at.
 258     playlist_items:    Specific indices of playlist to download.
 259     playlistreverse:   Download playlist items in reverse order.
 260     playlistrandom:    Download playlist items in random order.
 261     matchtitle:        Download only matching titles.
 262     rejecttitle:       Reject downloads for matching titles.
 263     logger:            Log messages to a logging.Logger instance.
 264     logtostderr:       Log messages to stderr instead of stdout.
 265     consoletitle:       Display progress in console window's titlebar.
 266     writedescription:  Write the video description to a .description file
 267     writeinfojson:     Write the video description to a .info.json file
 268     clean_infojson:    Remove private fields from the infojson
 269     getcomments:       Extract video comments. This will not be written to disk
 270                        unless writeinfojson is also given
 271     writeannotations:  Write the video annotations to a .annotations.xml file
 272     writethumbnail:    Write the thumbnail image to a file
 273     allow_playlist_files: Whether to write playlists' description, infojson etc
 274                        also to disk when using the 'write*' options
 275     write_all_thumbnails:  Write all thumbnail formats to files
 276     writelink:         Write an internet shortcut file, depending on the
 277                        current platform (.url/.webloc/.desktop)
 278     writeurllink:      Write a Windows internet shortcut file (.url)
 279     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 280     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 281     writesubtitles:    Write the video subtitles to a file
 282     writeautomaticsub: Write the automatically generated subtitles to a file
 283     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 284                        Downloads all the subtitles of the video
 285                        (requires writesubtitles or writeautomaticsub)
 286     listsubtitles:     Lists all available subtitles for the video
 287     subtitlesformat:   The format code for subtitles
 288     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 289                        The list may contain "all" to refer to all the available
 290                        subtitles. The language can be prefixed with a "-" to
 291                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 292     keepvideo:         Keep the video file after post-processing
 293     daterange:         A DateRange object, download only if the upload_date is in the range.
 294     skip_download:     Skip the actual download of the video file
 295     cachedir:          Location of the cache files in the filesystem.
 296                        False to disable filesystem cache.
 297     noplaylist:        Download single video instead of a playlist if in doubt.
 298     age_limit:         An integer representing the user's age in years.
 299                        Unsuitable videos for the given age are skipped.
 300     min_views:         An integer representing the minimum view count the video
 301                        must have in order to not be skipped.
 302                        Videos without view count information are always
 303                        downloaded. None for no limit.
 304     max_views:         An integer representing the maximum view count.
 305                        Videos that are more popular than that are not
 306                        downloaded.
 307                        Videos without view count information are always
 308                        downloaded. None for no limit.
 309     download_archive:  File name of a file where all downloads are recorded.
 310                        Videos already present in the file are not downloaded
 311                        again.
 312     break_on_existing: Stop the download process after attempting to download a
 313                        file that is in the archive.
 314     break_on_reject:   Stop the download process when encountering a video that
 315                        has been filtered out.
 316     break_per_url:     Whether break_on_reject and break_on_existing
 317                        should act on each input URL as opposed to for the entire queue
 318     cookiefile:        File name where cookies should be read from and dumped to
 319     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 320                        name/path from where cookies are loaded.
 321                        Eg: ('chrome', ) or ('vivaldi', 'default')
 322     nocheckcertificate:Do not verify SSL certificates
 323     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 324                        At the moment, this is only supported by YouTube.
 325     proxy:             URL of the proxy server to use
 326     geo_verification_proxy:  URL of the proxy to use for IP address verification
 327                        on geo-restricted sites.
 328     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 329     bidi_workaround:   Work around buggy terminals without bidirectional text
 330                        support, using fridibi
 331     debug_printtraffic:Print out sent and received HTTP traffic
 332     include_ads:       Download ads as well (deprecated)
 333     default_search:    Prepend this string if an input url is not valid.
 334                        'auto' for elaborate guessing
 335     encoding:          Use this encoding instead of the system-specified.
 336     extract_flat:      Do not resolve URLs, return the immediate result.
 337                        Pass in 'in_playlist' to only show this behavior for
 338                        playlist items.
 339     wait_for_video:    If given, wait for scheduled streams to become available.
 340                        The value should be a tuple containing the range
 341                        (min_secs, max_secs) to wait between retries
 342     postprocessors:    A list of dictionaries, each with an entry
 343                        * key:  The name of the postprocessor. See
 344                                yt_dlp/postprocessor/__init__.py for a list.
 345                        * when: When to run the postprocessor. Can be one of
 346                                pre_process|before_dl|post_process|after_move.
 347                                Assumed to be 'post_process' if not given
 348     post_hooks:        Deprecated - Register a custom postprocessor instead
 349                        A list of functions that get called as the final step
 350                        for each video file, after all postprocessors have been
 351                        called. The filename will be passed as the only argument.
 352     progress_hooks:    A list of functions that get called on download
 353                        progress, with a dictionary with the entries
 354                        * status: One of "downloading", "error", or "finished".
 355                                  Check this first and ignore unknown values.
 356                        * info_dict: The extracted info_dict
 357
 358                        If status is one of "downloading", or "finished", the
 359                        following properties may also be present:
 360                        * filename: The final filename (always present)
 361                        * tmpfilename: The filename we're currently writing to
 362                        * downloaded_bytes: Bytes on disk
 363                        * total_bytes: Size of the whole file, None if unknown
 364                        * total_bytes_estimate: Guess of the eventual file size,
 365                                                None if unavailable.
 366                        * elapsed: The number of seconds since download started.
 367                        * eta: The estimated time in seconds, None if unknown
 368                        * speed: The download speed in bytes/second, None if
 369                                 unknown
 370                        * fragment_index: The counter of the currently
 371                                          downloaded video fragment.
 372                        * fragment_count: The number of fragments (= individual
 373                                          files that will be merged)
 374
 375                        Progress hooks are guaranteed to be called at least once
 376                        (with status "finished") if the download is successful.
 377     postprocessor_hooks:  A list of functions that get called on postprocessing
 378                        progress, with a dictionary with the entries
 379                        * status: One of "started", "processing", or "finished".
 380                                  Check this first and ignore unknown values.
 381                        * postprocessor: Name of the postprocessor
 382                        * info_dict: The extracted info_dict
 383
 384                        Progress hooks are guaranteed to be called at least twice
 385                        (with status "started" and "finished") if the processing is successful.
 386     merge_output_format: Extension to use when merging formats.
 387     final_ext:         Expected final extension; used to detect when the file was
 388                        already downloaded and converted
 389     fixup:             Automatically correct known faults of the file.
 390                        One of:
 391                        - "never": do nothing
 392                        - "warn": only emit a warning
 393                        - "detect_or_warn": check whether we can do anything
 394                                            about it, warn otherwise (default)
 395     source_address:    Client-side IP address to bind to.
 396     call_home:         Boolean, true iff we are allowed to contact the
 397                        yt-dlp servers for debugging. (BROKEN)
 398     sleep_interval_requests: Number of seconds to sleep between requests
 399                        during extraction
 400     sleep_interval:    Number of seconds to sleep before each download when
 401                        used alone or a lower bound of a range for randomized
 402                        sleep before each download (minimum possible number
 403                        of seconds to sleep) when used along with
 404                        max_sleep_interval.
 405     max_sleep_interval:Upper bound of a range for randomized sleep before each
 406                        download (maximum possible number of seconds to sleep).
 407                        Must only be used along with sleep_interval.
 408                        Actual sleep time will be a random float from range
 409                        [sleep_interval; max_sleep_interval].
 410     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 411     listformats:       Print an overview of available video formats and exit.
 412     list_thumbnails:   Print a table of all thumbnails and exit.
 413     match_filter:      A function that gets called with the info_dict of
 414                        every video.
 415                        If it returns a message, the video is ignored.
 416                        If it returns None, the video is downloaded.
 417                        match_filter_func in utils.py is one example for this.
 418     no_color:          Do not emit color codes in output.
 419     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 420                        HTTP header
 421     geo_bypass_country:
 422                        Two-letter ISO 3166-2 country code that will be used for
 423                        explicit geographic restriction bypassing via faking
 424                        X-Forwarded-For HTTP header
 425     geo_bypass_ip_block:
 426                        IP range in CIDR notation that will be used similarly to
 427                        geo_bypass_country
 428
 429     The following options determine which downloader is picked:
 430     external_downloader: A dictionary of protocol keys and the executable of the
 431                        external downloader to use for it. The allowed protocols
 432                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 433                        Set the value to 'native' to use the native downloader
 434     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 435                        or {'m3u8': 'ffmpeg'} instead.
 436                        Use the native HLS downloader instead of ffmpeg/avconv
 437                        if True, otherwise use ffmpeg/avconv if False, otherwise
 438                        use downloader suggested by extractor if None.
 439     compat_opts:       Compatibility options. See "Differences in default behavior".
 440                        The following options do not work when used through the API:
 441                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 442                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 443                        Refer __init__.py for their implementation
 444     progress_template: Dictionary of templates for progress outputs.
 445                        Allowed keys are 'download', 'postprocess',
 446                        'download-title' (console title) and 'postprocess-title'.
 447                        The template is mapped on a dictionary with keys 'progress' and 'info'
 448
 449     The following parameters are not used by YoutubeDL itself, they are used by
 450     the downloader (see yt_dlp/downloader/common.py):
 451     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 452     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 453     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 454     external_downloader_args, concurrent_fragment_downloads.
 455
 456     The following options are used by the post processors:
 457     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 458                        otherwise prefer ffmpeg. (avconv support is deprecated)
 459     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 460                        to the binary or its containing directory.
 461     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 462                        and a list of additional command-line arguments for the
 463                        postprocessor/executable. The dict can also have "PP+EXE" keys
 464                        which are used when the given exe is used by the given PP.
 465                        Use 'default' as the name for arguments to passed to all PP
 466                        For compatibility with youtube-dl, a single list of args
 467                        can also be used
 468
 469     The following options are used by the extractors:
 470     extractor_retries: Number of times to retry for known errors
 471     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 472     hls_split_discontinuity: Split HLS playlists to different formats at
 473                        discontinuities such as ad breaks (default: False)
 474     extractor_args:    A dictionary of arguments to be passed to the extractors.
 475                        See "EXTRACTOR ARGUMENTS" for details.
 476                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 477     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 478                        If True (default), DASH manifests and related
 479                        data will be downloaded and processed by extractor.
 480                        You can reduce network I/O by disabling it if you don't
 481                        care about DASH. (only for youtube)
 482     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 483                        If True (default), HLS manifests and related
 484                        data will be downloaded and processed by extractor.
 485                        You can reduce network I/O by disabling it if you don't
 486                        care about HLS. (only for youtube)
 487     """
 488
 489     _NUMERIC_FIELDS = set((
 490         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 491         'timestamp', 'release_timestamp',
 492         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 493         'average_rating', 'comment_count', 'age_limit',
 494         'start_time', 'end_time',
 495         'chapter_number', 'season_number', 'episode_number',
 496         'track_number', 'disc_number', 'release_year',
 497     ))
 498
 499     _format_selection_exts = {
 500         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 501         'video': {'mp4', 'flv', 'webm', '3gp'},
 502         'storyboards': {'mhtml'},
 503     }
 504
 505     params = None
 506     _ies = {}
 507     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 508     _printed_messages = set()
 509     _first_webpage_request = True
 510     _download_retcode = None
 511     _num_downloads = None
 512     _playlist_level = 0
 513     _playlist_urls = set()
 514     _screen_file = None
 515
 516     def __init__(self, params=None, auto_init=True):
 517         """Create a FileDownloader object with the given options.
 518         @param auto_init    Whether to load the default extractors and print header (if verbose).
 519                             Set to 'no_verbose_header' to not print the header
 520         """
 521         if params is None:
 522             params = {}
 523         self._ies = {}
 524         self._ies_instances = {}
 525         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 526         self._printed_messages = set()
 527         self._first_webpage_request = True
 528         self._post_hooks = []
 529         self._progress_hooks = []
 530         self._postprocessor_hooks = []
 531         self._download_retcode = 0
 532         self._num_downloads = 0
 533         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 534         self._err_file = sys.stderr
 535         self.params = params
 536         self.cache = Cache(self)
 537
 538         windows_enable_vt_mode()
 539         self._allow_colors = {
 540             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 541             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 542         }
 543
 544         if sys.version_info < (3, 6):
 545             self.report_warning(
 546                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 547
 548         if self.params.get('allow_unplayable_formats'):
 549             self.report_warning(
 550                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 551                 'This is a developer option intended for debugging. \n'
 552                 '         If you experience any issues while using this option, '
 553                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 554
 555         def check_deprecated(param, option, suggestion):
 556             if self.params.get(param) is not None:
 557                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 558                 return True
 559             return False
 560
 561         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 562             if self.params.get('geo_verification_proxy') is None:
 563                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 564
 565         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 566         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 567         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 568
 569         for msg in self.params.get('_warnings', []):
 570             self.report_warning(msg)
 571         for msg in self.params.get('_deprecation_warnings', []):
 572             self.deprecation_warning(msg)
 573
 574         if 'list-formats' in self.params.get('compat_opts', []):
 575             self.params['listformats_table'] = False
 576
 577         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 578             # nooverwrites was unnecessarily changed to overwrites
 579             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 580             # This ensures compatibility with both keys
 581             self.params['overwrites'] = not self.params['nooverwrites']
 582         elif self.params.get('overwrites') is None:
 583             self.params.pop('overwrites', None)
 584         else:
 585             self.params['nooverwrites'] = not self.params['overwrites']
 586
 587         if params.get('bidi_workaround', False):
 588             try:
 589                 import pty
 590                 master, slave = pty.openpty()
 591                 width = compat_get_terminal_size().columns
 592                 if width is None:
 593                     width_args = []
 594                 else:
 595                     width_args = ['-w', str(width)]
 596                 sp_kwargs = dict(
 597                     stdin=subprocess.PIPE,
 598                     stdout=slave,
 599                     stderr=self._err_file)
 600                 try:
 601                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 602                 except OSError:
 603                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 604                 self._output_channel = os.fdopen(master, 'rb')
 605             except OSError as ose:
 606                 if ose.errno == errno.ENOENT:
 607                     self.report_warning(
 608                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 609                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 610                 else:
 611                     raise
 612
 613         if (sys.platform != 'win32'
 614                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 615                 and not params.get('restrictfilenames', False)):
 616             # Unicode filesystem API will throw errors (#1474, #13027)
 617             self.report_warning(
 618                 'Assuming --restrict-filenames since file system encoding '
 619                 'cannot encode all characters. '
 620                 'Set the LC_ALL environment variable to fix this.')
 621             self.params['restrictfilenames'] = True
 622
 623         self.outtmpl_dict = self.parse_outtmpl()
 624
 625         # Creating format selector here allows us to catch syntax errors before the extraction
 626         self.format_selector = (
 627             None if self.params.get('format') is None
 628             else self.params['format'] if callable(self.params['format'])
 629             else self.build_format_selector(self.params['format']))
 630
 631         self._setup_opener()
 632
 633         if auto_init:
 634             if auto_init != 'no_verbose_header':
 635                 self.print_debug_header()
 636             self.add_default_info_extractors()
 637
 638         hooks = {
 639             'post_hooks': self.add_post_hook,
 640             'progress_hooks': self.add_progress_hook,
 641             'postprocessor_hooks': self.add_postprocessor_hook,
 642         }
 643         for opt, fn in hooks.items():
 644             for ph in self.params.get(opt, []):
 645                 fn(ph)
 646
 647         for pp_def_raw in self.params.get('postprocessors', []):
 648             pp_def = dict(pp_def_raw)
 649             when = pp_def.pop('when', 'post_process')
 650             self.add_post_processor(
 651                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 652                 when=when)
 653
 654         register_socks_protocols()
 655
 656         def preload_download_archive(fn):
 657             """Preload the archive, if any is specified"""
 658             if fn is None:
 659                 return False
 660             self.write_debug(f'Loading archive file {fn!r}')
 661             try:
 662                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 663                     for line in archive_file:
 664                         self.archive.add(line.strip())
 665             except IOError as ioe:
 666                 if ioe.errno != errno.ENOENT:
 667                     raise
 668                 return False
 669             return True
 670
 671         self.archive = set()
 672         preload_download_archive(self.params.get('download_archive'))
 673
 674     def warn_if_short_id(self, argv):
 675         # short YouTube ID starting with dash?
 676         idxs = [
 677             i for i, a in enumerate(argv)
 678             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 679         if idxs:
 680             correct_argv = (
 681                 ['yt-dlp']
 682                 + [a for i, a in enumerate(argv) if i not in idxs]
 683                 + ['--'] + [argv[i] for i in idxs]
 684             )
 685             self.report_warning(
 686                 'Long argument string detected. '
 687                 'Use -- to separate parameters and URLs, like this:\n%s' %
 688                 args_to_str(correct_argv))
 689
 690     def add_info_extractor(self, ie):
 691         """Add an InfoExtractor object to the end of the list."""
 692         ie_key = ie.ie_key()
 693         self._ies[ie_key] = ie
 694         if not isinstance(ie, type):
 695             self._ies_instances[ie_key] = ie
 696             ie.set_downloader(self)
 697
 698     def _get_info_extractor_class(self, ie_key):
 699         ie = self._ies.get(ie_key)
 700         if ie is None:
 701             ie = get_info_extractor(ie_key)
 702             self.add_info_extractor(ie)
 703         return ie
 704
 705     def get_info_extractor(self, ie_key):
 706         """
 707         Get an instance of an IE with name ie_key, it will try to get one from
 708         the _ies list, if there's no instance it will create a new one and add
 709         it to the extractor list.
 710         """
 711         ie = self._ies_instances.get(ie_key)
 712         if ie is None:
 713             ie = get_info_extractor(ie_key)()
 714             self.add_info_extractor(ie)
 715         return ie
 716
 717     def add_default_info_extractors(self):
 718         """
 719         Add the InfoExtractors returned by gen_extractors to the end of the list
 720         """
 721         for ie in gen_extractor_classes():
 722             self.add_info_extractor(ie)
 723
 724     def add_post_processor(self, pp, when='post_process'):
 725         """Add a PostProcessor object to the end of the chain."""
 726         self._pps[when].append(pp)
 727         pp.set_downloader(self)
 728
 729     def add_post_hook(self, ph):
 730         """Add the post hook"""
 731         self._post_hooks.append(ph)
 732
 733     def add_progress_hook(self, ph):
 734         """Add the download progress hook"""
 735         self._progress_hooks.append(ph)
 736
 737     def add_postprocessor_hook(self, ph):
 738         """Add the postprocessing progress hook"""
 739         self._postprocessor_hooks.append(ph)
 740         for pps in self._pps.values():
 741             for pp in pps:
 742                 pp.add_progress_hook(ph)
 743
 744     def _bidi_workaround(self, message):
 745         if not hasattr(self, '_output_channel'):
 746             return message
 747
 748         assert hasattr(self, '_output_process')
 749         assert isinstance(message, compat_str)
 750         line_count = message.count('\n') + 1
 751         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 752         self._output_process.stdin.flush()
 753         res = ''.join(self._output_channel.readline().decode('utf-8')
 754                       for _ in range(line_count))
 755         return res[:-len('\n')]
 756
 757     def _write_string(self, message, out=None, only_once=False):
 758         if only_once:
 759             if message in self._printed_messages:
 760                 return
 761             self._printed_messages.add(message)
 762         write_string(message, out=out, encoding=self.params.get('encoding'))
 763
 764     def to_stdout(self, message, skip_eol=False, quiet=False):
 765         """Print message to stdout"""
 766         if self.params.get('logger'):
 767             self.params['logger'].debug(message)
 768         elif not quiet or self.params.get('verbose'):
 769             self._write_string(
 770                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 771                 self._err_file if quiet else self._screen_file)
 772
 773     def to_stderr(self, message, only_once=False):
 774         """Print message to stderr"""
 775         assert isinstance(message, compat_str)
 776         if self.params.get('logger'):
 777             self.params['logger'].error(message)
 778         else:
 779             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 780
 781     def to_console_title(self, message):
 782         if not self.params.get('consoletitle', False):
 783             return
 784         message = remove_terminal_sequences(message)
 785         if compat_os_name == 'nt':
 786             if ctypes.windll.kernel32.GetConsoleWindow():
 787                 # c_wchar_p() might not be necessary if `message` is
 788                 # already of type unicode()
 789                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 790         elif 'TERM' in os.environ:
 791             self._write_string('\033]0;%s\007' % message, self._screen_file)
 792
 793     def save_console_title(self):
 794         if not self.params.get('consoletitle', False):
 795             return
 796         if self.params.get('simulate'):
 797             return
 798         if compat_os_name != 'nt' and 'TERM' in os.environ:
 799             # Save the title on stack
 800             self._write_string('\033[22;0t', self._screen_file)
 801
 802     def restore_console_title(self):
 803         if not self.params.get('consoletitle', False):
 804             return
 805         if self.params.get('simulate'):
 806             return
 807         if compat_os_name != 'nt' and 'TERM' in os.environ:
 808             # Restore the title from stack
 809             self._write_string('\033[23;0t', self._screen_file)
 810
 811     def __enter__(self):
 812         self.save_console_title()
 813         return self
 814
 815     def __exit__(self, *args):
 816         self.restore_console_title()
 817
 818         if self.params.get('cookiefile') is not None:
 819             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 820
 821     def trouble(self, message=None, tb=None):
 822         """Determine action to take when a download problem appears.
 823
 824         Depending on if the downloader has been configured to ignore
 825         download errors or not, this method may throw an exception or
 826         not when errors are found, after printing the message.
 827
 828         tb, if given, is additional traceback information.
 829         """
 830         if message is not None:
 831             self.to_stderr(message)
 832         if self.params.get('verbose'):
 833             if tb is None:
 834                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 835                     tb = ''
 836                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 837                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 838                     tb += encode_compat_str(traceback.format_exc())
 839                 else:
 840                     tb_data = traceback.format_list(traceback.extract_stack())
 841                     tb = ''.join(tb_data)
 842             if tb:
 843                 self.to_stderr(tb)
 844         if not self.params.get('ignoreerrors'):
 845             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 846                 exc_info = sys.exc_info()[1].exc_info
 847             else:
 848                 exc_info = sys.exc_info()
 849             raise DownloadError(message, exc_info)
 850         self._download_retcode = 1
 851
 852     def to_screen(self, message, skip_eol=False):
 853         """Print message to stdout if not in quiet mode"""
 854         self.to_stdout(
 855             message, skip_eol, quiet=self.params.get('quiet', False))
 856
 857     class Styles(Enum):
 858         HEADERS = 'yellow'
 859         EMPHASIS = 'light blue'
 860         ID = 'green'
 861         DELIM = 'blue'
 862         ERROR = 'red'
 863         WARNING = 'yellow'
 864         SUPPRESS = 'light black'
 865
 866     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 867         if test_encoding:
 868             original_text = text
 869             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 870             text = text.encode(encoding, 'ignore').decode(encoding)
 871             if fallback is not None and text != original_text:
 872                 text = fallback
 873         if isinstance(f, self.Styles):
 874             f = f.value
 875         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 876
 877     def _format_screen(self, *args, **kwargs):
 878         return self._format_text(
 879             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 880
 881     def _format_err(self, *args, **kwargs):
 882         return self._format_text(
 883             self._err_file, self._allow_colors['err'], *args, **kwargs)
 884
 885     def report_warning(self, message, only_once=False):
 886         '''
 887         Print the message to stderr, it will be prefixed with 'WARNING:'
 888         If stderr is a tty file the 'WARNING:' will be colored
 889         '''
 890         if self.params.get('logger') is not None:
 891             self.params['logger'].warning(message)
 892         else:
 893             if self.params.get('no_warnings'):
 894                 return
 895             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 896
 897     def deprecation_warning(self, message):
 898         if self.params.get('logger') is not None:
 899             self.params['logger'].warning('DeprecationWarning: {message}')
 900         else:
 901             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 902
 903     def report_error(self, message, tb=None):
 904         '''
 905         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 906         in red if stderr is a tty file.
 907         '''
 908         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 909
 910     def write_debug(self, message, only_once=False):
 911         '''Log debug message or Print message to stderr'''
 912         if not self.params.get('verbose', False):
 913             return
 914         message = '[debug] %s' % message
 915         if self.params.get('logger'):
 916             self.params['logger'].debug(message)
 917         else:
 918             self.to_stderr(message, only_once)
 919
 920     def report_file_already_downloaded(self, file_name):
 921         """Report file has already been fully downloaded."""
 922         try:
 923             self.to_screen('[download] %s has already been downloaded' % file_name)
 924         except UnicodeEncodeError:
 925             self.to_screen('[download] The file has already been downloaded')
 926
 927     def report_file_delete(self, file_name):
 928         """Report that existing file will be deleted."""
 929         try:
 930             self.to_screen('Deleting existing file %s' % file_name)
 931         except UnicodeEncodeError:
 932             self.to_screen('Deleting existing file')
 933
 934     def raise_no_formats(self, info, forced=False):
 935         has_drm = info.get('__has_drm')
 936         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 937         expected = self.params.get('ignore_no_formats_error')
 938         if forced or not expected:
 939             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 940                                  expected=has_drm or expected)
 941         else:
 942             self.report_warning(msg)
 943
 944     def parse_outtmpl(self):
 945         outtmpl_dict = self.params.get('outtmpl', {})
 946         if not isinstance(outtmpl_dict, dict):
 947             outtmpl_dict = {'default': outtmpl_dict}
 948         # Remove spaces in the default template
 949         if self.params.get('restrictfilenames'):
 950             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 951         else:
 952             sanitize = lambda x: x
 953         outtmpl_dict.update({
 954             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 955             if outtmpl_dict.get(k) is None})
 956         for key, val in outtmpl_dict.items():
 957             if isinstance(val, bytes):
 958                 self.report_warning(
 959                     'Parameter outtmpl is bytes, but should be a unicode string. '
 960                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 961         return outtmpl_dict
 962
 963     def get_output_path(self, dir_type='', filename=None):
 964         paths = self.params.get('paths', {})
 965         assert isinstance(paths, dict)
 966         path = os.path.join(
 967             expand_path(paths.get('home', '').strip()),
 968             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 969             filename or '')
 970
 971         # Temporary fix for #4787
 972         # 'Treat' all problem characters by passing filename through preferredencoding
 973         # to workaround encoding issues with subprocess on python2 @ Windows
 974         if sys.version_info < (3, 0) and sys.platform == 'win32':
 975             path = encodeFilename(path, True).decode(preferredencoding())
 976         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 977
 978     @staticmethod
 979     def _outtmpl_expandpath(outtmpl):
 980         # expand_path translates '%%' into '%' and '$$' into '$'
 981         # correspondingly that is not what we want since we need to keep
 982         # '%%' intact for template dict substitution step. Working around
 983         # with boundary-alike separator hack.
 984         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 985         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 986
 987         # outtmpl should be expand_path'ed before template dict substitution
 988         # because meta fields may contain env variables we don't want to
 989         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 990         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 991         return expand_path(outtmpl).replace(sep, '')
 992
 993     @staticmethod
 994     def escape_outtmpl(outtmpl):
 995         ''' Escape any remaining strings like %s, %abc% etc. '''
 996         return re.sub(
 997             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 998             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 999             outtmpl)
1000
1001     @classmethod
1002     def validate_outtmpl(cls, outtmpl):
1003         ''' @return None or Exception object '''
1004         outtmpl = re.sub(
1005             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
1006             lambda mobj: f'{mobj.group(0)[:-1]}s',
1007             cls._outtmpl_expandpath(outtmpl))
1008         try:
1009             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1010             return None
1011         except ValueError as err:
1012             return err
1013
1014     @staticmethod
1015     def _copy_infodict(info_dict):
1016         info_dict = dict(info_dict)
1017         for key in ('__original_infodict', '__postprocessors'):
1018             info_dict.pop(key, None)
1019         return info_dict
1020
1021     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
1022         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1023         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1024
1025         info_dict = self._copy_infodict(info_dict)
1026         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1027             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1028             if info_dict.get('duration', None) is not None
1029             else None)
1030         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1031         if info_dict.get('resolution') is None:
1032             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1033
1034         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1035         # of %(field)s to %(field)0Nd for backward compatibility
1036         field_size_compat_map = {
1037             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1038             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1039             'autonumber': self.params.get('autonumber_size') or 5,
1040         }
1041
1042         TMPL_DICT = {}
1043         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1044         MATH_FUNCTIONS = {
1045             '+': float.__add__,
1046             '-': float.__sub__,
1047         }
1048         # Field is of the form key1.key2...
1049         # where keys (except first) can be string, int or slice
1050         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1051         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1052         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1053         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1054             (?P<negate>-)?
1055             (?P<fields>{field})
1056             (?P<maths>(?:{math_op}{math_field})*)
1057             (?:>(?P<strf_format>.+?))?
1058             (?P<alternate>(?<!\\),[^|&)]+)?
1059             (?:&(?P<replacement>.*?))?
1060             (?:\|(?P<default>.*?))?
1061             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1062
1063         def _traverse_infodict(k):
1064             k = k.split('.')
1065             if k[0] == '':
1066                 k.pop(0)
1067             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1068
1069         def get_value(mdict):
1070             # Object traversal
1071             value = _traverse_infodict(mdict['fields'])
1072             # Negative
1073             if mdict['negate']:
1074                 value = float_or_none(value)
1075                 if value is not None:
1076                     value *= -1
1077             # Do maths
1078             offset_key = mdict['maths']
1079             if offset_key:
1080                 value = float_or_none(value)
1081                 operator = None
1082                 while offset_key:
1083                     item = re.match(
1084                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1085                         offset_key).group(0)
1086                     offset_key = offset_key[len(item):]
1087                     if operator is None:
1088                         operator = MATH_FUNCTIONS[item]
1089                         continue
1090                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1091                     offset = float_or_none(item)
1092                     if offset is None:
1093                         offset = float_or_none(_traverse_infodict(item))
1094                     try:
1095                         value = operator(value, multiplier * offset)
1096                     except (TypeError, ZeroDivisionError):
1097                         return None
1098                     operator = None
1099             # Datetime formatting
1100             if mdict['strf_format']:
1101                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1102
1103             return value
1104
1105         na = self.params.get('outtmpl_na_placeholder', 'NA')
1106
1107         def _dumpjson_default(obj):
1108             if isinstance(obj, (set, LazyList)):
1109                 return list(obj)
1110             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1111
1112         def create_key(outer_mobj):
1113             if not outer_mobj.group('has_key'):
1114                 return outer_mobj.group(0)
1115             key = outer_mobj.group('key')
1116             mobj = re.match(INTERNAL_FORMAT_RE, key)
1117             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1118             value, replacement, default = None, None, na
1119             while mobj:
1120                 mobj = mobj.groupdict()
1121                 default = mobj['default'] if mobj['default'] is not None else default
1122                 value = get_value(mobj)
1123                 replacement = mobj['replacement']
1124                 if value is None and mobj['alternate']:
1125                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1126                 else:
1127                     break
1128
1129             fmt = outer_mobj.group('format')
1130             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1131                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1132
1133             value = default if value is None else value if replacement is None else replacement
1134
1135             flags = outer_mobj.group('conversion') or ''
1136             str_fmt = f'{fmt[:-1]}s'
1137             if fmt[-1] == 'l':  # list
1138                 delim = '\n' if '#' in flags else ', '
1139                 value, fmt = delim.join(variadic(value)), str_fmt
1140             elif fmt[-1] == 'j':  # json
1141                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1142             elif fmt[-1] == 'q':  # quoted
1143                 value = map(str, variadic(value) if '#' in flags else [value])
1144                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1145             elif fmt[-1] == 'B':  # bytes
1146                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1147                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1148             elif fmt[-1] == 'U':  # unicode normalized
1149                 value, fmt = unicodedata.normalize(
1150                     # "+" = compatibility equivalence, "#" = NFD
1151                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1152                     value), str_fmt
1153             elif fmt[-1] == 'c':
1154                 if value:
1155                     value = str(value)[0]
1156                 else:
1157                     fmt = str_fmt
1158             elif fmt[-1] not in 'rs':  # numeric
1159                 value = float_or_none(value)
1160                 if value is None:
1161                     value, fmt = default, 's'
1162
1163             if sanitize:
1164                 if fmt[-1] == 'r':
1165                     # If value is an object, sanitize might convert it to a string
1166                     # So we convert it to repr first
1167                     value, fmt = repr(value), str_fmt
1168                 if fmt[-1] in 'csr':
1169                     value = sanitize(initial_field, value)
1170
1171             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1172             TMPL_DICT[key] = value
1173             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1174
1175         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1176
1177     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1178         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1179         return self.escape_outtmpl(outtmpl) % info_dict
1180
1181     def _prepare_filename(self, info_dict, tmpl_type='default'):
1182         try:
1183             sanitize = lambda k, v: sanitize_filename(
1184                 compat_str(v),
1185                 restricted=self.params.get('restrictfilenames'),
1186                 is_id=(k == 'id' or k.endswith('_id')))
1187             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1188             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1189
1190             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1191             if filename and force_ext is not None:
1192                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1193
1194             # https://github.com/blackjack4494/youtube-dlc/issues/85
1195             trim_file_name = self.params.get('trim_file_name', False)
1196             if trim_file_name:
1197                 no_ext, *ext = filename.rsplit('.', 2)
1198                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1199
1200             return filename
1201         except ValueError as err:
1202             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1203             return None
1204
1205     def prepare_filename(self, info_dict, dir_type='', warn=False):
1206         """Generate the output filename."""
1207
1208         filename = self._prepare_filename(info_dict, dir_type or 'default')
1209         if not filename and dir_type not in ('', 'temp'):
1210             return ''
1211
1212         if warn:
1213             if not self.params.get('paths'):
1214                 pass
1215             elif filename == '-':
1216                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1217             elif os.path.isabs(filename):
1218                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1219         if filename == '-' or not filename:
1220             return filename
1221
1222         return self.get_output_path(dir_type, filename)
1223
1224     def _match_entry(self, info_dict, incomplete=False, silent=False):
1225         """ Returns None if the file should be downloaded """
1226
1227         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1228
1229         def check_filter():
1230             if 'title' in info_dict:
1231                 # This can happen when we're just evaluating the playlist
1232                 title = info_dict['title']
1233                 matchtitle = self.params.get('matchtitle', False)
1234                 if matchtitle:
1235                     if not re.search(matchtitle, title, re.IGNORECASE):
1236                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1237                 rejecttitle = self.params.get('rejecttitle', False)
1238                 if rejecttitle:
1239                     if re.search(rejecttitle, title, re.IGNORECASE):
1240                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1241             date = info_dict.get('upload_date')
1242             if date is not None:
1243                 dateRange = self.params.get('daterange', DateRange())
1244                 if date not in dateRange:
1245                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1246             view_count = info_dict.get('view_count')
1247             if view_count is not None:
1248                 min_views = self.params.get('min_views')
1249                 if min_views is not None and view_count < min_views:
1250                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1251                 max_views = self.params.get('max_views')
1252                 if max_views is not None and view_count > max_views:
1253                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1254             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1255                 return 'Skipping "%s" because it is age restricted' % video_title
1256
1257             match_filter = self.params.get('match_filter')
1258             if match_filter is not None:
1259                 try:
1260                     ret = match_filter(info_dict, incomplete=incomplete)
1261                 except TypeError:
1262                     # For backward compatibility
1263                     ret = None if incomplete else match_filter(info_dict)
1264                 if ret is not None:
1265                     return ret
1266             return None
1267
1268         if self.in_download_archive(info_dict):
1269             reason = '%s has already been recorded in the archive' % video_title
1270             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1271         else:
1272             reason = check_filter()
1273             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1274         if reason is not None:
1275             if not silent:
1276                 self.to_screen('[download] ' + reason)
1277             if self.params.get(break_opt, False):
1278                 raise break_err()
1279         return reason
1280
1281     @staticmethod
1282     def add_extra_info(info_dict, extra_info):
1283         '''Set the keys from extra_info in info dict if they are missing'''
1284         for key, value in extra_info.items():
1285             info_dict.setdefault(key, value)
1286
1287     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1288                      process=True, force_generic_extractor=False):
1289         """
1290         Return a list with a dictionary for each video extracted.
1291
1292         Arguments:
1293         url -- URL to extract
1294
1295         Keyword arguments:
1296         download -- whether to download videos during extraction
1297         ie_key -- extractor key hint
1298         extra_info -- dictionary containing the extra values to add to each result
1299         process -- whether to resolve all unresolved references (URLs, playlist items),
1300             must be True for download to work.
1301         force_generic_extractor -- force using the generic extractor
1302         """
1303
1304         if extra_info is None:
1305             extra_info = {}
1306
1307         if not ie_key and force_generic_extractor:
1308             ie_key = 'Generic'
1309
1310         if ie_key:
1311             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1312         else:
1313             ies = self._ies
1314
1315         for ie_key, ie in ies.items():
1316             if not ie.suitable(url):
1317                 continue
1318
1319             if not ie.working():
1320                 self.report_warning('The program functionality for this site has been marked as broken, '
1321                                     'and will probably not work.')
1322
1323             temp_id = ie.get_temp_id(url)
1324             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1325                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1326                 if self.params.get('break_on_existing', False):
1327                     raise ExistingVideoReached()
1328                 break
1329             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1330         else:
1331             self.report_error('no suitable InfoExtractor for URL %s' % url)
1332
1333     def __handle_extraction_exceptions(func):
1334         @functools.wraps(func)
1335         def wrapper(self, *args, **kwargs):
1336             try:
1337                 return func(self, *args, **kwargs)
1338             except GeoRestrictedError as e:
1339                 msg = e.msg
1340                 if e.countries:
1341                     msg += '\nThis video is available in %s.' % ', '.join(
1342                         map(ISO3166Utils.short2full, e.countries))
1343                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1344                 self.report_error(msg)
1345             except ExtractorError as e:  # An error we somewhat expected
1346                 self.report_error(compat_str(e), e.format_traceback())
1347             except ReExtractInfo as e:
1348                 if e.expected:
1349                     self.to_screen(f'{e}; Re-extracting data')
1350                 else:
1351                     self.to_stderr('\r')
1352                     self.report_warning(f'{e}; Re-extracting data')
1353                 return wrapper(self, *args, **kwargs)
1354             except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1355                 raise
1356             except Exception as e:
1357                 if self.params.get('ignoreerrors'):
1358                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1359                 else:
1360                     raise
1361         return wrapper
1362
1363     def _wait_for_video(self, ie_result):
1364         if (not self.params.get('wait_for_video')
1365                 or ie_result.get('_type', 'video') != 'video'
1366                 or ie_result.get('formats') or ie_result.get('url')):
1367             return
1368
1369         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1370         last_msg = ''
1371
1372         def progress(msg):
1373             nonlocal last_msg
1374             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1375             last_msg = msg
1376
1377         min_wait, max_wait = self.params.get('wait_for_video')
1378         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1379         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1380             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1381             self.report_warning('Release time of video is not known')
1382         elif (diff or 0) <= 0:
1383             self.report_warning('Video should already be available according to extracted info')
1384         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1385         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1386
1387         wait_till = time.time() + diff
1388         try:
1389             while True:
1390                 diff = wait_till - time.time()
1391                 if diff <= 0:
1392                     progress('')
1393                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1394                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1395                 time.sleep(1)
1396         except KeyboardInterrupt:
1397             progress('')
1398             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1399         except BaseException as e:
1400             if not isinstance(e, ReExtractInfo):
1401                 self.to_screen('')
1402             raise
1403
1404     @__handle_extraction_exceptions
1405     def __extract_info(self, url, ie, download, extra_info, process):
1406         ie_result = ie.extract(url)
1407         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1408             return
1409         if isinstance(ie_result, list):
1410             # Backwards compatibility: old IE result format
1411             ie_result = {
1412                 '_type': 'compat_list',
1413                 'entries': ie_result,
1414             }
1415         if extra_info.get('original_url'):
1416             ie_result.setdefault('original_url', extra_info['original_url'])
1417         self.add_default_extra_info(ie_result, ie, url)
1418         if process:
1419             self._wait_for_video(ie_result)
1420             return self.process_ie_result(ie_result, download, extra_info)
1421         else:
1422             return ie_result
1423
1424     def add_default_extra_info(self, ie_result, ie, url):
1425         if url is not None:
1426             self.add_extra_info(ie_result, {
1427                 'webpage_url': url,
1428                 'original_url': url,
1429                 'webpage_url_basename': url_basename(url),
1430                 'webpage_url_domain': get_domain(url),
1431             })
1432         if ie is not None:
1433             self.add_extra_info(ie_result, {
1434                 'extractor': ie.IE_NAME,
1435                 'extractor_key': ie.ie_key(),
1436             })
1437
1438     def process_ie_result(self, ie_result, download=True, extra_info=None):
1439         """
1440         Take the result of the ie(may be modified) and resolve all unresolved
1441         references (URLs, playlist items).
1442
1443         It will also download the videos if 'download'.
1444         Returns the resolved ie_result.
1445         """
1446         if extra_info is None:
1447             extra_info = {}
1448         result_type = ie_result.get('_type', 'video')
1449
1450         if result_type in ('url', 'url_transparent'):
1451             ie_result['url'] = sanitize_url(ie_result['url'])
1452             if ie_result.get('original_url'):
1453                 extra_info.setdefault('original_url', ie_result['original_url'])
1454
1455             extract_flat = self.params.get('extract_flat', False)
1456             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1457                     or extract_flat is True):
1458                 info_copy = ie_result.copy()
1459                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1460                 if ie and not ie_result.get('id'):
1461                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1462                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1463                 self.add_extra_info(info_copy, extra_info)
1464                 info_copy, _ = self.pre_process(info_copy)
1465                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1466                 if self.params.get('force_write_download_archive', False):
1467                     self.record_download_archive(info_copy)
1468                 return ie_result
1469
1470         if result_type == 'video':
1471             self.add_extra_info(ie_result, extra_info)
1472             ie_result = self.process_video_result(ie_result, download=download)
1473             additional_urls = (ie_result or {}).get('additional_urls')
1474             if additional_urls:
1475                 # TODO: Improve MetadataParserPP to allow setting a list
1476                 if isinstance(additional_urls, compat_str):
1477                     additional_urls = [additional_urls]
1478                 self.to_screen(
1479                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1480                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1481                 ie_result['additional_entries'] = [
1482                     self.extract_info(
1483                         url, download, extra_info,
1484                         force_generic_extractor=self.params.get('force_generic_extractor'))
1485                     for url in additional_urls
1486                 ]
1487             return ie_result
1488         elif result_type == 'url':
1489             # We have to add extra_info to the results because it may be
1490             # contained in a playlist
1491             return self.extract_info(
1492                 ie_result['url'], download,
1493                 ie_key=ie_result.get('ie_key'),
1494                 extra_info=extra_info)
1495         elif result_type == 'url_transparent':
1496             # Use the information from the embedding page
1497             info = self.extract_info(
1498                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1499                 extra_info=extra_info, download=False, process=False)
1500
1501             # extract_info may return None when ignoreerrors is enabled and
1502             # extraction failed with an error, don't crash and return early
1503             # in this case
1504             if not info:
1505                 return info
1506
1507             force_properties = dict(
1508                 (k, v) for k, v in ie_result.items() if v is not None)
1509             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1510                 if f in force_properties:
1511                     del force_properties[f]
1512             new_result = info.copy()
1513             new_result.update(force_properties)
1514
1515             # Extracted info may not be a video result (i.e.
1516             # info.get('_type', 'video') != video) but rather an url or
1517             # url_transparent. In such cases outer metadata (from ie_result)
1518             # should be propagated to inner one (info). For this to happen
1519             # _type of info should be overridden with url_transparent. This
1520             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1521             if new_result.get('_type') == 'url':
1522                 new_result['_type'] = 'url_transparent'
1523
1524             return self.process_ie_result(
1525                 new_result, download=download, extra_info=extra_info)
1526         elif result_type in ('playlist', 'multi_video'):
1527             # Protect from infinite recursion due to recursively nested playlists
1528             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1529             webpage_url = ie_result['webpage_url']
1530             if webpage_url in self._playlist_urls:
1531                 self.to_screen(
1532                     '[download] Skipping already downloaded playlist: %s'
1533                     % ie_result.get('title') or ie_result.get('id'))
1534                 return
1535
1536             self._playlist_level += 1
1537             self._playlist_urls.add(webpage_url)
1538             self._sanitize_thumbnails(ie_result)
1539             try:
1540                 return self.__process_playlist(ie_result, download)
1541             finally:
1542                 self._playlist_level -= 1
1543                 if not self._playlist_level:
1544                     self._playlist_urls.clear()
1545         elif result_type == 'compat_list':
1546             self.report_warning(
1547                 'Extractor %s returned a compat_list result. '
1548                 'It needs to be updated.' % ie_result.get('extractor'))
1549
1550             def _fixup(r):
1551                 self.add_extra_info(r, {
1552                     'extractor': ie_result['extractor'],
1553                     'webpage_url': ie_result['webpage_url'],
1554                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1555                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1556                     'extractor_key': ie_result['extractor_key'],
1557                 })
1558                 return r
1559             ie_result['entries'] = [
1560                 self.process_ie_result(_fixup(r), download, extra_info)
1561                 for r in ie_result['entries']
1562             ]
1563             return ie_result
1564         else:
1565             raise Exception('Invalid result type: %s' % result_type)
1566
1567     def _ensure_dir_exists(self, path):
1568         return make_dir(path, self.report_error)
1569
1570     def __process_playlist(self, ie_result, download):
1571         # We process each entry in the playlist
1572         playlist = ie_result.get('title') or ie_result.get('id')
1573         self.to_screen('[download] Downloading playlist: %s' % playlist)
1574
1575         if 'entries' not in ie_result:
1576             raise EntryNotInPlaylist('There are no entries')
1577
1578         MissingEntry = object()
1579         incomplete_entries = bool(ie_result.get('requested_entries'))
1580         if incomplete_entries:
1581             def fill_missing_entries(entries, indices):
1582                 ret = [MissingEntry] * max(indices)
1583                 for i, entry in zip(indices, entries):
1584                     ret[i - 1] = entry
1585                 return ret
1586             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1587
1588         playlist_results = []
1589
1590         playliststart = self.params.get('playliststart', 1)
1591         playlistend = self.params.get('playlistend')
1592         # For backwards compatibility, interpret -1 as whole list
1593         if playlistend == -1:
1594             playlistend = None
1595
1596         playlistitems_str = self.params.get('playlist_items')
1597         playlistitems = None
1598         if playlistitems_str is not None:
1599             def iter_playlistitems(format):
1600                 for string_segment in format.split(','):
1601                     if '-' in string_segment:
1602                         start, end = string_segment.split('-')
1603                         for item in range(int(start), int(end) + 1):
1604                             yield int(item)
1605                     else:
1606                         yield int(string_segment)
1607             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1608
1609         ie_entries = ie_result['entries']
1610         msg = (
1611             'Downloading %d videos' if not isinstance(ie_entries, list)
1612             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1613
1614         if isinstance(ie_entries, list):
1615             def get_entry(i):
1616                 return ie_entries[i - 1]
1617         else:
1618             if not isinstance(ie_entries, (PagedList, LazyList)):
1619                 ie_entries = LazyList(ie_entries)
1620
1621             def get_entry(i):
1622                 return YoutubeDL.__handle_extraction_exceptions(
1623                     lambda self, i: ie_entries[i - 1]
1624                 )(self, i)
1625
1626         entries = []
1627         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1628         for i in items:
1629             if i == 0:
1630                 continue
1631             if playlistitems is None and playlistend is not None and playlistend < i:
1632                 break
1633             entry = None
1634             try:
1635                 entry = get_entry(i)
1636                 if entry is MissingEntry:
1637                     raise EntryNotInPlaylist()
1638             except (IndexError, EntryNotInPlaylist):
1639                 if incomplete_entries:
1640                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1641                 elif not playlistitems:
1642                     break
1643             entries.append(entry)
1644             try:
1645                 if entry is not None:
1646                     self._match_entry(entry, incomplete=True, silent=True)
1647             except (ExistingVideoReached, RejectedVideoReached):
1648                 break
1649         ie_result['entries'] = entries
1650
1651         # Save playlist_index before re-ordering
1652         entries = [
1653             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1654             for i, entry in enumerate(entries, 1)
1655             if entry is not None]
1656         n_entries = len(entries)
1657
1658         if not playlistitems and (playliststart != 1 or playlistend):
1659             playlistitems = list(range(playliststart, playliststart + n_entries))
1660         ie_result['requested_entries'] = playlistitems
1661
1662         _infojson_written = False
1663         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1664             ie_copy = {
1665                 'playlist': playlist,
1666                 'playlist_id': ie_result.get('id'),
1667                 'playlist_title': ie_result.get('title'),
1668                 'playlist_uploader': ie_result.get('uploader'),
1669                 'playlist_uploader_id': ie_result.get('uploader_id'),
1670                 'playlist_index': 0,
1671                 'n_entries': n_entries,
1672             }
1673             ie_copy.update(dict(ie_result))
1674
1675             _infojson_written = self._write_info_json(
1676                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1677             if _infojson_written is None:
1678                 return
1679             if self._write_description('playlist', ie_result,
1680                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1681                 return
1682             # TODO: This should be passed to ThumbnailsConvertor if necessary
1683             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1684
1685         if self.params.get('playlistreverse', False):
1686             entries = entries[::-1]
1687         if self.params.get('playlistrandom', False):
1688             random.shuffle(entries)
1689
1690         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1691
1692         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1693         failures = 0
1694         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1695         for i, entry_tuple in enumerate(entries, 1):
1696             playlist_index, entry = entry_tuple
1697             if 'playlist-index' in self.params.get('compat_opts', []):
1698                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1699             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1700             # This __x_forwarded_for_ip thing is a bit ugly but requires
1701             # minimal changes
1702             if x_forwarded_for:
1703                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1704             extra = {
1705                 'n_entries': n_entries,
1706                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1707                 'playlist_index': playlist_index,
1708                 'playlist_autonumber': i,
1709                 'playlist': playlist,
1710                 'playlist_id': ie_result.get('id'),
1711                 'playlist_title': ie_result.get('title'),
1712                 'playlist_uploader': ie_result.get('uploader'),
1713                 'playlist_uploader_id': ie_result.get('uploader_id'),
1714                 'extractor': ie_result['extractor'],
1715                 'webpage_url': ie_result['webpage_url'],
1716                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1717                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1718                 'extractor_key': ie_result['extractor_key'],
1719             }
1720
1721             if self._match_entry(entry, incomplete=True) is not None:
1722                 continue
1723
1724             entry_result = self.__process_iterable_entry(entry, download, extra)
1725             if not entry_result:
1726                 failures += 1
1727             if failures >= max_failures:
1728                 self.report_error(
1729                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1730                 break
1731             playlist_results.append(entry_result)
1732         ie_result['entries'] = playlist_results
1733
1734         # Write the updated info to json
1735         if _infojson_written and self._write_info_json(
1736                 'updated playlist', ie_result,
1737                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1738             return
1739         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1740         return ie_result
1741
1742     @__handle_extraction_exceptions
1743     def __process_iterable_entry(self, entry, download, extra_info):
1744         return self.process_ie_result(
1745             entry, download=download, extra_info=extra_info)
1746
1747     def _build_format_filter(self, filter_spec):
1748         " Returns a function to filter the formats according to the filter_spec "
1749
1750         OPERATORS = {
1751             '<': operator.lt,
1752             '<=': operator.le,
1753             '>': operator.gt,
1754             '>=': operator.ge,
1755             '=': operator.eq,
1756             '!=': operator.ne,
1757         }
1758         operator_rex = re.compile(r'''(?x)\s*
1759             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1760             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1761             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1762             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1763         m = operator_rex.fullmatch(filter_spec)
1764         if m:
1765             try:
1766                 comparison_value = int(m.group('value'))
1767             except ValueError:
1768                 comparison_value = parse_filesize(m.group('value'))
1769                 if comparison_value is None:
1770                     comparison_value = parse_filesize(m.group('value') + 'B')
1771                 if comparison_value is None:
1772                     raise ValueError(
1773                         'Invalid value %r in format specification %r' % (
1774                             m.group('value'), filter_spec))
1775             op = OPERATORS[m.group('op')]
1776
1777         if not m:
1778             STR_OPERATORS = {
1779                 '=': operator.eq,
1780                 '^=': lambda attr, value: attr.startswith(value),
1781                 '$=': lambda attr, value: attr.endswith(value),
1782                 '*=': lambda attr, value: value in attr,
1783             }
1784             str_operator_rex = re.compile(r'''(?x)\s*
1785                 (?P<key>[a-zA-Z0-9._-]+)\s*
1786                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1787                 (?P<value>[a-zA-Z0-9._-]+)\s*
1788                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1789             m = str_operator_rex.fullmatch(filter_spec)
1790             if m:
1791                 comparison_value = m.group('value')
1792                 str_op = STR_OPERATORS[m.group('op')]
1793                 if m.group('negation'):
1794                     op = lambda attr, value: not str_op(attr, value)
1795                 else:
1796                     op = str_op
1797
1798         if not m:
1799             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1800
1801         def _filter(f):
1802             actual_value = f.get(m.group('key'))
1803             if actual_value is None:
1804                 return m.group('none_inclusive')
1805             return op(actual_value, comparison_value)
1806         return _filter
1807
1808     def _check_formats(self, formats):
1809         for f in formats:
1810             self.to_screen('[info] Testing format %s' % f['format_id'])
1811             path = self.get_output_path('temp')
1812             if not self._ensure_dir_exists(f'{path}/'):
1813                 continue
1814             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1815             temp_file.close()
1816             try:
1817                 success, _ = self.dl(temp_file.name, f, test=True)
1818             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1819                 success = False
1820             finally:
1821                 if os.path.exists(temp_file.name):
1822                     try:
1823                         os.remove(temp_file.name)
1824                     except OSError:
1825                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1826             if success:
1827                 yield f
1828             else:
1829                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1830
1831     def _default_format_spec(self, info_dict, download=True):
1832
1833         def can_merge():
1834             merger = FFmpegMergerPP(self)
1835             return merger.available and merger.can_merge()
1836
1837         prefer_best = (
1838             not self.params.get('simulate')
1839             and download
1840             and (
1841                 not can_merge()
1842                 or info_dict.get('is_live', False)
1843                 or self.outtmpl_dict['default'] == '-'))
1844         compat = (
1845             prefer_best
1846             or self.params.get('allow_multiple_audio_streams', False)
1847             or 'format-spec' in self.params.get('compat_opts', []))
1848
1849         return (
1850             'best/bestvideo+bestaudio' if prefer_best
1851             else 'bestvideo*+bestaudio/best' if not compat
1852             else 'bestvideo+bestaudio/best')
1853
1854     def build_format_selector(self, format_spec):
1855         def syntax_error(note, start):
1856             message = (
1857                 'Invalid format specification: '
1858                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1859             return SyntaxError(message)
1860
1861         PICKFIRST = 'PICKFIRST'
1862         MERGE = 'MERGE'
1863         SINGLE = 'SINGLE'
1864         GROUP = 'GROUP'
1865         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1866
1867         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1868                                   'video': self.params.get('allow_multiple_video_streams', False)}
1869
1870         check_formats = self.params.get('check_formats') == 'selected'
1871
1872         def _parse_filter(tokens):
1873             filter_parts = []
1874             for type, string, start, _, _ in tokens:
1875                 if type == tokenize.OP and string == ']':
1876                     return ''.join(filter_parts)
1877                 else:
1878                     filter_parts.append(string)
1879
1880         def _remove_unused_ops(tokens):
1881             # Remove operators that we don't use and join them with the surrounding strings
1882             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1883             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1884             last_string, last_start, last_end, last_line = None, None, None, None
1885             for type, string, start, end, line in tokens:
1886                 if type == tokenize.OP and string == '[':
1887                     if last_string:
1888                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1889                         last_string = None
1890                     yield type, string, start, end, line
1891                     # everything inside brackets will be handled by _parse_filter
1892                     for type, string, start, end, line in tokens:
1893                         yield type, string, start, end, line
1894                         if type == tokenize.OP and string == ']':
1895                             break
1896                 elif type == tokenize.OP and string in ALLOWED_OPS:
1897                     if last_string:
1898                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1899                         last_string = None
1900                     yield type, string, start, end, line
1901                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1902                     if not last_string:
1903                         last_string = string
1904                         last_start = start
1905                         last_end = end
1906                     else:
1907                         last_string += string
1908             if last_string:
1909                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1910
1911         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1912             selectors = []
1913             current_selector = None
1914             for type, string, start, _, _ in tokens:
1915                 # ENCODING is only defined in python 3.x
1916                 if type == getattr(tokenize, 'ENCODING', None):
1917                     continue
1918                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1919                     current_selector = FormatSelector(SINGLE, string, [])
1920                 elif type == tokenize.OP:
1921                     if string == ')':
1922                         if not inside_group:
1923                             # ')' will be handled by the parentheses group
1924                             tokens.restore_last_token()
1925                         break
1926                     elif inside_merge and string in ['/', ',']:
1927                         tokens.restore_last_token()
1928                         break
1929                     elif inside_choice and string == ',':
1930                         tokens.restore_last_token()
1931                         break
1932                     elif string == ',':
1933                         if not current_selector:
1934                             raise syntax_error('"," must follow a format selector', start)
1935                         selectors.append(current_selector)
1936                         current_selector = None
1937                     elif string == '/':
1938                         if not current_selector:
1939                             raise syntax_error('"/" must follow a format selector', start)
1940                         first_choice = current_selector
1941                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1942                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1943                     elif string == '[':
1944                         if not current_selector:
1945                             current_selector = FormatSelector(SINGLE, 'best', [])
1946                         format_filter = _parse_filter(tokens)
1947                         current_selector.filters.append(format_filter)
1948                     elif string == '(':
1949                         if current_selector:
1950                             raise syntax_error('Unexpected "("', start)
1951                         group = _parse_format_selection(tokens, inside_group=True)
1952                         current_selector = FormatSelector(GROUP, group, [])
1953                     elif string == '+':
1954                         if not current_selector:
1955                             raise syntax_error('Unexpected "+"', start)
1956                         selector_1 = current_selector
1957                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1958                         if not selector_2:
1959                             raise syntax_error('Expected a selector', start)
1960                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1961                     else:
1962                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1963                 elif type == tokenize.ENDMARKER:
1964                     break
1965             if current_selector:
1966                 selectors.append(current_selector)
1967             return selectors
1968
1969         def _merge(formats_pair):
1970             format_1, format_2 = formats_pair
1971
1972             formats_info = []
1973             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1974             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1975
1976             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1977                 get_no_more = {'video': False, 'audio': False}
1978                 for (i, fmt_info) in enumerate(formats_info):
1979                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1980                         formats_info.pop(i)
1981                         continue
1982                     for aud_vid in ['audio', 'video']:
1983                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1984                             if get_no_more[aud_vid]:
1985                                 formats_info.pop(i)
1986                                 break
1987                             get_no_more[aud_vid] = True
1988
1989             if len(formats_info) == 1:
1990                 return formats_info[0]
1991
1992             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1993             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1994
1995             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1996             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1997
1998             output_ext = self.params.get('merge_output_format')
1999             if not output_ext:
2000                 if the_only_video:
2001                     output_ext = the_only_video['ext']
2002                 elif the_only_audio and not video_fmts:
2003                     output_ext = the_only_audio['ext']
2004                 else:
2005                     output_ext = 'mkv'
2006
2007             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2008
2009             new_dict = {
2010                 'requested_formats': formats_info,
2011                 'format': '+'.join(filtered('format')),
2012                 'format_id': '+'.join(filtered('format_id')),
2013                 'ext': output_ext,
2014                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2015                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2016                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2017                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2018                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2019             }
2020
2021             if the_only_video:
2022                 new_dict.update({
2023                     'width': the_only_video.get('width'),
2024                     'height': the_only_video.get('height'),
2025                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2026                     'fps': the_only_video.get('fps'),
2027                     'dynamic_range': the_only_video.get('dynamic_range'),
2028                     'vcodec': the_only_video.get('vcodec'),
2029                     'vbr': the_only_video.get('vbr'),
2030                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2031                 })
2032
2033             if the_only_audio:
2034                 new_dict.update({
2035                     'acodec': the_only_audio.get('acodec'),
2036                     'abr': the_only_audio.get('abr'),
2037                     'asr': the_only_audio.get('asr'),
2038                 })
2039
2040             return new_dict
2041
2042         def _check_formats(formats):
2043             if not check_formats:
2044                 yield from formats
2045                 return
2046             yield from self._check_formats(formats)
2047
2048         def _build_selector_function(selector):
2049             if isinstance(selector, list):  # ,
2050                 fs = [_build_selector_function(s) for s in selector]
2051
2052                 def selector_function(ctx):
2053                     for f in fs:
2054                         yield from f(ctx)
2055                 return selector_function
2056
2057             elif selector.type == GROUP:  # ()
2058                 selector_function = _build_selector_function(selector.selector)
2059
2060             elif selector.type == PICKFIRST:  # /
2061                 fs = [_build_selector_function(s) for s in selector.selector]
2062
2063                 def selector_function(ctx):
2064                     for f in fs:
2065                         picked_formats = list(f(ctx))
2066                         if picked_formats:
2067                             return picked_formats
2068                     return []
2069
2070             elif selector.type == MERGE:  # +
2071                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2072
2073                 def selector_function(ctx):
2074                     for pair in itertools.product(
2075                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
2076                         yield _merge(pair)
2077
2078             elif selector.type == SINGLE:  # atom
2079                 format_spec = selector.selector or 'best'
2080
2081                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2082                 if format_spec == 'all':
2083                     def selector_function(ctx):
2084                         yield from _check_formats(ctx['formats'][::-1])
2085                 elif format_spec == 'mergeall':
2086                     def selector_function(ctx):
2087                         formats = list(_check_formats(ctx['formats']))
2088                         if not formats:
2089                             return
2090                         merged_format = formats[-1]
2091                         for f in formats[-2::-1]:
2092                             merged_format = _merge((merged_format, f))
2093                         yield merged_format
2094
2095                 else:
2096                     format_fallback, format_reverse, format_idx = False, True, 1
2097                     mobj = re.match(
2098                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2099                         format_spec)
2100                     if mobj is not None:
2101                         format_idx = int_or_none(mobj.group('n'), default=1)
2102                         format_reverse = mobj.group('bw')[0] == 'b'
2103                         format_type = (mobj.group('type') or [None])[0]
2104                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2105                         format_modified = mobj.group('mod') is not None
2106
2107                         format_fallback = not format_type and not format_modified  # for b, w
2108                         _filter_f = (
2109                             (lambda f: f.get('%scodec' % format_type) != 'none')
2110                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2111                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2112                             if format_type  # bv, ba, wv, wa
2113                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2114                             if not format_modified  # b, w
2115                             else lambda f: True)  # b*, w*
2116                         filter_f = lambda f: _filter_f(f) and (
2117                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2118                     else:
2119                         if format_spec in self._format_selection_exts['audio']:
2120                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2121                         elif format_spec in self._format_selection_exts['video']:
2122                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2123                         elif format_spec in self._format_selection_exts['storyboards']:
2124                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2125                         else:
2126                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2127
2128                     def selector_function(ctx):
2129                         formats = list(ctx['formats'])
2130                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2131                         if format_fallback and ctx['incomplete_formats'] and not matches:
2132                             # for extractors with incomplete formats (audio only (soundcloud)
2133                             # or video only (imgur)) best/worst will fallback to
2134                             # best/worst {video,audio}-only format
2135                             matches = formats
2136                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2137                         try:
2138                             yield matches[format_idx - 1]
2139                         except IndexError:
2140                             return
2141
2142             filters = [self._build_format_filter(f) for f in selector.filters]
2143
2144             def final_selector(ctx):
2145                 ctx_copy = copy.deepcopy(ctx)
2146                 for _filter in filters:
2147                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2148                 return selector_function(ctx_copy)
2149             return final_selector
2150
2151         stream = io.BytesIO(format_spec.encode('utf-8'))
2152         try:
2153             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2154         except tokenize.TokenError:
2155             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2156
2157         class TokenIterator(object):
2158             def __init__(self, tokens):
2159                 self.tokens = tokens
2160                 self.counter = 0
2161
2162             def __iter__(self):
2163                 return self
2164
2165             def __next__(self):
2166                 if self.counter >= len(self.tokens):
2167                     raise StopIteration()
2168                 value = self.tokens[self.counter]
2169                 self.counter += 1
2170                 return value
2171
2172             next = __next__
2173
2174             def restore_last_token(self):
2175                 self.counter -= 1
2176
2177         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2178         return _build_selector_function(parsed_selector)
2179
2180     def _calc_headers(self, info_dict):
2181         res = std_headers.copy()
2182
2183         add_headers = info_dict.get('http_headers')
2184         if add_headers:
2185             res.update(add_headers)
2186
2187         cookies = self._calc_cookies(info_dict)
2188         if cookies:
2189             res['Cookie'] = cookies
2190
2191         if 'X-Forwarded-For' not in res:
2192             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2193             if x_forwarded_for_ip:
2194                 res['X-Forwarded-For'] = x_forwarded_for_ip
2195
2196         return res
2197
2198     def _calc_cookies(self, info_dict):
2199         pr = sanitized_Request(info_dict['url'])
2200         self.cookiejar.add_cookie_header(pr)
2201         return pr.get_header('Cookie')
2202
2203     def _sort_thumbnails(self, thumbnails):
2204         thumbnails.sort(key=lambda t: (
2205             t.get('preference') if t.get('preference') is not None else -1,
2206             t.get('width') if t.get('width') is not None else -1,
2207             t.get('height') if t.get('height') is not None else -1,
2208             t.get('id') if t.get('id') is not None else '',
2209             t.get('url')))
2210
2211     def _sanitize_thumbnails(self, info_dict):
2212         thumbnails = info_dict.get('thumbnails')
2213         if thumbnails is None:
2214             thumbnail = info_dict.get('thumbnail')
2215             if thumbnail:
2216                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2217         if not thumbnails:
2218             return
2219
2220         def check_thumbnails(thumbnails):
2221             for t in thumbnails:
2222                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2223                 try:
2224                     self.urlopen(HEADRequest(t['url']))
2225                 except network_exceptions as err:
2226                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2227                     continue
2228                 yield t
2229
2230         self._sort_thumbnails(thumbnails)
2231         for i, t in enumerate(thumbnails):
2232             if t.get('id') is None:
2233                 t['id'] = '%d' % i
2234             if t.get('width') and t.get('height'):
2235                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2236             t['url'] = sanitize_url(t['url'])
2237
2238         if self.params.get('check_formats') is True:
2239             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2240         else:
2241             info_dict['thumbnails'] = thumbnails
2242
2243     def process_video_result(self, info_dict, download=True):
2244         assert info_dict.get('_type', 'video') == 'video'
2245
2246         if 'id' not in info_dict:
2247             raise ExtractorError('Missing "id" field in extractor result')
2248         if 'title' not in info_dict:
2249             raise ExtractorError('Missing "title" field in extractor result',
2250                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2251
2252         def report_force_conversion(field, field_not, conversion):
2253             self.report_warning(
2254                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2255                 % (field, field_not, conversion))
2256
2257         def sanitize_string_field(info, string_field):
2258             field = info.get(string_field)
2259             if field is None or isinstance(field, compat_str):
2260                 return
2261             report_force_conversion(string_field, 'a string', 'string')
2262             info[string_field] = compat_str(field)
2263
2264         def sanitize_numeric_fields(info):
2265             for numeric_field in self._NUMERIC_FIELDS:
2266                 field = info.get(numeric_field)
2267                 if field is None or isinstance(field, compat_numeric_types):
2268                     continue
2269                 report_force_conversion(numeric_field, 'numeric', 'int')
2270                 info[numeric_field] = int_or_none(field)
2271
2272         sanitize_string_field(info_dict, 'id')
2273         sanitize_numeric_fields(info_dict)
2274
2275         if 'playlist' not in info_dict:
2276             # It isn't part of a playlist
2277             info_dict['playlist'] = None
2278             info_dict['playlist_index'] = None
2279
2280         self._sanitize_thumbnails(info_dict)
2281
2282         thumbnail = info_dict.get('thumbnail')
2283         thumbnails = info_dict.get('thumbnails')
2284         if thumbnail:
2285             info_dict['thumbnail'] = sanitize_url(thumbnail)
2286         elif thumbnails:
2287             info_dict['thumbnail'] = thumbnails[-1]['url']
2288
2289         if info_dict.get('display_id') is None and 'id' in info_dict:
2290             info_dict['display_id'] = info_dict['id']
2291
2292         if info_dict.get('duration') is not None:
2293             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2294
2295         for ts_key, date_key in (
2296                 ('timestamp', 'upload_date'),
2297                 ('release_timestamp', 'release_date'),
2298         ):
2299             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2300                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2301                 # see http://bugs.python.org/issue1646728)
2302                 try:
2303                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2304                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2305                 except (ValueError, OverflowError, OSError):
2306                     pass
2307
2308         live_keys = ('is_live', 'was_live')
2309         live_status = info_dict.get('live_status')
2310         if live_status is None:
2311             for key in live_keys:
2312                 if info_dict.get(key) is False:
2313                     continue
2314                 if info_dict.get(key):
2315                     live_status = key
2316                 break
2317             if all(info_dict.get(key) is False for key in live_keys):
2318                 live_status = 'not_live'
2319         if live_status:
2320             info_dict['live_status'] = live_status
2321             for key in live_keys:
2322                 if info_dict.get(key) is None:
2323                     info_dict[key] = (live_status == key)
2324
2325         # Auto generate title fields corresponding to the *_number fields when missing
2326         # in order to always have clean titles. This is very common for TV series.
2327         for field in ('chapter', 'season', 'episode'):
2328             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2329                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2330
2331         for cc_kind in ('subtitles', 'automatic_captions'):
2332             cc = info_dict.get(cc_kind)
2333             if cc:
2334                 for _, subtitle in cc.items():
2335                     for subtitle_format in subtitle:
2336                         if subtitle_format.get('url'):
2337                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2338                         if subtitle_format.get('ext') is None:
2339                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2340
2341         automatic_captions = info_dict.get('automatic_captions')
2342         subtitles = info_dict.get('subtitles')
2343
2344         info_dict['requested_subtitles'] = self.process_subtitles(
2345             info_dict['id'], subtitles, automatic_captions)
2346
2347         if info_dict.get('formats') is None:
2348             # There's only one format available
2349             formats = [info_dict]
2350         else:
2351             formats = info_dict['formats']
2352
2353         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2354         if not self.params.get('allow_unplayable_formats'):
2355             formats = [f for f in formats if not f.get('has_drm')]
2356
2357         if not formats:
2358             self.raise_no_formats(info_dict)
2359
2360         def is_wellformed(f):
2361             url = f.get('url')
2362             if not url:
2363                 self.report_warning(
2364                     '"url" field is missing or empty - skipping format, '
2365                     'there is an error in extractor')
2366                 return False
2367             if isinstance(url, bytes):
2368                 sanitize_string_field(f, 'url')
2369             return True
2370
2371         # Filter out malformed formats for better extraction robustness
2372         formats = list(filter(is_wellformed, formats))
2373
2374         formats_dict = {}
2375
2376         # We check that all the formats have the format and format_id fields
2377         for i, format in enumerate(formats):
2378             sanitize_string_field(format, 'format_id')
2379             sanitize_numeric_fields(format)
2380             format['url'] = sanitize_url(format['url'])
2381             if not format.get('format_id'):
2382                 format['format_id'] = compat_str(i)
2383             else:
2384                 # Sanitize format_id from characters used in format selector expression
2385                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2386             format_id = format['format_id']
2387             if format_id not in formats_dict:
2388                 formats_dict[format_id] = []
2389             formats_dict[format_id].append(format)
2390
2391         # Make sure all formats have unique format_id
2392         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2393         for format_id, ambiguous_formats in formats_dict.items():
2394             ambigious_id = len(ambiguous_formats) > 1
2395             for i, format in enumerate(ambiguous_formats):
2396                 if ambigious_id:
2397                     format['format_id'] = '%s-%d' % (format_id, i)
2398                 if format.get('ext') is None:
2399                     format['ext'] = determine_ext(format['url']).lower()
2400                 # Ensure there is no conflict between id and ext in format selection
2401                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2402                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2403                     format['format_id'] = 'f%s' % format['format_id']
2404
2405         for i, format in enumerate(formats):
2406             if format.get('format') is None:
2407                 format['format'] = '{id} - {res}{note}'.format(
2408                     id=format['format_id'],
2409                     res=self.format_resolution(format),
2410                     note=format_field(format, 'format_note', ' (%s)'),
2411                 )
2412             if format.get('protocol') is None:
2413                 format['protocol'] = determine_protocol(format)
2414             if format.get('resolution') is None:
2415                 format['resolution'] = self.format_resolution(format, default=None)
2416             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2417                 format['dynamic_range'] = 'SDR'
2418             if (info_dict.get('duration') and format.get('tbr')
2419                     and not format.get('filesize') and not format.get('filesize_approx')):
2420                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2421
2422             # Add HTTP headers, so that external programs can use them from the
2423             # json output
2424             full_format_info = info_dict.copy()
2425             full_format_info.update(format)
2426             format['http_headers'] = self._calc_headers(full_format_info)
2427         # Remove private housekeeping stuff
2428         if '__x_forwarded_for_ip' in info_dict:
2429             del info_dict['__x_forwarded_for_ip']
2430
2431         # TODO Central sorting goes here
2432
2433         if self.params.get('check_formats') is True:
2434             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2435
2436         if not formats or formats[0] is not info_dict:
2437             # only set the 'formats' fields if the original info_dict list them
2438             # otherwise we end up with a circular reference, the first (and unique)
2439             # element in the 'formats' field in info_dict is info_dict itself,
2440             # which can't be exported to json
2441             info_dict['formats'] = formats
2442
2443         info_dict, _ = self.pre_process(info_dict)
2444
2445         # The pre-processors may have modified the formats
2446         formats = info_dict.get('formats', [info_dict])
2447
2448         if self.params.get('list_thumbnails'):
2449             self.list_thumbnails(info_dict)
2450         if self.params.get('listformats'):
2451             if not info_dict.get('formats') and not info_dict.get('url'):
2452                 self.to_screen('%s has no formats' % info_dict['id'])
2453             else:
2454                 self.list_formats(info_dict)
2455         if self.params.get('listsubtitles'):
2456             if 'automatic_captions' in info_dict:
2457                 self.list_subtitles(
2458                     info_dict['id'], automatic_captions, 'automatic captions')
2459             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2460         list_only = self.params.get('simulate') is None and (
2461             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2462         if list_only:
2463             # Without this printing, -F --print-json will not work
2464             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2465             return
2466
2467         format_selector = self.format_selector
2468         if format_selector is None:
2469             req_format = self._default_format_spec(info_dict, download=download)
2470             self.write_debug('Default format spec: %s' % req_format)
2471             format_selector = self.build_format_selector(req_format)
2472
2473         # While in format selection we may need to have an access to the original
2474         # format set in order to calculate some metrics or do some processing.
2475         # For now we need to be able to guess whether original formats provided
2476         # by extractor are incomplete or not (i.e. whether extractor provides only
2477         # video-only or audio-only formats) for proper formats selection for
2478         # extractors with such incomplete formats (see
2479         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2480         # Since formats may be filtered during format selection and may not match
2481         # the original formats the results may be incorrect. Thus original formats
2482         # or pre-calculated metrics should be passed to format selection routines
2483         # as well.
2484         # We will pass a context object containing all necessary additional data
2485         # instead of just formats.
2486         # This fixes incorrect format selection issue (see
2487         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2488         incomplete_formats = (
2489             # All formats are video-only or
2490             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2491             # all formats are audio-only
2492             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2493
2494         ctx = {
2495             'formats': formats,
2496             'incomplete_formats': incomplete_formats,
2497         }
2498
2499         formats_to_download = list(format_selector(ctx))
2500         if not formats_to_download:
2501             if not self.params.get('ignore_no_formats_error'):
2502                 raise ExtractorError('Requested format is not available', expected=True,
2503                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2504             else:
2505                 self.report_warning('Requested format is not available')
2506                 # Process what we can, even without any available formats.
2507                 self.process_info(dict(info_dict))
2508         elif download:
2509             self.to_screen(
2510                 '[info] %s: Downloading %d format(s): %s' % (
2511                     info_dict['id'], len(formats_to_download),
2512                     ", ".join([f['format_id'] for f in formats_to_download])))
2513             for fmt in formats_to_download:
2514                 new_info = dict(info_dict)
2515                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2516                 new_info['__original_infodict'] = info_dict
2517                 new_info.update(fmt)
2518                 self.process_info(new_info)
2519         # We update the info dict with the selected best quality format (backwards compatibility)
2520         if formats_to_download:
2521             info_dict.update(formats_to_download[-1])
2522         return info_dict
2523
2524     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2525         """Select the requested subtitles and their format"""
2526         available_subs = {}
2527         if normal_subtitles and self.params.get('writesubtitles'):
2528             available_subs.update(normal_subtitles)
2529         if automatic_captions and self.params.get('writeautomaticsub'):
2530             for lang, cap_info in automatic_captions.items():
2531                 if lang not in available_subs:
2532                     available_subs[lang] = cap_info
2533
2534         if (not self.params.get('writesubtitles') and not
2535                 self.params.get('writeautomaticsub') or not
2536                 available_subs):
2537             return None
2538
2539         all_sub_langs = available_subs.keys()
2540         if self.params.get('allsubtitles', False):
2541             requested_langs = all_sub_langs
2542         elif self.params.get('subtitleslangs', False):
2543             # A list is used so that the order of languages will be the same as
2544             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2545             requested_langs = []
2546             for lang_re in self.params.get('subtitleslangs'):
2547                 if lang_re == 'all':
2548                     requested_langs.extend(all_sub_langs)
2549                     continue
2550                 discard = lang_re[0] == '-'
2551                 if discard:
2552                     lang_re = lang_re[1:]
2553                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2554                 if discard:
2555                     for lang in current_langs:
2556                         while lang in requested_langs:
2557                             requested_langs.remove(lang)
2558                 else:
2559                     requested_langs.extend(current_langs)
2560             requested_langs = orderedSet(requested_langs)
2561         elif 'en' in available_subs:
2562             requested_langs = ['en']
2563         else:
2564             requested_langs = [list(all_sub_langs)[0]]
2565         if requested_langs:
2566             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2567
2568         formats_query = self.params.get('subtitlesformat', 'best')
2569         formats_preference = formats_query.split('/') if formats_query else []
2570         subs = {}
2571         for lang in requested_langs:
2572             formats = available_subs.get(lang)
2573             if formats is None:
2574                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2575                 continue
2576             for ext in formats_preference:
2577                 if ext == 'best':
2578                     f = formats[-1]
2579                     break
2580                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2581                 if matches:
2582                     f = matches[-1]
2583                     break
2584             else:
2585                 f = formats[-1]
2586                 self.report_warning(
2587                     'No subtitle format found matching "%s" for language %s, '
2588                     'using %s' % (formats_query, lang, f['ext']))
2589             subs[lang] = f
2590         return subs
2591
2592     def __forced_printings(self, info_dict, filename, incomplete):
2593         def print_mandatory(field, actual_field=None):
2594             if actual_field is None:
2595                 actual_field = field
2596             if (self.params.get('force%s' % field, False)
2597                     and (not incomplete or info_dict.get(actual_field) is not None)):
2598                 self.to_stdout(info_dict[actual_field])
2599
2600         def print_optional(field):
2601             if (self.params.get('force%s' % field, False)
2602                     and info_dict.get(field) is not None):
2603                 self.to_stdout(info_dict[field])
2604
2605         info_dict = info_dict.copy()
2606         if filename is not None:
2607             info_dict['filename'] = filename
2608         if info_dict.get('requested_formats') is not None:
2609             # For RTMP URLs, also include the playpath
2610             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2611         elif 'url' in info_dict:
2612             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2613
2614         if self.params.get('forceprint') or self.params.get('forcejson'):
2615             self.post_extract(info_dict)
2616         for tmpl in self.params.get('forceprint', []):
2617             mobj = re.match(r'\w+(=?)$', tmpl)
2618             if mobj and mobj.group(1):
2619                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2620             elif mobj:
2621                 tmpl = '%({})s'.format(tmpl)
2622             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2623
2624         print_mandatory('title')
2625         print_mandatory('id')
2626         print_mandatory('url', 'urls')
2627         print_optional('thumbnail')
2628         print_optional('description')
2629         print_optional('filename')
2630         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2631             self.to_stdout(formatSeconds(info_dict['duration']))
2632         print_mandatory('format')
2633
2634         if self.params.get('forcejson'):
2635             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2636
2637     def dl(self, name, info, subtitle=False, test=False):
2638         if not info.get('url'):
2639             self.raise_no_formats(info, True)
2640
2641         if test:
2642             verbose = self.params.get('verbose')
2643             params = {
2644                 'test': True,
2645                 'quiet': self.params.get('quiet') or not verbose,
2646                 'verbose': verbose,
2647                 'noprogress': not verbose,
2648                 'nopart': True,
2649                 'skip_unavailable_fragments': False,
2650                 'keep_fragments': False,
2651                 'overwrites': True,
2652                 '_no_ytdl_file': True,
2653             }
2654         else:
2655             params = self.params
2656         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2657         if not test:
2658             for ph in self._progress_hooks:
2659                 fd.add_progress_hook(ph)
2660             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2661             self.write_debug('Invoking downloader on "%s"' % urls)
2662
2663         new_info = copy.deepcopy(self._copy_infodict(info))
2664         if new_info.get('http_headers') is None:
2665             new_info['http_headers'] = self._calc_headers(new_info)
2666         return fd.download(name, new_info, subtitle)
2667
2668     def process_info(self, info_dict):
2669         """Process a single resolved IE result."""
2670
2671         assert info_dict.get('_type', 'video') == 'video'
2672
2673         max_downloads = self.params.get('max_downloads')
2674         if max_downloads is not None:
2675             if self._num_downloads >= int(max_downloads):
2676                 raise MaxDownloadsReached()
2677
2678         if info_dict.get('is_live'):
2679             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2680
2681         # TODO: backward compatibility, to be removed
2682         info_dict['fulltitle'] = info_dict['title']
2683
2684         if 'format' not in info_dict and 'ext' in info_dict:
2685             info_dict['format'] = info_dict['ext']
2686
2687         if self._match_entry(info_dict) is not None:
2688             return
2689
2690         self.post_extract(info_dict)
2691         self._num_downloads += 1
2692
2693         # info_dict['_filename'] needs to be set for backward compatibility
2694         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2695         temp_filename = self.prepare_filename(info_dict, 'temp')
2696         files_to_move = {}
2697
2698         # Forced printings
2699         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2700
2701         if self.params.get('simulate'):
2702             if self.params.get('force_write_download_archive', False):
2703                 self.record_download_archive(info_dict)
2704             # Do nothing else if in simulate mode
2705             return
2706
2707         if full_filename is None:
2708             return
2709         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2710             return
2711         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2712             return
2713
2714         if self._write_description('video', info_dict,
2715                                    self.prepare_filename(info_dict, 'description')) is None:
2716             return
2717
2718         sub_files = self._write_subtitles(info_dict, temp_filename)
2719         if sub_files is None:
2720             return
2721         files_to_move.update(dict(sub_files))
2722
2723         thumb_files = self._write_thumbnails(
2724             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2725         if thumb_files is None:
2726             return
2727         files_to_move.update(dict(thumb_files))
2728
2729         infofn = self.prepare_filename(info_dict, 'infojson')
2730         _infojson_written = self._write_info_json('video', info_dict, infofn)
2731         if _infojson_written:
2732             info_dict['infojson_filename'] = infofn
2733             # For backward compatibility, even though it was a private field
2734             info_dict['__infojson_filename'] = infofn
2735         elif _infojson_written is None:
2736             return
2737
2738         # Note: Annotations are deprecated
2739         annofn = None
2740         if self.params.get('writeannotations', False):
2741             annofn = self.prepare_filename(info_dict, 'annotation')
2742         if annofn:
2743             if not self._ensure_dir_exists(encodeFilename(annofn)):
2744                 return
2745             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2746                 self.to_screen('[info] Video annotations are already present')
2747             elif not info_dict.get('annotations'):
2748                 self.report_warning('There are no annotations to write.')
2749             else:
2750                 try:
2751                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2752                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2753                         annofile.write(info_dict['annotations'])
2754                 except (KeyError, TypeError):
2755                     self.report_warning('There are no annotations to write.')
2756                 except (OSError, IOError):
2757                     self.report_error('Cannot write annotations file: ' + annofn)
2758                     return
2759
2760         # Write internet shortcut files
2761         def _write_link_file(link_type):
2762             if 'webpage_url' not in info_dict:
2763                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2764                 return False
2765             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2766             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2767                 return False
2768             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2769                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2770                 return True
2771             try:
2772                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2773                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2774                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2775                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2776                     if link_type == 'desktop':
2777                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2778                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2779             except (OSError, IOError):
2780                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2781                 return False
2782             return True
2783
2784         write_links = {
2785             'url': self.params.get('writeurllink'),
2786             'webloc': self.params.get('writewebloclink'),
2787             'desktop': self.params.get('writedesktoplink'),
2788         }
2789         if self.params.get('writelink'):
2790             link_type = ('webloc' if sys.platform == 'darwin'
2791                          else 'desktop' if sys.platform.startswith('linux')
2792                          else 'url')
2793             write_links[link_type] = True
2794
2795         if any(should_write and not _write_link_file(link_type)
2796                for link_type, should_write in write_links.items()):
2797             return
2798
2799         try:
2800             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2801         except PostProcessingError as err:
2802             self.report_error('Preprocessing: %s' % str(err))
2803             return
2804
2805         must_record_download_archive = False
2806         if self.params.get('skip_download', False):
2807             info_dict['filepath'] = temp_filename
2808             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2809             info_dict['__files_to_move'] = files_to_move
2810             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2811         else:
2812             # Download
2813             info_dict.setdefault('__postprocessors', [])
2814             try:
2815
2816                 def existing_file(*filepaths):
2817                     ext = info_dict.get('ext')
2818                     final_ext = self.params.get('final_ext', ext)
2819                     existing_files = []
2820                     for file in orderedSet(filepaths):
2821                         if final_ext != ext:
2822                             converted = replace_extension(file, final_ext, ext)
2823                             if os.path.exists(encodeFilename(converted)):
2824                                 existing_files.append(converted)
2825                         if os.path.exists(encodeFilename(file)):
2826                             existing_files.append(file)
2827
2828                     if not existing_files or self.params.get('overwrites', False):
2829                         for file in orderedSet(existing_files):
2830                             self.report_file_delete(file)
2831                             os.remove(encodeFilename(file))
2832                         return None
2833
2834                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2835                     return existing_files[0]
2836
2837                 success = True
2838                 if info_dict.get('requested_formats') is not None:
2839
2840                     def compatible_formats(formats):
2841                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2842                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2843                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2844                         if len(video_formats) > 2 or len(audio_formats) > 2:
2845                             return False
2846
2847                         # Check extension
2848                         exts = set(format.get('ext') for format in formats)
2849                         COMPATIBLE_EXTS = (
2850                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2851                             set(('webm',)),
2852                         )
2853                         for ext_sets in COMPATIBLE_EXTS:
2854                             if ext_sets.issuperset(exts):
2855                                 return True
2856                         # TODO: Check acodec/vcodec
2857                         return False
2858
2859                     requested_formats = info_dict['requested_formats']
2860                     old_ext = info_dict['ext']
2861                     if self.params.get('merge_output_format') is None:
2862                         if not compatible_formats(requested_formats):
2863                             info_dict['ext'] = 'mkv'
2864                             self.report_warning(
2865                                 'Requested formats are incompatible for merge and will be merged into mkv')
2866                         if (info_dict['ext'] == 'webm'
2867                                 and info_dict.get('thumbnails')
2868                                 # check with type instead of pp_key, __name__, or isinstance
2869                                 # since we dont want any custom PPs to trigger this
2870                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2871                             info_dict['ext'] = 'mkv'
2872                             self.report_warning(
2873                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2874                     new_ext = info_dict['ext']
2875
2876                     def correct_ext(filename, ext=new_ext):
2877                         if filename == '-':
2878                             return filename
2879                         filename_real_ext = os.path.splitext(filename)[1][1:]
2880                         filename_wo_ext = (
2881                             os.path.splitext(filename)[0]
2882                             if filename_real_ext in (old_ext, new_ext)
2883                             else filename)
2884                         return '%s.%s' % (filename_wo_ext, ext)
2885
2886                     # Ensure filename always has a correct extension for successful merge
2887                     full_filename = correct_ext(full_filename)
2888                     temp_filename = correct_ext(temp_filename)
2889                     dl_filename = existing_file(full_filename, temp_filename)
2890                     info_dict['__real_download'] = False
2891
2892                     if dl_filename is not None:
2893                         self.report_file_already_downloaded(dl_filename)
2894                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2895                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2896                         success, real_download = self.dl(temp_filename, info_dict)
2897                         info_dict['__real_download'] = real_download
2898                     else:
2899                         downloaded = []
2900                         merger = FFmpegMergerPP(self)
2901                         if self.params.get('allow_unplayable_formats'):
2902                             self.report_warning(
2903                                 'You have requested merging of multiple formats '
2904                                 'while also allowing unplayable formats to be downloaded. '
2905                                 'The formats won\'t be merged to prevent data corruption.')
2906                         elif not merger.available:
2907                             self.report_warning(
2908                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2909                                 'The formats won\'t be merged.')
2910
2911                         if temp_filename == '-':
2912                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2913                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2914                                       else 'but ffmpeg is not installed')
2915                             self.report_warning(
2916                                 f'You have requested downloading multiple formats to stdout {reason}. '
2917                                 'The formats will be streamed one after the other')
2918                             fname = temp_filename
2919                         for f in requested_formats:
2920                             new_info = dict(info_dict)
2921                             del new_info['requested_formats']
2922                             new_info.update(f)
2923                             if temp_filename != '-':
2924                                 fname = prepend_extension(
2925                                     correct_ext(temp_filename, new_info['ext']),
2926                                     'f%s' % f['format_id'], new_info['ext'])
2927                                 if not self._ensure_dir_exists(fname):
2928                                     return
2929                                 f['filepath'] = fname
2930                                 downloaded.append(fname)
2931                             partial_success, real_download = self.dl(fname, new_info)
2932                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2933                             success = success and partial_success
2934                         if merger.available and not self.params.get('allow_unplayable_formats'):
2935                             info_dict['__postprocessors'].append(merger)
2936                             info_dict['__files_to_merge'] = downloaded
2937                             # Even if there were no downloads, it is being merged only now
2938                             info_dict['__real_download'] = True
2939                         else:
2940                             for file in downloaded:
2941                                 files_to_move[file] = None
2942                 else:
2943                     # Just a single file
2944                     dl_filename = existing_file(full_filename, temp_filename)
2945                     if dl_filename is None or dl_filename == temp_filename:
2946                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2947                         # So we should try to resume the download
2948                         success, real_download = self.dl(temp_filename, info_dict)
2949                         info_dict['__real_download'] = real_download
2950                     else:
2951                         self.report_file_already_downloaded(dl_filename)
2952
2953                 dl_filename = dl_filename or temp_filename
2954                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2955
2956             except network_exceptions as err:
2957                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2958                 return
2959             except (OSError, IOError) as err:
2960                 raise UnavailableVideoError(err)
2961             except (ContentTooShortError, ) as err:
2962                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2963                 return
2964
2965             if success and full_filename != '-':
2966
2967                 def fixup():
2968                     do_fixup = True
2969                     fixup_policy = self.params.get('fixup')
2970                     vid = info_dict['id']
2971
2972                     if fixup_policy in ('ignore', 'never'):
2973                         return
2974                     elif fixup_policy == 'warn':
2975                         do_fixup = False
2976                     elif fixup_policy != 'force':
2977                         assert fixup_policy in ('detect_or_warn', None)
2978                         if not info_dict.get('__real_download'):
2979                             do_fixup = False
2980
2981                     def ffmpeg_fixup(cndn, msg, cls):
2982                         if not cndn:
2983                             return
2984                         if not do_fixup:
2985                             self.report_warning(f'{vid}: {msg}')
2986                             return
2987                         pp = cls(self)
2988                         if pp.available:
2989                             info_dict['__postprocessors'].append(pp)
2990                         else:
2991                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2992
2993                     stretched_ratio = info_dict.get('stretched_ratio')
2994                     ffmpeg_fixup(
2995                         stretched_ratio not in (1, None),
2996                         f'Non-uniform pixel ratio {stretched_ratio}',
2997                         FFmpegFixupStretchedPP)
2998
2999                     ffmpeg_fixup(
3000                         (info_dict.get('requested_formats') is None
3001                          and info_dict.get('container') == 'm4a_dash'
3002                          and info_dict.get('ext') == 'm4a'),
3003                         'writing DASH m4a. Only some players support this container',
3004                         FFmpegFixupM4aPP)
3005
3006                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3007                     downloader = downloader.__name__ if downloader else None
3008                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
3009                                  'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3010                                  FFmpegFixupM3u8PP)
3011                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3012                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3013
3014                 fixup()
3015                 try:
3016                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
3017                 except PostProcessingError as err:
3018                     self.report_error('Postprocessing: %s' % str(err))
3019                     return
3020                 try:
3021                     for ph in self._post_hooks:
3022                         ph(info_dict['filepath'])
3023                 except Exception as err:
3024                     self.report_error('post hooks: %s' % str(err))
3025                     return
3026                 must_record_download_archive = True
3027
3028         if must_record_download_archive or self.params.get('force_write_download_archive', False):
3029             self.record_download_archive(info_dict)
3030         max_downloads = self.params.get('max_downloads')
3031         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3032             raise MaxDownloadsReached()
3033
3034     def __download_wrapper(self, func):
3035         @functools.wraps(func)
3036         def wrapper(*args, **kwargs):
3037             try:
3038                 res = func(*args, **kwargs)
3039             except UnavailableVideoError as e:
3040                 self.report_error(e)
3041             except MaxDownloadsReached as e:
3042                 self.to_screen(f'[info] {e}')
3043                 raise
3044             except DownloadCancelled as e:
3045                 self.to_screen(f'[info] {e}')
3046                 if not self.params.get('break_per_url'):
3047                     raise
3048             else:
3049                 if self.params.get('dump_single_json', False):
3050                     self.post_extract(res)
3051                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3052         return wrapper
3053
3054     def download(self, url_list):
3055         """Download a given list of URLs."""
3056         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3057         outtmpl = self.outtmpl_dict['default']
3058         if (len(url_list) > 1
3059                 and outtmpl != '-'
3060                 and '%' not in outtmpl
3061                 and self.params.get('max_downloads') != 1):
3062             raise SameFileError(outtmpl)
3063
3064         for url in url_list:
3065             self.__download_wrapper(self.extract_info)(
3066                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3067
3068         return self._download_retcode
3069
3070     def download_with_info_file(self, info_filename):
3071         with contextlib.closing(fileinput.FileInput(
3072                 [info_filename], mode='r',
3073                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3074             # FileInput doesn't have a read method, we can't call json.load
3075             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3076         try:
3077             self.__download_wrapper(self.process_ie_result)(info, download=True)
3078         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3079             if not isinstance(e, EntryNotInPlaylist):
3080                 self.to_stderr('\r')
3081             webpage_url = info.get('webpage_url')
3082             if webpage_url is not None:
3083                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3084                 return self.download([webpage_url])
3085             else:
3086                 raise
3087         return self._download_retcode
3088
3089     @staticmethod
3090     def sanitize_info(info_dict, remove_private_keys=False):
3091         ''' Sanitize the infodict for converting to json '''
3092         if info_dict is None:
3093             return info_dict
3094         info_dict.setdefault('epoch', int(time.time()))
3095         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3096         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3097         if remove_private_keys:
3098             remove_keys |= {
3099                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3100                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3101             }
3102             empty_values = (None, {}, [], set(), tuple())
3103             reject = lambda k, v: k not in keep_keys and (
3104                 k.startswith('_') or k in remove_keys or v in empty_values)
3105         else:
3106             reject = lambda k, v: k in remove_keys
3107         filter_fn = lambda obj: (
3108             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3109             else obj if not isinstance(obj, dict)
3110             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3111         return filter_fn(info_dict)
3112
3113     @staticmethod
3114     def filter_requested_info(info_dict, actually_filter=True):
3115         ''' Alias of sanitize_info for backward compatibility '''
3116         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3117
3118     def run_pp(self, pp, infodict):
3119         files_to_delete = []
3120         if '__files_to_move' not in infodict:
3121             infodict['__files_to_move'] = {}
3122         try:
3123             files_to_delete, infodict = pp.run(infodict)
3124         except PostProcessingError as e:
3125             # Must be True and not 'only_download'
3126             if self.params.get('ignoreerrors') is True:
3127                 self.report_error(e)
3128                 return infodict
3129             raise
3130
3131         if not files_to_delete:
3132             return infodict
3133         if self.params.get('keepvideo', False):
3134             for f in files_to_delete:
3135                 infodict['__files_to_move'].setdefault(f, '')
3136         else:
3137             for old_filename in set(files_to_delete):
3138                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3139                 try:
3140                     os.remove(encodeFilename(old_filename))
3141                 except (IOError, OSError):
3142                     self.report_warning('Unable to remove downloaded original file')
3143                 if old_filename in infodict['__files_to_move']:
3144                     del infodict['__files_to_move'][old_filename]
3145         return infodict
3146
3147     @staticmethod
3148     def post_extract(info_dict):
3149         def actual_post_extract(info_dict):
3150             if info_dict.get('_type') in ('playlist', 'multi_video'):
3151                 for video_dict in info_dict.get('entries', {}):
3152                     actual_post_extract(video_dict or {})
3153                 return
3154
3155             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3156             extra = post_extractor().items()
3157             info_dict.update(extra)
3158             info_dict.pop('__post_extractor', None)
3159
3160             original_infodict = info_dict.get('__original_infodict') or {}
3161             original_infodict.update(extra)
3162             original_infodict.pop('__post_extractor', None)
3163
3164         actual_post_extract(info_dict or {})
3165
3166     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3167         info = dict(ie_info)
3168         info['__files_to_move'] = files_to_move or {}
3169         for pp in self._pps[key]:
3170             info = self.run_pp(pp, info)
3171         return info, info.pop('__files_to_move', None)
3172
3173     def post_process(self, filename, ie_info, files_to_move=None):
3174         """Run all the postprocessors on the given file."""
3175         info = dict(ie_info)
3176         info['filepath'] = filename
3177         info['__files_to_move'] = files_to_move or {}
3178
3179         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3180             info = self.run_pp(pp, info)
3181         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3182         del info['__files_to_move']
3183         for pp in self._pps['after_move']:
3184             info = self.run_pp(pp, info)
3185         return info
3186
3187     def _make_archive_id(self, info_dict):
3188         video_id = info_dict.get('id')
3189         if not video_id:
3190             return
3191         # Future-proof against any change in case
3192         # and backwards compatibility with prior versions
3193         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3194         if extractor is None:
3195             url = str_or_none(info_dict.get('url'))
3196             if not url:
3197                 return
3198             # Try to find matching extractor for the URL and take its ie_key
3199             for ie_key, ie in self._ies.items():
3200                 if ie.suitable(url):
3201                     extractor = ie_key
3202                     break
3203             else:
3204                 return
3205         return '%s %s' % (extractor.lower(), video_id)
3206
3207     def in_download_archive(self, info_dict):
3208         fn = self.params.get('download_archive')
3209         if fn is None:
3210             return False
3211
3212         vid_id = self._make_archive_id(info_dict)
3213         if not vid_id:
3214             return False  # Incomplete video information
3215
3216         return vid_id in self.archive
3217
3218     def record_download_archive(self, info_dict):
3219         fn = self.params.get('download_archive')
3220         if fn is None:
3221             return
3222         vid_id = self._make_archive_id(info_dict)
3223         assert vid_id
3224         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3225             archive_file.write(vid_id + '\n')
3226         self.archive.add(vid_id)
3227
3228     @staticmethod
3229     def format_resolution(format, default='unknown'):
3230         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3231             return 'audio only'
3232         if format.get('resolution') is not None:
3233             return format['resolution']
3234         if format.get('width') and format.get('height'):
3235             return '%dx%d' % (format['width'], format['height'])
3236         elif format.get('height'):
3237             return '%sp' % format['height']
3238         elif format.get('width'):
3239             return '%dx?' % format['width']
3240         return default
3241
3242     def _format_note(self, fdict):
3243         res = ''
3244         if fdict.get('ext') in ['f4f', 'f4m']:
3245             res += '(unsupported)'
3246         if fdict.get('language'):
3247             if res:
3248                 res += ' '
3249             res += '[%s]' % fdict['language']
3250         if fdict.get('format_note') is not None:
3251             if res:
3252                 res += ' '
3253             res += fdict['format_note']
3254         if fdict.get('tbr') is not None:
3255             if res:
3256                 res += ', '
3257             res += '%4dk' % fdict['tbr']
3258         if fdict.get('container') is not None:
3259             if res:
3260                 res += ', '
3261             res += '%s container' % fdict['container']
3262         if (fdict.get('vcodec') is not None
3263                 and fdict.get('vcodec') != 'none'):
3264             if res:
3265                 res += ', '
3266             res += fdict['vcodec']
3267             if fdict.get('vbr') is not None:
3268                 res += '@'
3269         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3270             res += 'video@'
3271         if fdict.get('vbr') is not None:
3272             res += '%4dk' % fdict['vbr']
3273         if fdict.get('fps') is not None:
3274             if res:
3275                 res += ', '
3276             res += '%sfps' % fdict['fps']
3277         if fdict.get('acodec') is not None:
3278             if res:
3279                 res += ', '
3280             if fdict['acodec'] == 'none':
3281                 res += 'video only'
3282             else:
3283                 res += '%-5s' % fdict['acodec']
3284         elif fdict.get('abr') is not None:
3285             if res:
3286                 res += ', '
3287             res += 'audio'
3288         if fdict.get('abr') is not None:
3289             res += '@%3dk' % fdict['abr']
3290         if fdict.get('asr') is not None:
3291             res += ' (%5dHz)' % fdict['asr']
3292         if fdict.get('filesize') is not None:
3293             if res:
3294                 res += ', '
3295             res += format_bytes(fdict['filesize'])
3296         elif fdict.get('filesize_approx') is not None:
3297             if res:
3298                 res += ', '
3299             res += '~' + format_bytes(fdict['filesize_approx'])
3300         return res
3301
3302     def _list_format_headers(self, *headers):
3303         if self.params.get('listformats_table', True) is not False:
3304             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3305         return headers
3306
3307     def list_formats(self, info_dict):
3308         formats = info_dict.get('formats', [info_dict])
3309         new_format = self.params.get('listformats_table', True) is not False
3310         if new_format:
3311             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3312             table = [
3313                 [
3314                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3315                     format_field(f, 'ext'),
3316                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3317                     format_field(f, 'fps', '\t%d'),
3318                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3319                     delim,
3320                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3321                     format_field(f, 'tbr', '\t%dk'),
3322                     shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
3323                     delim,
3324                     format_field(f, 'vcodec', default='unknown').replace(
3325                         'none',
3326                         'images' if f.get('acodec') == 'none'
3327                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3328                     format_field(f, 'vbr', '\t%dk'),
3329                     format_field(f, 'acodec', default='unknown').replace(
3330                         'none',
3331                         '' if f.get('vcodec') == 'none'
3332                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3333                     format_field(f, 'abr', '\t%dk'),
3334                     format_field(f, 'asr', '\t%dHz'),
3335                     join_nonempty(
3336                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3337                         format_field(f, 'language', '[%s]'),
3338                         join_nonempty(
3339                             format_field(f, 'format_note'),
3340                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3341                             delim=', '),
3342                         delim=' '),
3343                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3344             header_line = self._list_format_headers(
3345                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3346                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3347         else:
3348             table = [
3349                 [
3350                     format_field(f, 'format_id'),
3351                     format_field(f, 'ext'),
3352                     self.format_resolution(f),
3353                     self._format_note(f)]
3354                 for f in formats
3355                 if f.get('preference') is None or f['preference'] >= -1000]
3356             header_line = ['format code', 'extension', 'resolution', 'note']
3357
3358         self.to_screen(
3359             '[info] Available formats for %s:' % info_dict['id'])
3360         self.to_stdout(render_table(
3361             header_line, table,
3362             extra_gap=(0 if new_format else 1),
3363             hide_empty=new_format,
3364             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3365
3366     def list_thumbnails(self, info_dict):
3367         thumbnails = list(info_dict.get('thumbnails'))
3368         if not thumbnails:
3369             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3370             return
3371
3372         self.to_screen(
3373             '[info] Thumbnails for %s:' % info_dict['id'])
3374         self.to_stdout(render_table(
3375             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3376             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3377
3378     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3379         if not subtitles:
3380             self.to_screen('%s has no %s' % (video_id, name))
3381             return
3382         self.to_screen(
3383             'Available %s for %s:' % (name, video_id))
3384
3385         def _row(lang, formats):
3386             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3387             if len(set(names)) == 1:
3388                 names = [] if names[0] == 'unknown' else names[:1]
3389             return [lang, ', '.join(names), ', '.join(exts)]
3390
3391         self.to_stdout(render_table(
3392             self._list_format_headers('Language', 'Name', 'Formats'),
3393             [_row(lang, formats) for lang, formats in subtitles.items()],
3394             hide_empty=True))
3395
3396     def urlopen(self, req):
3397         """ Start an HTTP download """
3398         if isinstance(req, compat_basestring):
3399             req = sanitized_Request(req)
3400         return self._opener.open(req, timeout=self._socket_timeout)
3401
3402     def print_debug_header(self):
3403         if not self.params.get('verbose'):
3404             return
3405
3406         def get_encoding(stream):
3407             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3408             if not supports_terminal_sequences(stream):
3409                 from .compat import WINDOWS_VT_MODE
3410                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3411             return ret
3412
3413         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3414             locale.getpreferredencoding(),
3415             sys.getfilesystemencoding(),
3416             get_encoding(self._screen_file), get_encoding(self._err_file),
3417             self.get_encoding())
3418
3419         logger = self.params.get('logger')
3420         if logger:
3421             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3422             write_debug(encoding_str)
3423         else:
3424             write_string(f'[debug] {encoding_str}\n', encoding=None)
3425             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3426
3427         source = detect_variant()
3428         write_debug(join_nonempty(
3429             'yt-dlp version', __version__,
3430             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3431             '' if source == 'unknown' else f'({source})',
3432             delim=' '))
3433         if not _LAZY_LOADER:
3434             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3435                 write_debug('Lazy loading extractors is forcibly disabled')
3436             else:
3437                 write_debug('Lazy loading extractors is disabled')
3438         if plugin_extractors or plugin_postprocessors:
3439             write_debug('Plugins: %s' % [
3440                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3441                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3442         if self.params.get('compat_opts'):
3443             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3444
3445         if source == 'source':
3446             try:
3447                 sp = Popen(
3448                     ['git', 'rev-parse', '--short', 'HEAD'],
3449                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3450                     cwd=os.path.dirname(os.path.abspath(__file__)))
3451                 out, err = sp.communicate_or_kill()
3452                 out = out.decode().strip()
3453                 if re.match('[0-9a-f]+', out):
3454                     write_debug('Git HEAD: %s' % out)
3455             except Exception:
3456                 try:
3457                     sys.exc_clear()
3458                 except Exception:
3459                     pass
3460
3461         def python_implementation():
3462             impl_name = platform.python_implementation()
3463             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3464                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3465             return impl_name
3466
3467         write_debug('Python version %s (%s %s) - %s' % (
3468             platform.python_version(),
3469             python_implementation(),
3470             platform.architecture()[0],
3471             platform_name()))
3472
3473         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3474         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3475         if ffmpeg_features:
3476             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3477
3478         exe_versions['rtmpdump'] = rtmpdump_version()
3479         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3480         exe_str = ', '.join(
3481             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3482         ) or 'none'
3483         write_debug('exe versions: %s' % exe_str)
3484
3485         from .downloader.websocket import has_websockets
3486         from .postprocessor.embedthumbnail import has_mutagen
3487         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3488
3489         lib_str = join_nonempty(
3490             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3491             KEYRING_AVAILABLE and 'keyring',
3492             has_mutagen and 'mutagen',
3493             SQLITE_AVAILABLE and 'sqlite',
3494             has_websockets and 'websockets',
3495             delim=', ') or 'none'
3496         write_debug('Optional libraries: %s' % lib_str)
3497
3498         proxy_map = {}
3499         for handler in self._opener.handlers:
3500             if hasattr(handler, 'proxies'):
3501                 proxy_map.update(handler.proxies)
3502         write_debug(f'Proxy map: {proxy_map}')
3503
3504         # Not implemented
3505         if False and self.params.get('call_home'):
3506             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3507             write_debug('Public IP address: %s' % ipaddr)
3508             latest_version = self.urlopen(
3509                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3510             if version_tuple(latest_version) > version_tuple(__version__):
3511                 self.report_warning(
3512                     'You are using an outdated version (newest version: %s)! '
3513                     'See https://yt-dl.org/update if you need help updating.' %
3514                     latest_version)
3515
3516     def _setup_opener(self):
3517         timeout_val = self.params.get('socket_timeout')
3518         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3519
3520         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3521         opts_cookiefile = self.params.get('cookiefile')
3522         opts_proxy = self.params.get('proxy')
3523
3524         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3525
3526         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3527         if opts_proxy is not None:
3528             if opts_proxy == '':
3529                 proxies = {}
3530             else:
3531                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3532         else:
3533             proxies = compat_urllib_request.getproxies()
3534             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3535             if 'http' in proxies and 'https' not in proxies:
3536                 proxies['https'] = proxies['http']
3537         proxy_handler = PerRequestProxyHandler(proxies)
3538
3539         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3540         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3541         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3542         redirect_handler = YoutubeDLRedirectHandler()
3543         data_handler = compat_urllib_request_DataHandler()
3544
3545         # When passing our own FileHandler instance, build_opener won't add the
3546         # default FileHandler and allows us to disable the file protocol, which
3547         # can be used for malicious purposes (see
3548         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3549         file_handler = compat_urllib_request.FileHandler()
3550
3551         def file_open(*args, **kwargs):
3552             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3553         file_handler.file_open = file_open
3554
3555         opener = compat_urllib_request.build_opener(
3556             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3557
3558         # Delete the default user-agent header, which would otherwise apply in
3559         # cases where our custom HTTP handler doesn't come into play
3560         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3561         opener.addheaders = []
3562         self._opener = opener
3563
3564     def encode(self, s):
3565         if isinstance(s, bytes):
3566             return s  # Already encoded
3567
3568         try:
3569             return s.encode(self.get_encoding())
3570         except UnicodeEncodeError as err:
3571             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3572             raise
3573
3574     def get_encoding(self):
3575         encoding = self.params.get('encoding')
3576         if encoding is None:
3577             encoding = preferredencoding()
3578         return encoding
3579
3580     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3581         ''' Write infojson and returns True = written, False = skip, None = error '''
3582         if overwrite is None:
3583             overwrite = self.params.get('overwrites', True)
3584         if not self.params.get('writeinfojson'):
3585             return False
3586         elif not infofn:
3587             self.write_debug(f'Skipping writing {label} infojson')
3588             return False
3589         elif not self._ensure_dir_exists(infofn):
3590             return None
3591         elif not overwrite and os.path.exists(infofn):
3592             self.to_screen(f'[info] {label.title()} metadata is already present')
3593         else:
3594             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3595             try:
3596                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3597             except (OSError, IOError):
3598                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3599                 return None
3600         return True
3601
3602     def _write_description(self, label, ie_result, descfn):
3603         ''' Write description and returns True = written, False = skip, None = error '''
3604         if not self.params.get('writedescription'):
3605             return False
3606         elif not descfn:
3607             self.write_debug(f'Skipping writing {label} description')
3608             return False
3609         elif not self._ensure_dir_exists(descfn):
3610             return None
3611         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3612             self.to_screen(f'[info] {label.title()} description is already present')
3613         elif ie_result.get('description') is None:
3614             self.report_warning(f'There\'s no {label} description to write')
3615             return False
3616         else:
3617             try:
3618                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3619                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3620                     descfile.write(ie_result['description'])
3621             except (OSError, IOError):
3622                 self.report_error(f'Cannot write {label} description file {descfn}')
3623                 return None
3624         return True
3625
3626     def _write_subtitles(self, info_dict, filename):
3627         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3628         ret = []
3629         subtitles = info_dict.get('requested_subtitles')
3630         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3631             # subtitles download errors are already managed as troubles in relevant IE
3632             # that way it will silently go on when used with unsupporting IE
3633             return ret
3634
3635         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3636         if not sub_filename_base:
3637             self.to_screen('[info] Skipping writing video subtitles')
3638             return ret
3639         for sub_lang, sub_info in subtitles.items():
3640             sub_format = sub_info['ext']
3641             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3642             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3643             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3644                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3645                 sub_info['filepath'] = sub_filename
3646                 ret.append((sub_filename, sub_filename_final))
3647                 continue
3648
3649             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3650             if sub_info.get('data') is not None:
3651                 try:
3652                     # Use newline='' to prevent conversion of newline characters
3653                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3654                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3655                         subfile.write(sub_info['data'])
3656                     sub_info['filepath'] = sub_filename
3657                     ret.append((sub_filename, sub_filename_final))
3658                     continue
3659                 except (OSError, IOError):
3660                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3661                     return None
3662
3663             try:
3664                 sub_copy = sub_info.copy()
3665                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3666                 self.dl(sub_filename, sub_copy, subtitle=True)
3667                 sub_info['filepath'] = sub_filename
3668                 ret.append((sub_filename, sub_filename_final))
3669             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3670                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3671                 continue
3672         return ret
3673
3674     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3675         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3676         write_all = self.params.get('write_all_thumbnails', False)
3677         thumbnails, ret = [], []
3678         if write_all or self.params.get('writethumbnail', False):
3679             thumbnails = info_dict.get('thumbnails') or []
3680         multiple = write_all and len(thumbnails) > 1
3681
3682         if thumb_filename_base is None:
3683             thumb_filename_base = filename
3684         if thumbnails and not thumb_filename_base:
3685             self.write_debug(f'Skipping writing {label} thumbnail')
3686             return ret
3687
3688         for idx, t in list(enumerate(thumbnails))[::-1]:
3689             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3690             thumb_display_id = f'{label} thumbnail {t["id"]}'
3691             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3692             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3693
3694             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3695                 ret.append((thumb_filename, thumb_filename_final))
3696                 t['filepath'] = thumb_filename
3697                 self.to_screen('[info] %s is already present' % (
3698                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3699             else:
3700                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3701                 try:
3702                     uf = self.urlopen(t['url'])
3703                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3704                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3705                         shutil.copyfileobj(uf, thumbf)
3706                     ret.append((thumb_filename, thumb_filename_final))
3707                     t['filepath'] = thumb_filename
3708                 except network_exceptions as err:
3709                     thumbnails.pop(idx)
3710                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3711             if ret and not write_all:
3712                 break
3713         return ret