yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     network_exceptions,
  86     number_of_digits,
  87     orderedSet,
  88     OUTTMPL_TYPES,
  89     PagedList,
  90     parse_filesize,
  91     PerRequestProxyHandler,
  92     platform_name,
  93     Popen,
  94     PostProcessingError,
  95     preferredencoding,
  96     prepend_extension,
  97     ReExtractInfo,
  98     register_socks_protocols,
  99     RejectedVideoReached,
 100     remove_terminal_sequences,
 101     render_table,
 102     replace_extension,
 103     SameFileError,
 104     sanitize_filename,
 105     sanitize_path,
 106     sanitize_url,
 107     sanitized_Request,
 108     std_headers,
 109     STR_FORMAT_RE_TMPL,
 110     STR_FORMAT_TYPES,
 111     str_or_none,
 112     strftime_or_none,
 113     subtitles_filename,
 114     supports_terminal_sequences,
 115     timetuple_from_msec,
 116     to_high_limit_path,
 117     traverse_obj,
 118     try_get,
 119     UnavailableVideoError,
 120     url_basename,
 121     variadic,
 122     version_tuple,
 123     write_json_file,
 124     write_string,
 125     YoutubeDLCookieProcessor,
 126     YoutubeDLHandler,
 127     YoutubeDLRedirectHandler,
 128 )
 129 from .cache import Cache
 130 from .minicurses import format_text
 131 from .extractor import (
 132     gen_extractor_classes,
 133     get_info_extractor,
 134     _LAZY_LOADER,
 135     _PLUGIN_CLASSES as plugin_extractors
 136 )
 137 from .extractor.openload import PhantomJSwrapper
 138 from .downloader import (
 139     FFmpegFD,
 140     get_suitable_downloader,
 141     shorten_protocol_name
 142 )
 143 from .downloader.rtmp import rtmpdump_version
 144 from .postprocessor import (
 145     get_postprocessor,
 146     EmbedThumbnailPP,
 147     FFmpegFixupDurationPP,
 148     FFmpegFixupM3u8PP,
 149     FFmpegFixupM4aPP,
 150     FFmpegFixupStretchedPP,
 151     FFmpegFixupTimestampPP,
 152     FFmpegMergerPP,
 153     FFmpegPostProcessor,
 154     MoveFilesAfterDownloadPP,
 155     _PLUGIN_CLASSES as plugin_postprocessors
 156 )
 157 from .update import detect_variant
 158 from .version import __version__, RELEASE_GIT_HEAD
 159
 160 if compat_os_name == 'nt':
 161     import ctypes
 162
 163
 164 class YoutubeDL(object):
 165     """YoutubeDL class.
 166
 167     YoutubeDL objects are the ones responsible of downloading the
 168     actual video file and writing it to disk if the user has requested
 169     it, among some other tasks. In most cases there should be one per
 170     program. As, given a video URL, the downloader doesn't know how to
 171     extract all the needed information, task that InfoExtractors do, it
 172     has to pass the URL to one of them.
 173
 174     For this, YoutubeDL objects have a method that allows
 175     InfoExtractors to be registered in a given order. When it is passed
 176     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 177     finds that reports being able to handle it. The InfoExtractor extracts
 178     all the information about the video or videos the URL refers to, and
 179     YoutubeDL process the extracted information, possibly using a File
 180     Downloader to download the video.
 181
 182     YoutubeDL objects accept a lot of parameters. In order not to saturate
 183     the object constructor with arguments, it receives a dictionary of
 184     options instead. These options are available through the params
 185     attribute for the InfoExtractors to use. The YoutubeDL also
 186     registers itself as the downloader in charge for the InfoExtractors
 187     that are added to it, so this is a "mutual registration".
 188
 189     Available options:
 190
 191     username:          Username for authentication purposes.
 192     password:          Password for authentication purposes.
 193     videopassword:     Password for accessing a video.
 194     ap_mso:            Adobe Pass multiple-system operator identifier.
 195     ap_username:       Multiple-system operator account username.
 196     ap_password:       Multiple-system operator account password.
 197     usenetrc:          Use netrc for authentication instead.
 198     verbose:           Print additional info to stdout.
 199     quiet:             Do not print messages to stdout.
 200     no_warnings:       Do not print out anything for warnings.
 201     forceprint:        A list of templates to force print
 202     forceurl:          Force printing final URL. (Deprecated)
 203     forcetitle:        Force printing title. (Deprecated)
 204     forceid:           Force printing ID. (Deprecated)
 205     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 206     forcedescription:  Force printing description. (Deprecated)
 207     forcefilename:     Force printing final filename. (Deprecated)
 208     forceduration:     Force printing duration. (Deprecated)
 209     forcejson:         Force printing info_dict as JSON.
 210     dump_single_json:  Force printing the info_dict of the whole playlist
 211                        (or video) as a single JSON line.
 212     force_write_download_archive: Force writing download archive regardless
 213                        of 'skip_download' or 'simulate'.
 214     simulate:          Do not download the video files. If unset (or None),
 215                        simulate only if listsubtitles, listformats or list_thumbnails is used
 216     format:            Video format code. see "FORMAT SELECTION" for more details.
 217                        You can also pass a function. The function takes 'ctx' as
 218                        argument and returns the formats to download.
 219                        See "build_format_selector" for an implementation
 220     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 221     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 222                        extracting metadata even if the video is not actually
 223                        available for download (experimental)
 224     format_sort:       A list of fields by which to sort the video formats.
 225                        See "Sorting Formats" for more details.
 226     format_sort_force: Force the given format_sort. see "Sorting Formats"
 227                        for more details.
 228     allow_multiple_video_streams:   Allow multiple video streams to be merged
 229                        into a single file
 230     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 231                        into a single file
 232     check_formats      Whether to test if the formats are downloadable.
 233                        Can be True (check all), False (check none),
 234                        'selected' (check selected formats),
 235                        or None (check only if requested by extractor)
 236     paths:             Dictionary of output paths. The allowed keys are 'home'
 237                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 238     outtmpl:           Dictionary of templates for output names. Allowed keys
 239                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 240                        For compatibility with youtube-dl, a single string can also be used
 241     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 242     restrictfilenames: Do not allow "&" and spaces in file names
 243     trim_file_name:    Limit length of filename (extension excluded)
 244     windowsfilenames:  Force the filenames to be windows compatible
 245     ignoreerrors:      Do not stop on download/postprocessing errors.
 246                        Can be 'only_download' to ignore only download errors.
 247                        Default is 'only_download' for CLI, but False for API
 248     skip_playlist_after_errors: Number of allowed failures until the rest of
 249                        the playlist is skipped
 250     force_generic_extractor: Force downloader to use the generic extractor
 251     overwrites:        Overwrite all video and metadata files if True,
 252                        overwrite only non-video files if None
 253                        and don't overwrite any file if False
 254                        For compatibility with youtube-dl,
 255                        "nooverwrites" may also be used instead
 256     playliststart:     Playlist item to start at.
 257     playlistend:       Playlist item to end at.
 258     playlist_items:    Specific indices of playlist to download.
 259     playlistreverse:   Download playlist items in reverse order.
 260     playlistrandom:    Download playlist items in random order.
 261     matchtitle:        Download only matching titles.
 262     rejecttitle:       Reject downloads for matching titles.
 263     logger:            Log messages to a logging.Logger instance.
 264     logtostderr:       Log messages to stderr instead of stdout.
 265     consoletitle:       Display progress in console window's titlebar.
 266     writedescription:  Write the video description to a .description file
 267     writeinfojson:     Write the video description to a .info.json file
 268     clean_infojson:    Remove private fields from the infojson
 269     getcomments:       Extract video comments. This will not be written to disk
 270                        unless writeinfojson is also given
 271     writeannotations:  Write the video annotations to a .annotations.xml file
 272     writethumbnail:    Write the thumbnail image to a file
 273     allow_playlist_files: Whether to write playlists' description, infojson etc
 274                        also to disk when using the 'write*' options
 275     write_all_thumbnails:  Write all thumbnail formats to files
 276     writelink:         Write an internet shortcut file, depending on the
 277                        current platform (.url/.webloc/.desktop)
 278     writeurllink:      Write a Windows internet shortcut file (.url)
 279     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 280     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 281     writesubtitles:    Write the video subtitles to a file
 282     writeautomaticsub: Write the automatically generated subtitles to a file
 283     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 284                        Downloads all the subtitles of the video
 285                        (requires writesubtitles or writeautomaticsub)
 286     listsubtitles:     Lists all available subtitles for the video
 287     subtitlesformat:   The format code for subtitles
 288     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 289                        The list may contain "all" to refer to all the available
 290                        subtitles. The language can be prefixed with a "-" to
 291                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 292     keepvideo:         Keep the video file after post-processing
 293     daterange:         A DateRange object, download only if the upload_date is in the range.
 294     skip_download:     Skip the actual download of the video file
 295     cachedir:          Location of the cache files in the filesystem.
 296                        False to disable filesystem cache.
 297     noplaylist:        Download single video instead of a playlist if in doubt.
 298     age_limit:         An integer representing the user's age in years.
 299                        Unsuitable videos for the given age are skipped.
 300     min_views:         An integer representing the minimum view count the video
 301                        must have in order to not be skipped.
 302                        Videos without view count information are always
 303                        downloaded. None for no limit.
 304     max_views:         An integer representing the maximum view count.
 305                        Videos that are more popular than that are not
 306                        downloaded.
 307                        Videos without view count information are always
 308                        downloaded. None for no limit.
 309     download_archive:  File name of a file where all downloads are recorded.
 310                        Videos already present in the file are not downloaded
 311                        again.
 312     break_on_existing: Stop the download process after attempting to download a
 313                        file that is in the archive.
 314     break_on_reject:   Stop the download process when encountering a video that
 315                        has been filtered out.
 316     break_per_url:     Whether break_on_reject and break_on_existing
 317                        should act on each input URL as opposed to for the entire queue
 318     cookiefile:        File name where cookies should be read from and dumped to
 319     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 320                        name/path from where cookies are loaded.
 321                        Eg: ('chrome', ) or ('vivaldi', 'default')
 322     nocheckcertificate:Do not verify SSL certificates
 323     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 324                        At the moment, this is only supported by YouTube.
 325     proxy:             URL of the proxy server to use
 326     geo_verification_proxy:  URL of the proxy to use for IP address verification
 327                        on geo-restricted sites.
 328     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 329     bidi_workaround:   Work around buggy terminals without bidirectional text
 330                        support, using fridibi
 331     debug_printtraffic:Print out sent and received HTTP traffic
 332     include_ads:       Download ads as well (deprecated)
 333     default_search:    Prepend this string if an input url is not valid.
 334                        'auto' for elaborate guessing
 335     encoding:          Use this encoding instead of the system-specified.
 336     extract_flat:      Do not resolve URLs, return the immediate result.
 337                        Pass in 'in_playlist' to only show this behavior for
 338                        playlist items.
 339     wait_for_video:    If given, wait for scheduled streams to become available.
 340                        The value should be a tuple containing the range
 341                        (min_secs, max_secs) to wait between retries
 342     postprocessors:    A list of dictionaries, each with an entry
 343                        * key:  The name of the postprocessor. See
 344                                yt_dlp/postprocessor/__init__.py for a list.
 345                        * when: When to run the postprocessor. Can be one of
 346                                pre_process|before_dl|post_process|after_move.
 347                                Assumed to be 'post_process' if not given
 348     post_hooks:        Deprecated - Register a custom postprocessor instead
 349                        A list of functions that get called as the final step
 350                        for each video file, after all postprocessors have been
 351                        called. The filename will be passed as the only argument.
 352     progress_hooks:    A list of functions that get called on download
 353                        progress, with a dictionary with the entries
 354                        * status: One of "downloading", "error", or "finished".
 355                                  Check this first and ignore unknown values.
 356                        * info_dict: The extracted info_dict
 357
 358                        If status is one of "downloading", or "finished", the
 359                        following properties may also be present:
 360                        * filename: The final filename (always present)
 361                        * tmpfilename: The filename we're currently writing to
 362                        * downloaded_bytes: Bytes on disk
 363                        * total_bytes: Size of the whole file, None if unknown
 364                        * total_bytes_estimate: Guess of the eventual file size,
 365                                                None if unavailable.
 366                        * elapsed: The number of seconds since download started.
 367                        * eta: The estimated time in seconds, None if unknown
 368                        * speed: The download speed in bytes/second, None if
 369                                 unknown
 370                        * fragment_index: The counter of the currently
 371                                          downloaded video fragment.
 372                        * fragment_count: The number of fragments (= individual
 373                                          files that will be merged)
 374
 375                        Progress hooks are guaranteed to be called at least once
 376                        (with status "finished") if the download is successful.
 377     postprocessor_hooks:  A list of functions that get called on postprocessing
 378                        progress, with a dictionary with the entries
 379                        * status: One of "started", "processing", or "finished".
 380                                  Check this first and ignore unknown values.
 381                        * postprocessor: Name of the postprocessor
 382                        * info_dict: The extracted info_dict
 383
 384                        Progress hooks are guaranteed to be called at least twice
 385                        (with status "started" and "finished") if the processing is successful.
 386     merge_output_format: Extension to use when merging formats.
 387     final_ext:         Expected final extension; used to detect when the file was
 388                        already downloaded and converted
 389     fixup:             Automatically correct known faults of the file.
 390                        One of:
 391                        - "never": do nothing
 392                        - "warn": only emit a warning
 393                        - "detect_or_warn": check whether we can do anything
 394                                            about it, warn otherwise (default)
 395     source_address:    Client-side IP address to bind to.
 396     call_home:         Boolean, true iff we are allowed to contact the
 397                        yt-dlp servers for debugging. (BROKEN)
 398     sleep_interval_requests: Number of seconds to sleep between requests
 399                        during extraction
 400     sleep_interval:    Number of seconds to sleep before each download when
 401                        used alone or a lower bound of a range for randomized
 402                        sleep before each download (minimum possible number
 403                        of seconds to sleep) when used along with
 404                        max_sleep_interval.
 405     max_sleep_interval:Upper bound of a range for randomized sleep before each
 406                        download (maximum possible number of seconds to sleep).
 407                        Must only be used along with sleep_interval.
 408                        Actual sleep time will be a random float from range
 409                        [sleep_interval; max_sleep_interval].
 410     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 411     listformats:       Print an overview of available video formats and exit.
 412     list_thumbnails:   Print a table of all thumbnails and exit.
 413     match_filter:      A function that gets called with the info_dict of
 414                        every video.
 415                        If it returns a message, the video is ignored.
 416                        If it returns None, the video is downloaded.
 417                        match_filter_func in utils.py is one example for this.
 418     no_color:          Do not emit color codes in output.
 419     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 420                        HTTP header
 421     geo_bypass_country:
 422                        Two-letter ISO 3166-2 country code that will be used for
 423                        explicit geographic restriction bypassing via faking
 424                        X-Forwarded-For HTTP header
 425     geo_bypass_ip_block:
 426                        IP range in CIDR notation that will be used similarly to
 427                        geo_bypass_country
 428
 429     The following options determine which downloader is picked:
 430     external_downloader: A dictionary of protocol keys and the executable of the
 431                        external downloader to use for it. The allowed protocols
 432                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 433                        Set the value to 'native' to use the native downloader
 434     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 435                        or {'m3u8': 'ffmpeg'} instead.
 436                        Use the native HLS downloader instead of ffmpeg/avconv
 437                        if True, otherwise use ffmpeg/avconv if False, otherwise
 438                        use downloader suggested by extractor if None.
 439     compat_opts:       Compatibility options. See "Differences in default behavior".
 440                        The following options do not work when used through the API:
 441                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 442                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 443                        Refer __init__.py for their implementation
 444     progress_template: Dictionary of templates for progress outputs.
 445                        Allowed keys are 'download', 'postprocess',
 446                        'download-title' (console title) and 'postprocess-title'.
 447                        The template is mapped on a dictionary with keys 'progress' and 'info'
 448
 449     The following parameters are not used by YoutubeDL itself, they are used by
 450     the downloader (see yt_dlp/downloader/common.py):
 451     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 452     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 453     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 454     external_downloader_args, concurrent_fragment_downloads.
 455
 456     The following options are used by the post processors:
 457     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 458                        otherwise prefer ffmpeg. (avconv support is deprecated)
 459     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 460                        to the binary or its containing directory.
 461     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 462                        and a list of additional command-line arguments for the
 463                        postprocessor/executable. The dict can also have "PP+EXE" keys
 464                        which are used when the given exe is used by the given PP.
 465                        Use 'default' as the name for arguments to passed to all PP
 466                        For compatibility with youtube-dl, a single list of args
 467                        can also be used
 468
 469     The following options are used by the extractors:
 470     extractor_retries: Number of times to retry for known errors
 471     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 472     hls_split_discontinuity: Split HLS playlists to different formats at
 473                        discontinuities such as ad breaks (default: False)
 474     extractor_args:    A dictionary of arguments to be passed to the extractors.
 475                        See "EXTRACTOR ARGUMENTS" for details.
 476                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 477     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 478                        If True (default), DASH manifests and related
 479                        data will be downloaded and processed by extractor.
 480                        You can reduce network I/O by disabling it if you don't
 481                        care about DASH. (only for youtube)
 482     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 483                        If True (default), HLS manifests and related
 484                        data will be downloaded and processed by extractor.
 485                        You can reduce network I/O by disabling it if you don't
 486                        care about HLS. (only for youtube)
 487     """
 488
 489     _NUMERIC_FIELDS = set((
 490         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 491         'timestamp', 'release_timestamp',
 492         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 493         'average_rating', 'comment_count', 'age_limit',
 494         'start_time', 'end_time',
 495         'chapter_number', 'season_number', 'episode_number',
 496         'track_number', 'disc_number', 'release_year',
 497     ))
 498
 499     _format_selection_exts = {
 500         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 501         'video': {'mp4', 'flv', 'webm', '3gp'},
 502         'storyboards': {'mhtml'},
 503     }
 504
 505     params = None
 506     _ies = {}
 507     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 508     _printed_messages = set()
 509     _first_webpage_request = True
 510     _download_retcode = None
 511     _num_downloads = None
 512     _playlist_level = 0
 513     _playlist_urls = set()
 514     _screen_file = None
 515
 516     def __init__(self, params=None, auto_init=True):
 517         """Create a FileDownloader object with the given options.
 518         @param auto_init    Whether to load the default extractors and print header (if verbose).
 519                             Set to 'no_verbose_header' to not print the header
 520         """
 521         if params is None:
 522             params = {}
 523         self._ies = {}
 524         self._ies_instances = {}
 525         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 526         self._printed_messages = set()
 527         self._first_webpage_request = True
 528         self._post_hooks = []
 529         self._progress_hooks = []
 530         self._postprocessor_hooks = []
 531         self._download_retcode = 0
 532         self._num_downloads = 0
 533         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 534         self._err_file = sys.stderr
 535         self.params = params
 536         self.cache = Cache(self)
 537
 538         windows_enable_vt_mode()
 539         self._allow_colors = {
 540             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 541             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 542         }
 543
 544         if sys.version_info < (3, 6):
 545             self.report_warning(
 546                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 547
 548         if self.params.get('allow_unplayable_formats'):
 549             self.report_warning(
 550                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 551                 'This is a developer option intended for debugging. \n'
 552                 '         If you experience any issues while using this option, '
 553                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 554
 555         def check_deprecated(param, option, suggestion):
 556             if self.params.get(param) is not None:
 557                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 558                 return True
 559             return False
 560
 561         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 562             if self.params.get('geo_verification_proxy') is None:
 563                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 564
 565         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 566         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 567         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 568
 569         for msg in self.params.get('_warnings', []):
 570             self.report_warning(msg)
 571         for msg in self.params.get('_deprecation_warnings', []):
 572             self.deprecation_warning(msg)
 573
 574         if 'list-formats' in self.params.get('compat_opts', []):
 575             self.params['listformats_table'] = False
 576
 577         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 578             # nooverwrites was unnecessarily changed to overwrites
 579             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 580             # This ensures compatibility with both keys
 581             self.params['overwrites'] = not self.params['nooverwrites']
 582         elif self.params.get('overwrites') is None:
 583             self.params.pop('overwrites', None)
 584         else:
 585             self.params['nooverwrites'] = not self.params['overwrites']
 586
 587         if params.get('bidi_workaround', False):
 588             try:
 589                 import pty
 590                 master, slave = pty.openpty()
 591                 width = compat_get_terminal_size().columns
 592                 if width is None:
 593                     width_args = []
 594                 else:
 595                     width_args = ['-w', str(width)]
 596                 sp_kwargs = dict(
 597                     stdin=subprocess.PIPE,
 598                     stdout=slave,
 599                     stderr=self._err_file)
 600                 try:
 601                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 602                 except OSError:
 603                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 604                 self._output_channel = os.fdopen(master, 'rb')
 605             except OSError as ose:
 606                 if ose.errno == errno.ENOENT:
 607                     self.report_warning(
 608                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 609                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 610                 else:
 611                     raise
 612
 613         if (sys.platform != 'win32'
 614                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 615                 and not params.get('restrictfilenames', False)):
 616             # Unicode filesystem API will throw errors (#1474, #13027)
 617             self.report_warning(
 618                 'Assuming --restrict-filenames since file system encoding '
 619                 'cannot encode all characters. '
 620                 'Set the LC_ALL environment variable to fix this.')
 621             self.params['restrictfilenames'] = True
 622
 623         self.outtmpl_dict = self.parse_outtmpl()
 624
 625         # Creating format selector here allows us to catch syntax errors before the extraction
 626         self.format_selector = (
 627             None if self.params.get('format') is None
 628             else self.params['format'] if callable(self.params['format'])
 629             else self.build_format_selector(self.params['format']))
 630
 631         self._setup_opener()
 632
 633         if auto_init:
 634             if auto_init != 'no_verbose_header':
 635                 self.print_debug_header()
 636             self.add_default_info_extractors()
 637
 638         hooks = {
 639             'post_hooks': self.add_post_hook,
 640             'progress_hooks': self.add_progress_hook,
 641             'postprocessor_hooks': self.add_postprocessor_hook,
 642         }
 643         for opt, fn in hooks.items():
 644             for ph in self.params.get(opt, []):
 645                 fn(ph)
 646
 647         for pp_def_raw in self.params.get('postprocessors', []):
 648             pp_def = dict(pp_def_raw)
 649             when = pp_def.pop('when', 'post_process')
 650             self.add_post_processor(
 651                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 652                 when=when)
 653
 654         register_socks_protocols()
 655
 656         def preload_download_archive(fn):
 657             """Preload the archive, if any is specified"""
 658             if fn is None:
 659                 return False
 660             self.write_debug(f'Loading archive file {fn!r}')
 661             try:
 662                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 663                     for line in archive_file:
 664                         self.archive.add(line.strip())
 665             except IOError as ioe:
 666                 if ioe.errno != errno.ENOENT:
 667                     raise
 668                 return False
 669             return True
 670
 671         self.archive = set()
 672         preload_download_archive(self.params.get('download_archive'))
 673
 674     def warn_if_short_id(self, argv):
 675         # short YouTube ID starting with dash?
 676         idxs = [
 677             i for i, a in enumerate(argv)
 678             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 679         if idxs:
 680             correct_argv = (
 681                 ['yt-dlp']
 682                 + [a for i, a in enumerate(argv) if i not in idxs]
 683                 + ['--'] + [argv[i] for i in idxs]
 684             )
 685             self.report_warning(
 686                 'Long argument string detected. '
 687                 'Use -- to separate parameters and URLs, like this:\n%s' %
 688                 args_to_str(correct_argv))
 689
 690     def add_info_extractor(self, ie):
 691         """Add an InfoExtractor object to the end of the list."""
 692         ie_key = ie.ie_key()
 693         self._ies[ie_key] = ie
 694         if not isinstance(ie, type):
 695             self._ies_instances[ie_key] = ie
 696             ie.set_downloader(self)
 697
 698     def _get_info_extractor_class(self, ie_key):
 699         ie = self._ies.get(ie_key)
 700         if ie is None:
 701             ie = get_info_extractor(ie_key)
 702             self.add_info_extractor(ie)
 703         return ie
 704
 705     def get_info_extractor(self, ie_key):
 706         """
 707         Get an instance of an IE with name ie_key, it will try to get one from
 708         the _ies list, if there's no instance it will create a new one and add
 709         it to the extractor list.
 710         """
 711         ie = self._ies_instances.get(ie_key)
 712         if ie is None:
 713             ie = get_info_extractor(ie_key)()
 714             self.add_info_extractor(ie)
 715         return ie
 716
 717     def add_default_info_extractors(self):
 718         """
 719         Add the InfoExtractors returned by gen_extractors to the end of the list
 720         """
 721         for ie in gen_extractor_classes():
 722             self.add_info_extractor(ie)
 723
 724     def add_post_processor(self, pp, when='post_process'):
 725         """Add a PostProcessor object to the end of the chain."""
 726         self._pps[when].append(pp)
 727         pp.set_downloader(self)
 728
 729     def add_post_hook(self, ph):
 730         """Add the post hook"""
 731         self._post_hooks.append(ph)
 732
 733     def add_progress_hook(self, ph):
 734         """Add the download progress hook"""
 735         self._progress_hooks.append(ph)
 736
 737     def add_postprocessor_hook(self, ph):
 738         """Add the postprocessing progress hook"""
 739         self._postprocessor_hooks.append(ph)
 740         for pps in self._pps.values():
 741             for pp in pps:
 742                 pp.add_progress_hook(ph)
 743
 744     def _bidi_workaround(self, message):
 745         if not hasattr(self, '_output_channel'):
 746             return message
 747
 748         assert hasattr(self, '_output_process')
 749         assert isinstance(message, compat_str)
 750         line_count = message.count('\n') + 1
 751         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 752         self._output_process.stdin.flush()
 753         res = ''.join(self._output_channel.readline().decode('utf-8')
 754                       for _ in range(line_count))
 755         return res[:-len('\n')]
 756
 757     def _write_string(self, message, out=None, only_once=False):
 758         if only_once:
 759             if message in self._printed_messages:
 760                 return
 761             self._printed_messages.add(message)
 762         write_string(message, out=out, encoding=self.params.get('encoding'))
 763
 764     def to_stdout(self, message, skip_eol=False, quiet=False):
 765         """Print message to stdout"""
 766         if self.params.get('logger'):
 767             self.params['logger'].debug(message)
 768         elif not quiet or self.params.get('verbose'):
 769             self._write_string(
 770                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 771                 self._err_file if quiet else self._screen_file)
 772
 773     def to_stderr(self, message, only_once=False):
 774         """Print message to stderr"""
 775         assert isinstance(message, compat_str)
 776         if self.params.get('logger'):
 777             self.params['logger'].error(message)
 778         else:
 779             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 780
 781     def to_console_title(self, message):
 782         if not self.params.get('consoletitle', False):
 783             return
 784         message = remove_terminal_sequences(message)
 785         if compat_os_name == 'nt':
 786             if ctypes.windll.kernel32.GetConsoleWindow():
 787                 # c_wchar_p() might not be necessary if `message` is
 788                 # already of type unicode()
 789                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 790         elif 'TERM' in os.environ:
 791             self._write_string('\033]0;%s\007' % message, self._screen_file)
 792
 793     def save_console_title(self):
 794         if not self.params.get('consoletitle', False):
 795             return
 796         if self.params.get('simulate'):
 797             return
 798         if compat_os_name != 'nt' and 'TERM' in os.environ:
 799             # Save the title on stack
 800             self._write_string('\033[22;0t', self._screen_file)
 801
 802     def restore_console_title(self):
 803         if not self.params.get('consoletitle', False):
 804             return
 805         if self.params.get('simulate'):
 806             return
 807         if compat_os_name != 'nt' and 'TERM' in os.environ:
 808             # Restore the title from stack
 809             self._write_string('\033[23;0t', self._screen_file)
 810
 811     def __enter__(self):
 812         self.save_console_title()
 813         return self
 814
 815     def __exit__(self, *args):
 816         self.restore_console_title()
 817
 818         if self.params.get('cookiefile') is not None:
 819             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 820
 821     def trouble(self, message=None, tb=None):
 822         """Determine action to take when a download problem appears.
 823
 824         Depending on if the downloader has been configured to ignore
 825         download errors or not, this method may throw an exception or
 826         not when errors are found, after printing the message.
 827
 828         tb, if given, is additional traceback information.
 829         """
 830         if message is not None:
 831             self.to_stderr(message)
 832         if self.params.get('verbose'):
 833             if tb is None:
 834                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 835                     tb = ''
 836                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 837                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 838                     tb += encode_compat_str(traceback.format_exc())
 839                 else:
 840                     tb_data = traceback.format_list(traceback.extract_stack())
 841                     tb = ''.join(tb_data)
 842             if tb:
 843                 self.to_stderr(tb)
 844         if not self.params.get('ignoreerrors'):
 845             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 846                 exc_info = sys.exc_info()[1].exc_info
 847             else:
 848                 exc_info = sys.exc_info()
 849             raise DownloadError(message, exc_info)
 850         self._download_retcode = 1
 851
 852     def to_screen(self, message, skip_eol=False):
 853         """Print message to stdout if not in quiet mode"""
 854         self.to_stdout(
 855             message, skip_eol, quiet=self.params.get('quiet', False))
 856
 857     class Styles(Enum):
 858         HEADERS = 'yellow'
 859         EMPHASIS = 'light blue'
 860         ID = 'green'
 861         DELIM = 'blue'
 862         ERROR = 'red'
 863         WARNING = 'yellow'
 864         SUPPRESS = 'light black'
 865
 866     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 867         if test_encoding:
 868             original_text = text
 869             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 870             text = text.encode(encoding, 'ignore').decode(encoding)
 871             if fallback is not None and text != original_text:
 872                 text = fallback
 873         if isinstance(f, self.Styles):
 874             f = f.value
 875         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 876
 877     def _format_screen(self, *args, **kwargs):
 878         return self._format_text(
 879             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 880
 881     def _format_err(self, *args, **kwargs):
 882         return self._format_text(
 883             self._err_file, self._allow_colors['err'], *args, **kwargs)
 884
 885     def report_warning(self, message, only_once=False):
 886         '''
 887         Print the message to stderr, it will be prefixed with 'WARNING:'
 888         If stderr is a tty file the 'WARNING:' will be colored
 889         '''
 890         if self.params.get('logger') is not None:
 891             self.params['logger'].warning(message)
 892         else:
 893             if self.params.get('no_warnings'):
 894                 return
 895             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 896
 897     def deprecation_warning(self, message):
 898         if self.params.get('logger') is not None:
 899             self.params['logger'].warning('DeprecationWarning: {message}')
 900         else:
 901             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 902
 903     def report_error(self, message, tb=None):
 904         '''
 905         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 906         in red if stderr is a tty file.
 907         '''
 908         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 909
 910     def write_debug(self, message, only_once=False):
 911         '''Log debug message or Print message to stderr'''
 912         if not self.params.get('verbose', False):
 913             return
 914         message = '[debug] %s' % message
 915         if self.params.get('logger'):
 916             self.params['logger'].debug(message)
 917         else:
 918             self.to_stderr(message, only_once)
 919
 920     def report_file_already_downloaded(self, file_name):
 921         """Report file has already been fully downloaded."""
 922         try:
 923             self.to_screen('[download] %s has already been downloaded' % file_name)
 924         except UnicodeEncodeError:
 925             self.to_screen('[download] The file has already been downloaded')
 926
 927     def report_file_delete(self, file_name):
 928         """Report that existing file will be deleted."""
 929         try:
 930             self.to_screen('Deleting existing file %s' % file_name)
 931         except UnicodeEncodeError:
 932             self.to_screen('Deleting existing file')
 933
 934     def raise_no_formats(self, info, forced=False):
 935         has_drm = info.get('__has_drm')
 936         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 937         expected = self.params.get('ignore_no_formats_error')
 938         if forced or not expected:
 939             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 940                                  expected=has_drm or expected)
 941         else:
 942             self.report_warning(msg)
 943
 944     def parse_outtmpl(self):
 945         outtmpl_dict = self.params.get('outtmpl', {})
 946         if not isinstance(outtmpl_dict, dict):
 947             outtmpl_dict = {'default': outtmpl_dict}
 948         # Remove spaces in the default template
 949         if self.params.get('restrictfilenames'):
 950             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 951         else:
 952             sanitize = lambda x: x
 953         outtmpl_dict.update({
 954             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 955             if outtmpl_dict.get(k) is None})
 956         for key, val in outtmpl_dict.items():
 957             if isinstance(val, bytes):
 958                 self.report_warning(
 959                     'Parameter outtmpl is bytes, but should be a unicode string. '
 960                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 961         return outtmpl_dict
 962
 963     def get_output_path(self, dir_type='', filename=None):
 964         paths = self.params.get('paths', {})
 965         assert isinstance(paths, dict)
 966         path = os.path.join(
 967             expand_path(paths.get('home', '').strip()),
 968             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 969             filename or '')
 970
 971         # Temporary fix for #4787
 972         # 'Treat' all problem characters by passing filename through preferredencoding
 973         # to workaround encoding issues with subprocess on python2 @ Windows
 974         if sys.version_info < (3, 0) and sys.platform == 'win32':
 975             path = encodeFilename(path, True).decode(preferredencoding())
 976         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 977
 978     @staticmethod
 979     def _outtmpl_expandpath(outtmpl):
 980         # expand_path translates '%%' into '%' and '$$' into '$'
 981         # correspondingly that is not what we want since we need to keep
 982         # '%%' intact for template dict substitution step. Working around
 983         # with boundary-alike separator hack.
 984         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 985         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 986
 987         # outtmpl should be expand_path'ed before template dict substitution
 988         # because meta fields may contain env variables we don't want to
 989         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 990         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 991         return expand_path(outtmpl).replace(sep, '')
 992
 993     @staticmethod
 994     def escape_outtmpl(outtmpl):
 995         ''' Escape any remaining strings like %s, %abc% etc. '''
 996         return re.sub(
 997             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 998             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 999             outtmpl)
1000
1001     @classmethod
1002     def validate_outtmpl(cls, outtmpl):
1003         ''' @return None or Exception object '''
1004         outtmpl = re.sub(
1005             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
1006             lambda mobj: f'{mobj.group(0)[:-1]}s',
1007             cls._outtmpl_expandpath(outtmpl))
1008         try:
1009             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1010             return None
1011         except ValueError as err:
1012             return err
1013
1014     @staticmethod
1015     def _copy_infodict(info_dict):
1016         info_dict = dict(info_dict)
1017         for key in ('__original_infodict', '__postprocessors'):
1018             info_dict.pop(key, None)
1019         return info_dict
1020
1021     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
1022         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1023         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1024
1025         info_dict = self._copy_infodict(info_dict)
1026         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1027             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1028             if info_dict.get('duration', None) is not None
1029             else None)
1030         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1031         if info_dict.get('resolution') is None:
1032             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1033
1034         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1035         # of %(field)s to %(field)0Nd for backward compatibility
1036         field_size_compat_map = {
1037             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1038             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1039             'autonumber': self.params.get('autonumber_size') or 5,
1040         }
1041
1042         TMPL_DICT = {}
1043         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1044         MATH_FUNCTIONS = {
1045             '+': float.__add__,
1046             '-': float.__sub__,
1047         }
1048         # Field is of the form key1.key2...
1049         # where keys (except first) can be string, int or slice
1050         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1051         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1052         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1053         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1054             (?P<negate>-)?
1055             (?P<fields>{field})
1056             (?P<maths>(?:{math_op}{math_field})*)
1057             (?:>(?P<strf_format>.+?))?
1058             (?P<alternate>(?<!\\),[^|)]+)?
1059             (?:\|(?P<default>.*?))?
1060             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1061
1062         def _traverse_infodict(k):
1063             k = k.split('.')
1064             if k[0] == '':
1065                 k.pop(0)
1066             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1067
1068         def get_value(mdict):
1069             # Object traversal
1070             value = _traverse_infodict(mdict['fields'])
1071             # Negative
1072             if mdict['negate']:
1073                 value = float_or_none(value)
1074                 if value is not None:
1075                     value *= -1
1076             # Do maths
1077             offset_key = mdict['maths']
1078             if offset_key:
1079                 value = float_or_none(value)
1080                 operator = None
1081                 while offset_key:
1082                     item = re.match(
1083                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1084                         offset_key).group(0)
1085                     offset_key = offset_key[len(item):]
1086                     if operator is None:
1087                         operator = MATH_FUNCTIONS[item]
1088                         continue
1089                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1090                     offset = float_or_none(item)
1091                     if offset is None:
1092                         offset = float_or_none(_traverse_infodict(item))
1093                     try:
1094                         value = operator(value, multiplier * offset)
1095                     except (TypeError, ZeroDivisionError):
1096                         return None
1097                     operator = None
1098             # Datetime formatting
1099             if mdict['strf_format']:
1100                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1101
1102             return value
1103
1104         na = self.params.get('outtmpl_na_placeholder', 'NA')
1105
1106         def _dumpjson_default(obj):
1107             if isinstance(obj, (set, LazyList)):
1108                 return list(obj)
1109             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1110
1111         def create_key(outer_mobj):
1112             if not outer_mobj.group('has_key'):
1113                 return outer_mobj.group(0)
1114             key = outer_mobj.group('key')
1115             mobj = re.match(INTERNAL_FORMAT_RE, key)
1116             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1117             value, default = None, na
1118             while mobj:
1119                 mobj = mobj.groupdict()
1120                 default = mobj['default'] if mobj['default'] is not None else default
1121                 value = get_value(mobj)
1122                 if value is None and mobj['alternate']:
1123                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1124                 else:
1125                     break
1126
1127             fmt = outer_mobj.group('format')
1128             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1129                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1130
1131             value = default if value is None else value
1132
1133             flags = outer_mobj.group('conversion') or ''
1134             str_fmt = f'{fmt[:-1]}s'
1135             if fmt[-1] == 'l':  # list
1136                 delim = '\n' if '#' in flags else ', '
1137                 value, fmt = delim.join(variadic(value)), str_fmt
1138             elif fmt[-1] == 'j':  # json
1139                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1140             elif fmt[-1] == 'q':  # quoted
1141                 value = map(str, variadic(value) if '#' in flags else [value])
1142                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1143             elif fmt[-1] == 'B':  # bytes
1144                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1145                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1146             elif fmt[-1] == 'U':  # unicode normalized
1147                 value, fmt = unicodedata.normalize(
1148                     # "+" = compatibility equivalence, "#" = NFD
1149                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1150                     value), str_fmt
1151             elif fmt[-1] == 'c':
1152                 if value:
1153                     value = str(value)[0]
1154                 else:
1155                     fmt = str_fmt
1156             elif fmt[-1] not in 'rs':  # numeric
1157                 value = float_or_none(value)
1158                 if value is None:
1159                     value, fmt = default, 's'
1160
1161             if sanitize:
1162                 if fmt[-1] == 'r':
1163                     # If value is an object, sanitize might convert it to a string
1164                     # So we convert it to repr first
1165                     value, fmt = repr(value), str_fmt
1166                 if fmt[-1] in 'csr':
1167                     value = sanitize(initial_field, value)
1168
1169             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1170             TMPL_DICT[key] = value
1171             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1172
1173         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1174
1175     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1176         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1177         return self.escape_outtmpl(outtmpl) % info_dict
1178
1179     def _prepare_filename(self, info_dict, tmpl_type='default'):
1180         try:
1181             sanitize = lambda k, v: sanitize_filename(
1182                 compat_str(v),
1183                 restricted=self.params.get('restrictfilenames'),
1184                 is_id=(k == 'id' or k.endswith('_id')))
1185             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1186             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1187
1188             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1189             if filename and force_ext is not None:
1190                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1191
1192             # https://github.com/blackjack4494/youtube-dlc/issues/85
1193             trim_file_name = self.params.get('trim_file_name', False)
1194             if trim_file_name:
1195                 no_ext, *ext = filename.rsplit('.', 2)
1196                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1197
1198             return filename
1199         except ValueError as err:
1200             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1201             return None
1202
1203     def prepare_filename(self, info_dict, dir_type='', warn=False):
1204         """Generate the output filename."""
1205
1206         filename = self._prepare_filename(info_dict, dir_type or 'default')
1207         if not filename and dir_type not in ('', 'temp'):
1208             return ''
1209
1210         if warn:
1211             if not self.params.get('paths'):
1212                 pass
1213             elif filename == '-':
1214                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1215             elif os.path.isabs(filename):
1216                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1217         if filename == '-' or not filename:
1218             return filename
1219
1220         return self.get_output_path(dir_type, filename)
1221
1222     def _match_entry(self, info_dict, incomplete=False, silent=False):
1223         """ Returns None if the file should be downloaded """
1224
1225         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1226
1227         def check_filter():
1228             if 'title' in info_dict:
1229                 # This can happen when we're just evaluating the playlist
1230                 title = info_dict['title']
1231                 matchtitle = self.params.get('matchtitle', False)
1232                 if matchtitle:
1233                     if not re.search(matchtitle, title, re.IGNORECASE):
1234                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1235                 rejecttitle = self.params.get('rejecttitle', False)
1236                 if rejecttitle:
1237                     if re.search(rejecttitle, title, re.IGNORECASE):
1238                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1239             date = info_dict.get('upload_date')
1240             if date is not None:
1241                 dateRange = self.params.get('daterange', DateRange())
1242                 if date not in dateRange:
1243                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1244             view_count = info_dict.get('view_count')
1245             if view_count is not None:
1246                 min_views = self.params.get('min_views')
1247                 if min_views is not None and view_count < min_views:
1248                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1249                 max_views = self.params.get('max_views')
1250                 if max_views is not None and view_count > max_views:
1251                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1252             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1253                 return 'Skipping "%s" because it is age restricted' % video_title
1254
1255             match_filter = self.params.get('match_filter')
1256             if match_filter is not None:
1257                 try:
1258                     ret = match_filter(info_dict, incomplete=incomplete)
1259                 except TypeError:
1260                     # For backward compatibility
1261                     ret = None if incomplete else match_filter(info_dict)
1262                 if ret is not None:
1263                     return ret
1264             return None
1265
1266         if self.in_download_archive(info_dict):
1267             reason = '%s has already been recorded in the archive' % video_title
1268             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1269         else:
1270             reason = check_filter()
1271             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1272         if reason is not None:
1273             if not silent:
1274                 self.to_screen('[download] ' + reason)
1275             if self.params.get(break_opt, False):
1276                 raise break_err()
1277         return reason
1278
1279     @staticmethod
1280     def add_extra_info(info_dict, extra_info):
1281         '''Set the keys from extra_info in info dict if they are missing'''
1282         for key, value in extra_info.items():
1283             info_dict.setdefault(key, value)
1284
1285     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1286                      process=True, force_generic_extractor=False):
1287         """
1288         Return a list with a dictionary for each video extracted.
1289
1290         Arguments:
1291         url -- URL to extract
1292
1293         Keyword arguments:
1294         download -- whether to download videos during extraction
1295         ie_key -- extractor key hint
1296         extra_info -- dictionary containing the extra values to add to each result
1297         process -- whether to resolve all unresolved references (URLs, playlist items),
1298             must be True for download to work.
1299         force_generic_extractor -- force using the generic extractor
1300         """
1301
1302         if extra_info is None:
1303             extra_info = {}
1304
1305         if not ie_key and force_generic_extractor:
1306             ie_key = 'Generic'
1307
1308         if ie_key:
1309             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1310         else:
1311             ies = self._ies
1312
1313         for ie_key, ie in ies.items():
1314             if not ie.suitable(url):
1315                 continue
1316
1317             if not ie.working():
1318                 self.report_warning('The program functionality for this site has been marked as broken, '
1319                                     'and will probably not work.')
1320
1321             temp_id = ie.get_temp_id(url)
1322             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1323                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1324                 if self.params.get('break_on_existing', False):
1325                     raise ExistingVideoReached()
1326                 break
1327             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1328         else:
1329             self.report_error('no suitable InfoExtractor for URL %s' % url)
1330
1331     def __handle_extraction_exceptions(func):
1332         @functools.wraps(func)
1333         def wrapper(self, *args, **kwargs):
1334             try:
1335                 return func(self, *args, **kwargs)
1336             except GeoRestrictedError as e:
1337                 msg = e.msg
1338                 if e.countries:
1339                     msg += '\nThis video is available in %s.' % ', '.join(
1340                         map(ISO3166Utils.short2full, e.countries))
1341                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1342                 self.report_error(msg)
1343             except ExtractorError as e:  # An error we somewhat expected
1344                 self.report_error(compat_str(e), e.format_traceback())
1345             except ReExtractInfo as e:
1346                 if e.expected:
1347                     self.to_screen(f'{e}; Re-extracting data')
1348                 else:
1349                     self.to_stderr('\r')
1350                     self.report_warning(f'{e}; Re-extracting data')
1351                 return wrapper(self, *args, **kwargs)
1352             except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1353                 raise
1354             except Exception as e:
1355                 if self.params.get('ignoreerrors'):
1356                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1357                 else:
1358                     raise
1359         return wrapper
1360
1361     def _wait_for_video(self, ie_result):
1362         if (not self.params.get('wait_for_video')
1363                 or ie_result.get('_type', 'video') != 'video'
1364                 or ie_result.get('formats') or ie_result.get('url')):
1365             return
1366
1367         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1368         last_msg = ''
1369
1370         def progress(msg):
1371             nonlocal last_msg
1372             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1373             last_msg = msg
1374
1375         min_wait, max_wait = self.params.get('wait_for_video')
1376         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1377         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1378             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1379             self.report_warning('Release time of video is not known')
1380         elif (diff or 0) <= 0:
1381             self.report_warning('Video should already be available according to extracted info')
1382         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1383         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1384
1385         wait_till = time.time() + diff
1386         try:
1387             while True:
1388                 diff = wait_till - time.time()
1389                 if diff <= 0:
1390                     progress('')
1391                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1392                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1393                 time.sleep(1)
1394         except KeyboardInterrupt:
1395             progress('')
1396             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1397         except BaseException as e:
1398             if not isinstance(e, ReExtractInfo):
1399                 self.to_screen('')
1400             raise
1401
1402     @__handle_extraction_exceptions
1403     def __extract_info(self, url, ie, download, extra_info, process):
1404         ie_result = ie.extract(url)
1405         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1406             return
1407         if isinstance(ie_result, list):
1408             # Backwards compatibility: old IE result format
1409             ie_result = {
1410                 '_type': 'compat_list',
1411                 'entries': ie_result,
1412             }
1413         if extra_info.get('original_url'):
1414             ie_result.setdefault('original_url', extra_info['original_url'])
1415         self.add_default_extra_info(ie_result, ie, url)
1416         if process:
1417             self._wait_for_video(ie_result)
1418             return self.process_ie_result(ie_result, download, extra_info)
1419         else:
1420             return ie_result
1421
1422     def add_default_extra_info(self, ie_result, ie, url):
1423         if url is not None:
1424             self.add_extra_info(ie_result, {
1425                 'webpage_url': url,
1426                 'original_url': url,
1427                 'webpage_url_basename': url_basename(url),
1428                 'webpage_url_domain': get_domain(url),
1429             })
1430         if ie is not None:
1431             self.add_extra_info(ie_result, {
1432                 'extractor': ie.IE_NAME,
1433                 'extractor_key': ie.ie_key(),
1434             })
1435
1436     def process_ie_result(self, ie_result, download=True, extra_info=None):
1437         """
1438         Take the result of the ie(may be modified) and resolve all unresolved
1439         references (URLs, playlist items).
1440
1441         It will also download the videos if 'download'.
1442         Returns the resolved ie_result.
1443         """
1444         if extra_info is None:
1445             extra_info = {}
1446         result_type = ie_result.get('_type', 'video')
1447
1448         if result_type in ('url', 'url_transparent'):
1449             ie_result['url'] = sanitize_url(ie_result['url'])
1450             if ie_result.get('original_url'):
1451                 extra_info.setdefault('original_url', ie_result['original_url'])
1452
1453             extract_flat = self.params.get('extract_flat', False)
1454             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1455                     or extract_flat is True):
1456                 info_copy = ie_result.copy()
1457                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1458                 if ie and not ie_result.get('id'):
1459                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1460                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1461                 self.add_extra_info(info_copy, extra_info)
1462                 info_copy, _ = self.pre_process(info_copy)
1463                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1464                 if self.params.get('force_write_download_archive', False):
1465                     self.record_download_archive(info_copy)
1466                 return ie_result
1467
1468         if result_type == 'video':
1469             self.add_extra_info(ie_result, extra_info)
1470             ie_result = self.process_video_result(ie_result, download=download)
1471             additional_urls = (ie_result or {}).get('additional_urls')
1472             if additional_urls:
1473                 # TODO: Improve MetadataParserPP to allow setting a list
1474                 if isinstance(additional_urls, compat_str):
1475                     additional_urls = [additional_urls]
1476                 self.to_screen(
1477                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1478                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1479                 ie_result['additional_entries'] = [
1480                     self.extract_info(
1481                         url, download, extra_info,
1482                         force_generic_extractor=self.params.get('force_generic_extractor'))
1483                     for url in additional_urls
1484                 ]
1485             return ie_result
1486         elif result_type == 'url':
1487             # We have to add extra_info to the results because it may be
1488             # contained in a playlist
1489             return self.extract_info(
1490                 ie_result['url'], download,
1491                 ie_key=ie_result.get('ie_key'),
1492                 extra_info=extra_info)
1493         elif result_type == 'url_transparent':
1494             # Use the information from the embedding page
1495             info = self.extract_info(
1496                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1497                 extra_info=extra_info, download=False, process=False)
1498
1499             # extract_info may return None when ignoreerrors is enabled and
1500             # extraction failed with an error, don't crash and return early
1501             # in this case
1502             if not info:
1503                 return info
1504
1505             force_properties = dict(
1506                 (k, v) for k, v in ie_result.items() if v is not None)
1507             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1508                 if f in force_properties:
1509                     del force_properties[f]
1510             new_result = info.copy()
1511             new_result.update(force_properties)
1512
1513             # Extracted info may not be a video result (i.e.
1514             # info.get('_type', 'video') != video) but rather an url or
1515             # url_transparent. In such cases outer metadata (from ie_result)
1516             # should be propagated to inner one (info). For this to happen
1517             # _type of info should be overridden with url_transparent. This
1518             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1519             if new_result.get('_type') == 'url':
1520                 new_result['_type'] = 'url_transparent'
1521
1522             return self.process_ie_result(
1523                 new_result, download=download, extra_info=extra_info)
1524         elif result_type in ('playlist', 'multi_video'):
1525             # Protect from infinite recursion due to recursively nested playlists
1526             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1527             webpage_url = ie_result['webpage_url']
1528             if webpage_url in self._playlist_urls:
1529                 self.to_screen(
1530                     '[download] Skipping already downloaded playlist: %s'
1531                     % ie_result.get('title') or ie_result.get('id'))
1532                 return
1533
1534             self._playlist_level += 1
1535             self._playlist_urls.add(webpage_url)
1536             self._sanitize_thumbnails(ie_result)
1537             try:
1538                 return self.__process_playlist(ie_result, download)
1539             finally:
1540                 self._playlist_level -= 1
1541                 if not self._playlist_level:
1542                     self._playlist_urls.clear()
1543         elif result_type == 'compat_list':
1544             self.report_warning(
1545                 'Extractor %s returned a compat_list result. '
1546                 'It needs to be updated.' % ie_result.get('extractor'))
1547
1548             def _fixup(r):
1549                 self.add_extra_info(r, {
1550                     'extractor': ie_result['extractor'],
1551                     'webpage_url': ie_result['webpage_url'],
1552                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1553                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1554                     'extractor_key': ie_result['extractor_key'],
1555                 })
1556                 return r
1557             ie_result['entries'] = [
1558                 self.process_ie_result(_fixup(r), download, extra_info)
1559                 for r in ie_result['entries']
1560             ]
1561             return ie_result
1562         else:
1563             raise Exception('Invalid result type: %s' % result_type)
1564
1565     def _ensure_dir_exists(self, path):
1566         return make_dir(path, self.report_error)
1567
1568     def __process_playlist(self, ie_result, download):
1569         # We process each entry in the playlist
1570         playlist = ie_result.get('title') or ie_result.get('id')
1571         self.to_screen('[download] Downloading playlist: %s' % playlist)
1572
1573         if 'entries' not in ie_result:
1574             raise EntryNotInPlaylist('There are no entries')
1575
1576         MissingEntry = object()
1577         incomplete_entries = bool(ie_result.get('requested_entries'))
1578         if incomplete_entries:
1579             def fill_missing_entries(entries, indices):
1580                 ret = [MissingEntry] * max(indices)
1581                 for i, entry in zip(indices, entries):
1582                     ret[i - 1] = entry
1583                 return ret
1584             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1585
1586         playlist_results = []
1587
1588         playliststart = self.params.get('playliststart', 1)
1589         playlistend = self.params.get('playlistend')
1590         # For backwards compatibility, interpret -1 as whole list
1591         if playlistend == -1:
1592             playlistend = None
1593
1594         playlistitems_str = self.params.get('playlist_items')
1595         playlistitems = None
1596         if playlistitems_str is not None:
1597             def iter_playlistitems(format):
1598                 for string_segment in format.split(','):
1599                     if '-' in string_segment:
1600                         start, end = string_segment.split('-')
1601                         for item in range(int(start), int(end) + 1):
1602                             yield int(item)
1603                     else:
1604                         yield int(string_segment)
1605             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1606
1607         ie_entries = ie_result['entries']
1608         msg = (
1609             'Downloading %d videos' if not isinstance(ie_entries, list)
1610             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1611
1612         if isinstance(ie_entries, list):
1613             def get_entry(i):
1614                 return ie_entries[i - 1]
1615         else:
1616             if not isinstance(ie_entries, (PagedList, LazyList)):
1617                 ie_entries = LazyList(ie_entries)
1618
1619             def get_entry(i):
1620                 return YoutubeDL.__handle_extraction_exceptions(
1621                     lambda self, i: ie_entries[i - 1]
1622                 )(self, i)
1623
1624         entries = []
1625         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1626         for i in items:
1627             if i == 0:
1628                 continue
1629             if playlistitems is None and playlistend is not None and playlistend < i:
1630                 break
1631             entry = None
1632             try:
1633                 entry = get_entry(i)
1634                 if entry is MissingEntry:
1635                     raise EntryNotInPlaylist()
1636             except (IndexError, EntryNotInPlaylist):
1637                 if incomplete_entries:
1638                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1639                 elif not playlistitems:
1640                     break
1641             entries.append(entry)
1642             try:
1643                 if entry is not None:
1644                     self._match_entry(entry, incomplete=True, silent=True)
1645             except (ExistingVideoReached, RejectedVideoReached):
1646                 break
1647         ie_result['entries'] = entries
1648
1649         # Save playlist_index before re-ordering
1650         entries = [
1651             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1652             for i, entry in enumerate(entries, 1)
1653             if entry is not None]
1654         n_entries = len(entries)
1655
1656         if not playlistitems and (playliststart != 1 or playlistend):
1657             playlistitems = list(range(playliststart, playliststart + n_entries))
1658         ie_result['requested_entries'] = playlistitems
1659
1660         _infojson_written = False
1661         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1662             ie_copy = {
1663                 'playlist': playlist,
1664                 'playlist_id': ie_result.get('id'),
1665                 'playlist_title': ie_result.get('title'),
1666                 'playlist_uploader': ie_result.get('uploader'),
1667                 'playlist_uploader_id': ie_result.get('uploader_id'),
1668                 'playlist_index': 0,
1669                 'n_entries': n_entries,
1670             }
1671             ie_copy.update(dict(ie_result))
1672
1673             _infojson_written = self._write_info_json(
1674                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1675             if _infojson_written is None:
1676                 return
1677             if self._write_description('playlist', ie_result,
1678                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1679                 return
1680             # TODO: This should be passed to ThumbnailsConvertor if necessary
1681             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1682
1683         if self.params.get('playlistreverse', False):
1684             entries = entries[::-1]
1685         if self.params.get('playlistrandom', False):
1686             random.shuffle(entries)
1687
1688         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1689
1690         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1691         failures = 0
1692         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1693         for i, entry_tuple in enumerate(entries, 1):
1694             playlist_index, entry = entry_tuple
1695             if 'playlist-index' in self.params.get('compat_opts', []):
1696                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1697             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1698             # This __x_forwarded_for_ip thing is a bit ugly but requires
1699             # minimal changes
1700             if x_forwarded_for:
1701                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1702             extra = {
1703                 'n_entries': n_entries,
1704                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1705                 'playlist_index': playlist_index,
1706                 'playlist_autonumber': i,
1707                 'playlist': playlist,
1708                 'playlist_id': ie_result.get('id'),
1709                 'playlist_title': ie_result.get('title'),
1710                 'playlist_uploader': ie_result.get('uploader'),
1711                 'playlist_uploader_id': ie_result.get('uploader_id'),
1712                 'extractor': ie_result['extractor'],
1713                 'webpage_url': ie_result['webpage_url'],
1714                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1715                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1716                 'extractor_key': ie_result['extractor_key'],
1717             }
1718
1719             if self._match_entry(entry, incomplete=True) is not None:
1720                 continue
1721
1722             entry_result = self.__process_iterable_entry(entry, download, extra)
1723             if not entry_result:
1724                 failures += 1
1725             if failures >= max_failures:
1726                 self.report_error(
1727                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1728                 break
1729             playlist_results.append(entry_result)
1730         ie_result['entries'] = playlist_results
1731
1732         # Write the updated info to json
1733         if _infojson_written and self._write_info_json(
1734                 'updated playlist', ie_result,
1735                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1736             return
1737         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1738         return ie_result
1739
1740     @__handle_extraction_exceptions
1741     def __process_iterable_entry(self, entry, download, extra_info):
1742         return self.process_ie_result(
1743             entry, download=download, extra_info=extra_info)
1744
1745     def _build_format_filter(self, filter_spec):
1746         " Returns a function to filter the formats according to the filter_spec "
1747
1748         OPERATORS = {
1749             '<': operator.lt,
1750             '<=': operator.le,
1751             '>': operator.gt,
1752             '>=': operator.ge,
1753             '=': operator.eq,
1754             '!=': operator.ne,
1755         }
1756         operator_rex = re.compile(r'''(?x)\s*
1757             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1758             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1759             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1760             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1761         m = operator_rex.fullmatch(filter_spec)
1762         if m:
1763             try:
1764                 comparison_value = int(m.group('value'))
1765             except ValueError:
1766                 comparison_value = parse_filesize(m.group('value'))
1767                 if comparison_value is None:
1768                     comparison_value = parse_filesize(m.group('value') + 'B')
1769                 if comparison_value is None:
1770                     raise ValueError(
1771                         'Invalid value %r in format specification %r' % (
1772                             m.group('value'), filter_spec))
1773             op = OPERATORS[m.group('op')]
1774
1775         if not m:
1776             STR_OPERATORS = {
1777                 '=': operator.eq,
1778                 '^=': lambda attr, value: attr.startswith(value),
1779                 '$=': lambda attr, value: attr.endswith(value),
1780                 '*=': lambda attr, value: value in attr,
1781             }
1782             str_operator_rex = re.compile(r'''(?x)\s*
1783                 (?P<key>[a-zA-Z0-9._-]+)\s*
1784                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1785                 (?P<value>[a-zA-Z0-9._-]+)\s*
1786                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1787             m = str_operator_rex.fullmatch(filter_spec)
1788             if m:
1789                 comparison_value = m.group('value')
1790                 str_op = STR_OPERATORS[m.group('op')]
1791                 if m.group('negation'):
1792                     op = lambda attr, value: not str_op(attr, value)
1793                 else:
1794                     op = str_op
1795
1796         if not m:
1797             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1798
1799         def _filter(f):
1800             actual_value = f.get(m.group('key'))
1801             if actual_value is None:
1802                 return m.group('none_inclusive')
1803             return op(actual_value, comparison_value)
1804         return _filter
1805
1806     def _check_formats(self, formats):
1807         for f in formats:
1808             self.to_screen('[info] Testing format %s' % f['format_id'])
1809             path = self.get_output_path('temp')
1810             if not self._ensure_dir_exists(f'{path}/'):
1811                 continue
1812             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1813             temp_file.close()
1814             try:
1815                 success, _ = self.dl(temp_file.name, f, test=True)
1816             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1817                 success = False
1818             finally:
1819                 if os.path.exists(temp_file.name):
1820                     try:
1821                         os.remove(temp_file.name)
1822                     except OSError:
1823                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1824             if success:
1825                 yield f
1826             else:
1827                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1828
1829     def _default_format_spec(self, info_dict, download=True):
1830
1831         def can_merge():
1832             merger = FFmpegMergerPP(self)
1833             return merger.available and merger.can_merge()
1834
1835         prefer_best = (
1836             not self.params.get('simulate')
1837             and download
1838             and (
1839                 not can_merge()
1840                 or info_dict.get('is_live', False)
1841                 or self.outtmpl_dict['default'] == '-'))
1842         compat = (
1843             prefer_best
1844             or self.params.get('allow_multiple_audio_streams', False)
1845             or 'format-spec' in self.params.get('compat_opts', []))
1846
1847         return (
1848             'best/bestvideo+bestaudio' if prefer_best
1849             else 'bestvideo*+bestaudio/best' if not compat
1850             else 'bestvideo+bestaudio/best')
1851
1852     def build_format_selector(self, format_spec):
1853         def syntax_error(note, start):
1854             message = (
1855                 'Invalid format specification: '
1856                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1857             return SyntaxError(message)
1858
1859         PICKFIRST = 'PICKFIRST'
1860         MERGE = 'MERGE'
1861         SINGLE = 'SINGLE'
1862         GROUP = 'GROUP'
1863         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1864
1865         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1866                                   'video': self.params.get('allow_multiple_video_streams', False)}
1867
1868         check_formats = self.params.get('check_formats') == 'selected'
1869
1870         def _parse_filter(tokens):
1871             filter_parts = []
1872             for type, string, start, _, _ in tokens:
1873                 if type == tokenize.OP and string == ']':
1874                     return ''.join(filter_parts)
1875                 else:
1876                     filter_parts.append(string)
1877
1878         def _remove_unused_ops(tokens):
1879             # Remove operators that we don't use and join them with the surrounding strings
1880             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1881             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1882             last_string, last_start, last_end, last_line = None, None, None, None
1883             for type, string, start, end, line in tokens:
1884                 if type == tokenize.OP and string == '[':
1885                     if last_string:
1886                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1887                         last_string = None
1888                     yield type, string, start, end, line
1889                     # everything inside brackets will be handled by _parse_filter
1890                     for type, string, start, end, line in tokens:
1891                         yield type, string, start, end, line
1892                         if type == tokenize.OP and string == ']':
1893                             break
1894                 elif type == tokenize.OP and string in ALLOWED_OPS:
1895                     if last_string:
1896                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1897                         last_string = None
1898                     yield type, string, start, end, line
1899                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1900                     if not last_string:
1901                         last_string = string
1902                         last_start = start
1903                         last_end = end
1904                     else:
1905                         last_string += string
1906             if last_string:
1907                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1908
1909         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1910             selectors = []
1911             current_selector = None
1912             for type, string, start, _, _ in tokens:
1913                 # ENCODING is only defined in python 3.x
1914                 if type == getattr(tokenize, 'ENCODING', None):
1915                     continue
1916                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1917                     current_selector = FormatSelector(SINGLE, string, [])
1918                 elif type == tokenize.OP:
1919                     if string == ')':
1920                         if not inside_group:
1921                             # ')' will be handled by the parentheses group
1922                             tokens.restore_last_token()
1923                         break
1924                     elif inside_merge and string in ['/', ',']:
1925                         tokens.restore_last_token()
1926                         break
1927                     elif inside_choice and string == ',':
1928                         tokens.restore_last_token()
1929                         break
1930                     elif string == ',':
1931                         if not current_selector:
1932                             raise syntax_error('"," must follow a format selector', start)
1933                         selectors.append(current_selector)
1934                         current_selector = None
1935                     elif string == '/':
1936                         if not current_selector:
1937                             raise syntax_error('"/" must follow a format selector', start)
1938                         first_choice = current_selector
1939                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1940                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1941                     elif string == '[':
1942                         if not current_selector:
1943                             current_selector = FormatSelector(SINGLE, 'best', [])
1944                         format_filter = _parse_filter(tokens)
1945                         current_selector.filters.append(format_filter)
1946                     elif string == '(':
1947                         if current_selector:
1948                             raise syntax_error('Unexpected "("', start)
1949                         group = _parse_format_selection(tokens, inside_group=True)
1950                         current_selector = FormatSelector(GROUP, group, [])
1951                     elif string == '+':
1952                         if not current_selector:
1953                             raise syntax_error('Unexpected "+"', start)
1954                         selector_1 = current_selector
1955                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1956                         if not selector_2:
1957                             raise syntax_error('Expected a selector', start)
1958                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1959                     else:
1960                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1961                 elif type == tokenize.ENDMARKER:
1962                     break
1963             if current_selector:
1964                 selectors.append(current_selector)
1965             return selectors
1966
1967         def _merge(formats_pair):
1968             format_1, format_2 = formats_pair
1969
1970             formats_info = []
1971             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1972             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1973
1974             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1975                 get_no_more = {'video': False, 'audio': False}
1976                 for (i, fmt_info) in enumerate(formats_info):
1977                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1978                         formats_info.pop(i)
1979                         continue
1980                     for aud_vid in ['audio', 'video']:
1981                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1982                             if get_no_more[aud_vid]:
1983                                 formats_info.pop(i)
1984                                 break
1985                             get_no_more[aud_vid] = True
1986
1987             if len(formats_info) == 1:
1988                 return formats_info[0]
1989
1990             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1991             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1992
1993             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1994             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1995
1996             output_ext = self.params.get('merge_output_format')
1997             if not output_ext:
1998                 if the_only_video:
1999                     output_ext = the_only_video['ext']
2000                 elif the_only_audio and not video_fmts:
2001                     output_ext = the_only_audio['ext']
2002                 else:
2003                     output_ext = 'mkv'
2004
2005             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2006
2007             new_dict = {
2008                 'requested_formats': formats_info,
2009                 'format': '+'.join(filtered('format')),
2010                 'format_id': '+'.join(filtered('format_id')),
2011                 'ext': output_ext,
2012                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2013                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2014                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2015                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2016                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2017             }
2018
2019             if the_only_video:
2020                 new_dict.update({
2021                     'width': the_only_video.get('width'),
2022                     'height': the_only_video.get('height'),
2023                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2024                     'fps': the_only_video.get('fps'),
2025                     'dynamic_range': the_only_video.get('dynamic_range'),
2026                     'vcodec': the_only_video.get('vcodec'),
2027                     'vbr': the_only_video.get('vbr'),
2028                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2029                 })
2030
2031             if the_only_audio:
2032                 new_dict.update({
2033                     'acodec': the_only_audio.get('acodec'),
2034                     'abr': the_only_audio.get('abr'),
2035                     'asr': the_only_audio.get('asr'),
2036                 })
2037
2038             return new_dict
2039
2040         def _check_formats(formats):
2041             if not check_formats:
2042                 yield from formats
2043                 return
2044             yield from self._check_formats(formats)
2045
2046         def _build_selector_function(selector):
2047             if isinstance(selector, list):  # ,
2048                 fs = [_build_selector_function(s) for s in selector]
2049
2050                 def selector_function(ctx):
2051                     for f in fs:
2052                         yield from f(ctx)
2053                 return selector_function
2054
2055             elif selector.type == GROUP:  # ()
2056                 selector_function = _build_selector_function(selector.selector)
2057
2058             elif selector.type == PICKFIRST:  # /
2059                 fs = [_build_selector_function(s) for s in selector.selector]
2060
2061                 def selector_function(ctx):
2062                     for f in fs:
2063                         picked_formats = list(f(ctx))
2064                         if picked_formats:
2065                             return picked_formats
2066                     return []
2067
2068             elif selector.type == MERGE:  # +
2069                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2070
2071                 def selector_function(ctx):
2072                     for pair in itertools.product(
2073                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
2074                         yield _merge(pair)
2075
2076             elif selector.type == SINGLE:  # atom
2077                 format_spec = selector.selector or 'best'
2078
2079                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2080                 if format_spec == 'all':
2081                     def selector_function(ctx):
2082                         yield from _check_formats(ctx['formats'][::-1])
2083                 elif format_spec == 'mergeall':
2084                     def selector_function(ctx):
2085                         formats = list(_check_formats(ctx['formats']))
2086                         if not formats:
2087                             return
2088                         merged_format = formats[-1]
2089                         for f in formats[-2::-1]:
2090                             merged_format = _merge((merged_format, f))
2091                         yield merged_format
2092
2093                 else:
2094                     format_fallback, format_reverse, format_idx = False, True, 1
2095                     mobj = re.match(
2096                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2097                         format_spec)
2098                     if mobj is not None:
2099                         format_idx = int_or_none(mobj.group('n'), default=1)
2100                         format_reverse = mobj.group('bw')[0] == 'b'
2101                         format_type = (mobj.group('type') or [None])[0]
2102                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2103                         format_modified = mobj.group('mod') is not None
2104
2105                         format_fallback = not format_type and not format_modified  # for b, w
2106                         _filter_f = (
2107                             (lambda f: f.get('%scodec' % format_type) != 'none')
2108                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2109                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2110                             if format_type  # bv, ba, wv, wa
2111                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2112                             if not format_modified  # b, w
2113                             else lambda f: True)  # b*, w*
2114                         filter_f = lambda f: _filter_f(f) and (
2115                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2116                     else:
2117                         if format_spec in self._format_selection_exts['audio']:
2118                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2119                         elif format_spec in self._format_selection_exts['video']:
2120                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2121                         elif format_spec in self._format_selection_exts['storyboards']:
2122                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2123                         else:
2124                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2125
2126                     def selector_function(ctx):
2127                         formats = list(ctx['formats'])
2128                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2129                         if format_fallback and ctx['incomplete_formats'] and not matches:
2130                             # for extractors with incomplete formats (audio only (soundcloud)
2131                             # or video only (imgur)) best/worst will fallback to
2132                             # best/worst {video,audio}-only format
2133                             matches = formats
2134                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2135                         try:
2136                             yield matches[format_idx - 1]
2137                         except IndexError:
2138                             return
2139
2140             filters = [self._build_format_filter(f) for f in selector.filters]
2141
2142             def final_selector(ctx):
2143                 ctx_copy = copy.deepcopy(ctx)
2144                 for _filter in filters:
2145                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2146                 return selector_function(ctx_copy)
2147             return final_selector
2148
2149         stream = io.BytesIO(format_spec.encode('utf-8'))
2150         try:
2151             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2152         except tokenize.TokenError:
2153             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2154
2155         class TokenIterator(object):
2156             def __init__(self, tokens):
2157                 self.tokens = tokens
2158                 self.counter = 0
2159
2160             def __iter__(self):
2161                 return self
2162
2163             def __next__(self):
2164                 if self.counter >= len(self.tokens):
2165                     raise StopIteration()
2166                 value = self.tokens[self.counter]
2167                 self.counter += 1
2168                 return value
2169
2170             next = __next__
2171
2172             def restore_last_token(self):
2173                 self.counter -= 1
2174
2175         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2176         return _build_selector_function(parsed_selector)
2177
2178     def _calc_headers(self, info_dict):
2179         res = std_headers.copy()
2180
2181         add_headers = info_dict.get('http_headers')
2182         if add_headers:
2183             res.update(add_headers)
2184
2185         cookies = self._calc_cookies(info_dict)
2186         if cookies:
2187             res['Cookie'] = cookies
2188
2189         if 'X-Forwarded-For' not in res:
2190             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2191             if x_forwarded_for_ip:
2192                 res['X-Forwarded-For'] = x_forwarded_for_ip
2193
2194         return res
2195
2196     def _calc_cookies(self, info_dict):
2197         pr = sanitized_Request(info_dict['url'])
2198         self.cookiejar.add_cookie_header(pr)
2199         return pr.get_header('Cookie')
2200
2201     def _sort_thumbnails(self, thumbnails):
2202         thumbnails.sort(key=lambda t: (
2203             t.get('preference') if t.get('preference') is not None else -1,
2204             t.get('width') if t.get('width') is not None else -1,
2205             t.get('height') if t.get('height') is not None else -1,
2206             t.get('id') if t.get('id') is not None else '',
2207             t.get('url')))
2208
2209     def _sanitize_thumbnails(self, info_dict):
2210         thumbnails = info_dict.get('thumbnails')
2211         if thumbnails is None:
2212             thumbnail = info_dict.get('thumbnail')
2213             if thumbnail:
2214                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2215         if not thumbnails:
2216             return
2217
2218         def check_thumbnails(thumbnails):
2219             for t in thumbnails:
2220                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2221                 try:
2222                     self.urlopen(HEADRequest(t['url']))
2223                 except network_exceptions as err:
2224                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2225                     continue
2226                 yield t
2227
2228         self._sort_thumbnails(thumbnails)
2229         for i, t in enumerate(thumbnails):
2230             if t.get('id') is None:
2231                 t['id'] = '%d' % i
2232             if t.get('width') and t.get('height'):
2233                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2234             t['url'] = sanitize_url(t['url'])
2235
2236         if self.params.get('check_formats') is True:
2237             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2238         else:
2239             info_dict['thumbnails'] = thumbnails
2240
2241     def process_video_result(self, info_dict, download=True):
2242         assert info_dict.get('_type', 'video') == 'video'
2243
2244         if 'id' not in info_dict:
2245             raise ExtractorError('Missing "id" field in extractor result')
2246         if 'title' not in info_dict:
2247             raise ExtractorError('Missing "title" field in extractor result',
2248                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2249
2250         def report_force_conversion(field, field_not, conversion):
2251             self.report_warning(
2252                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2253                 % (field, field_not, conversion))
2254
2255         def sanitize_string_field(info, string_field):
2256             field = info.get(string_field)
2257             if field is None or isinstance(field, compat_str):
2258                 return
2259             report_force_conversion(string_field, 'a string', 'string')
2260             info[string_field] = compat_str(field)
2261
2262         def sanitize_numeric_fields(info):
2263             for numeric_field in self._NUMERIC_FIELDS:
2264                 field = info.get(numeric_field)
2265                 if field is None or isinstance(field, compat_numeric_types):
2266                     continue
2267                 report_force_conversion(numeric_field, 'numeric', 'int')
2268                 info[numeric_field] = int_or_none(field)
2269
2270         sanitize_string_field(info_dict, 'id')
2271         sanitize_numeric_fields(info_dict)
2272
2273         if 'playlist' not in info_dict:
2274             # It isn't part of a playlist
2275             info_dict['playlist'] = None
2276             info_dict['playlist_index'] = None
2277
2278         self._sanitize_thumbnails(info_dict)
2279
2280         thumbnail = info_dict.get('thumbnail')
2281         thumbnails = info_dict.get('thumbnails')
2282         if thumbnail:
2283             info_dict['thumbnail'] = sanitize_url(thumbnail)
2284         elif thumbnails:
2285             info_dict['thumbnail'] = thumbnails[-1]['url']
2286
2287         if info_dict.get('display_id') is None and 'id' in info_dict:
2288             info_dict['display_id'] = info_dict['id']
2289
2290         if info_dict.get('duration') is not None:
2291             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2292
2293         for ts_key, date_key in (
2294                 ('timestamp', 'upload_date'),
2295                 ('release_timestamp', 'release_date'),
2296         ):
2297             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2298                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2299                 # see http://bugs.python.org/issue1646728)
2300                 try:
2301                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2302                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2303                 except (ValueError, OverflowError, OSError):
2304                     pass
2305
2306         live_keys = ('is_live', 'was_live')
2307         live_status = info_dict.get('live_status')
2308         if live_status is None:
2309             for key in live_keys:
2310                 if info_dict.get(key) is False:
2311                     continue
2312                 if info_dict.get(key):
2313                     live_status = key
2314                 break
2315             if all(info_dict.get(key) is False for key in live_keys):
2316                 live_status = 'not_live'
2317         if live_status:
2318             info_dict['live_status'] = live_status
2319             for key in live_keys:
2320                 if info_dict.get(key) is None:
2321                     info_dict[key] = (live_status == key)
2322
2323         # Auto generate title fields corresponding to the *_number fields when missing
2324         # in order to always have clean titles. This is very common for TV series.
2325         for field in ('chapter', 'season', 'episode'):
2326             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2327                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2328
2329         for cc_kind in ('subtitles', 'automatic_captions'):
2330             cc = info_dict.get(cc_kind)
2331             if cc:
2332                 for _, subtitle in cc.items():
2333                     for subtitle_format in subtitle:
2334                         if subtitle_format.get('url'):
2335                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2336                         if subtitle_format.get('ext') is None:
2337                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2338
2339         automatic_captions = info_dict.get('automatic_captions')
2340         subtitles = info_dict.get('subtitles')
2341
2342         info_dict['requested_subtitles'] = self.process_subtitles(
2343             info_dict['id'], subtitles, automatic_captions)
2344
2345         if info_dict.get('formats') is None:
2346             # There's only one format available
2347             formats = [info_dict]
2348         else:
2349             formats = info_dict['formats']
2350
2351         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2352         if not self.params.get('allow_unplayable_formats'):
2353             formats = [f for f in formats if not f.get('has_drm')]
2354
2355         if not formats:
2356             self.raise_no_formats(info_dict)
2357
2358         def is_wellformed(f):
2359             url = f.get('url')
2360             if not url:
2361                 self.report_warning(
2362                     '"url" field is missing or empty - skipping format, '
2363                     'there is an error in extractor')
2364                 return False
2365             if isinstance(url, bytes):
2366                 sanitize_string_field(f, 'url')
2367             return True
2368
2369         # Filter out malformed formats for better extraction robustness
2370         formats = list(filter(is_wellformed, formats))
2371
2372         formats_dict = {}
2373
2374         # We check that all the formats have the format and format_id fields
2375         for i, format in enumerate(formats):
2376             sanitize_string_field(format, 'format_id')
2377             sanitize_numeric_fields(format)
2378             format['url'] = sanitize_url(format['url'])
2379             if not format.get('format_id'):
2380                 format['format_id'] = compat_str(i)
2381             else:
2382                 # Sanitize format_id from characters used in format selector expression
2383                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2384             format_id = format['format_id']
2385             if format_id not in formats_dict:
2386                 formats_dict[format_id] = []
2387             formats_dict[format_id].append(format)
2388
2389         # Make sure all formats have unique format_id
2390         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2391         for format_id, ambiguous_formats in formats_dict.items():
2392             ambigious_id = len(ambiguous_formats) > 1
2393             for i, format in enumerate(ambiguous_formats):
2394                 if ambigious_id:
2395                     format['format_id'] = '%s-%d' % (format_id, i)
2396                 if format.get('ext') is None:
2397                     format['ext'] = determine_ext(format['url']).lower()
2398                 # Ensure there is no conflict between id and ext in format selection
2399                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2400                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2401                     format['format_id'] = 'f%s' % format['format_id']
2402
2403         for i, format in enumerate(formats):
2404             if format.get('format') is None:
2405                 format['format'] = '{id} - {res}{note}'.format(
2406                     id=format['format_id'],
2407                     res=self.format_resolution(format),
2408                     note=format_field(format, 'format_note', ' (%s)'),
2409                 )
2410             if format.get('protocol') is None:
2411                 format['protocol'] = determine_protocol(format)
2412             if format.get('resolution') is None:
2413                 format['resolution'] = self.format_resolution(format, default=None)
2414             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2415                 format['dynamic_range'] = 'SDR'
2416             if (info_dict.get('duration') and format.get('tbr')
2417                     and not format.get('filesize') and not format.get('filesize_approx')):
2418                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2419
2420             # Add HTTP headers, so that external programs can use them from the
2421             # json output
2422             full_format_info = info_dict.copy()
2423             full_format_info.update(format)
2424             format['http_headers'] = self._calc_headers(full_format_info)
2425         # Remove private housekeeping stuff
2426         if '__x_forwarded_for_ip' in info_dict:
2427             del info_dict['__x_forwarded_for_ip']
2428
2429         # TODO Central sorting goes here
2430
2431         if self.params.get('check_formats') is True:
2432             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2433
2434         if not formats or formats[0] is not info_dict:
2435             # only set the 'formats' fields if the original info_dict list them
2436             # otherwise we end up with a circular reference, the first (and unique)
2437             # element in the 'formats' field in info_dict is info_dict itself,
2438             # which can't be exported to json
2439             info_dict['formats'] = formats
2440
2441         info_dict, _ = self.pre_process(info_dict)
2442
2443         # The pre-processors may have modified the formats
2444         formats = info_dict.get('formats', [info_dict])
2445
2446         if self.params.get('list_thumbnails'):
2447             self.list_thumbnails(info_dict)
2448         if self.params.get('listformats'):
2449             if not info_dict.get('formats') and not info_dict.get('url'):
2450                 self.to_screen('%s has no formats' % info_dict['id'])
2451             else:
2452                 self.list_formats(info_dict)
2453         if self.params.get('listsubtitles'):
2454             if 'automatic_captions' in info_dict:
2455                 self.list_subtitles(
2456                     info_dict['id'], automatic_captions, 'automatic captions')
2457             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2458         list_only = self.params.get('simulate') is None and (
2459             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2460         if list_only:
2461             # Without this printing, -F --print-json will not work
2462             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2463             return
2464
2465         format_selector = self.format_selector
2466         if format_selector is None:
2467             req_format = self._default_format_spec(info_dict, download=download)
2468             self.write_debug('Default format spec: %s' % req_format)
2469             format_selector = self.build_format_selector(req_format)
2470
2471         # While in format selection we may need to have an access to the original
2472         # format set in order to calculate some metrics or do some processing.
2473         # For now we need to be able to guess whether original formats provided
2474         # by extractor are incomplete or not (i.e. whether extractor provides only
2475         # video-only or audio-only formats) for proper formats selection for
2476         # extractors with such incomplete formats (see
2477         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2478         # Since formats may be filtered during format selection and may not match
2479         # the original formats the results may be incorrect. Thus original formats
2480         # or pre-calculated metrics should be passed to format selection routines
2481         # as well.
2482         # We will pass a context object containing all necessary additional data
2483         # instead of just formats.
2484         # This fixes incorrect format selection issue (see
2485         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2486         incomplete_formats = (
2487             # All formats are video-only or
2488             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2489             # all formats are audio-only
2490             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2491
2492         ctx = {
2493             'formats': formats,
2494             'incomplete_formats': incomplete_formats,
2495         }
2496
2497         formats_to_download = list(format_selector(ctx))
2498         if not formats_to_download:
2499             if not self.params.get('ignore_no_formats_error'):
2500                 raise ExtractorError('Requested format is not available', expected=True,
2501                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2502             else:
2503                 self.report_warning('Requested format is not available')
2504                 # Process what we can, even without any available formats.
2505                 self.process_info(dict(info_dict))
2506         elif download:
2507             self.to_screen(
2508                 '[info] %s: Downloading %d format(s): %s' % (
2509                     info_dict['id'], len(formats_to_download),
2510                     ", ".join([f['format_id'] for f in formats_to_download])))
2511             for fmt in formats_to_download:
2512                 new_info = dict(info_dict)
2513                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2514                 new_info['__original_infodict'] = info_dict
2515                 new_info.update(fmt)
2516                 self.process_info(new_info)
2517         # We update the info dict with the selected best quality format (backwards compatibility)
2518         if formats_to_download:
2519             info_dict.update(formats_to_download[-1])
2520         return info_dict
2521
2522     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2523         """Select the requested subtitles and their format"""
2524         available_subs = {}
2525         if normal_subtitles and self.params.get('writesubtitles'):
2526             available_subs.update(normal_subtitles)
2527         if automatic_captions and self.params.get('writeautomaticsub'):
2528             for lang, cap_info in automatic_captions.items():
2529                 if lang not in available_subs:
2530                     available_subs[lang] = cap_info
2531
2532         if (not self.params.get('writesubtitles') and not
2533                 self.params.get('writeautomaticsub') or not
2534                 available_subs):
2535             return None
2536
2537         all_sub_langs = available_subs.keys()
2538         if self.params.get('allsubtitles', False):
2539             requested_langs = all_sub_langs
2540         elif self.params.get('subtitleslangs', False):
2541             # A list is used so that the order of languages will be the same as
2542             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2543             requested_langs = []
2544             for lang_re in self.params.get('subtitleslangs'):
2545                 if lang_re == 'all':
2546                     requested_langs.extend(all_sub_langs)
2547                     continue
2548                 discard = lang_re[0] == '-'
2549                 if discard:
2550                     lang_re = lang_re[1:]
2551                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2552                 if discard:
2553                     for lang in current_langs:
2554                         while lang in requested_langs:
2555                             requested_langs.remove(lang)
2556                 else:
2557                     requested_langs.extend(current_langs)
2558             requested_langs = orderedSet(requested_langs)
2559         elif 'en' in available_subs:
2560             requested_langs = ['en']
2561         else:
2562             requested_langs = [list(all_sub_langs)[0]]
2563         if requested_langs:
2564             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2565
2566         formats_query = self.params.get('subtitlesformat', 'best')
2567         formats_preference = formats_query.split('/') if formats_query else []
2568         subs = {}
2569         for lang in requested_langs:
2570             formats = available_subs.get(lang)
2571             if formats is None:
2572                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2573                 continue
2574             for ext in formats_preference:
2575                 if ext == 'best':
2576                     f = formats[-1]
2577                     break
2578                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2579                 if matches:
2580                     f = matches[-1]
2581                     break
2582             else:
2583                 f = formats[-1]
2584                 self.report_warning(
2585                     'No subtitle format found matching "%s" for language %s, '
2586                     'using %s' % (formats_query, lang, f['ext']))
2587             subs[lang] = f
2588         return subs
2589
2590     def __forced_printings(self, info_dict, filename, incomplete):
2591         def print_mandatory(field, actual_field=None):
2592             if actual_field is None:
2593                 actual_field = field
2594             if (self.params.get('force%s' % field, False)
2595                     and (not incomplete or info_dict.get(actual_field) is not None)):
2596                 self.to_stdout(info_dict[actual_field])
2597
2598         def print_optional(field):
2599             if (self.params.get('force%s' % field, False)
2600                     and info_dict.get(field) is not None):
2601                 self.to_stdout(info_dict[field])
2602
2603         info_dict = info_dict.copy()
2604         if filename is not None:
2605             info_dict['filename'] = filename
2606         if info_dict.get('requested_formats') is not None:
2607             # For RTMP URLs, also include the playpath
2608             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2609         elif 'url' in info_dict:
2610             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2611
2612         if self.params.get('forceprint') or self.params.get('forcejson'):
2613             self.post_extract(info_dict)
2614         for tmpl in self.params.get('forceprint', []):
2615             mobj = re.match(r'\w+(=?)$', tmpl)
2616             if mobj and mobj.group(1):
2617                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2618             elif mobj:
2619                 tmpl = '%({})s'.format(tmpl)
2620             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2621
2622         print_mandatory('title')
2623         print_mandatory('id')
2624         print_mandatory('url', 'urls')
2625         print_optional('thumbnail')
2626         print_optional('description')
2627         print_optional('filename')
2628         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2629             self.to_stdout(formatSeconds(info_dict['duration']))
2630         print_mandatory('format')
2631
2632         if self.params.get('forcejson'):
2633             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2634
2635     def dl(self, name, info, subtitle=False, test=False):
2636         if not info.get('url'):
2637             self.raise_no_formats(info, True)
2638
2639         if test:
2640             verbose = self.params.get('verbose')
2641             params = {
2642                 'test': True,
2643                 'quiet': self.params.get('quiet') or not verbose,
2644                 'verbose': verbose,
2645                 'noprogress': not verbose,
2646                 'nopart': True,
2647                 'skip_unavailable_fragments': False,
2648                 'keep_fragments': False,
2649                 'overwrites': True,
2650                 '_no_ytdl_file': True,
2651             }
2652         else:
2653             params = self.params
2654         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2655         if not test:
2656             for ph in self._progress_hooks:
2657                 fd.add_progress_hook(ph)
2658             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2659             self.write_debug('Invoking downloader on "%s"' % urls)
2660
2661         new_info = copy.deepcopy(self._copy_infodict(info))
2662         if new_info.get('http_headers') is None:
2663             new_info['http_headers'] = self._calc_headers(new_info)
2664         return fd.download(name, new_info, subtitle)
2665
2666     def process_info(self, info_dict):
2667         """Process a single resolved IE result."""
2668
2669         assert info_dict.get('_type', 'video') == 'video'
2670
2671         max_downloads = self.params.get('max_downloads')
2672         if max_downloads is not None:
2673             if self._num_downloads >= int(max_downloads):
2674                 raise MaxDownloadsReached()
2675
2676         if info_dict.get('is_live'):
2677             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2678
2679         # TODO: backward compatibility, to be removed
2680         info_dict['fulltitle'] = info_dict['title']
2681
2682         if 'format' not in info_dict and 'ext' in info_dict:
2683             info_dict['format'] = info_dict['ext']
2684
2685         if self._match_entry(info_dict) is not None:
2686             return
2687
2688         self.post_extract(info_dict)
2689         self._num_downloads += 1
2690
2691         # info_dict['_filename'] needs to be set for backward compatibility
2692         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2693         temp_filename = self.prepare_filename(info_dict, 'temp')
2694         files_to_move = {}
2695
2696         # Forced printings
2697         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2698
2699         if self.params.get('simulate'):
2700             if self.params.get('force_write_download_archive', False):
2701                 self.record_download_archive(info_dict)
2702             # Do nothing else if in simulate mode
2703             return
2704
2705         if full_filename is None:
2706             return
2707         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2708             return
2709         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2710             return
2711
2712         if self._write_description('video', info_dict,
2713                                    self.prepare_filename(info_dict, 'description')) is None:
2714             return
2715
2716         sub_files = self._write_subtitles(info_dict, temp_filename)
2717         if sub_files is None:
2718             return
2719         files_to_move.update(dict(sub_files))
2720
2721         thumb_files = self._write_thumbnails(
2722             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2723         if thumb_files is None:
2724             return
2725         files_to_move.update(dict(thumb_files))
2726
2727         infofn = self.prepare_filename(info_dict, 'infojson')
2728         _infojson_written = self._write_info_json('video', info_dict, infofn)
2729         if _infojson_written:
2730             info_dict['infojson_filename'] = infofn
2731             # For backward compatability, even though it was a private field
2732             info_dict['__infojson_filename'] = infofn
2733         elif _infojson_written is None:
2734             return
2735
2736         # Note: Annotations are deprecated
2737         annofn = None
2738         if self.params.get('writeannotations', False):
2739             annofn = self.prepare_filename(info_dict, 'annotation')
2740         if annofn:
2741             if not self._ensure_dir_exists(encodeFilename(annofn)):
2742                 return
2743             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2744                 self.to_screen('[info] Video annotations are already present')
2745             elif not info_dict.get('annotations'):
2746                 self.report_warning('There are no annotations to write.')
2747             else:
2748                 try:
2749                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2750                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2751                         annofile.write(info_dict['annotations'])
2752                 except (KeyError, TypeError):
2753                     self.report_warning('There are no annotations to write.')
2754                 except (OSError, IOError):
2755                     self.report_error('Cannot write annotations file: ' + annofn)
2756                     return
2757
2758         # Write internet shortcut files
2759         def _write_link_file(link_type):
2760             if 'webpage_url' not in info_dict:
2761                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2762                 return False
2763             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2764             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2765                 return False
2766             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2767                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2768                 return True
2769             try:
2770                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2771                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2772                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2773                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2774                     if link_type == 'desktop':
2775                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2776                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2777             except (OSError, IOError):
2778                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2779                 return False
2780             return True
2781
2782         write_links = {
2783             'url': self.params.get('writeurllink'),
2784             'webloc': self.params.get('writewebloclink'),
2785             'desktop': self.params.get('writedesktoplink'),
2786         }
2787         if self.params.get('writelink'):
2788             link_type = ('webloc' if sys.platform == 'darwin'
2789                          else 'desktop' if sys.platform.startswith('linux')
2790                          else 'url')
2791             write_links[link_type] = True
2792
2793         if any(should_write and not _write_link_file(link_type)
2794                for link_type, should_write in write_links.items()):
2795             return
2796
2797         try:
2798             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2799         except PostProcessingError as err:
2800             self.report_error('Preprocessing: %s' % str(err))
2801             return
2802
2803         must_record_download_archive = False
2804         if self.params.get('skip_download', False):
2805             info_dict['filepath'] = temp_filename
2806             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2807             info_dict['__files_to_move'] = files_to_move
2808             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2809         else:
2810             # Download
2811             info_dict.setdefault('__postprocessors', [])
2812             try:
2813
2814                 def existing_file(*filepaths):
2815                     ext = info_dict.get('ext')
2816                     final_ext = self.params.get('final_ext', ext)
2817                     existing_files = []
2818                     for file in orderedSet(filepaths):
2819                         if final_ext != ext:
2820                             converted = replace_extension(file, final_ext, ext)
2821                             if os.path.exists(encodeFilename(converted)):
2822                                 existing_files.append(converted)
2823                         if os.path.exists(encodeFilename(file)):
2824                             existing_files.append(file)
2825
2826                     if not existing_files or self.params.get('overwrites', False):
2827                         for file in orderedSet(existing_files):
2828                             self.report_file_delete(file)
2829                             os.remove(encodeFilename(file))
2830                         return None
2831
2832                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2833                     return existing_files[0]
2834
2835                 success = True
2836                 if info_dict.get('requested_formats') is not None:
2837
2838                     def compatible_formats(formats):
2839                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2840                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2841                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2842                         if len(video_formats) > 2 or len(audio_formats) > 2:
2843                             return False
2844
2845                         # Check extension
2846                         exts = set(format.get('ext') for format in formats)
2847                         COMPATIBLE_EXTS = (
2848                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2849                             set(('webm',)),
2850                         )
2851                         for ext_sets in COMPATIBLE_EXTS:
2852                             if ext_sets.issuperset(exts):
2853                                 return True
2854                         # TODO: Check acodec/vcodec
2855                         return False
2856
2857                     requested_formats = info_dict['requested_formats']
2858                     old_ext = info_dict['ext']
2859                     if self.params.get('merge_output_format') is None:
2860                         if not compatible_formats(requested_formats):
2861                             info_dict['ext'] = 'mkv'
2862                             self.report_warning(
2863                                 'Requested formats are incompatible for merge and will be merged into mkv')
2864                         if (info_dict['ext'] == 'webm'
2865                                 and info_dict.get('thumbnails')
2866                                 # check with type instead of pp_key, __name__, or isinstance
2867                                 # since we dont want any custom PPs to trigger this
2868                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2869                             info_dict['ext'] = 'mkv'
2870                             self.report_warning(
2871                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2872                     new_ext = info_dict['ext']
2873
2874                     def correct_ext(filename, ext=new_ext):
2875                         if filename == '-':
2876                             return filename
2877                         filename_real_ext = os.path.splitext(filename)[1][1:]
2878                         filename_wo_ext = (
2879                             os.path.splitext(filename)[0]
2880                             if filename_real_ext in (old_ext, new_ext)
2881                             else filename)
2882                         return '%s.%s' % (filename_wo_ext, ext)
2883
2884                     # Ensure filename always has a correct extension for successful merge
2885                     full_filename = correct_ext(full_filename)
2886                     temp_filename = correct_ext(temp_filename)
2887                     dl_filename = existing_file(full_filename, temp_filename)
2888                     info_dict['__real_download'] = False
2889
2890                     if dl_filename is not None:
2891                         self.report_file_already_downloaded(dl_filename)
2892                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2893                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2894                         success, real_download = self.dl(temp_filename, info_dict)
2895                         info_dict['__real_download'] = real_download
2896                     else:
2897                         downloaded = []
2898                         merger = FFmpegMergerPP(self)
2899                         if self.params.get('allow_unplayable_formats'):
2900                             self.report_warning(
2901                                 'You have requested merging of multiple formats '
2902                                 'while also allowing unplayable formats to be downloaded. '
2903                                 'The formats won\'t be merged to prevent data corruption.')
2904                         elif not merger.available:
2905                             self.report_warning(
2906                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2907                                 'The formats won\'t be merged.')
2908
2909                         if temp_filename == '-':
2910                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2911                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2912                                       else 'but ffmpeg is not installed')
2913                             self.report_warning(
2914                                 f'You have requested downloading multiple formats to stdout {reason}. '
2915                                 'The formats will be streamed one after the other')
2916                             fname = temp_filename
2917                         for f in requested_formats:
2918                             new_info = dict(info_dict)
2919                             del new_info['requested_formats']
2920                             new_info.update(f)
2921                             if temp_filename != '-':
2922                                 fname = prepend_extension(
2923                                     correct_ext(temp_filename, new_info['ext']),
2924                                     'f%s' % f['format_id'], new_info['ext'])
2925                                 if not self._ensure_dir_exists(fname):
2926                                     return
2927                                 f['filepath'] = fname
2928                                 downloaded.append(fname)
2929                             partial_success, real_download = self.dl(fname, new_info)
2930                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2931                             success = success and partial_success
2932                         if merger.available and not self.params.get('allow_unplayable_formats'):
2933                             info_dict['__postprocessors'].append(merger)
2934                             info_dict['__files_to_merge'] = downloaded
2935                             # Even if there were no downloads, it is being merged only now
2936                             info_dict['__real_download'] = True
2937                         else:
2938                             for file in downloaded:
2939                                 files_to_move[file] = None
2940                 else:
2941                     # Just a single file
2942                     dl_filename = existing_file(full_filename, temp_filename)
2943                     if dl_filename is None or dl_filename == temp_filename:
2944                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2945                         # So we should try to resume the download
2946                         success, real_download = self.dl(temp_filename, info_dict)
2947                         info_dict['__real_download'] = real_download
2948                     else:
2949                         self.report_file_already_downloaded(dl_filename)
2950
2951                 dl_filename = dl_filename or temp_filename
2952                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2953
2954             except network_exceptions as err:
2955                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2956                 return
2957             except (OSError, IOError) as err:
2958                 raise UnavailableVideoError(err)
2959             except (ContentTooShortError, ) as err:
2960                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2961                 return
2962
2963             if success and full_filename != '-':
2964
2965                 def fixup():
2966                     do_fixup = True
2967                     fixup_policy = self.params.get('fixup')
2968                     vid = info_dict['id']
2969
2970                     if fixup_policy in ('ignore', 'never'):
2971                         return
2972                     elif fixup_policy == 'warn':
2973                         do_fixup = False
2974                     elif fixup_policy != 'force':
2975                         assert fixup_policy in ('detect_or_warn', None)
2976                         if not info_dict.get('__real_download'):
2977                             do_fixup = False
2978
2979                     def ffmpeg_fixup(cndn, msg, cls):
2980                         if not cndn:
2981                             return
2982                         if not do_fixup:
2983                             self.report_warning(f'{vid}: {msg}')
2984                             return
2985                         pp = cls(self)
2986                         if pp.available:
2987                             info_dict['__postprocessors'].append(pp)
2988                         else:
2989                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2990
2991                     stretched_ratio = info_dict.get('stretched_ratio')
2992                     ffmpeg_fixup(
2993                         stretched_ratio not in (1, None),
2994                         f'Non-uniform pixel ratio {stretched_ratio}',
2995                         FFmpegFixupStretchedPP)
2996
2997                     ffmpeg_fixup(
2998                         (info_dict.get('requested_formats') is None
2999                          and info_dict.get('container') == 'm4a_dash'
3000                          and info_dict.get('ext') == 'm4a'),
3001                         'writing DASH m4a. Only some players support this container',
3002                         FFmpegFixupM4aPP)
3003
3004                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3005                     downloader = downloader.__name__ if downloader else None
3006                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
3007                                  'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3008                                  FFmpegFixupM3u8PP)
3009                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3010                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3011
3012                 fixup()
3013                 try:
3014                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
3015                 except PostProcessingError as err:
3016                     self.report_error('Postprocessing: %s' % str(err))
3017                     return
3018                 try:
3019                     for ph in self._post_hooks:
3020                         ph(info_dict['filepath'])
3021                 except Exception as err:
3022                     self.report_error('post hooks: %s' % str(err))
3023                     return
3024                 must_record_download_archive = True
3025
3026         if must_record_download_archive or self.params.get('force_write_download_archive', False):
3027             self.record_download_archive(info_dict)
3028         max_downloads = self.params.get('max_downloads')
3029         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3030             raise MaxDownloadsReached()
3031
3032     def __download_wrapper(self, func):
3033         @functools.wraps(func)
3034         def wrapper(*args, **kwargs):
3035             try:
3036                 res = func(*args, **kwargs)
3037             except UnavailableVideoError as e:
3038                 self.report_error(e)
3039             except MaxDownloadsReached as e:
3040                 self.to_screen(f'[info] {e}')
3041                 raise
3042             except DownloadCancelled as e:
3043                 self.to_screen(f'[info] {e}')
3044                 if not self.params.get('break_per_url'):
3045                     raise
3046             else:
3047                 if self.params.get('dump_single_json', False):
3048                     self.post_extract(res)
3049                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3050         return wrapper
3051
3052     def download(self, url_list):
3053         """Download a given list of URLs."""
3054         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3055         outtmpl = self.outtmpl_dict['default']
3056         if (len(url_list) > 1
3057                 and outtmpl != '-'
3058                 and '%' not in outtmpl
3059                 and self.params.get('max_downloads') != 1):
3060             raise SameFileError(outtmpl)
3061
3062         for url in url_list:
3063             self.__download_wrapper(self.extract_info)(
3064                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3065
3066         return self._download_retcode
3067
3068     def download_with_info_file(self, info_filename):
3069         with contextlib.closing(fileinput.FileInput(
3070                 [info_filename], mode='r',
3071                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3072             # FileInput doesn't have a read method, we can't call json.load
3073             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3074         try:
3075             self.__download_wrapper(self.process_ie_result)(info, download=True)
3076         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3077             if not isinstance(e, EntryNotInPlaylist):
3078                 self.to_stderr('\r')
3079             webpage_url = info.get('webpage_url')
3080             if webpage_url is not None:
3081                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3082                 return self.download([webpage_url])
3083             else:
3084                 raise
3085         return self._download_retcode
3086
3087     @staticmethod
3088     def sanitize_info(info_dict, remove_private_keys=False):
3089         ''' Sanitize the infodict for converting to json '''
3090         if info_dict is None:
3091             return info_dict
3092         info_dict.setdefault('epoch', int(time.time()))
3093         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3094         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3095         if remove_private_keys:
3096             remove_keys |= {
3097                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3098                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3099             }
3100             empty_values = (None, {}, [], set(), tuple())
3101             reject = lambda k, v: k not in keep_keys and (
3102                 k.startswith('_') or k in remove_keys or v in empty_values)
3103         else:
3104             reject = lambda k, v: k in remove_keys
3105         filter_fn = lambda obj: (
3106             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3107             else obj if not isinstance(obj, dict)
3108             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3109         return filter_fn(info_dict)
3110
3111     @staticmethod
3112     def filter_requested_info(info_dict, actually_filter=True):
3113         ''' Alias of sanitize_info for backward compatibility '''
3114         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3115
3116     def run_pp(self, pp, infodict):
3117         files_to_delete = []
3118         if '__files_to_move' not in infodict:
3119             infodict['__files_to_move'] = {}
3120         try:
3121             files_to_delete, infodict = pp.run(infodict)
3122         except PostProcessingError as e:
3123             # Must be True and not 'only_download'
3124             if self.params.get('ignoreerrors') is True:
3125                 self.report_error(e)
3126                 return infodict
3127             raise
3128
3129         if not files_to_delete:
3130             return infodict
3131         if self.params.get('keepvideo', False):
3132             for f in files_to_delete:
3133                 infodict['__files_to_move'].setdefault(f, '')
3134         else:
3135             for old_filename in set(files_to_delete):
3136                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3137                 try:
3138                     os.remove(encodeFilename(old_filename))
3139                 except (IOError, OSError):
3140                     self.report_warning('Unable to remove downloaded original file')
3141                 if old_filename in infodict['__files_to_move']:
3142                     del infodict['__files_to_move'][old_filename]
3143         return infodict
3144
3145     @staticmethod
3146     def post_extract(info_dict):
3147         def actual_post_extract(info_dict):
3148             if info_dict.get('_type') in ('playlist', 'multi_video'):
3149                 for video_dict in info_dict.get('entries', {}):
3150                     actual_post_extract(video_dict or {})
3151                 return
3152
3153             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3154             extra = post_extractor().items()
3155             info_dict.update(extra)
3156             info_dict.pop('__post_extractor', None)
3157
3158             original_infodict = info_dict.get('__original_infodict') or {}
3159             original_infodict.update(extra)
3160             original_infodict.pop('__post_extractor', None)
3161
3162         actual_post_extract(info_dict or {})
3163
3164     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3165         info = dict(ie_info)
3166         info['__files_to_move'] = files_to_move or {}
3167         for pp in self._pps[key]:
3168             info = self.run_pp(pp, info)
3169         return info, info.pop('__files_to_move', None)
3170
3171     def post_process(self, filename, ie_info, files_to_move=None):
3172         """Run all the postprocessors on the given file."""
3173         info = dict(ie_info)
3174         info['filepath'] = filename
3175         info['__files_to_move'] = files_to_move or {}
3176
3177         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3178             info = self.run_pp(pp, info)
3179         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3180         del info['__files_to_move']
3181         for pp in self._pps['after_move']:
3182             info = self.run_pp(pp, info)
3183         return info
3184
3185     def _make_archive_id(self, info_dict):
3186         video_id = info_dict.get('id')
3187         if not video_id:
3188             return
3189         # Future-proof against any change in case
3190         # and backwards compatibility with prior versions
3191         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3192         if extractor is None:
3193             url = str_or_none(info_dict.get('url'))
3194             if not url:
3195                 return
3196             # Try to find matching extractor for the URL and take its ie_key
3197             for ie_key, ie in self._ies.items():
3198                 if ie.suitable(url):
3199                     extractor = ie_key
3200                     break
3201             else:
3202                 return
3203         return '%s %s' % (extractor.lower(), video_id)
3204
3205     def in_download_archive(self, info_dict):
3206         fn = self.params.get('download_archive')
3207         if fn is None:
3208             return False
3209
3210         vid_id = self._make_archive_id(info_dict)
3211         if not vid_id:
3212             return False  # Incomplete video information
3213
3214         return vid_id in self.archive
3215
3216     def record_download_archive(self, info_dict):
3217         fn = self.params.get('download_archive')
3218         if fn is None:
3219             return
3220         vid_id = self._make_archive_id(info_dict)
3221         assert vid_id
3222         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3223             archive_file.write(vid_id + '\n')
3224         self.archive.add(vid_id)
3225
3226     @staticmethod
3227     def format_resolution(format, default='unknown'):
3228         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3229             return 'audio only'
3230         if format.get('resolution') is not None:
3231             return format['resolution']
3232         if format.get('width') and format.get('height'):
3233             return '%dx%d' % (format['width'], format['height'])
3234         elif format.get('height'):
3235             return '%sp' % format['height']
3236         elif format.get('width'):
3237             return '%dx?' % format['width']
3238         return default
3239
3240     def _format_note(self, fdict):
3241         res = ''
3242         if fdict.get('ext') in ['f4f', 'f4m']:
3243             res += '(unsupported)'
3244         if fdict.get('language'):
3245             if res:
3246                 res += ' '
3247             res += '[%s]' % fdict['language']
3248         if fdict.get('format_note') is not None:
3249             if res:
3250                 res += ' '
3251             res += fdict['format_note']
3252         if fdict.get('tbr') is not None:
3253             if res:
3254                 res += ', '
3255             res += '%4dk' % fdict['tbr']
3256         if fdict.get('container') is not None:
3257             if res:
3258                 res += ', '
3259             res += '%s container' % fdict['container']
3260         if (fdict.get('vcodec') is not None
3261                 and fdict.get('vcodec') != 'none'):
3262             if res:
3263                 res += ', '
3264             res += fdict['vcodec']
3265             if fdict.get('vbr') is not None:
3266                 res += '@'
3267         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3268             res += 'video@'
3269         if fdict.get('vbr') is not None:
3270             res += '%4dk' % fdict['vbr']
3271         if fdict.get('fps') is not None:
3272             if res:
3273                 res += ', '
3274             res += '%sfps' % fdict['fps']
3275         if fdict.get('acodec') is not None:
3276             if res:
3277                 res += ', '
3278             if fdict['acodec'] == 'none':
3279                 res += 'video only'
3280             else:
3281                 res += '%-5s' % fdict['acodec']
3282         elif fdict.get('abr') is not None:
3283             if res:
3284                 res += ', '
3285             res += 'audio'
3286         if fdict.get('abr') is not None:
3287             res += '@%3dk' % fdict['abr']
3288         if fdict.get('asr') is not None:
3289             res += ' (%5dHz)' % fdict['asr']
3290         if fdict.get('filesize') is not None:
3291             if res:
3292                 res += ', '
3293             res += format_bytes(fdict['filesize'])
3294         elif fdict.get('filesize_approx') is not None:
3295             if res:
3296                 res += ', '
3297             res += '~' + format_bytes(fdict['filesize_approx'])
3298         return res
3299
3300     def _list_format_headers(self, *headers):
3301         if self.params.get('listformats_table', True) is not False:
3302             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3303         return headers
3304
3305     def list_formats(self, info_dict):
3306         formats = info_dict.get('formats', [info_dict])
3307         new_format = self.params.get('listformats_table', True) is not False
3308         if new_format:
3309             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3310             table = [
3311                 [
3312                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3313                     format_field(f, 'ext'),
3314                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3315                     format_field(f, 'fps', '\t%d'),
3316                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3317                     delim,
3318                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3319                     format_field(f, 'tbr', '\t%dk'),
3320                     shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
3321                     delim,
3322                     format_field(f, 'vcodec', default='unknown').replace(
3323                         'none',
3324                         'images' if f.get('acodec') == 'none'
3325                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3326                     format_field(f, 'vbr', '\t%dk'),
3327                     format_field(f, 'acodec', default='unknown').replace(
3328                         'none',
3329                         '' if f.get('vcodec') == 'none'
3330                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3331                     format_field(f, 'abr', '\t%dk'),
3332                     format_field(f, 'asr', '\t%dHz'),
3333                     join_nonempty(
3334                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3335                         format_field(f, 'language', '[%s]'),
3336                         join_nonempty(
3337                             format_field(f, 'format_note'),
3338                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3339                             delim=', '),
3340                         delim=' '),
3341                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3342             header_line = self._list_format_headers(
3343                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3344                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3345         else:
3346             table = [
3347                 [
3348                     format_field(f, 'format_id'),
3349                     format_field(f, 'ext'),
3350                     self.format_resolution(f),
3351                     self._format_note(f)]
3352                 for f in formats
3353                 if f.get('preference') is None or f['preference'] >= -1000]
3354             header_line = ['format code', 'extension', 'resolution', 'note']
3355
3356         self.to_screen(
3357             '[info] Available formats for %s:' % info_dict['id'])
3358         self.to_stdout(render_table(
3359             header_line, table,
3360             extra_gap=(0 if new_format else 1),
3361             hide_empty=new_format,
3362             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3363
3364     def list_thumbnails(self, info_dict):
3365         thumbnails = list(info_dict.get('thumbnails'))
3366         if not thumbnails:
3367             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3368             return
3369
3370         self.to_screen(
3371             '[info] Thumbnails for %s:' % info_dict['id'])
3372         self.to_stdout(render_table(
3373             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3374             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3375
3376     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3377         if not subtitles:
3378             self.to_screen('%s has no %s' % (video_id, name))
3379             return
3380         self.to_screen(
3381             'Available %s for %s:' % (name, video_id))
3382
3383         def _row(lang, formats):
3384             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3385             if len(set(names)) == 1:
3386                 names = [] if names[0] == 'unknown' else names[:1]
3387             return [lang, ', '.join(names), ', '.join(exts)]
3388
3389         self.to_stdout(render_table(
3390             self._list_format_headers('Language', 'Name', 'Formats'),
3391             [_row(lang, formats) for lang, formats in subtitles.items()],
3392             hide_empty=True))
3393
3394     def urlopen(self, req):
3395         """ Start an HTTP download """
3396         if isinstance(req, compat_basestring):
3397             req = sanitized_Request(req)
3398         return self._opener.open(req, timeout=self._socket_timeout)
3399
3400     def print_debug_header(self):
3401         if not self.params.get('verbose'):
3402             return
3403
3404         def get_encoding(stream):
3405             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3406             if not supports_terminal_sequences(stream):
3407                 from .compat import WINDOWS_VT_MODE
3408                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3409             return ret
3410
3411         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3412             locale.getpreferredencoding(),
3413             sys.getfilesystemencoding(),
3414             get_encoding(self._screen_file), get_encoding(self._err_file),
3415             self.get_encoding())
3416
3417         logger = self.params.get('logger')
3418         if logger:
3419             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3420             write_debug(encoding_str)
3421         else:
3422             write_string(f'[debug] {encoding_str}\n', encoding=None)
3423             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3424
3425         source = detect_variant()
3426         write_debug(join_nonempty(
3427             'yt-dlp version', __version__,
3428             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3429             '' if source == 'unknown' else f'({source})',
3430             delim=' '))
3431         if not _LAZY_LOADER:
3432             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3433                 write_debug('Lazy loading extractors is forcibly disabled')
3434             else:
3435                 write_debug('Lazy loading extractors is disabled')
3436         if plugin_extractors or plugin_postprocessors:
3437             write_debug('Plugins: %s' % [
3438                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3439                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3440         if self.params.get('compat_opts'):
3441             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3442
3443         if source == 'source':
3444             try:
3445                 sp = Popen(
3446                     ['git', 'rev-parse', '--short', 'HEAD'],
3447                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3448                     cwd=os.path.dirname(os.path.abspath(__file__)))
3449                 out, err = sp.communicate_or_kill()
3450                 out = out.decode().strip()
3451                 if re.match('[0-9a-f]+', out):
3452                     write_debug('Git HEAD: %s' % out)
3453             except Exception:
3454                 try:
3455                     sys.exc_clear()
3456                 except Exception:
3457                     pass
3458
3459         def python_implementation():
3460             impl_name = platform.python_implementation()
3461             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3462                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3463             return impl_name
3464
3465         write_debug('Python version %s (%s %s) - %s' % (
3466             platform.python_version(),
3467             python_implementation(),
3468             platform.architecture()[0],
3469             platform_name()))
3470
3471         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3472         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3473         if ffmpeg_features:
3474             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3475
3476         exe_versions['rtmpdump'] = rtmpdump_version()
3477         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3478         exe_str = ', '.join(
3479             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3480         ) or 'none'
3481         write_debug('exe versions: %s' % exe_str)
3482
3483         from .downloader.websocket import has_websockets
3484         from .postprocessor.embedthumbnail import has_mutagen
3485         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3486
3487         lib_str = join_nonempty(
3488             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3489             KEYRING_AVAILABLE and 'keyring',
3490             has_mutagen and 'mutagen',
3491             SQLITE_AVAILABLE and 'sqlite',
3492             has_websockets and 'websockets',
3493             delim=', ') or 'none'
3494         write_debug('Optional libraries: %s' % lib_str)
3495
3496         proxy_map = {}
3497         for handler in self._opener.handlers:
3498             if hasattr(handler, 'proxies'):
3499                 proxy_map.update(handler.proxies)
3500         write_debug(f'Proxy map: {proxy_map}')
3501
3502         # Not implemented
3503         if False and self.params.get('call_home'):
3504             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3505             write_debug('Public IP address: %s' % ipaddr)
3506             latest_version = self.urlopen(
3507                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3508             if version_tuple(latest_version) > version_tuple(__version__):
3509                 self.report_warning(
3510                     'You are using an outdated version (newest version: %s)! '
3511                     'See https://yt-dl.org/update if you need help updating.' %
3512                     latest_version)
3513
3514     def _setup_opener(self):
3515         timeout_val = self.params.get('socket_timeout')
3516         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3517
3518         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3519         opts_cookiefile = self.params.get('cookiefile')
3520         opts_proxy = self.params.get('proxy')
3521
3522         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3523
3524         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3525         if opts_proxy is not None:
3526             if opts_proxy == '':
3527                 proxies = {}
3528             else:
3529                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3530         else:
3531             proxies = compat_urllib_request.getproxies()
3532             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3533             if 'http' in proxies and 'https' not in proxies:
3534                 proxies['https'] = proxies['http']
3535         proxy_handler = PerRequestProxyHandler(proxies)
3536
3537         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3538         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3539         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3540         redirect_handler = YoutubeDLRedirectHandler()
3541         data_handler = compat_urllib_request_DataHandler()
3542
3543         # When passing our own FileHandler instance, build_opener won't add the
3544         # default FileHandler and allows us to disable the file protocol, which
3545         # can be used for malicious purposes (see
3546         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3547         file_handler = compat_urllib_request.FileHandler()
3548
3549         def file_open(*args, **kwargs):
3550             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3551         file_handler.file_open = file_open
3552
3553         opener = compat_urllib_request.build_opener(
3554             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3555
3556         # Delete the default user-agent header, which would otherwise apply in
3557         # cases where our custom HTTP handler doesn't come into play
3558         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3559         opener.addheaders = []
3560         self._opener = opener
3561
3562     def encode(self, s):
3563         if isinstance(s, bytes):
3564             return s  # Already encoded
3565
3566         try:
3567             return s.encode(self.get_encoding())
3568         except UnicodeEncodeError as err:
3569             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3570             raise
3571
3572     def get_encoding(self):
3573         encoding = self.params.get('encoding')
3574         if encoding is None:
3575             encoding = preferredencoding()
3576         return encoding
3577
3578     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3579         ''' Write infojson and returns True = written, False = skip, None = error '''
3580         if overwrite is None:
3581             overwrite = self.params.get('overwrites', True)
3582         if not self.params.get('writeinfojson'):
3583             return False
3584         elif not infofn:
3585             self.write_debug(f'Skipping writing {label} infojson')
3586             return False
3587         elif not self._ensure_dir_exists(infofn):
3588             return None
3589         elif not overwrite and os.path.exists(infofn):
3590             self.to_screen(f'[info] {label.title()} metadata is already present')
3591         else:
3592             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3593             try:
3594                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3595             except (OSError, IOError):
3596                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3597                 return None
3598         return True
3599
3600     def _write_description(self, label, ie_result, descfn):
3601         ''' Write description and returns True = written, False = skip, None = error '''
3602         if not self.params.get('writedescription'):
3603             return False
3604         elif not descfn:
3605             self.write_debug(f'Skipping writing {label} description')
3606             return False
3607         elif not self._ensure_dir_exists(descfn):
3608             return None
3609         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3610             self.to_screen(f'[info] {label.title()} description is already present')
3611         elif ie_result.get('description') is None:
3612             self.report_warning(f'There\'s no {label} description to write')
3613             return False
3614         else:
3615             try:
3616                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3617                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3618                     descfile.write(ie_result['description'])
3619             except (OSError, IOError):
3620                 self.report_error(f'Cannot write {label} description file {descfn}')
3621                 return None
3622         return True
3623
3624     def _write_subtitles(self, info_dict, filename):
3625         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3626         ret = []
3627         subtitles = info_dict.get('requested_subtitles')
3628         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3629             # subtitles download errors are already managed as troubles in relevant IE
3630             # that way it will silently go on when used with unsupporting IE
3631             return ret
3632
3633         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3634         if not sub_filename_base:
3635             self.to_screen('[info] Skipping writing video subtitles')
3636             return ret
3637         for sub_lang, sub_info in subtitles.items():
3638             sub_format = sub_info['ext']
3639             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3640             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3641             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3642                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3643                 sub_info['filepath'] = sub_filename
3644                 ret.append((sub_filename, sub_filename_final))
3645                 continue
3646
3647             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3648             if sub_info.get('data') is not None:
3649                 try:
3650                     # Use newline='' to prevent conversion of newline characters
3651                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3652                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3653                         subfile.write(sub_info['data'])
3654                     sub_info['filepath'] = sub_filename
3655                     ret.append((sub_filename, sub_filename_final))
3656                     continue
3657                 except (OSError, IOError):
3658                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3659                     return None
3660
3661             try:
3662                 sub_copy = sub_info.copy()
3663                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3664                 self.dl(sub_filename, sub_copy, subtitle=True)
3665                 sub_info['filepath'] = sub_filename
3666                 ret.append((sub_filename, sub_filename_final))
3667             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3668                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3669                 continue
3670         return ret
3671
3672     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3673         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3674         write_all = self.params.get('write_all_thumbnails', False)
3675         thumbnails, ret = [], []
3676         if write_all or self.params.get('writethumbnail', False):
3677             thumbnails = info_dict.get('thumbnails') or []
3678         multiple = write_all and len(thumbnails) > 1
3679
3680         if thumb_filename_base is None:
3681             thumb_filename_base = filename
3682         if thumbnails and not thumb_filename_base:
3683             self.write_debug(f'Skipping writing {label} thumbnail')
3684             return ret
3685
3686         for t in thumbnails[::-1]:
3687             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3688             thumb_display_id = f'{label} thumbnail {t["id"]}'
3689             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3690             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3691
3692             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3693                 ret.append((thumb_filename, thumb_filename_final))
3694                 t['filepath'] = thumb_filename
3695                 self.to_screen('[info] %s is already present' % (
3696                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3697             else:
3698                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3699                 try:
3700                     uf = self.urlopen(t['url'])
3701                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3702                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3703                         shutil.copyfileobj(uf, thumbf)
3704                     ret.append((thumb_filename, thumb_filename_final))
3705                     t['filepath'] = thumb_filename
3706                 except network_exceptions as err:
3707                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3708             if ret and not write_all:
3709                 break
3710         return ret