yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     network_exceptions,
  86     number_of_digits,
  87     orderedSet,
  88     OUTTMPL_TYPES,
  89     PagedList,
  90     parse_filesize,
  91     PerRequestProxyHandler,
  92     platform_name,
  93     Popen,
  94     PostProcessingError,
  95     preferredencoding,
  96     prepend_extension,
  97     ReExtractInfo,
  98     register_socks_protocols,
  99     RejectedVideoReached,
 100     remove_terminal_sequences,
 101     render_table,
 102     replace_extension,
 103     SameFileError,
 104     sanitize_filename,
 105     sanitize_path,
 106     sanitize_url,
 107     sanitized_Request,
 108     std_headers,
 109     STR_FORMAT_RE_TMPL,
 110     STR_FORMAT_TYPES,
 111     str_or_none,
 112     strftime_or_none,
 113     subtitles_filename,
 114     supports_terminal_sequences,
 115     timetuple_from_msec,
 116     to_high_limit_path,
 117     traverse_obj,
 118     try_get,
 119     UnavailableVideoError,
 120     url_basename,
 121     variadic,
 122     version_tuple,
 123     write_json_file,
 124     write_string,
 125     YoutubeDLCookieProcessor,
 126     YoutubeDLHandler,
 127     YoutubeDLRedirectHandler,
 128 )
 129 from .cache import Cache
 130 from .minicurses import format_text
 131 from .extractor import (
 132     gen_extractor_classes,
 133     get_info_extractor,
 134     _LAZY_LOADER,
 135     _PLUGIN_CLASSES as plugin_extractors
 136 )
 137 from .extractor.openload import PhantomJSwrapper
 138 from .downloader import (
 139     FFmpegFD,
 140     get_suitable_downloader,
 141     shorten_protocol_name
 142 )
 143 from .downloader.rtmp import rtmpdump_version
 144 from .postprocessor import (
 145     get_postprocessor,
 146     EmbedThumbnailPP,
 147     FFmpegFixupDurationPP,
 148     FFmpegFixupM3u8PP,
 149     FFmpegFixupM4aPP,
 150     FFmpegFixupStretchedPP,
 151     FFmpegFixupTimestampPP,
 152     FFmpegMergerPP,
 153     FFmpegPostProcessor,
 154     MoveFilesAfterDownloadPP,
 155     _PLUGIN_CLASSES as plugin_postprocessors
 156 )
 157 from .update import detect_variant
 158 from .version import __version__, RELEASE_GIT_HEAD
 159
 160 if compat_os_name == 'nt':
 161     import ctypes
 162
 163
 164 class YoutubeDL(object):
 165     """YoutubeDL class.
 166
 167     YoutubeDL objects are the ones responsible of downloading the
 168     actual video file and writing it to disk if the user has requested
 169     it, among some other tasks. In most cases there should be one per
 170     program. As, given a video URL, the downloader doesn't know how to
 171     extract all the needed information, task that InfoExtractors do, it
 172     has to pass the URL to one of them.
 173
 174     For this, YoutubeDL objects have a method that allows
 175     InfoExtractors to be registered in a given order. When it is passed
 176     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 177     finds that reports being able to handle it. The InfoExtractor extracts
 178     all the information about the video or videos the URL refers to, and
 179     YoutubeDL process the extracted information, possibly using a File
 180     Downloader to download the video.
 181
 182     YoutubeDL objects accept a lot of parameters. In order not to saturate
 183     the object constructor with arguments, it receives a dictionary of
 184     options instead. These options are available through the params
 185     attribute for the InfoExtractors to use. The YoutubeDL also
 186     registers itself as the downloader in charge for the InfoExtractors
 187     that are added to it, so this is a "mutual registration".
 188
 189     Available options:
 190
 191     username:          Username for authentication purposes.
 192     password:          Password for authentication purposes.
 193     videopassword:     Password for accessing a video.
 194     ap_mso:            Adobe Pass multiple-system operator identifier.
 195     ap_username:       Multiple-system operator account username.
 196     ap_password:       Multiple-system operator account password.
 197     usenetrc:          Use netrc for authentication instead.
 198     verbose:           Print additional info to stdout.
 199     quiet:             Do not print messages to stdout.
 200     no_warnings:       Do not print out anything for warnings.
 201     forceprint:        A list of templates to force print
 202     forceurl:          Force printing final URL. (Deprecated)
 203     forcetitle:        Force printing title. (Deprecated)
 204     forceid:           Force printing ID. (Deprecated)
 205     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 206     forcedescription:  Force printing description. (Deprecated)
 207     forcefilename:     Force printing final filename. (Deprecated)
 208     forceduration:     Force printing duration. (Deprecated)
 209     forcejson:         Force printing info_dict as JSON.
 210     dump_single_json:  Force printing the info_dict of the whole playlist
 211                        (or video) as a single JSON line.
 212     force_write_download_archive: Force writing download archive regardless
 213                        of 'skip_download' or 'simulate'.
 214     simulate:          Do not download the video files. If unset (or None),
 215                        simulate only if listsubtitles, listformats or list_thumbnails is used
 216     format:            Video format code. see "FORMAT SELECTION" for more details.
 217                        You can also pass a function. The function takes 'ctx' as
 218                        argument and returns the formats to download.
 219                        See "build_format_selector" for an implementation
 220     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 221     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 222                        extracting metadata even if the video is not actually
 223                        available for download (experimental)
 224     format_sort:       A list of fields by which to sort the video formats.
 225                        See "Sorting Formats" for more details.
 226     format_sort_force: Force the given format_sort. see "Sorting Formats"
 227                        for more details.
 228     allow_multiple_video_streams:   Allow multiple video streams to be merged
 229                        into a single file
 230     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 231                        into a single file
 232     check_formats      Whether to test if the formats are downloadable.
 233                        Can be True (check all), False (check none),
 234                        'selected' (check selected formats),
 235                        or None (check only if requested by extractor)
 236     paths:             Dictionary of output paths. The allowed keys are 'home'
 237                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 238     outtmpl:           Dictionary of templates for output names. Allowed keys
 239                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 240                        For compatibility with youtube-dl, a single string can also be used
 241     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 242     restrictfilenames: Do not allow "&" and spaces in file names
 243     trim_file_name:    Limit length of filename (extension excluded)
 244     windowsfilenames:  Force the filenames to be windows compatible
 245     ignoreerrors:      Do not stop on download/postprocessing errors.
 246                        Can be 'only_download' to ignore only download errors.
 247                        Default is 'only_download' for CLI, but False for API
 248     skip_playlist_after_errors: Number of allowed failures until the rest of
 249                        the playlist is skipped
 250     force_generic_extractor: Force downloader to use the generic extractor
 251     overwrites:        Overwrite all video and metadata files if True,
 252                        overwrite only non-video files if None
 253                        and don't overwrite any file if False
 254                        For compatibility with youtube-dl,
 255                        "nooverwrites" may also be used instead
 256     playliststart:     Playlist item to start at.
 257     playlistend:       Playlist item to end at.
 258     playlist_items:    Specific indices of playlist to download.
 259     playlistreverse:   Download playlist items in reverse order.
 260     playlistrandom:    Download playlist items in random order.
 261     matchtitle:        Download only matching titles.
 262     rejecttitle:       Reject downloads for matching titles.
 263     logger:            Log messages to a logging.Logger instance.
 264     logtostderr:       Log messages to stderr instead of stdout.
 265     consoletitle:       Display progress in console window's titlebar.
 266     writedescription:  Write the video description to a .description file
 267     writeinfojson:     Write the video description to a .info.json file
 268     clean_infojson:    Remove private fields from the infojson
 269     getcomments:       Extract video comments. This will not be written to disk
 270                        unless writeinfojson is also given
 271     writeannotations:  Write the video annotations to a .annotations.xml file
 272     writethumbnail:    Write the thumbnail image to a file
 273     allow_playlist_files: Whether to write playlists' description, infojson etc
 274                        also to disk when using the 'write*' options
 275     write_all_thumbnails:  Write all thumbnail formats to files
 276     writelink:         Write an internet shortcut file, depending on the
 277                        current platform (.url/.webloc/.desktop)
 278     writeurllink:      Write a Windows internet shortcut file (.url)
 279     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 280     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 281     writesubtitles:    Write the video subtitles to a file
 282     writeautomaticsub: Write the automatically generated subtitles to a file
 283     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 284                        Downloads all the subtitles of the video
 285                        (requires writesubtitles or writeautomaticsub)
 286     listsubtitles:     Lists all available subtitles for the video
 287     subtitlesformat:   The format code for subtitles
 288     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 289                        The list may contain "all" to refer to all the available
 290                        subtitles. The language can be prefixed with a "-" to
 291                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 292     keepvideo:         Keep the video file after post-processing
 293     daterange:         A DateRange object, download only if the upload_date is in the range.
 294     skip_download:     Skip the actual download of the video file
 295     cachedir:          Location of the cache files in the filesystem.
 296                        False to disable filesystem cache.
 297     noplaylist:        Download single video instead of a playlist if in doubt.
 298     age_limit:         An integer representing the user's age in years.
 299                        Unsuitable videos for the given age are skipped.
 300     min_views:         An integer representing the minimum view count the video
 301                        must have in order to not be skipped.
 302                        Videos without view count information are always
 303                        downloaded. None for no limit.
 304     max_views:         An integer representing the maximum view count.
 305                        Videos that are more popular than that are not
 306                        downloaded.
 307                        Videos without view count information are always
 308                        downloaded. None for no limit.
 309     download_archive:  File name of a file where all downloads are recorded.
 310                        Videos already present in the file are not downloaded
 311                        again.
 312     break_on_existing: Stop the download process after attempting to download a
 313                        file that is in the archive.
 314     break_on_reject:   Stop the download process when encountering a video that
 315                        has been filtered out.
 316     break_per_url:     Whether break_on_reject and break_on_existing
 317                        should act on each input URL as opposed to for the entire queue
 318     cookiefile:        File name where cookies should be read from and dumped to
 319     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 320                        name/path from where cookies are loaded.
 321                        Eg: ('chrome', ) or ('vivaldi', 'default')
 322     nocheckcertificate:Do not verify SSL certificates
 323     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 324                        At the moment, this is only supported by YouTube.
 325     proxy:             URL of the proxy server to use
 326     geo_verification_proxy:  URL of the proxy to use for IP address verification
 327                        on geo-restricted sites.
 328     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 329     bidi_workaround:   Work around buggy terminals without bidirectional text
 330                        support, using fridibi
 331     debug_printtraffic:Print out sent and received HTTP traffic
 332     include_ads:       Download ads as well (deprecated)
 333     default_search:    Prepend this string if an input url is not valid.
 334                        'auto' for elaborate guessing
 335     encoding:          Use this encoding instead of the system-specified.
 336     extract_flat:      Do not resolve URLs, return the immediate result.
 337                        Pass in 'in_playlist' to only show this behavior for
 338                        playlist items.
 339     wait_for_video:    If given, wait for scheduled streams to become available.
 340                        The value should be a tuple containing the range
 341                        (min_secs, max_secs) to wait between retries
 342     postprocessors:    A list of dictionaries, each with an entry
 343                        * key:  The name of the postprocessor. See
 344                                yt_dlp/postprocessor/__init__.py for a list.
 345                        * when: When to run the postprocessor. Can be one of
 346                                pre_process|before_dl|post_process|after_move.
 347                                Assumed to be 'post_process' if not given
 348     post_hooks:        Deprecated - Register a custom postprocessor instead
 349                        A list of functions that get called as the final step
 350                        for each video file, after all postprocessors have been
 351                        called. The filename will be passed as the only argument.
 352     progress_hooks:    A list of functions that get called on download
 353                        progress, with a dictionary with the entries
 354                        * status: One of "downloading", "error", or "finished".
 355                                  Check this first and ignore unknown values.
 356                        * info_dict: The extracted info_dict
 357
 358                        If status is one of "downloading", or "finished", the
 359                        following properties may also be present:
 360                        * filename: The final filename (always present)
 361                        * tmpfilename: The filename we're currently writing to
 362                        * downloaded_bytes: Bytes on disk
 363                        * total_bytes: Size of the whole file, None if unknown
 364                        * total_bytes_estimate: Guess of the eventual file size,
 365                                                None if unavailable.
 366                        * elapsed: The number of seconds since download started.
 367                        * eta: The estimated time in seconds, None if unknown
 368                        * speed: The download speed in bytes/second, None if
 369                                 unknown
 370                        * fragment_index: The counter of the currently
 371                                          downloaded video fragment.
 372                        * fragment_count: The number of fragments (= individual
 373                                          files that will be merged)
 374
 375                        Progress hooks are guaranteed to be called at least once
 376                        (with status "finished") if the download is successful.
 377     postprocessor_hooks:  A list of functions that get called on postprocessing
 378                        progress, with a dictionary with the entries
 379                        * status: One of "started", "processing", or "finished".
 380                                  Check this first and ignore unknown values.
 381                        * postprocessor: Name of the postprocessor
 382                        * info_dict: The extracted info_dict
 383
 384                        Progress hooks are guaranteed to be called at least twice
 385                        (with status "started" and "finished") if the processing is successful.
 386     merge_output_format: Extension to use when merging formats.
 387     final_ext:         Expected final extension; used to detect when the file was
 388                        already downloaded and converted
 389     fixup:             Automatically correct known faults of the file.
 390                        One of:
 391                        - "never": do nothing
 392                        - "warn": only emit a warning
 393                        - "detect_or_warn": check whether we can do anything
 394                                            about it, warn otherwise (default)
 395     source_address:    Client-side IP address to bind to.
 396     call_home:         Boolean, true iff we are allowed to contact the
 397                        yt-dlp servers for debugging. (BROKEN)
 398     sleep_interval_requests: Number of seconds to sleep between requests
 399                        during extraction
 400     sleep_interval:    Number of seconds to sleep before each download when
 401                        used alone or a lower bound of a range for randomized
 402                        sleep before each download (minimum possible number
 403                        of seconds to sleep) when used along with
 404                        max_sleep_interval.
 405     max_sleep_interval:Upper bound of a range for randomized sleep before each
 406                        download (maximum possible number of seconds to sleep).
 407                        Must only be used along with sleep_interval.
 408                        Actual sleep time will be a random float from range
 409                        [sleep_interval; max_sleep_interval].
 410     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 411     listformats:       Print an overview of available video formats and exit.
 412     list_thumbnails:   Print a table of all thumbnails and exit.
 413     match_filter:      A function that gets called with the info_dict of
 414                        every video.
 415                        If it returns a message, the video is ignored.
 416                        If it returns None, the video is downloaded.
 417                        match_filter_func in utils.py is one example for this.
 418     no_color:          Do not emit color codes in output.
 419     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 420                        HTTP header
 421     geo_bypass_country:
 422                        Two-letter ISO 3166-2 country code that will be used for
 423                        explicit geographic restriction bypassing via faking
 424                        X-Forwarded-For HTTP header
 425     geo_bypass_ip_block:
 426                        IP range in CIDR notation that will be used similarly to
 427                        geo_bypass_country
 428
 429     The following options determine which downloader is picked:
 430     external_downloader: A dictionary of protocol keys and the executable of the
 431                        external downloader to use for it. The allowed protocols
 432                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 433                        Set the value to 'native' to use the native downloader
 434     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 435                        or {'m3u8': 'ffmpeg'} instead.
 436                        Use the native HLS downloader instead of ffmpeg/avconv
 437                        if True, otherwise use ffmpeg/avconv if False, otherwise
 438                        use downloader suggested by extractor if None.
 439     compat_opts:       Compatibility options. See "Differences in default behavior".
 440                        The following options do not work when used through the API:
 441                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 442                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 443                        Refer __init__.py for their implementation
 444     progress_template: Dictionary of templates for progress outputs.
 445                        Allowed keys are 'download', 'postprocess',
 446                        'download-title' (console title) and 'postprocess-title'.
 447                        The template is mapped on a dictionary with keys 'progress' and 'info'
 448
 449     The following parameters are not used by YoutubeDL itself, they are used by
 450     the downloader (see yt_dlp/downloader/common.py):
 451     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 452     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 453     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 454     external_downloader_args, concurrent_fragment_downloads.
 455
 456     The following options are used by the post processors:
 457     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 458                        otherwise prefer ffmpeg. (avconv support is deprecated)
 459     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 460                        to the binary or its containing directory.
 461     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 462                        and a list of additional command-line arguments for the
 463                        postprocessor/executable. The dict can also have "PP+EXE" keys
 464                        which are used when the given exe is used by the given PP.
 465                        Use 'default' as the name for arguments to passed to all PP
 466                        For compatibility with youtube-dl, a single list of args
 467                        can also be used
 468
 469     The following options are used by the extractors:
 470     extractor_retries: Number of times to retry for known errors
 471     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 472     hls_split_discontinuity: Split HLS playlists to different formats at
 473                        discontinuities such as ad breaks (default: False)
 474     extractor_args:    A dictionary of arguments to be passed to the extractors.
 475                        See "EXTRACTOR ARGUMENTS" for details.
 476                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 477     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 478                        If True (default), DASH manifests and related
 479                        data will be downloaded and processed by extractor.
 480                        You can reduce network I/O by disabling it if you don't
 481                        care about DASH. (only for youtube)
 482     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 483                        If True (default), HLS manifests and related
 484                        data will be downloaded and processed by extractor.
 485                        You can reduce network I/O by disabling it if you don't
 486                        care about HLS. (only for youtube)
 487     """
 488
 489     _NUMERIC_FIELDS = set((
 490         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 491         'timestamp', 'release_timestamp',
 492         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 493         'average_rating', 'comment_count', 'age_limit',
 494         'start_time', 'end_time',
 495         'chapter_number', 'season_number', 'episode_number',
 496         'track_number', 'disc_number', 'release_year',
 497     ))
 498
 499     _format_selection_exts = {
 500         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 501         'video': {'mp4', 'flv', 'webm', '3gp'},
 502         'storyboards': {'mhtml'},
 503     }
 504
 505     params = None
 506     _ies = {}
 507     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 508     _printed_messages = set()
 509     _first_webpage_request = True
 510     _download_retcode = None
 511     _num_downloads = None
 512     _playlist_level = 0
 513     _playlist_urls = set()
 514     _screen_file = None
 515
 516     def __init__(self, params=None, auto_init=True):
 517         """Create a FileDownloader object with the given options.
 518         @param auto_init    Whether to load the default extractors and print header (if verbose).
 519                             Set to 'no_verbose_header' to not print the header
 520         """
 521         if params is None:
 522             params = {}
 523         self._ies = {}
 524         self._ies_instances = {}
 525         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 526         self._printed_messages = set()
 527         self._first_webpage_request = True
 528         self._post_hooks = []
 529         self._progress_hooks = []
 530         self._postprocessor_hooks = []
 531         self._download_retcode = 0
 532         self._num_downloads = 0
 533         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 534         self._err_file = sys.stderr
 535         self.params = params
 536         self.cache = Cache(self)
 537
 538         windows_enable_vt_mode()
 539         self._allow_colors = {
 540             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 541             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 542         }
 543
 544         if sys.version_info < (3, 6):
 545             self.report_warning(
 546                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 547
 548         if self.params.get('allow_unplayable_formats'):
 549             self.report_warning(
 550                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 551                 'This is a developer option intended for debugging. \n'
 552                 '         If you experience any issues while using this option, '
 553                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 554
 555         def check_deprecated(param, option, suggestion):
 556             if self.params.get(param) is not None:
 557                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 558                 return True
 559             return False
 560
 561         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 562             if self.params.get('geo_verification_proxy') is None:
 563                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 564
 565         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 566         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 567         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 568
 569         for msg in self.params.get('_warnings', []):
 570             self.report_warning(msg)
 571         for msg in self.params.get('_deprecation_warnings', []):
 572             self.deprecation_warning(msg)
 573
 574         if 'list-formats' in self.params.get('compat_opts', []):
 575             self.params['listformats_table'] = False
 576
 577         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 578             # nooverwrites was unnecessarily changed to overwrites
 579             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 580             # This ensures compatibility with both keys
 581             self.params['overwrites'] = not self.params['nooverwrites']
 582         elif self.params.get('overwrites') is None:
 583             self.params.pop('overwrites', None)
 584         else:
 585             self.params['nooverwrites'] = not self.params['overwrites']
 586
 587         if params.get('bidi_workaround', False):
 588             try:
 589                 import pty
 590                 master, slave = pty.openpty()
 591                 width = compat_get_terminal_size().columns
 592                 if width is None:
 593                     width_args = []
 594                 else:
 595                     width_args = ['-w', str(width)]
 596                 sp_kwargs = dict(
 597                     stdin=subprocess.PIPE,
 598                     stdout=slave,
 599                     stderr=self._err_file)
 600                 try:
 601                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 602                 except OSError:
 603                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 604                 self._output_channel = os.fdopen(master, 'rb')
 605             except OSError as ose:
 606                 if ose.errno == errno.ENOENT:
 607                     self.report_warning(
 608                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 609                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 610                 else:
 611                     raise
 612
 613         if (sys.platform != 'win32'
 614                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 615                 and not params.get('restrictfilenames', False)):
 616             # Unicode filesystem API will throw errors (#1474, #13027)
 617             self.report_warning(
 618                 'Assuming --restrict-filenames since file system encoding '
 619                 'cannot encode all characters. '
 620                 'Set the LC_ALL environment variable to fix this.')
 621             self.params['restrictfilenames'] = True
 622
 623         self.outtmpl_dict = self.parse_outtmpl()
 624
 625         # Creating format selector here allows us to catch syntax errors before the extraction
 626         self.format_selector = (
 627             None if self.params.get('format') is None
 628             else self.params['format'] if callable(self.params['format'])
 629             else self.build_format_selector(self.params['format']))
 630
 631         self._setup_opener()
 632
 633         if auto_init:
 634             if auto_init != 'no_verbose_header':
 635                 self.print_debug_header()
 636             self.add_default_info_extractors()
 637
 638         hooks = {
 639             'post_hooks': self.add_post_hook,
 640             'progress_hooks': self.add_progress_hook,
 641             'postprocessor_hooks': self.add_postprocessor_hook,
 642         }
 643         for opt, fn in hooks.items():
 644             for ph in self.params.get(opt, []):
 645                 fn(ph)
 646
 647         for pp_def_raw in self.params.get('postprocessors', []):
 648             pp_def = dict(pp_def_raw)
 649             when = pp_def.pop('when', 'post_process')
 650             self.add_post_processor(
 651                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 652                 when=when)
 653
 654         register_socks_protocols()
 655
 656         def preload_download_archive(fn):
 657             """Preload the archive, if any is specified"""
 658             if fn is None:
 659                 return False
 660             self.write_debug(f'Loading archive file {fn!r}')
 661             try:
 662                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 663                     for line in archive_file:
 664                         self.archive.add(line.strip())
 665             except IOError as ioe:
 666                 if ioe.errno != errno.ENOENT:
 667                     raise
 668                 return False
 669             return True
 670
 671         self.archive = set()
 672         preload_download_archive(self.params.get('download_archive'))
 673
 674     def warn_if_short_id(self, argv):
 675         # short YouTube ID starting with dash?
 676         idxs = [
 677             i for i, a in enumerate(argv)
 678             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 679         if idxs:
 680             correct_argv = (
 681                 ['yt-dlp']
 682                 + [a for i, a in enumerate(argv) if i not in idxs]
 683                 + ['--'] + [argv[i] for i in idxs]
 684             )
 685             self.report_warning(
 686                 'Long argument string detected. '
 687                 'Use -- to separate parameters and URLs, like this:\n%s' %
 688                 args_to_str(correct_argv))
 689
 690     def add_info_extractor(self, ie):
 691         """Add an InfoExtractor object to the end of the list."""
 692         ie_key = ie.ie_key()
 693         self._ies[ie_key] = ie
 694         if not isinstance(ie, type):
 695             self._ies_instances[ie_key] = ie
 696             ie.set_downloader(self)
 697
 698     def _get_info_extractor_class(self, ie_key):
 699         ie = self._ies.get(ie_key)
 700         if ie is None:
 701             ie = get_info_extractor(ie_key)
 702             self.add_info_extractor(ie)
 703         return ie
 704
 705     def get_info_extractor(self, ie_key):
 706         """
 707         Get an instance of an IE with name ie_key, it will try to get one from
 708         the _ies list, if there's no instance it will create a new one and add
 709         it to the extractor list.
 710         """
 711         ie = self._ies_instances.get(ie_key)
 712         if ie is None:
 713             ie = get_info_extractor(ie_key)()
 714             self.add_info_extractor(ie)
 715         return ie
 716
 717     def add_default_info_extractors(self):
 718         """
 719         Add the InfoExtractors returned by gen_extractors to the end of the list
 720         """
 721         for ie in gen_extractor_classes():
 722             self.add_info_extractor(ie)
 723
 724     def add_post_processor(self, pp, when='post_process'):
 725         """Add a PostProcessor object to the end of the chain."""
 726         self._pps[when].append(pp)
 727         pp.set_downloader(self)
 728
 729     def add_post_hook(self, ph):
 730         """Add the post hook"""
 731         self._post_hooks.append(ph)
 732
 733     def add_progress_hook(self, ph):
 734         """Add the download progress hook"""
 735         self._progress_hooks.append(ph)
 736
 737     def add_postprocessor_hook(self, ph):
 738         """Add the postprocessing progress hook"""
 739         self._postprocessor_hooks.append(ph)
 740         for pps in self._pps.values():
 741             for pp in pps:
 742                 pp.add_progress_hook(ph)
 743
 744     def _bidi_workaround(self, message):
 745         if not hasattr(self, '_output_channel'):
 746             return message
 747
 748         assert hasattr(self, '_output_process')
 749         assert isinstance(message, compat_str)
 750         line_count = message.count('\n') + 1
 751         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 752         self._output_process.stdin.flush()
 753         res = ''.join(self._output_channel.readline().decode('utf-8')
 754                       for _ in range(line_count))
 755         return res[:-len('\n')]
 756
 757     def _write_string(self, message, out=None, only_once=False):
 758         if only_once:
 759             if message in self._printed_messages:
 760                 return
 761             self._printed_messages.add(message)
 762         write_string(message, out=out, encoding=self.params.get('encoding'))
 763
 764     def to_stdout(self, message, skip_eol=False, quiet=False):
 765         """Print message to stdout"""
 766         if self.params.get('logger'):
 767             self.params['logger'].debug(message)
 768         elif not quiet or self.params.get('verbose'):
 769             self._write_string(
 770                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 771                 self._err_file if quiet else self._screen_file)
 772
 773     def to_stderr(self, message, only_once=False):
 774         """Print message to stderr"""
 775         assert isinstance(message, compat_str)
 776         if self.params.get('logger'):
 777             self.params['logger'].error(message)
 778         else:
 779             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 780
 781     def to_console_title(self, message):
 782         if not self.params.get('consoletitle', False):
 783             return
 784         message = remove_terminal_sequences(message)
 785         if compat_os_name == 'nt':
 786             if ctypes.windll.kernel32.GetConsoleWindow():
 787                 # c_wchar_p() might not be necessary if `message` is
 788                 # already of type unicode()
 789                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 790         elif 'TERM' in os.environ:
 791             self._write_string('\033]0;%s\007' % message, self._screen_file)
 792
 793     def save_console_title(self):
 794         if not self.params.get('consoletitle', False):
 795             return
 796         if self.params.get('simulate'):
 797             return
 798         if compat_os_name != 'nt' and 'TERM' in os.environ:
 799             # Save the title on stack
 800             self._write_string('\033[22;0t', self._screen_file)
 801
 802     def restore_console_title(self):
 803         if not self.params.get('consoletitle', False):
 804             return
 805         if self.params.get('simulate'):
 806             return
 807         if compat_os_name != 'nt' and 'TERM' in os.environ:
 808             # Restore the title from stack
 809             self._write_string('\033[23;0t', self._screen_file)
 810
 811     def __enter__(self):
 812         self.save_console_title()
 813         return self
 814
 815     def __exit__(self, *args):
 816         self.restore_console_title()
 817
 818         if self.params.get('cookiefile') is not None:
 819             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 820
 821     def trouble(self, message=None, tb=None):
 822         """Determine action to take when a download problem appears.
 823
 824         Depending on if the downloader has been configured to ignore
 825         download errors or not, this method may throw an exception or
 826         not when errors are found, after printing the message.
 827
 828         tb, if given, is additional traceback information.
 829         """
 830         if message is not None:
 831             self.to_stderr(message)
 832         if self.params.get('verbose'):
 833             if tb is None:
 834                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 835                     tb = ''
 836                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 837                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 838                     tb += encode_compat_str(traceback.format_exc())
 839                 else:
 840                     tb_data = traceback.format_list(traceback.extract_stack())
 841                     tb = ''.join(tb_data)
 842             if tb:
 843                 self.to_stderr(tb)
 844         if not self.params.get('ignoreerrors'):
 845             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 846                 exc_info = sys.exc_info()[1].exc_info
 847             else:
 848                 exc_info = sys.exc_info()
 849             raise DownloadError(message, exc_info)
 850         self._download_retcode = 1
 851
 852     def to_screen(self, message, skip_eol=False):
 853         """Print message to stdout if not in quiet mode"""
 854         self.to_stdout(
 855             message, skip_eol, quiet=self.params.get('quiet', False))
 856
 857     class Styles(Enum):
 858         HEADERS = 'yellow'
 859         EMPHASIS = 'light blue'
 860         ID = 'green'
 861         DELIM = 'blue'
 862         ERROR = 'red'
 863         WARNING = 'yellow'
 864         SUPPRESS = 'light black'
 865
 866     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 867         if test_encoding:
 868             original_text = text
 869             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 870             text = text.encode(encoding, 'ignore').decode(encoding)
 871             if fallback is not None and text != original_text:
 872                 text = fallback
 873         if isinstance(f, self.Styles):
 874             f = f.value
 875         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 876
 877     def _format_screen(self, *args, **kwargs):
 878         return self._format_text(
 879             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 880
 881     def _format_err(self, *args, **kwargs):
 882         return self._format_text(
 883             self._err_file, self._allow_colors['err'], *args, **kwargs)
 884
 885     def report_warning(self, message, only_once=False):
 886         '''
 887         Print the message to stderr, it will be prefixed with 'WARNING:'
 888         If stderr is a tty file the 'WARNING:' will be colored
 889         '''
 890         if self.params.get('logger') is not None:
 891             self.params['logger'].warning(message)
 892         else:
 893             if self.params.get('no_warnings'):
 894                 return
 895             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 896
 897     def deprecation_warning(self, message):
 898         if self.params.get('logger') is not None:
 899             self.params['logger'].warning('DeprecationWarning: {message}')
 900         else:
 901             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 902
 903     def report_error(self, message, tb=None):
 904         '''
 905         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 906         in red if stderr is a tty file.
 907         '''
 908         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 909
 910     def write_debug(self, message, only_once=False):
 911         '''Log debug message or Print message to stderr'''
 912         if not self.params.get('verbose', False):
 913             return
 914         message = '[debug] %s' % message
 915         if self.params.get('logger'):
 916             self.params['logger'].debug(message)
 917         else:
 918             self.to_stderr(message, only_once)
 919
 920     def report_file_already_downloaded(self, file_name):
 921         """Report file has already been fully downloaded."""
 922         try:
 923             self.to_screen('[download] %s has already been downloaded' % file_name)
 924         except UnicodeEncodeError:
 925             self.to_screen('[download] The file has already been downloaded')
 926
 927     def report_file_delete(self, file_name):
 928         """Report that existing file will be deleted."""
 929         try:
 930             self.to_screen('Deleting existing file %s' % file_name)
 931         except UnicodeEncodeError:
 932             self.to_screen('Deleting existing file')
 933
 934     def raise_no_formats(self, info, forced=False):
 935         has_drm = info.get('__has_drm')
 936         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 937         expected = self.params.get('ignore_no_formats_error')
 938         if forced or not expected:
 939             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 940                                  expected=has_drm or expected)
 941         else:
 942             self.report_warning(msg)
 943
 944     def parse_outtmpl(self):
 945         outtmpl_dict = self.params.get('outtmpl', {})
 946         if not isinstance(outtmpl_dict, dict):
 947             outtmpl_dict = {'default': outtmpl_dict}
 948         # Remove spaces in the default template
 949         if self.params.get('restrictfilenames'):
 950             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 951         else:
 952             sanitize = lambda x: x
 953         outtmpl_dict.update({
 954             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 955             if outtmpl_dict.get(k) is None})
 956         for key, val in outtmpl_dict.items():
 957             if isinstance(val, bytes):
 958                 self.report_warning(
 959                     'Parameter outtmpl is bytes, but should be a unicode string. '
 960                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 961         return outtmpl_dict
 962
 963     def get_output_path(self, dir_type='', filename=None):
 964         paths = self.params.get('paths', {})
 965         assert isinstance(paths, dict)
 966         path = os.path.join(
 967             expand_path(paths.get('home', '').strip()),
 968             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 969             filename or '')
 970
 971         # Temporary fix for #4787
 972         # 'Treat' all problem characters by passing filename through preferredencoding
 973         # to workaround encoding issues with subprocess on python2 @ Windows
 974         if sys.version_info < (3, 0) and sys.platform == 'win32':
 975             path = encodeFilename(path, True).decode(preferredencoding())
 976         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 977
 978     @staticmethod
 979     def _outtmpl_expandpath(outtmpl):
 980         # expand_path translates '%%' into '%' and '$$' into '$'
 981         # correspondingly that is not what we want since we need to keep
 982         # '%%' intact for template dict substitution step. Working around
 983         # with boundary-alike separator hack.
 984         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 985         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 986
 987         # outtmpl should be expand_path'ed before template dict substitution
 988         # because meta fields may contain env variables we don't want to
 989         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 990         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 991         return expand_path(outtmpl).replace(sep, '')
 992
 993     @staticmethod
 994     def escape_outtmpl(outtmpl):
 995         ''' Escape any remaining strings like %s, %abc% etc. '''
 996         return re.sub(
 997             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 998             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 999             outtmpl)
1000
1001     @classmethod
1002     def validate_outtmpl(cls, outtmpl):
1003         ''' @return None or Exception object '''
1004         outtmpl = re.sub(
1005             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
1006             lambda mobj: f'{mobj.group(0)[:-1]}s',
1007             cls._outtmpl_expandpath(outtmpl))
1008         try:
1009             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1010             return None
1011         except ValueError as err:
1012             return err
1013
1014     @staticmethod
1015     def _copy_infodict(info_dict):
1016         info_dict = dict(info_dict)
1017         for key in ('__original_infodict', '__postprocessors'):
1018             info_dict.pop(key, None)
1019         return info_dict
1020
1021     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
1022         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1023         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1024
1025         info_dict = self._copy_infodict(info_dict)
1026         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1027             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1028             if info_dict.get('duration', None) is not None
1029             else None)
1030         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1031         if info_dict.get('resolution') is None:
1032             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1033
1034         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1035         # of %(field)s to %(field)0Nd for backward compatibility
1036         field_size_compat_map = {
1037             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1038             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1039             'autonumber': self.params.get('autonumber_size') or 5,
1040         }
1041
1042         TMPL_DICT = {}
1043         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1044         MATH_FUNCTIONS = {
1045             '+': float.__add__,
1046             '-': float.__sub__,
1047         }
1048         # Field is of the form key1.key2...
1049         # where keys (except first) can be string, int or slice
1050         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1051         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1052         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1053         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1054             (?P<negate>-)?
1055             (?P<fields>{field})
1056             (?P<maths>(?:{math_op}{math_field})*)
1057             (?:>(?P<strf_format>.+?))?
1058             (?P<alternate>(?<!\\),[^|)]+)?
1059             (?:\|(?P<default>.*?))?
1060             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1061
1062         def _traverse_infodict(k):
1063             k = k.split('.')
1064             if k[0] == '':
1065                 k.pop(0)
1066             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1067
1068         def get_value(mdict):
1069             # Object traversal
1070             value = _traverse_infodict(mdict['fields'])
1071             # Negative
1072             if mdict['negate']:
1073                 value = float_or_none(value)
1074                 if value is not None:
1075                     value *= -1
1076             # Do maths
1077             offset_key = mdict['maths']
1078             if offset_key:
1079                 value = float_or_none(value)
1080                 operator = None
1081                 while offset_key:
1082                     item = re.match(
1083                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1084                         offset_key).group(0)
1085                     offset_key = offset_key[len(item):]
1086                     if operator is None:
1087                         operator = MATH_FUNCTIONS[item]
1088                         continue
1089                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1090                     offset = float_or_none(item)
1091                     if offset is None:
1092                         offset = float_or_none(_traverse_infodict(item))
1093                     try:
1094                         value = operator(value, multiplier * offset)
1095                     except (TypeError, ZeroDivisionError):
1096                         return None
1097                     operator = None
1098             # Datetime formatting
1099             if mdict['strf_format']:
1100                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1101
1102             return value
1103
1104         na = self.params.get('outtmpl_na_placeholder', 'NA')
1105
1106         def _dumpjson_default(obj):
1107             if isinstance(obj, (set, LazyList)):
1108                 return list(obj)
1109             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1110
1111         def create_key(outer_mobj):
1112             if not outer_mobj.group('has_key'):
1113                 return outer_mobj.group(0)
1114             key = outer_mobj.group('key')
1115             mobj = re.match(INTERNAL_FORMAT_RE, key)
1116             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1117             value, default = None, na
1118             while mobj:
1119                 mobj = mobj.groupdict()
1120                 default = mobj['default'] if mobj['default'] is not None else default
1121                 value = get_value(mobj)
1122                 if value is None and mobj['alternate']:
1123                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1124                 else:
1125                     break
1126
1127             fmt = outer_mobj.group('format')
1128             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1129                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1130
1131             value = default if value is None else value
1132
1133             flags = outer_mobj.group('conversion') or ''
1134             str_fmt = f'{fmt[:-1]}s'
1135             if fmt[-1] == 'l':  # list
1136                 delim = '\n' if '#' in flags else ', '
1137                 value, fmt = delim.join(variadic(value)), str_fmt
1138             elif fmt[-1] == 'j':  # json
1139                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1140             elif fmt[-1] == 'q':  # quoted
1141                 value = map(str, variadic(value) if '#' in flags else [value])
1142                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1143             elif fmt[-1] == 'B':  # bytes
1144                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1145                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1146             elif fmt[-1] == 'U':  # unicode normalized
1147                 value, fmt = unicodedata.normalize(
1148                     # "+" = compatibility equivalence, "#" = NFD
1149                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1150                     value), str_fmt
1151             elif fmt[-1] == 'c':
1152                 if value:
1153                     value = str(value)[0]
1154                 else:
1155                     fmt = str_fmt
1156             elif fmt[-1] not in 'rs':  # numeric
1157                 value = float_or_none(value)
1158                 if value is None:
1159                     value, fmt = default, 's'
1160
1161             if sanitize:
1162                 if fmt[-1] == 'r':
1163                     # If value is an object, sanitize might convert it to a string
1164                     # So we convert it to repr first
1165                     value, fmt = repr(value), str_fmt
1166                 if fmt[-1] in 'csr':
1167                     value = sanitize(initial_field, value)
1168
1169             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1170             TMPL_DICT[key] = value
1171             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1172
1173         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1174
1175     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1176         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1177         return self.escape_outtmpl(outtmpl) % info_dict
1178
1179     def _prepare_filename(self, info_dict, tmpl_type='default'):
1180         try:
1181             sanitize = lambda k, v: sanitize_filename(
1182                 compat_str(v),
1183                 restricted=self.params.get('restrictfilenames'),
1184                 is_id=(k == 'id' or k.endswith('_id')))
1185             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1186             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1187
1188             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1189             if filename and force_ext is not None:
1190                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1191
1192             # https://github.com/blackjack4494/youtube-dlc/issues/85
1193             trim_file_name = self.params.get('trim_file_name', False)
1194             if trim_file_name:
1195                 no_ext, *ext = filename.rsplit('.', 2)
1196                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1197
1198             return filename
1199         except ValueError as err:
1200             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1201             return None
1202
1203     def prepare_filename(self, info_dict, dir_type='', warn=False):
1204         """Generate the output filename."""
1205
1206         filename = self._prepare_filename(info_dict, dir_type or 'default')
1207         if not filename and dir_type not in ('', 'temp'):
1208             return ''
1209
1210         if warn:
1211             if not self.params.get('paths'):
1212                 pass
1213             elif filename == '-':
1214                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1215             elif os.path.isabs(filename):
1216                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1217         if filename == '-' or not filename:
1218             return filename
1219
1220         return self.get_output_path(dir_type, filename)
1221
1222     def _match_entry(self, info_dict, incomplete=False, silent=False):
1223         """ Returns None if the file should be downloaded """
1224
1225         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1226
1227         def check_filter():
1228             if 'title' in info_dict:
1229                 # This can happen when we're just evaluating the playlist
1230                 title = info_dict['title']
1231                 matchtitle = self.params.get('matchtitle', False)
1232                 if matchtitle:
1233                     if not re.search(matchtitle, title, re.IGNORECASE):
1234                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1235                 rejecttitle = self.params.get('rejecttitle', False)
1236                 if rejecttitle:
1237                     if re.search(rejecttitle, title, re.IGNORECASE):
1238                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1239             date = info_dict.get('upload_date')
1240             if date is not None:
1241                 dateRange = self.params.get('daterange', DateRange())
1242                 if date not in dateRange:
1243                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1244             view_count = info_dict.get('view_count')
1245             if view_count is not None:
1246                 min_views = self.params.get('min_views')
1247                 if min_views is not None and view_count < min_views:
1248                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1249                 max_views = self.params.get('max_views')
1250                 if max_views is not None and view_count > max_views:
1251                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1252             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1253                 return 'Skipping "%s" because it is age restricted' % video_title
1254
1255             match_filter = self.params.get('match_filter')
1256             if match_filter is not None:
1257                 try:
1258                     ret = match_filter(info_dict, incomplete=incomplete)
1259                 except TypeError:
1260                     # For backward compatibility
1261                     ret = None if incomplete else match_filter(info_dict)
1262                 if ret is not None:
1263                     return ret
1264             return None
1265
1266         if self.in_download_archive(info_dict):
1267             reason = '%s has already been recorded in the archive' % video_title
1268             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1269         else:
1270             reason = check_filter()
1271             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1272         if reason is not None:
1273             if not silent:
1274                 self.to_screen('[download] ' + reason)
1275             if self.params.get(break_opt, False):
1276                 raise break_err()
1277         return reason
1278
1279     @staticmethod
1280     def add_extra_info(info_dict, extra_info):
1281         '''Set the keys from extra_info in info dict if they are missing'''
1282         for key, value in extra_info.items():
1283             info_dict.setdefault(key, value)
1284
1285     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1286                      process=True, force_generic_extractor=False):
1287         """
1288         Return a list with a dictionary for each video extracted.
1289
1290         Arguments:
1291         url -- URL to extract
1292
1293         Keyword arguments:
1294         download -- whether to download videos during extraction
1295         ie_key -- extractor key hint
1296         extra_info -- dictionary containing the extra values to add to each result
1297         process -- whether to resolve all unresolved references (URLs, playlist items),
1298             must be True for download to work.
1299         force_generic_extractor -- force using the generic extractor
1300         """
1301
1302         if extra_info is None:
1303             extra_info = {}
1304
1305         if not ie_key and force_generic_extractor:
1306             ie_key = 'Generic'
1307
1308         if ie_key:
1309             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1310         else:
1311             ies = self._ies
1312
1313         for ie_key, ie in ies.items():
1314             if not ie.suitable(url):
1315                 continue
1316
1317             if not ie.working():
1318                 self.report_warning('The program functionality for this site has been marked as broken, '
1319                                     'and will probably not work.')
1320
1321             temp_id = ie.get_temp_id(url)
1322             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1323                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1324                 if self.params.get('break_on_existing', False):
1325                     raise ExistingVideoReached()
1326                 break
1327             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1328         else:
1329             self.report_error('no suitable InfoExtractor for URL %s' % url)
1330
1331     def __handle_extraction_exceptions(func):
1332         @functools.wraps(func)
1333         def wrapper(self, *args, **kwargs):
1334             try:
1335                 return func(self, *args, **kwargs)
1336             except GeoRestrictedError as e:
1337                 msg = e.msg
1338                 if e.countries:
1339                     msg += '\nThis video is available in %s.' % ', '.join(
1340                         map(ISO3166Utils.short2full, e.countries))
1341                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1342                 self.report_error(msg)
1343             except ExtractorError as e:  # An error we somewhat expected
1344                 self.report_error(compat_str(e), e.format_traceback())
1345             except ReExtractInfo as e:
1346                 if e.expected:
1347                     self.to_screen(f'{e}; Re-extracting data')
1348                 else:
1349                     self.to_stderr('\r')
1350                     self.report_warning(f'{e}; Re-extracting data')
1351                 return wrapper(self, *args, **kwargs)
1352             except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1353                 raise
1354             except Exception as e:
1355                 if self.params.get('ignoreerrors'):
1356                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1357                 else:
1358                     raise
1359         return wrapper
1360
1361     def _wait_for_video(self, ie_result):
1362         if (not self.params.get('wait_for_video')
1363                 or ie_result.get('_type', 'video') != 'video'
1364                 or ie_result.get('formats') or ie_result.get('url')):
1365             return
1366
1367         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1368         last_msg = ''
1369
1370         def progress(msg):
1371             nonlocal last_msg
1372             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1373             last_msg = msg
1374
1375         min_wait, max_wait = self.params.get('wait_for_video')
1376         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1377         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1378             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1379             self.report_warning('Release time of video is not known')
1380         elif (diff or 0) <= 0:
1381             self.report_warning('Video should already be available according to extracted info')
1382         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1383         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1384
1385         wait_till = time.time() + diff
1386         try:
1387             while True:
1388                 diff = wait_till - time.time()
1389                 if diff <= 0:
1390                     progress('')
1391                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1392                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1393                 time.sleep(1)
1394         except KeyboardInterrupt:
1395             progress('')
1396             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1397         except BaseException as e:
1398             if not isinstance(e, ReExtractInfo):
1399                 self.to_screen('')
1400             raise
1401
1402     @__handle_extraction_exceptions
1403     def __extract_info(self, url, ie, download, extra_info, process):
1404         ie_result = ie.extract(url)
1405         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1406             return
1407         if isinstance(ie_result, list):
1408             # Backwards compatibility: old IE result format
1409             ie_result = {
1410                 '_type': 'compat_list',
1411                 'entries': ie_result,
1412             }
1413         if extra_info.get('original_url'):
1414             ie_result.setdefault('original_url', extra_info['original_url'])
1415         self.add_default_extra_info(ie_result, ie, url)
1416         if process:
1417             self._wait_for_video(ie_result)
1418             return self.process_ie_result(ie_result, download, extra_info)
1419         else:
1420             return ie_result
1421
1422     def add_default_extra_info(self, ie_result, ie, url):
1423         if url is not None:
1424             self.add_extra_info(ie_result, {
1425                 'webpage_url': url,
1426                 'original_url': url,
1427                 'webpage_url_basename': url_basename(url),
1428                 'webpage_url_domain': get_domain(url),
1429             })
1430         if ie is not None:
1431             self.add_extra_info(ie_result, {
1432                 'extractor': ie.IE_NAME,
1433                 'extractor_key': ie.ie_key(),
1434             })
1435
1436     def process_ie_result(self, ie_result, download=True, extra_info=None):
1437         """
1438         Take the result of the ie(may be modified) and resolve all unresolved
1439         references (URLs, playlist items).
1440
1441         It will also download the videos if 'download'.
1442         Returns the resolved ie_result.
1443         """
1444         if extra_info is None:
1445             extra_info = {}
1446         result_type = ie_result.get('_type', 'video')
1447
1448         if result_type in ('url', 'url_transparent'):
1449             ie_result['url'] = sanitize_url(ie_result['url'])
1450             if ie_result.get('original_url'):
1451                 extra_info.setdefault('original_url', ie_result['original_url'])
1452
1453             extract_flat = self.params.get('extract_flat', False)
1454             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1455                     or extract_flat is True):
1456                 info_copy = ie_result.copy()
1457                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1458                 if ie and not ie_result.get('id'):
1459                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1460                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1461                 self.add_extra_info(info_copy, extra_info)
1462                 info_copy, _ = self.pre_process(info_copy)
1463                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1464                 if self.params.get('force_write_download_archive', False):
1465                     self.record_download_archive(info_copy)
1466                 return ie_result
1467
1468         if result_type == 'video':
1469             self.add_extra_info(ie_result, extra_info)
1470             ie_result = self.process_video_result(ie_result, download=download)
1471             additional_urls = (ie_result or {}).get('additional_urls')
1472             if additional_urls:
1473                 # TODO: Improve MetadataParserPP to allow setting a list
1474                 if isinstance(additional_urls, compat_str):
1475                     additional_urls = [additional_urls]
1476                 self.to_screen(
1477                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1478                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1479                 ie_result['additional_entries'] = [
1480                     self.extract_info(
1481                         url, download, extra_info,
1482                         force_generic_extractor=self.params.get('force_generic_extractor'))
1483                     for url in additional_urls
1484                 ]
1485             return ie_result
1486         elif result_type == 'url':
1487             # We have to add extra_info to the results because it may be
1488             # contained in a playlist
1489             return self.extract_info(
1490                 ie_result['url'], download,
1491                 ie_key=ie_result.get('ie_key'),
1492                 extra_info=extra_info)
1493         elif result_type == 'url_transparent':
1494             # Use the information from the embedding page
1495             info = self.extract_info(
1496                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1497                 extra_info=extra_info, download=False, process=False)
1498
1499             # extract_info may return None when ignoreerrors is enabled and
1500             # extraction failed with an error, don't crash and return early
1501             # in this case
1502             if not info:
1503                 return info
1504
1505             force_properties = dict(
1506                 (k, v) for k, v in ie_result.items() if v is not None)
1507             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1508                 if f in force_properties:
1509                     del force_properties[f]
1510             new_result = info.copy()
1511             new_result.update(force_properties)
1512
1513             # Extracted info may not be a video result (i.e.
1514             # info.get('_type', 'video') != video) but rather an url or
1515             # url_transparent. In such cases outer metadata (from ie_result)
1516             # should be propagated to inner one (info). For this to happen
1517             # _type of info should be overridden with url_transparent. This
1518             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1519             if new_result.get('_type') == 'url':
1520                 new_result['_type'] = 'url_transparent'
1521
1522             return self.process_ie_result(
1523                 new_result, download=download, extra_info=extra_info)
1524         elif result_type in ('playlist', 'multi_video'):
1525             # Protect from infinite recursion due to recursively nested playlists
1526             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1527             webpage_url = ie_result['webpage_url']
1528             if webpage_url in self._playlist_urls:
1529                 self.to_screen(
1530                     '[download] Skipping already downloaded playlist: %s'
1531                     % ie_result.get('title') or ie_result.get('id'))
1532                 return
1533
1534             self._playlist_level += 1
1535             self._playlist_urls.add(webpage_url)
1536             self._sanitize_thumbnails(ie_result)
1537             try:
1538                 return self.__process_playlist(ie_result, download)
1539             finally:
1540                 self._playlist_level -= 1
1541                 if not self._playlist_level:
1542                     self._playlist_urls.clear()
1543         elif result_type == 'compat_list':
1544             self.report_warning(
1545                 'Extractor %s returned a compat_list result. '
1546                 'It needs to be updated.' % ie_result.get('extractor'))
1547
1548             def _fixup(r):
1549                 self.add_extra_info(r, {
1550                     'extractor': ie_result['extractor'],
1551                     'webpage_url': ie_result['webpage_url'],
1552                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1553                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1554                     'extractor_key': ie_result['extractor_key'],
1555                 })
1556                 return r
1557             ie_result['entries'] = [
1558                 self.process_ie_result(_fixup(r), download, extra_info)
1559                 for r in ie_result['entries']
1560             ]
1561             return ie_result
1562         else:
1563             raise Exception('Invalid result type: %s' % result_type)
1564
1565     def _ensure_dir_exists(self, path):
1566         return make_dir(path, self.report_error)
1567
1568     def __process_playlist(self, ie_result, download):
1569         # We process each entry in the playlist
1570         playlist = ie_result.get('title') or ie_result.get('id')
1571         self.to_screen('[download] Downloading playlist: %s' % playlist)
1572
1573         if 'entries' not in ie_result:
1574             raise EntryNotInPlaylist('There are no entries')
1575
1576         MissingEntry = object()
1577         incomplete_entries = bool(ie_result.get('requested_entries'))
1578         if incomplete_entries:
1579             def fill_missing_entries(entries, indices):
1580                 ret = [MissingEntry] * max(indices)
1581                 for i, entry in zip(indices, entries):
1582                     ret[i - 1] = entry
1583                 return ret
1584             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1585
1586         playlist_results = []
1587
1588         playliststart = self.params.get('playliststart', 1)
1589         playlistend = self.params.get('playlistend')
1590         # For backwards compatibility, interpret -1 as whole list
1591         if playlistend == -1:
1592             playlistend = None
1593
1594         playlistitems_str = self.params.get('playlist_items')
1595         playlistitems = None
1596         if playlistitems_str is not None:
1597             def iter_playlistitems(format):
1598                 for string_segment in format.split(','):
1599                     if '-' in string_segment:
1600                         start, end = string_segment.split('-')
1601                         for item in range(int(start), int(end) + 1):
1602                             yield int(item)
1603                     else:
1604                         yield int(string_segment)
1605             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1606
1607         ie_entries = ie_result['entries']
1608         msg = (
1609             'Downloading %d videos' if not isinstance(ie_entries, list)
1610             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1611
1612         if isinstance(ie_entries, list):
1613             def get_entry(i):
1614                 return ie_entries[i - 1]
1615         else:
1616             if not isinstance(ie_entries, (PagedList, LazyList)):
1617                 ie_entries = LazyList(ie_entries)
1618
1619             def get_entry(i):
1620                 return YoutubeDL.__handle_extraction_exceptions(
1621                     lambda self, i: ie_entries[i - 1]
1622                 )(self, i)
1623
1624         entries = []
1625         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1626         for i in items:
1627             if i == 0:
1628                 continue
1629             if playlistitems is None and playlistend is not None and playlistend < i:
1630                 break
1631             entry = None
1632             try:
1633                 entry = get_entry(i)
1634                 if entry is MissingEntry:
1635                     raise EntryNotInPlaylist()
1636             except (IndexError, EntryNotInPlaylist):
1637                 if incomplete_entries:
1638                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1639                 elif not playlistitems:
1640                     break
1641             entries.append(entry)
1642             try:
1643                 if entry is not None:
1644                     self._match_entry(entry, incomplete=True, silent=True)
1645             except (ExistingVideoReached, RejectedVideoReached):
1646                 break
1647         ie_result['entries'] = entries
1648
1649         # Save playlist_index before re-ordering
1650         entries = [
1651             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1652             for i, entry in enumerate(entries, 1)
1653             if entry is not None]
1654         n_entries = len(entries)
1655
1656         if not playlistitems and (playliststart != 1 or playlistend):
1657             playlistitems = list(range(playliststart, playliststart + n_entries))
1658         ie_result['requested_entries'] = playlistitems
1659
1660         _infojson_written = False
1661         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1662             ie_copy = {
1663                 'playlist': playlist,
1664                 'playlist_id': ie_result.get('id'),
1665                 'playlist_title': ie_result.get('title'),
1666                 'playlist_uploader': ie_result.get('uploader'),
1667                 'playlist_uploader_id': ie_result.get('uploader_id'),
1668                 'playlist_index': 0,
1669                 'n_entries': n_entries,
1670             }
1671             ie_copy.update(dict(ie_result))
1672
1673             _infojson_written = self._write_info_json(
1674                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1675             if _infojson_written is None:
1676                 return
1677             if self._write_description('playlist', ie_result,
1678                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1679                 return
1680             # TODO: This should be passed to ThumbnailsConvertor if necessary
1681             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1682
1683         if self.params.get('playlistreverse', False):
1684             entries = entries[::-1]
1685         if self.params.get('playlistrandom', False):
1686             random.shuffle(entries)
1687
1688         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1689
1690         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1691         failures = 0
1692         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1693         for i, entry_tuple in enumerate(entries, 1):
1694             playlist_index, entry = entry_tuple
1695             if 'playlist-index' in self.params.get('compat_opts', []):
1696                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1697             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1698             # This __x_forwarded_for_ip thing is a bit ugly but requires
1699             # minimal changes
1700             if x_forwarded_for:
1701                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1702             extra = {
1703                 'n_entries': n_entries,
1704                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1705                 'playlist_index': playlist_index,
1706                 'playlist_autonumber': i,
1707                 'playlist': playlist,
1708                 'playlist_id': ie_result.get('id'),
1709                 'playlist_title': ie_result.get('title'),
1710                 'playlist_uploader': ie_result.get('uploader'),
1711                 'playlist_uploader_id': ie_result.get('uploader_id'),
1712                 'extractor': ie_result['extractor'],
1713                 'webpage_url': ie_result['webpage_url'],
1714                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1715                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1716                 'extractor_key': ie_result['extractor_key'],
1717             }
1718
1719             if self._match_entry(entry, incomplete=True) is not None:
1720                 continue
1721
1722             entry_result = self.__process_iterable_entry(entry, download, extra)
1723             if not entry_result:
1724                 failures += 1
1725             if failures >= max_failures:
1726                 self.report_error(
1727                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1728                 break
1729             playlist_results.append(entry_result)
1730         ie_result['entries'] = playlist_results
1731
1732         # Write the updated info to json
1733         if _infojson_written and self._write_info_json(
1734                 'updated playlist', ie_result,
1735                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1736             return
1737         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1738         return ie_result
1739
1740     @__handle_extraction_exceptions
1741     def __process_iterable_entry(self, entry, download, extra_info):
1742         return self.process_ie_result(
1743             entry, download=download, extra_info=extra_info)
1744
1745     def _build_format_filter(self, filter_spec):
1746         " Returns a function to filter the formats according to the filter_spec "
1747
1748         OPERATORS = {
1749             '<': operator.lt,
1750             '<=': operator.le,
1751             '>': operator.gt,
1752             '>=': operator.ge,
1753             '=': operator.eq,
1754             '!=': operator.ne,
1755         }
1756         operator_rex = re.compile(r'''(?x)\s*
1757             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1758             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1759             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1760             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1761         m = operator_rex.fullmatch(filter_spec)
1762         if m:
1763             try:
1764                 comparison_value = int(m.group('value'))
1765             except ValueError:
1766                 comparison_value = parse_filesize(m.group('value'))
1767                 if comparison_value is None:
1768                     comparison_value = parse_filesize(m.group('value') + 'B')
1769                 if comparison_value is None:
1770                     raise ValueError(
1771                         'Invalid value %r in format specification %r' % (
1772                             m.group('value'), filter_spec))
1773             op = OPERATORS[m.group('op')]
1774
1775         if not m:
1776             STR_OPERATORS = {
1777                 '=': operator.eq,
1778                 '^=': lambda attr, value: attr.startswith(value),
1779                 '$=': lambda attr, value: attr.endswith(value),
1780                 '*=': lambda attr, value: value in attr,
1781             }
1782             str_operator_rex = re.compile(r'''(?x)\s*
1783                 (?P<key>[a-zA-Z0-9._-]+)\s*
1784                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1785                 (?P<value>[a-zA-Z0-9._-]+)\s*
1786                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1787             m = str_operator_rex.fullmatch(filter_spec)
1788             if m:
1789                 comparison_value = m.group('value')
1790                 str_op = STR_OPERATORS[m.group('op')]
1791                 if m.group('negation'):
1792                     op = lambda attr, value: not str_op(attr, value)
1793                 else:
1794                     op = str_op
1795
1796         if not m:
1797             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1798
1799         def _filter(f):
1800             actual_value = f.get(m.group('key'))
1801             if actual_value is None:
1802                 return m.group('none_inclusive')
1803             return op(actual_value, comparison_value)
1804         return _filter
1805
1806     def _check_formats(self, formats):
1807         for f in formats:
1808             self.to_screen('[info] Testing format %s' % f['format_id'])
1809             path = self.get_output_path('temp')
1810             if not self._ensure_dir_exists(f'{path}/'):
1811                 continue
1812             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1813             temp_file.close()
1814             try:
1815                 success, _ = self.dl(temp_file.name, f, test=True)
1816             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1817                 success = False
1818             finally:
1819                 if os.path.exists(temp_file.name):
1820                     try:
1821                         os.remove(temp_file.name)
1822                     except OSError:
1823                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1824             if success:
1825                 yield f
1826             else:
1827                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1828
1829     def _default_format_spec(self, info_dict, download=True):
1830
1831         def can_merge():
1832             merger = FFmpegMergerPP(self)
1833             return merger.available and merger.can_merge()
1834
1835         prefer_best = (
1836             not self.params.get('simulate')
1837             and download
1838             and (
1839                 not can_merge()
1840                 or info_dict.get('is_live', False)
1841                 or self.outtmpl_dict['default'] == '-'))
1842         compat = (
1843             prefer_best
1844             or self.params.get('allow_multiple_audio_streams', False)
1845             or 'format-spec' in self.params.get('compat_opts', []))
1846
1847         return (
1848             'best/bestvideo+bestaudio' if prefer_best
1849             else 'bestvideo*+bestaudio/best' if not compat
1850             else 'bestvideo+bestaudio/best')
1851
1852     def build_format_selector(self, format_spec):
1853         def syntax_error(note, start):
1854             message = (
1855                 'Invalid format specification: '
1856                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1857             return SyntaxError(message)
1858
1859         PICKFIRST = 'PICKFIRST'
1860         MERGE = 'MERGE'
1861         SINGLE = 'SINGLE'
1862         GROUP = 'GROUP'
1863         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1864
1865         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1866                                   'video': self.params.get('allow_multiple_video_streams', False)}
1867
1868         check_formats = self.params.get('check_formats') == 'selected'
1869
1870         def _parse_filter(tokens):
1871             filter_parts = []
1872             for type, string, start, _, _ in tokens:
1873                 if type == tokenize.OP and string == ']':
1874                     return ''.join(filter_parts)
1875                 else:
1876                     filter_parts.append(string)
1877
1878         def _remove_unused_ops(tokens):
1879             # Remove operators that we don't use and join them with the surrounding strings
1880             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1881             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1882             last_string, last_start, last_end, last_line = None, None, None, None
1883             for type, string, start, end, line in tokens:
1884                 if type == tokenize.OP and string == '[':
1885                     if last_string:
1886                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1887                         last_string = None
1888                     yield type, string, start, end, line
1889                     # everything inside brackets will be handled by _parse_filter
1890                     for type, string, start, end, line in tokens:
1891                         yield type, string, start, end, line
1892                         if type == tokenize.OP and string == ']':
1893                             break
1894                 elif type == tokenize.OP and string in ALLOWED_OPS:
1895                     if last_string:
1896                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1897                         last_string = None
1898                     yield type, string, start, end, line
1899                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1900                     if not last_string:
1901                         last_string = string
1902                         last_start = start
1903                         last_end = end
1904                     else:
1905                         last_string += string
1906             if last_string:
1907                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1908
1909         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1910             selectors = []
1911             current_selector = None
1912             for type, string, start, _, _ in tokens:
1913                 # ENCODING is only defined in python 3.x
1914                 if type == getattr(tokenize, 'ENCODING', None):
1915                     continue
1916                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1917                     current_selector = FormatSelector(SINGLE, string, [])
1918                 elif type == tokenize.OP:
1919                     if string == ')':
1920                         if not inside_group:
1921                             # ')' will be handled by the parentheses group
1922                             tokens.restore_last_token()
1923                         break
1924                     elif inside_merge and string in ['/', ',']:
1925                         tokens.restore_last_token()
1926                         break
1927                     elif inside_choice and string == ',':
1928                         tokens.restore_last_token()
1929                         break
1930                     elif string == ',':
1931                         if not current_selector:
1932                             raise syntax_error('"," must follow a format selector', start)
1933                         selectors.append(current_selector)
1934                         current_selector = None
1935                     elif string == '/':
1936                         if not current_selector:
1937                             raise syntax_error('"/" must follow a format selector', start)
1938                         first_choice = current_selector
1939                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1940                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1941                     elif string == '[':
1942                         if not current_selector:
1943                             current_selector = FormatSelector(SINGLE, 'best', [])
1944                         format_filter = _parse_filter(tokens)
1945                         current_selector.filters.append(format_filter)
1946                     elif string == '(':
1947                         if current_selector:
1948                             raise syntax_error('Unexpected "("', start)
1949                         group = _parse_format_selection(tokens, inside_group=True)
1950                         current_selector = FormatSelector(GROUP, group, [])
1951                     elif string == '+':
1952                         if not current_selector:
1953                             raise syntax_error('Unexpected "+"', start)
1954                         selector_1 = current_selector
1955                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1956                         if not selector_2:
1957                             raise syntax_error('Expected a selector', start)
1958                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1959                     else:
1960                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1961                 elif type == tokenize.ENDMARKER:
1962                     break
1963             if current_selector:
1964                 selectors.append(current_selector)
1965             return selectors
1966
1967         def _merge(formats_pair):
1968             format_1, format_2 = formats_pair
1969
1970             formats_info = []
1971             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1972             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1973
1974             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1975                 get_no_more = {'video': False, 'audio': False}
1976                 for (i, fmt_info) in enumerate(formats_info):
1977                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1978                         formats_info.pop(i)
1979                         continue
1980                     for aud_vid in ['audio', 'video']:
1981                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1982                             if get_no_more[aud_vid]:
1983                                 formats_info.pop(i)
1984                                 break
1985                             get_no_more[aud_vid] = True
1986
1987             if len(formats_info) == 1:
1988                 return formats_info[0]
1989
1990             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1991             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1992
1993             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1994             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1995
1996             output_ext = self.params.get('merge_output_format')
1997             if not output_ext:
1998                 if the_only_video:
1999                     output_ext = the_only_video['ext']
2000                 elif the_only_audio and not video_fmts:
2001                     output_ext = the_only_audio['ext']
2002                 else:
2003                     output_ext = 'mkv'
2004
2005             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2006
2007             new_dict = {
2008                 'requested_formats': formats_info,
2009                 'format': '+'.join(filtered('format')),
2010                 'format_id': '+'.join(filtered('format_id')),
2011                 'ext': output_ext,
2012                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2013                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2014                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2015                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2016                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2017             }
2018
2019             if the_only_video:
2020                 new_dict.update({
2021                     'width': the_only_video.get('width'),
2022                     'height': the_only_video.get('height'),
2023                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2024                     'fps': the_only_video.get('fps'),
2025                     'dynamic_range': the_only_video.get('dynamic_range'),
2026                     'vcodec': the_only_video.get('vcodec'),
2027                     'vbr': the_only_video.get('vbr'),
2028                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2029                 })
2030
2031             if the_only_audio:
2032                 new_dict.update({
2033                     'acodec': the_only_audio.get('acodec'),
2034                     'abr': the_only_audio.get('abr'),
2035                     'asr': the_only_audio.get('asr'),
2036                 })
2037
2038             return new_dict
2039
2040         def _check_formats(formats):
2041             if not check_formats:
2042                 yield from formats
2043                 return
2044             yield from self._check_formats(formats)
2045
2046         def _build_selector_function(selector):
2047             if isinstance(selector, list):  # ,
2048                 fs = [_build_selector_function(s) for s in selector]
2049
2050                 def selector_function(ctx):
2051                     for f in fs:
2052                         yield from f(ctx)
2053                 return selector_function
2054
2055             elif selector.type == GROUP:  # ()
2056                 selector_function = _build_selector_function(selector.selector)
2057
2058             elif selector.type == PICKFIRST:  # /
2059                 fs = [_build_selector_function(s) for s in selector.selector]
2060
2061                 def selector_function(ctx):
2062                     for f in fs:
2063                         picked_formats = list(f(ctx))
2064                         if picked_formats:
2065                             return picked_formats
2066                     return []
2067
2068             elif selector.type == MERGE:  # +
2069                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2070
2071                 def selector_function(ctx):
2072                     for pair in itertools.product(
2073                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
2074                         yield _merge(pair)
2075
2076             elif selector.type == SINGLE:  # atom
2077                 format_spec = selector.selector or 'best'
2078
2079                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2080                 if format_spec == 'all':
2081                     def selector_function(ctx):
2082                         yield from _check_formats(ctx['formats'][::-1])
2083                 elif format_spec == 'mergeall':
2084                     def selector_function(ctx):
2085                         formats = list(_check_formats(ctx['formats']))
2086                         if not formats:
2087                             return
2088                         merged_format = formats[-1]
2089                         for f in formats[-2::-1]:
2090                             merged_format = _merge((merged_format, f))
2091                         yield merged_format
2092
2093                 else:
2094                     format_fallback, format_reverse, format_idx = False, True, 1
2095                     mobj = re.match(
2096                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2097                         format_spec)
2098                     if mobj is not None:
2099                         format_idx = int_or_none(mobj.group('n'), default=1)
2100                         format_reverse = mobj.group('bw')[0] == 'b'
2101                         format_type = (mobj.group('type') or [None])[0]
2102                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2103                         format_modified = mobj.group('mod') is not None
2104
2105                         format_fallback = not format_type and not format_modified  # for b, w
2106                         _filter_f = (
2107                             (lambda f: f.get('%scodec' % format_type) != 'none')
2108                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2109                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2110                             if format_type  # bv, ba, wv, wa
2111                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2112                             if not format_modified  # b, w
2113                             else lambda f: True)  # b*, w*
2114                         filter_f = lambda f: _filter_f(f) and (
2115                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2116                     else:
2117                         if format_spec in self._format_selection_exts['audio']:
2118                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2119                         elif format_spec in self._format_selection_exts['video']:
2120                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2121                         elif format_spec in self._format_selection_exts['storyboards']:
2122                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2123                         else:
2124                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2125
2126                     def selector_function(ctx):
2127                         formats = list(ctx['formats'])
2128                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2129                         if format_fallback and ctx['incomplete_formats'] and not matches:
2130                             # for extractors with incomplete formats (audio only (soundcloud)
2131                             # or video only (imgur)) best/worst will fallback to
2132                             # best/worst {video,audio}-only format
2133                             matches = formats
2134                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2135                         try:
2136                             yield matches[format_idx - 1]
2137                         except IndexError:
2138                             return
2139
2140             filters = [self._build_format_filter(f) for f in selector.filters]
2141
2142             def final_selector(ctx):
2143                 ctx_copy = copy.deepcopy(ctx)
2144                 for _filter in filters:
2145                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2146                 return selector_function(ctx_copy)
2147             return final_selector
2148
2149         stream = io.BytesIO(format_spec.encode('utf-8'))
2150         try:
2151             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2152         except tokenize.TokenError:
2153             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2154
2155         class TokenIterator(object):
2156             def __init__(self, tokens):
2157                 self.tokens = tokens
2158                 self.counter = 0
2159
2160             def __iter__(self):
2161                 return self
2162
2163             def __next__(self):
2164                 if self.counter >= len(self.tokens):
2165                     raise StopIteration()
2166                 value = self.tokens[self.counter]
2167                 self.counter += 1
2168                 return value
2169
2170             next = __next__
2171
2172             def restore_last_token(self):
2173                 self.counter -= 1
2174
2175         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2176         return _build_selector_function(parsed_selector)
2177
2178     def _calc_headers(self, info_dict):
2179         res = std_headers.copy()
2180
2181         add_headers = info_dict.get('http_headers')
2182         if add_headers:
2183             res.update(add_headers)
2184
2185         cookies = self._calc_cookies(info_dict)
2186         if cookies:
2187             res['Cookie'] = cookies
2188
2189         if 'X-Forwarded-For' not in res:
2190             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2191             if x_forwarded_for_ip:
2192                 res['X-Forwarded-For'] = x_forwarded_for_ip
2193
2194         return res
2195
2196     def _calc_cookies(self, info_dict):
2197         pr = sanitized_Request(info_dict['url'])
2198         self.cookiejar.add_cookie_header(pr)
2199         return pr.get_header('Cookie')
2200
2201     def _sort_thumbnails(self, thumbnails):
2202         thumbnails.sort(key=lambda t: (
2203             t.get('preference') if t.get('preference') is not None else -1,
2204             t.get('width') if t.get('width') is not None else -1,
2205             t.get('height') if t.get('height') is not None else -1,
2206             t.get('id') if t.get('id') is not None else '',
2207             t.get('url')))
2208
2209     def _sanitize_thumbnails(self, info_dict):
2210         thumbnails = info_dict.get('thumbnails')
2211         if thumbnails is None:
2212             thumbnail = info_dict.get('thumbnail')
2213             if thumbnail:
2214                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2215         if not thumbnails:
2216             return
2217
2218         def check_thumbnails(thumbnails):
2219             for t in thumbnails:
2220                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2221                 try:
2222                     self.urlopen(HEADRequest(t['url']))
2223                 except network_exceptions as err:
2224                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2225                     continue
2226                 yield t
2227
2228         self._sort_thumbnails(thumbnails)
2229         for i, t in enumerate(thumbnails):
2230             if t.get('id') is None:
2231                 t['id'] = '%d' % i
2232             if t.get('width') and t.get('height'):
2233                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2234             t['url'] = sanitize_url(t['url'])
2235
2236         if self.params.get('check_formats') is True:
2237             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2238         else:
2239             info_dict['thumbnails'] = thumbnails
2240
2241     def process_video_result(self, info_dict, download=True):
2242         assert info_dict.get('_type', 'video') == 'video'
2243
2244         if 'id' not in info_dict:
2245             raise ExtractorError('Missing "id" field in extractor result')
2246         if 'title' not in info_dict:
2247             raise ExtractorError('Missing "title" field in extractor result',
2248                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2249
2250         def report_force_conversion(field, field_not, conversion):
2251             self.report_warning(
2252                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2253                 % (field, field_not, conversion))
2254
2255         def sanitize_string_field(info, string_field):
2256             field = info.get(string_field)
2257             if field is None or isinstance(field, compat_str):
2258                 return
2259             report_force_conversion(string_field, 'a string', 'string')
2260             info[string_field] = compat_str(field)
2261
2262         def sanitize_numeric_fields(info):
2263             for numeric_field in self._NUMERIC_FIELDS:
2264                 field = info.get(numeric_field)
2265                 if field is None or isinstance(field, compat_numeric_types):
2266                     continue
2267                 report_force_conversion(numeric_field, 'numeric', 'int')
2268                 info[numeric_field] = int_or_none(field)
2269
2270         sanitize_string_field(info_dict, 'id')
2271         sanitize_numeric_fields(info_dict)
2272
2273         if 'playlist' not in info_dict:
2274             # It isn't part of a playlist
2275             info_dict['playlist'] = None
2276             info_dict['playlist_index'] = None
2277
2278         self._sanitize_thumbnails(info_dict)
2279
2280         thumbnail = info_dict.get('thumbnail')
2281         thumbnails = info_dict.get('thumbnails')
2282         if thumbnail:
2283             info_dict['thumbnail'] = sanitize_url(thumbnail)
2284         elif thumbnails:
2285             info_dict['thumbnail'] = thumbnails[-1]['url']
2286
2287         if info_dict.get('display_id') is None and 'id' in info_dict:
2288             info_dict['display_id'] = info_dict['id']
2289
2290         if info_dict.get('duration') is not None:
2291             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2292
2293         for ts_key, date_key in (
2294                 ('timestamp', 'upload_date'),
2295                 ('release_timestamp', 'release_date'),
2296         ):
2297             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2298                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2299                 # see http://bugs.python.org/issue1646728)
2300                 try:
2301                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2302                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2303                 except (ValueError, OverflowError, OSError):
2304                     pass
2305
2306         live_keys = ('is_live', 'was_live')
2307         live_status = info_dict.get('live_status')
2308         if live_status is None:
2309             for key in live_keys:
2310                 if info_dict.get(key) is False:
2311                     continue
2312                 if info_dict.get(key):
2313                     live_status = key
2314                 break
2315             if all(info_dict.get(key) is False for key in live_keys):
2316                 live_status = 'not_live'
2317         if live_status:
2318             info_dict['live_status'] = live_status
2319             for key in live_keys:
2320                 if info_dict.get(key) is None:
2321                     info_dict[key] = (live_status == key)
2322
2323         # Auto generate title fields corresponding to the *_number fields when missing
2324         # in order to always have clean titles. This is very common for TV series.
2325         for field in ('chapter', 'season', 'episode'):
2326             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2327                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2328
2329         for cc_kind in ('subtitles', 'automatic_captions'):
2330             cc = info_dict.get(cc_kind)
2331             if cc:
2332                 for _, subtitle in cc.items():
2333                     for subtitle_format in subtitle:
2334                         if subtitle_format.get('url'):
2335                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2336                         if subtitle_format.get('ext') is None:
2337                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2338
2339         automatic_captions = info_dict.get('automatic_captions')
2340         subtitles = info_dict.get('subtitles')
2341
2342         info_dict['requested_subtitles'] = self.process_subtitles(
2343             info_dict['id'], subtitles, automatic_captions)
2344
2345         if info_dict.get('formats') is None:
2346             # There's only one format available
2347             formats = [info_dict]
2348         else:
2349             formats = info_dict['formats']
2350
2351         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2352         if not self.params.get('allow_unplayable_formats'):
2353             formats = [f for f in formats if not f.get('has_drm')]
2354
2355         if not formats:
2356             self.raise_no_formats(info_dict)
2357
2358         def is_wellformed(f):
2359             url = f.get('url')
2360             if not url:
2361                 self.report_warning(
2362                     '"url" field is missing or empty - skipping format, '
2363                     'there is an error in extractor')
2364                 return False
2365             if isinstance(url, bytes):
2366                 sanitize_string_field(f, 'url')
2367             return True
2368
2369         # Filter out malformed formats for better extraction robustness
2370         formats = list(filter(is_wellformed, formats))
2371
2372         formats_dict = {}
2373
2374         # We check that all the formats have the format and format_id fields
2375         for i, format in enumerate(formats):
2376             sanitize_string_field(format, 'format_id')
2377             sanitize_numeric_fields(format)
2378             format['url'] = sanitize_url(format['url'])
2379             if not format.get('format_id'):
2380                 format['format_id'] = compat_str(i)
2381             else:
2382                 # Sanitize format_id from characters used in format selector expression
2383                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2384             format_id = format['format_id']
2385             if format_id not in formats_dict:
2386                 formats_dict[format_id] = []
2387             formats_dict[format_id].append(format)
2388
2389         # Make sure all formats have unique format_id
2390         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2391         for format_id, ambiguous_formats in formats_dict.items():
2392             ambigious_id = len(ambiguous_formats) > 1
2393             for i, format in enumerate(ambiguous_formats):
2394                 if ambigious_id:
2395                     format['format_id'] = '%s-%d' % (format_id, i)
2396                 if format.get('ext') is None:
2397                     format['ext'] = determine_ext(format['url']).lower()
2398                 # Ensure there is no conflict between id and ext in format selection
2399                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2400                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2401                     format['format_id'] = 'f%s' % format['format_id']
2402
2403         for i, format in enumerate(formats):
2404             if format.get('format') is None:
2405                 format['format'] = '{id} - {res}{note}'.format(
2406                     id=format['format_id'],
2407                     res=self.format_resolution(format),
2408                     note=format_field(format, 'format_note', ' (%s)'),
2409                 )
2410             if format.get('protocol') is None:
2411                 format['protocol'] = determine_protocol(format)
2412             if format.get('resolution') is None:
2413                 format['resolution'] = self.format_resolution(format, default=None)
2414             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2415                 format['dynamic_range'] = 'SDR'
2416             if (info_dict.get('duration') and format.get('tbr')
2417                     and not format.get('filesize') and not format.get('filesize_approx')):
2418                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2419
2420             # Add HTTP headers, so that external programs can use them from the
2421             # json output
2422             full_format_info = info_dict.copy()
2423             full_format_info.update(format)
2424             format['http_headers'] = self._calc_headers(full_format_info)
2425         # Remove private housekeeping stuff
2426         if '__x_forwarded_for_ip' in info_dict:
2427             del info_dict['__x_forwarded_for_ip']
2428
2429         # TODO Central sorting goes here
2430
2431         if self.params.get('check_formats') is True:
2432             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2433
2434         if not formats or formats[0] is not info_dict:
2435             # only set the 'formats' fields if the original info_dict list them
2436             # otherwise we end up with a circular reference, the first (and unique)
2437             # element in the 'formats' field in info_dict is info_dict itself,
2438             # which can't be exported to json
2439             info_dict['formats'] = formats
2440
2441         info_dict, _ = self.pre_process(info_dict)
2442
2443         # The pre-processors may have modified the formats
2444         formats = info_dict.get('formats', [info_dict])
2445
2446         if self.params.get('list_thumbnails'):
2447             self.list_thumbnails(info_dict)
2448         if self.params.get('listformats'):
2449             if not info_dict.get('formats') and not info_dict.get('url'):
2450                 self.to_screen('%s has no formats' % info_dict['id'])
2451             else:
2452                 self.list_formats(info_dict)
2453         if self.params.get('listsubtitles'):
2454             if 'automatic_captions' in info_dict:
2455                 self.list_subtitles(
2456                     info_dict['id'], automatic_captions, 'automatic captions')
2457             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2458         list_only = self.params.get('simulate') is None and (
2459             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2460         if list_only:
2461             # Without this printing, -F --print-json will not work
2462             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2463             return
2464
2465         format_selector = self.format_selector
2466         if format_selector is None:
2467             req_format = self._default_format_spec(info_dict, download=download)
2468             self.write_debug('Default format spec: %s' % req_format)
2469             format_selector = self.build_format_selector(req_format)
2470
2471         # While in format selection we may need to have an access to the original
2472         # format set in order to calculate some metrics or do some processing.
2473         # For now we need to be able to guess whether original formats provided
2474         # by extractor are incomplete or not (i.e. whether extractor provides only
2475         # video-only or audio-only formats) for proper formats selection for
2476         # extractors with such incomplete formats (see
2477         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2478         # Since formats may be filtered during format selection and may not match
2479         # the original formats the results may be incorrect. Thus original formats
2480         # or pre-calculated metrics should be passed to format selection routines
2481         # as well.
2482         # We will pass a context object containing all necessary additional data
2483         # instead of just formats.
2484         # This fixes incorrect format selection issue (see
2485         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2486         incomplete_formats = (
2487             # All formats are video-only or
2488             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2489             # all formats are audio-only
2490             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2491
2492         ctx = {
2493             'formats': formats,
2494             'incomplete_formats': incomplete_formats,
2495         }
2496
2497         formats_to_download = list(format_selector(ctx))
2498         if not formats_to_download:
2499             if not self.params.get('ignore_no_formats_error'):
2500                 raise ExtractorError('Requested format is not available', expected=True,
2501                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2502             else:
2503                 self.report_warning('Requested format is not available')
2504                 # Process what we can, even without any available formats.
2505                 self.process_info(dict(info_dict))
2506         elif download:
2507             self.to_screen(
2508                 '[info] %s: Downloading %d format(s): %s' % (
2509                     info_dict['id'], len(formats_to_download),
2510                     ", ".join([f['format_id'] for f in formats_to_download])))
2511             for fmt in formats_to_download:
2512                 new_info = dict(info_dict)
2513                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2514                 new_info['__original_infodict'] = info_dict
2515                 new_info.update(fmt)
2516                 self.process_info(new_info)
2517         # We update the info dict with the selected best quality format (backwards compatibility)
2518         if formats_to_download:
2519             info_dict.update(formats_to_download[-1])
2520         return info_dict
2521
2522     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2523         """Select the requested subtitles and their format"""
2524         available_subs = {}
2525         if normal_subtitles and self.params.get('writesubtitles'):
2526             available_subs.update(normal_subtitles)
2527         if automatic_captions and self.params.get('writeautomaticsub'):
2528             for lang, cap_info in automatic_captions.items():
2529                 if lang not in available_subs:
2530                     available_subs[lang] = cap_info
2531
2532         if (not self.params.get('writesubtitles') and not
2533                 self.params.get('writeautomaticsub') or not
2534                 available_subs):
2535             return None
2536
2537         all_sub_langs = available_subs.keys()
2538         if self.params.get('allsubtitles', False):
2539             requested_langs = all_sub_langs
2540         elif self.params.get('subtitleslangs', False):
2541             # A list is used so that the order of languages will be the same as
2542             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2543             requested_langs = []
2544             for lang_re in self.params.get('subtitleslangs'):
2545                 if lang_re == 'all':
2546                     requested_langs.extend(all_sub_langs)
2547                     continue
2548                 discard = lang_re[0] == '-'
2549                 if discard:
2550                     lang_re = lang_re[1:]
2551                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2552                 if discard:
2553                     for lang in current_langs:
2554                         while lang in requested_langs:
2555                             requested_langs.remove(lang)
2556                 else:
2557                     requested_langs.extend(current_langs)
2558             requested_langs = orderedSet(requested_langs)
2559         elif 'en' in available_subs:
2560             requested_langs = ['en']
2561         else:
2562             requested_langs = [list(all_sub_langs)[0]]
2563         if requested_langs:
2564             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2565
2566         formats_query = self.params.get('subtitlesformat', 'best')
2567         formats_preference = formats_query.split('/') if formats_query else []
2568         subs = {}
2569         for lang in requested_langs:
2570             formats = available_subs.get(lang)
2571             if formats is None:
2572                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2573                 continue
2574             for ext in formats_preference:
2575                 if ext == 'best':
2576                     f = formats[-1]
2577                     break
2578                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2579                 if matches:
2580                     f = matches[-1]
2581                     break
2582             else:
2583                 f = formats[-1]
2584                 self.report_warning(
2585                     'No subtitle format found matching "%s" for language %s, '
2586                     'using %s' % (formats_query, lang, f['ext']))
2587             subs[lang] = f
2588         return subs
2589
2590     def __forced_printings(self, info_dict, filename, incomplete):
2591         def print_mandatory(field, actual_field=None):
2592             if actual_field is None:
2593                 actual_field = field
2594             if (self.params.get('force%s' % field, False)
2595                     and (not incomplete or info_dict.get(actual_field) is not None)):
2596                 self.to_stdout(info_dict[actual_field])
2597
2598         def print_optional(field):
2599             if (self.params.get('force%s' % field, False)
2600                     and info_dict.get(field) is not None):
2601                 self.to_stdout(info_dict[field])
2602
2603         info_dict = info_dict.copy()
2604         if filename is not None:
2605             info_dict['filename'] = filename
2606         if info_dict.get('requested_formats') is not None:
2607             # For RTMP URLs, also include the playpath
2608             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2609         elif 'url' in info_dict:
2610             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2611
2612         if self.params.get('forceprint') or self.params.get('forcejson'):
2613             self.post_extract(info_dict)
2614         for tmpl in self.params.get('forceprint', []):
2615             mobj = re.match(r'\w+(=?)$', tmpl)
2616             if mobj and mobj.group(1):
2617                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2618             elif mobj:
2619                 tmpl = '%({})s'.format(tmpl)
2620             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2621
2622         print_mandatory('title')
2623         print_mandatory('id')
2624         print_mandatory('url', 'urls')
2625         print_optional('thumbnail')
2626         print_optional('description')
2627         print_optional('filename')
2628         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2629             self.to_stdout(formatSeconds(info_dict['duration']))
2630         print_mandatory('format')
2631
2632         if self.params.get('forcejson'):
2633             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2634
2635     def dl(self, name, info, subtitle=False, test=False):
2636         if not info.get('url'):
2637             self.raise_no_formats(info, True)
2638
2639         if test:
2640             verbose = self.params.get('verbose')
2641             params = {
2642                 'test': True,
2643                 'quiet': self.params.get('quiet') or not verbose,
2644                 'verbose': verbose,
2645                 'noprogress': not verbose,
2646                 'nopart': True,
2647                 'skip_unavailable_fragments': False,
2648                 'keep_fragments': False,
2649                 'overwrites': True,
2650                 '_no_ytdl_file': True,
2651             }
2652         else:
2653             params = self.params
2654         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2655         if not test:
2656             for ph in self._progress_hooks:
2657                 fd.add_progress_hook(ph)
2658             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2659             self.write_debug('Invoking downloader on "%s"' % urls)
2660
2661         new_info = copy.deepcopy(self._copy_infodict(info))
2662         if new_info.get('http_headers') is None:
2663             new_info['http_headers'] = self._calc_headers(new_info)
2664         return fd.download(name, new_info, subtitle)
2665
2666     def process_info(self, info_dict):
2667         """Process a single resolved IE result."""
2668
2669         assert info_dict.get('_type', 'video') == 'video'
2670
2671         max_downloads = self.params.get('max_downloads')
2672         if max_downloads is not None:
2673             if self._num_downloads >= int(max_downloads):
2674                 raise MaxDownloadsReached()
2675
2676         # TODO: backward compatibility, to be removed
2677         info_dict['fulltitle'] = info_dict['title']
2678
2679         if 'format' not in info_dict and 'ext' in info_dict:
2680             info_dict['format'] = info_dict['ext']
2681
2682         if self._match_entry(info_dict) is not None:
2683             return
2684
2685         self.post_extract(info_dict)
2686         self._num_downloads += 1
2687
2688         # info_dict['_filename'] needs to be set for backward compatibility
2689         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2690         temp_filename = self.prepare_filename(info_dict, 'temp')
2691         files_to_move = {}
2692
2693         # Forced printings
2694         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2695
2696         if self.params.get('simulate'):
2697             if self.params.get('force_write_download_archive', False):
2698                 self.record_download_archive(info_dict)
2699             # Do nothing else if in simulate mode
2700             return
2701
2702         if full_filename is None:
2703             return
2704         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2705             return
2706         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2707             return
2708
2709         if self._write_description('video', info_dict,
2710                                    self.prepare_filename(info_dict, 'description')) is None:
2711             return
2712
2713         sub_files = self._write_subtitles(info_dict, temp_filename)
2714         if sub_files is None:
2715             return
2716         files_to_move.update(dict(sub_files))
2717
2718         thumb_files = self._write_thumbnails(
2719             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2720         if thumb_files is None:
2721             return
2722         files_to_move.update(dict(thumb_files))
2723
2724         infofn = self.prepare_filename(info_dict, 'infojson')
2725         _infojson_written = self._write_info_json('video', info_dict, infofn)
2726         if _infojson_written:
2727             info_dict['infojson_filename'] = infofn
2728             # For backward compatability, even though it was a private field
2729             info_dict['__infojson_filename'] = infofn
2730         elif _infojson_written is None:
2731             return
2732
2733         # Note: Annotations are deprecated
2734         annofn = None
2735         if self.params.get('writeannotations', False):
2736             annofn = self.prepare_filename(info_dict, 'annotation')
2737         if annofn:
2738             if not self._ensure_dir_exists(encodeFilename(annofn)):
2739                 return
2740             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2741                 self.to_screen('[info] Video annotations are already present')
2742             elif not info_dict.get('annotations'):
2743                 self.report_warning('There are no annotations to write.')
2744             else:
2745                 try:
2746                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2747                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2748                         annofile.write(info_dict['annotations'])
2749                 except (KeyError, TypeError):
2750                     self.report_warning('There are no annotations to write.')
2751                 except (OSError, IOError):
2752                     self.report_error('Cannot write annotations file: ' + annofn)
2753                     return
2754
2755         # Write internet shortcut files
2756         def _write_link_file(link_type):
2757             if 'webpage_url' not in info_dict:
2758                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2759                 return False
2760             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2761             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2762                 return False
2763             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2764                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2765                 return True
2766             try:
2767                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2768                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2769                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2770                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2771                     if link_type == 'desktop':
2772                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2773                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2774             except (OSError, IOError):
2775                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2776                 return False
2777             return True
2778
2779         write_links = {
2780             'url': self.params.get('writeurllink'),
2781             'webloc': self.params.get('writewebloclink'),
2782             'desktop': self.params.get('writedesktoplink'),
2783         }
2784         if self.params.get('writelink'):
2785             link_type = ('webloc' if sys.platform == 'darwin'
2786                          else 'desktop' if sys.platform.startswith('linux')
2787                          else 'url')
2788             write_links[link_type] = True
2789
2790         if any(should_write and not _write_link_file(link_type)
2791                for link_type, should_write in write_links.items()):
2792             return
2793
2794         try:
2795             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2796         except PostProcessingError as err:
2797             self.report_error('Preprocessing: %s' % str(err))
2798             return
2799
2800         must_record_download_archive = False
2801         if self.params.get('skip_download', False):
2802             info_dict['filepath'] = temp_filename
2803             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2804             info_dict['__files_to_move'] = files_to_move
2805             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2806         else:
2807             # Download
2808             info_dict.setdefault('__postprocessors', [])
2809             try:
2810
2811                 def existing_file(*filepaths):
2812                     ext = info_dict.get('ext')
2813                     final_ext = self.params.get('final_ext', ext)
2814                     existing_files = []
2815                     for file in orderedSet(filepaths):
2816                         if final_ext != ext:
2817                             converted = replace_extension(file, final_ext, ext)
2818                             if os.path.exists(encodeFilename(converted)):
2819                                 existing_files.append(converted)
2820                         if os.path.exists(encodeFilename(file)):
2821                             existing_files.append(file)
2822
2823                     if not existing_files or self.params.get('overwrites', False):
2824                         for file in orderedSet(existing_files):
2825                             self.report_file_delete(file)
2826                             os.remove(encodeFilename(file))
2827                         return None
2828
2829                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2830                     return existing_files[0]
2831
2832                 success = True
2833                 if info_dict.get('requested_formats') is not None:
2834
2835                     def compatible_formats(formats):
2836                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2837                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2838                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2839                         if len(video_formats) > 2 or len(audio_formats) > 2:
2840                             return False
2841
2842                         # Check extension
2843                         exts = set(format.get('ext') for format in formats)
2844                         COMPATIBLE_EXTS = (
2845                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2846                             set(('webm',)),
2847                         )
2848                         for ext_sets in COMPATIBLE_EXTS:
2849                             if ext_sets.issuperset(exts):
2850                                 return True
2851                         # TODO: Check acodec/vcodec
2852                         return False
2853
2854                     requested_formats = info_dict['requested_formats']
2855                     old_ext = info_dict['ext']
2856                     if self.params.get('merge_output_format') is None:
2857                         if not compatible_formats(requested_formats):
2858                             info_dict['ext'] = 'mkv'
2859                             self.report_warning(
2860                                 'Requested formats are incompatible for merge and will be merged into mkv')
2861                         if (info_dict['ext'] == 'webm'
2862                                 and info_dict.get('thumbnails')
2863                                 # check with type instead of pp_key, __name__, or isinstance
2864                                 # since we dont want any custom PPs to trigger this
2865                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2866                             info_dict['ext'] = 'mkv'
2867                             self.report_warning(
2868                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2869                     new_ext = info_dict['ext']
2870
2871                     def correct_ext(filename, ext=new_ext):
2872                         if filename == '-':
2873                             return filename
2874                         filename_real_ext = os.path.splitext(filename)[1][1:]
2875                         filename_wo_ext = (
2876                             os.path.splitext(filename)[0]
2877                             if filename_real_ext in (old_ext, new_ext)
2878                             else filename)
2879                         return '%s.%s' % (filename_wo_ext, ext)
2880
2881                     # Ensure filename always has a correct extension for successful merge
2882                     full_filename = correct_ext(full_filename)
2883                     temp_filename = correct_ext(temp_filename)
2884                     dl_filename = existing_file(full_filename, temp_filename)
2885                     info_dict['__real_download'] = False
2886
2887                     if dl_filename is not None:
2888                         self.report_file_already_downloaded(dl_filename)
2889                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2890                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2891                         success, real_download = self.dl(temp_filename, info_dict)
2892                         info_dict['__real_download'] = real_download
2893                     else:
2894                         downloaded = []
2895                         merger = FFmpegMergerPP(self)
2896                         if self.params.get('allow_unplayable_formats'):
2897                             self.report_warning(
2898                                 'You have requested merging of multiple formats '
2899                                 'while also allowing unplayable formats to be downloaded. '
2900                                 'The formats won\'t be merged to prevent data corruption.')
2901                         elif not merger.available:
2902                             self.report_warning(
2903                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2904                                 'The formats won\'t be merged.')
2905
2906                         if temp_filename == '-':
2907                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2908                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2909                                       else 'but ffmpeg is not installed')
2910                             self.report_warning(
2911                                 f'You have requested downloading multiple formats to stdout {reason}. '
2912                                 'The formats will be streamed one after the other')
2913                             fname = temp_filename
2914                         for f in requested_formats:
2915                             new_info = dict(info_dict)
2916                             del new_info['requested_formats']
2917                             new_info.update(f)
2918                             if temp_filename != '-':
2919                                 fname = prepend_extension(
2920                                     correct_ext(temp_filename, new_info['ext']),
2921                                     'f%s' % f['format_id'], new_info['ext'])
2922                                 if not self._ensure_dir_exists(fname):
2923                                     return
2924                                 f['filepath'] = fname
2925                                 downloaded.append(fname)
2926                             partial_success, real_download = self.dl(fname, new_info)
2927                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2928                             success = success and partial_success
2929                         if merger.available and not self.params.get('allow_unplayable_formats'):
2930                             info_dict['__postprocessors'].append(merger)
2931                             info_dict['__files_to_merge'] = downloaded
2932                             # Even if there were no downloads, it is being merged only now
2933                             info_dict['__real_download'] = True
2934                         else:
2935                             for file in downloaded:
2936                                 files_to_move[file] = None
2937                 else:
2938                     # Just a single file
2939                     dl_filename = existing_file(full_filename, temp_filename)
2940                     if dl_filename is None or dl_filename == temp_filename:
2941                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2942                         # So we should try to resume the download
2943                         success, real_download = self.dl(temp_filename, info_dict)
2944                         info_dict['__real_download'] = real_download
2945                     else:
2946                         self.report_file_already_downloaded(dl_filename)
2947
2948                 dl_filename = dl_filename or temp_filename
2949                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2950
2951             except network_exceptions as err:
2952                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2953                 return
2954             except (OSError, IOError) as err:
2955                 raise UnavailableVideoError(err)
2956             except (ContentTooShortError, ) as err:
2957                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2958                 return
2959
2960             if success and full_filename != '-':
2961
2962                 def fixup():
2963                     do_fixup = True
2964                     fixup_policy = self.params.get('fixup')
2965                     vid = info_dict['id']
2966
2967                     if fixup_policy in ('ignore', 'never'):
2968                         return
2969                     elif fixup_policy == 'warn':
2970                         do_fixup = False
2971                     elif fixup_policy != 'force':
2972                         assert fixup_policy in ('detect_or_warn', None)
2973                         if not info_dict.get('__real_download'):
2974                             do_fixup = False
2975
2976                     def ffmpeg_fixup(cndn, msg, cls):
2977                         if not cndn:
2978                             return
2979                         if not do_fixup:
2980                             self.report_warning(f'{vid}: {msg}')
2981                             return
2982                         pp = cls(self)
2983                         if pp.available:
2984                             info_dict['__postprocessors'].append(pp)
2985                         else:
2986                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2987
2988                     stretched_ratio = info_dict.get('stretched_ratio')
2989                     ffmpeg_fixup(
2990                         stretched_ratio not in (1, None),
2991                         f'Non-uniform pixel ratio {stretched_ratio}',
2992                         FFmpegFixupStretchedPP)
2993
2994                     ffmpeg_fixup(
2995                         (info_dict.get('requested_formats') is None
2996                          and info_dict.get('container') == 'm4a_dash'
2997                          and info_dict.get('ext') == 'm4a'),
2998                         'writing DASH m4a. Only some players support this container',
2999                         FFmpegFixupM4aPP)
3000
3001                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3002                     downloader = downloader.__name__ if downloader else None
3003                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
3004                                  'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3005                                  FFmpegFixupM3u8PP)
3006                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3007                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3008
3009                 fixup()
3010                 try:
3011                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
3012                 except PostProcessingError as err:
3013                     self.report_error('Postprocessing: %s' % str(err))
3014                     return
3015                 try:
3016                     for ph in self._post_hooks:
3017                         ph(info_dict['filepath'])
3018                 except Exception as err:
3019                     self.report_error('post hooks: %s' % str(err))
3020                     return
3021                 must_record_download_archive = True
3022
3023         if must_record_download_archive or self.params.get('force_write_download_archive', False):
3024             self.record_download_archive(info_dict)
3025         max_downloads = self.params.get('max_downloads')
3026         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3027             raise MaxDownloadsReached()
3028
3029     def __download_wrapper(self, func):
3030         @functools.wraps(func)
3031         def wrapper(*args, **kwargs):
3032             try:
3033                 res = func(*args, **kwargs)
3034             except UnavailableVideoError as e:
3035                 self.report_error(e)
3036             except MaxDownloadsReached as e:
3037                 self.to_screen(f'[info] {e}')
3038                 raise
3039             except DownloadCancelled as e:
3040                 self.to_screen(f'[info] {e}')
3041                 if not self.params.get('break_per_url'):
3042                     raise
3043             else:
3044                 if self.params.get('dump_single_json', False):
3045                     self.post_extract(res)
3046                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3047         return wrapper
3048
3049     def download(self, url_list):
3050         """Download a given list of URLs."""
3051         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3052         outtmpl = self.outtmpl_dict['default']
3053         if (len(url_list) > 1
3054                 and outtmpl != '-'
3055                 and '%' not in outtmpl
3056                 and self.params.get('max_downloads') != 1):
3057             raise SameFileError(outtmpl)
3058
3059         for url in url_list:
3060             self.__download_wrapper(self.extract_info)(
3061                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3062
3063         return self._download_retcode
3064
3065     def download_with_info_file(self, info_filename):
3066         with contextlib.closing(fileinput.FileInput(
3067                 [info_filename], mode='r',
3068                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3069             # FileInput doesn't have a read method, we can't call json.load
3070             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3071         try:
3072             self.__download_wrapper(self.process_ie_result)(info, download=True)
3073         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3074             if not isinstance(e, EntryNotInPlaylist):
3075                 self.to_stderr('\r')
3076             webpage_url = info.get('webpage_url')
3077             if webpage_url is not None:
3078                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3079                 return self.download([webpage_url])
3080             else:
3081                 raise
3082         return self._download_retcode
3083
3084     @staticmethod
3085     def sanitize_info(info_dict, remove_private_keys=False):
3086         ''' Sanitize the infodict for converting to json '''
3087         if info_dict is None:
3088             return info_dict
3089         info_dict.setdefault('epoch', int(time.time()))
3090         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3091         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3092         if remove_private_keys:
3093             remove_keys |= {
3094                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3095                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3096             }
3097             empty_values = (None, {}, [], set(), tuple())
3098             reject = lambda k, v: k not in keep_keys and (
3099                 k.startswith('_') or k in remove_keys or v in empty_values)
3100         else:
3101             reject = lambda k, v: k in remove_keys
3102         filter_fn = lambda obj: (
3103             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3104             else obj if not isinstance(obj, dict)
3105             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3106         return filter_fn(info_dict)
3107
3108     @staticmethod
3109     def filter_requested_info(info_dict, actually_filter=True):
3110         ''' Alias of sanitize_info for backward compatibility '''
3111         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3112
3113     def run_pp(self, pp, infodict):
3114         files_to_delete = []
3115         if '__files_to_move' not in infodict:
3116             infodict['__files_to_move'] = {}
3117         try:
3118             files_to_delete, infodict = pp.run(infodict)
3119         except PostProcessingError as e:
3120             # Must be True and not 'only_download'
3121             if self.params.get('ignoreerrors') is True:
3122                 self.report_error(e)
3123                 return infodict
3124             raise
3125
3126         if not files_to_delete:
3127             return infodict
3128         if self.params.get('keepvideo', False):
3129             for f in files_to_delete:
3130                 infodict['__files_to_move'].setdefault(f, '')
3131         else:
3132             for old_filename in set(files_to_delete):
3133                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3134                 try:
3135                     os.remove(encodeFilename(old_filename))
3136                 except (IOError, OSError):
3137                     self.report_warning('Unable to remove downloaded original file')
3138                 if old_filename in infodict['__files_to_move']:
3139                     del infodict['__files_to_move'][old_filename]
3140         return infodict
3141
3142     @staticmethod
3143     def post_extract(info_dict):
3144         def actual_post_extract(info_dict):
3145             if info_dict.get('_type') in ('playlist', 'multi_video'):
3146                 for video_dict in info_dict.get('entries', {}):
3147                     actual_post_extract(video_dict or {})
3148                 return
3149
3150             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3151             extra = post_extractor().items()
3152             info_dict.update(extra)
3153             info_dict.pop('__post_extractor', None)
3154
3155             original_infodict = info_dict.get('__original_infodict') or {}
3156             original_infodict.update(extra)
3157             original_infodict.pop('__post_extractor', None)
3158
3159         actual_post_extract(info_dict or {})
3160
3161     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3162         info = dict(ie_info)
3163         info['__files_to_move'] = files_to_move or {}
3164         for pp in self._pps[key]:
3165             info = self.run_pp(pp, info)
3166         return info, info.pop('__files_to_move', None)
3167
3168     def post_process(self, filename, ie_info, files_to_move=None):
3169         """Run all the postprocessors on the given file."""
3170         info = dict(ie_info)
3171         info['filepath'] = filename
3172         info['__files_to_move'] = files_to_move or {}
3173
3174         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3175             info = self.run_pp(pp, info)
3176         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3177         del info['__files_to_move']
3178         for pp in self._pps['after_move']:
3179             info = self.run_pp(pp, info)
3180         return info
3181
3182     def _make_archive_id(self, info_dict):
3183         video_id = info_dict.get('id')
3184         if not video_id:
3185             return
3186         # Future-proof against any change in case
3187         # and backwards compatibility with prior versions
3188         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3189         if extractor is None:
3190             url = str_or_none(info_dict.get('url'))
3191             if not url:
3192                 return
3193             # Try to find matching extractor for the URL and take its ie_key
3194             for ie_key, ie in self._ies.items():
3195                 if ie.suitable(url):
3196                     extractor = ie_key
3197                     break
3198             else:
3199                 return
3200         return '%s %s' % (extractor.lower(), video_id)
3201
3202     def in_download_archive(self, info_dict):
3203         fn = self.params.get('download_archive')
3204         if fn is None:
3205             return False
3206
3207         vid_id = self._make_archive_id(info_dict)
3208         if not vid_id:
3209             return False  # Incomplete video information
3210
3211         return vid_id in self.archive
3212
3213     def record_download_archive(self, info_dict):
3214         fn = self.params.get('download_archive')
3215         if fn is None:
3216             return
3217         vid_id = self._make_archive_id(info_dict)
3218         assert vid_id
3219         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3220             archive_file.write(vid_id + '\n')
3221         self.archive.add(vid_id)
3222
3223     @staticmethod
3224     def format_resolution(format, default='unknown'):
3225         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3226             return 'audio only'
3227         if format.get('resolution') is not None:
3228             return format['resolution']
3229         if format.get('width') and format.get('height'):
3230             return '%dx%d' % (format['width'], format['height'])
3231         elif format.get('height'):
3232             return '%sp' % format['height']
3233         elif format.get('width'):
3234             return '%dx?' % format['width']
3235         return default
3236
3237     def _format_note(self, fdict):
3238         res = ''
3239         if fdict.get('ext') in ['f4f', 'f4m']:
3240             res += '(unsupported)'
3241         if fdict.get('language'):
3242             if res:
3243                 res += ' '
3244             res += '[%s]' % fdict['language']
3245         if fdict.get('format_note') is not None:
3246             if res:
3247                 res += ' '
3248             res += fdict['format_note']
3249         if fdict.get('tbr') is not None:
3250             if res:
3251                 res += ', '
3252             res += '%4dk' % fdict['tbr']
3253         if fdict.get('container') is not None:
3254             if res:
3255                 res += ', '
3256             res += '%s container' % fdict['container']
3257         if (fdict.get('vcodec') is not None
3258                 and fdict.get('vcodec') != 'none'):
3259             if res:
3260                 res += ', '
3261             res += fdict['vcodec']
3262             if fdict.get('vbr') is not None:
3263                 res += '@'
3264         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3265             res += 'video@'
3266         if fdict.get('vbr') is not None:
3267             res += '%4dk' % fdict['vbr']
3268         if fdict.get('fps') is not None:
3269             if res:
3270                 res += ', '
3271             res += '%sfps' % fdict['fps']
3272         if fdict.get('acodec') is not None:
3273             if res:
3274                 res += ', '
3275             if fdict['acodec'] == 'none':
3276                 res += 'video only'
3277             else:
3278                 res += '%-5s' % fdict['acodec']
3279         elif fdict.get('abr') is not None:
3280             if res:
3281                 res += ', '
3282             res += 'audio'
3283         if fdict.get('abr') is not None:
3284             res += '@%3dk' % fdict['abr']
3285         if fdict.get('asr') is not None:
3286             res += ' (%5dHz)' % fdict['asr']
3287         if fdict.get('filesize') is not None:
3288             if res:
3289                 res += ', '
3290             res += format_bytes(fdict['filesize'])
3291         elif fdict.get('filesize_approx') is not None:
3292             if res:
3293                 res += ', '
3294             res += '~' + format_bytes(fdict['filesize_approx'])
3295         return res
3296
3297     def _list_format_headers(self, *headers):
3298         if self.params.get('listformats_table', True) is not False:
3299             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3300         return headers
3301
3302     def list_formats(self, info_dict):
3303         formats = info_dict.get('formats', [info_dict])
3304         new_format = self.params.get('listformats_table', True) is not False
3305         if new_format:
3306             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3307             table = [
3308                 [
3309                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3310                     format_field(f, 'ext'),
3311                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3312                     format_field(f, 'fps', '\t%d'),
3313                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3314                     delim,
3315                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3316                     format_field(f, 'tbr', '\t%dk'),
3317                     shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
3318                     delim,
3319                     format_field(f, 'vcodec', default='unknown').replace(
3320                         'none',
3321                         'images' if f.get('acodec') == 'none'
3322                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3323                     format_field(f, 'vbr', '\t%dk'),
3324                     format_field(f, 'acodec', default='unknown').replace(
3325                         'none',
3326                         '' if f.get('vcodec') == 'none'
3327                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3328                     format_field(f, 'abr', '\t%dk'),
3329                     format_field(f, 'asr', '\t%dHz'),
3330                     join_nonempty(
3331                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3332                         format_field(f, 'language', '[%s]'),
3333                         join_nonempty(
3334                             format_field(f, 'format_note'),
3335                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3336                             delim=', '),
3337                         delim=' '),
3338                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3339             header_line = self._list_format_headers(
3340                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3341                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3342         else:
3343             table = [
3344                 [
3345                     format_field(f, 'format_id'),
3346                     format_field(f, 'ext'),
3347                     self.format_resolution(f),
3348                     self._format_note(f)]
3349                 for f in formats
3350                 if f.get('preference') is None or f['preference'] >= -1000]
3351             header_line = ['format code', 'extension', 'resolution', 'note']
3352
3353         self.to_screen(
3354             '[info] Available formats for %s:' % info_dict['id'])
3355         self.to_stdout(render_table(
3356             header_line, table,
3357             extra_gap=(0 if new_format else 1),
3358             hide_empty=new_format,
3359             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3360
3361     def list_thumbnails(self, info_dict):
3362         thumbnails = list(info_dict.get('thumbnails'))
3363         if not thumbnails:
3364             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3365             return
3366
3367         self.to_screen(
3368             '[info] Thumbnails for %s:' % info_dict['id'])
3369         self.to_stdout(render_table(
3370             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3371             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3372
3373     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3374         if not subtitles:
3375             self.to_screen('%s has no %s' % (video_id, name))
3376             return
3377         self.to_screen(
3378             'Available %s for %s:' % (name, video_id))
3379
3380         def _row(lang, formats):
3381             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3382             if len(set(names)) == 1:
3383                 names = [] if names[0] == 'unknown' else names[:1]
3384             return [lang, ', '.join(names), ', '.join(exts)]
3385
3386         self.to_stdout(render_table(
3387             self._list_format_headers('Language', 'Name', 'Formats'),
3388             [_row(lang, formats) for lang, formats in subtitles.items()],
3389             hide_empty=True))
3390
3391     def urlopen(self, req):
3392         """ Start an HTTP download """
3393         if isinstance(req, compat_basestring):
3394             req = sanitized_Request(req)
3395         return self._opener.open(req, timeout=self._socket_timeout)
3396
3397     def print_debug_header(self):
3398         if not self.params.get('verbose'):
3399             return
3400
3401         def get_encoding(stream):
3402             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3403             if not supports_terminal_sequences(stream):
3404                 from .compat import WINDOWS_VT_MODE
3405                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3406             return ret
3407
3408         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3409             locale.getpreferredencoding(),
3410             sys.getfilesystemencoding(),
3411             get_encoding(self._screen_file), get_encoding(self._err_file),
3412             self.get_encoding())
3413
3414         logger = self.params.get('logger')
3415         if logger:
3416             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3417             write_debug(encoding_str)
3418         else:
3419             write_string(f'[debug] {encoding_str}\n', encoding=None)
3420             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3421
3422         source = detect_variant()
3423         write_debug(join_nonempty(
3424             'yt-dlp version', __version__,
3425             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3426             '' if source == 'unknown' else f'({source})',
3427             delim=' '))
3428         if not _LAZY_LOADER:
3429             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3430                 write_debug('Lazy loading extractors is forcibly disabled')
3431             else:
3432                 write_debug('Lazy loading extractors is disabled')
3433         if plugin_extractors or plugin_postprocessors:
3434             write_debug('Plugins: %s' % [
3435                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3436                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3437         if self.params.get('compat_opts'):
3438             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3439
3440         if source == 'source':
3441             try:
3442                 sp = Popen(
3443                     ['git', 'rev-parse', '--short', 'HEAD'],
3444                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3445                     cwd=os.path.dirname(os.path.abspath(__file__)))
3446                 out, err = sp.communicate_or_kill()
3447                 out = out.decode().strip()
3448                 if re.match('[0-9a-f]+', out):
3449                     write_debug('Git HEAD: %s' % out)
3450             except Exception:
3451                 try:
3452                     sys.exc_clear()
3453                 except Exception:
3454                     pass
3455
3456         def python_implementation():
3457             impl_name = platform.python_implementation()
3458             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3459                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3460             return impl_name
3461
3462         write_debug('Python version %s (%s %s) - %s' % (
3463             platform.python_version(),
3464             python_implementation(),
3465             platform.architecture()[0],
3466             platform_name()))
3467
3468         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3469         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3470         if ffmpeg_features:
3471             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3472
3473         exe_versions['rtmpdump'] = rtmpdump_version()
3474         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3475         exe_str = ', '.join(
3476             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3477         ) or 'none'
3478         write_debug('exe versions: %s' % exe_str)
3479
3480         from .downloader.websocket import has_websockets
3481         from .postprocessor.embedthumbnail import has_mutagen
3482         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3483
3484         lib_str = join_nonempty(
3485             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3486             KEYRING_AVAILABLE and 'keyring',
3487             has_mutagen and 'mutagen',
3488             SQLITE_AVAILABLE and 'sqlite',
3489             has_websockets and 'websockets',
3490             delim=', ') or 'none'
3491         write_debug('Optional libraries: %s' % lib_str)
3492
3493         proxy_map = {}
3494         for handler in self._opener.handlers:
3495             if hasattr(handler, 'proxies'):
3496                 proxy_map.update(handler.proxies)
3497         write_debug(f'Proxy map: {proxy_map}')
3498
3499         # Not implemented
3500         if False and self.params.get('call_home'):
3501             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3502             write_debug('Public IP address: %s' % ipaddr)
3503             latest_version = self.urlopen(
3504                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3505             if version_tuple(latest_version) > version_tuple(__version__):
3506                 self.report_warning(
3507                     'You are using an outdated version (newest version: %s)! '
3508                     'See https://yt-dl.org/update if you need help updating.' %
3509                     latest_version)
3510
3511     def _setup_opener(self):
3512         timeout_val = self.params.get('socket_timeout')
3513         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3514
3515         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3516         opts_cookiefile = self.params.get('cookiefile')
3517         opts_proxy = self.params.get('proxy')
3518
3519         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3520
3521         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3522         if opts_proxy is not None:
3523             if opts_proxy == '':
3524                 proxies = {}
3525             else:
3526                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3527         else:
3528             proxies = compat_urllib_request.getproxies()
3529             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3530             if 'http' in proxies and 'https' not in proxies:
3531                 proxies['https'] = proxies['http']
3532         proxy_handler = PerRequestProxyHandler(proxies)
3533
3534         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3535         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3536         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3537         redirect_handler = YoutubeDLRedirectHandler()
3538         data_handler = compat_urllib_request_DataHandler()
3539
3540         # When passing our own FileHandler instance, build_opener won't add the
3541         # default FileHandler and allows us to disable the file protocol, which
3542         # can be used for malicious purposes (see
3543         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3544         file_handler = compat_urllib_request.FileHandler()
3545
3546         def file_open(*args, **kwargs):
3547             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3548         file_handler.file_open = file_open
3549
3550         opener = compat_urllib_request.build_opener(
3551             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3552
3553         # Delete the default user-agent header, which would otherwise apply in
3554         # cases where our custom HTTP handler doesn't come into play
3555         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3556         opener.addheaders = []
3557         self._opener = opener
3558
3559     def encode(self, s):
3560         if isinstance(s, bytes):
3561             return s  # Already encoded
3562
3563         try:
3564             return s.encode(self.get_encoding())
3565         except UnicodeEncodeError as err:
3566             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3567             raise
3568
3569     def get_encoding(self):
3570         encoding = self.params.get('encoding')
3571         if encoding is None:
3572             encoding = preferredencoding()
3573         return encoding
3574
3575     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3576         ''' Write infojson and returns True = written, False = skip, None = error '''
3577         if overwrite is None:
3578             overwrite = self.params.get('overwrites', True)
3579         if not self.params.get('writeinfojson'):
3580             return False
3581         elif not infofn:
3582             self.write_debug(f'Skipping writing {label} infojson')
3583             return False
3584         elif not self._ensure_dir_exists(infofn):
3585             return None
3586         elif not overwrite and os.path.exists(infofn):
3587             self.to_screen(f'[info] {label.title()} metadata is already present')
3588         else:
3589             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3590             try:
3591                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3592             except (OSError, IOError):
3593                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3594                 return None
3595         return True
3596
3597     def _write_description(self, label, ie_result, descfn):
3598         ''' Write description and returns True = written, False = skip, None = error '''
3599         if not self.params.get('writedescription'):
3600             return False
3601         elif not descfn:
3602             self.write_debug(f'Skipping writing {label} description')
3603             return False
3604         elif not self._ensure_dir_exists(descfn):
3605             return None
3606         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3607             self.to_screen(f'[info] {label.title()} description is already present')
3608         elif ie_result.get('description') is None:
3609             self.report_warning(f'There\'s no {label} description to write')
3610             return False
3611         else:
3612             try:
3613                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3614                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3615                     descfile.write(ie_result['description'])
3616             except (OSError, IOError):
3617                 self.report_error(f'Cannot write {label} description file {descfn}')
3618                 return None
3619         return True
3620
3621     def _write_subtitles(self, info_dict, filename):
3622         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3623         ret = []
3624         subtitles = info_dict.get('requested_subtitles')
3625         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3626             # subtitles download errors are already managed as troubles in relevant IE
3627             # that way it will silently go on when used with unsupporting IE
3628             return ret
3629
3630         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3631         if not sub_filename_base:
3632             self.to_screen('[info] Skipping writing video subtitles')
3633             return ret
3634         for sub_lang, sub_info in subtitles.items():
3635             sub_format = sub_info['ext']
3636             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3637             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3638             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3639                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3640                 sub_info['filepath'] = sub_filename
3641                 ret.append((sub_filename, sub_filename_final))
3642                 continue
3643
3644             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3645             if sub_info.get('data') is not None:
3646                 try:
3647                     # Use newline='' to prevent conversion of newline characters
3648                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3649                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3650                         subfile.write(sub_info['data'])
3651                     sub_info['filepath'] = sub_filename
3652                     ret.append((sub_filename, sub_filename_final))
3653                     continue
3654                 except (OSError, IOError):
3655                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3656                     return None
3657
3658             try:
3659                 sub_copy = sub_info.copy()
3660                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3661                 self.dl(sub_filename, sub_copy, subtitle=True)
3662                 sub_info['filepath'] = sub_filename
3663                 ret.append((sub_filename, sub_filename_final))
3664             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3665                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3666                 continue
3667         return ret
3668
3669     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3670         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3671         write_all = self.params.get('write_all_thumbnails', False)
3672         thumbnails, ret = [], []
3673         if write_all or self.params.get('writethumbnail', False):
3674             thumbnails = info_dict.get('thumbnails') or []
3675         multiple = write_all and len(thumbnails) > 1
3676
3677         if thumb_filename_base is None:
3678             thumb_filename_base = filename
3679         if thumbnails and not thumb_filename_base:
3680             self.write_debug(f'Skipping writing {label} thumbnail')
3681             return ret
3682
3683         for t in thumbnails[::-1]:
3684             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3685             thumb_display_id = f'{label} thumbnail {t["id"]}'
3686             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3687             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3688
3689             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3690                 ret.append((thumb_filename, thumb_filename_final))
3691                 t['filepath'] = thumb_filename
3692                 self.to_screen('[info] %s is already present' % (
3693                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3694             else:
3695                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3696                 try:
3697                     uf = self.urlopen(t['url'])
3698                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3699                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3700                         shutil.copyfileobj(uf, thumbf)
3701                     ret.append((thumb_filename, thumb_filename_final))
3702                     t['filepath'] = thumb_filename
3703                 except network_exceptions as err:
3704                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3705             if ret and not write_all:
3706                 break
3707         return ret