yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     formatSeconds,
  71     GeoRestrictedError,
  72     get_domain,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     join_nonempty,
  78     LazyList,
  79     LINK_TEMPLATES,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     ReExtractInfo,
  97     register_socks_protocols,
  98     RejectedVideoReached,
  99     remove_terminal_sequences,
 100     render_table,
 101     replace_extension,
 102     SameFileError,
 103     sanitize_filename,
 104     sanitize_path,
 105     sanitize_url,
 106     sanitized_Request,
 107     std_headers,
 108     STR_FORMAT_RE_TMPL,
 109     STR_FORMAT_TYPES,
 110     str_or_none,
 111     strftime_or_none,
 112     subtitles_filename,
 113     supports_terminal_sequences,
 114     timetuple_from_msec,
 115     to_high_limit_path,
 116     traverse_obj,
 117     try_get,
 118     UnavailableVideoError,
 119     url_basename,
 120     variadic,
 121     version_tuple,
 122     write_json_file,
 123     write_string,
 124     YoutubeDLCookieProcessor,
 125     YoutubeDLHandler,
 126     YoutubeDLRedirectHandler,
 127 )
 128 from .cache import Cache
 129 from .minicurses import format_text
 130 from .extractor import (
 131     gen_extractor_classes,
 132     get_info_extractor,
 133     _LAZY_LOADER,
 134     _PLUGIN_CLASSES as plugin_extractors
 135 )
 136 from .extractor.openload import PhantomJSwrapper
 137 from .downloader import (
 138     FFmpegFD,
 139     get_suitable_downloader,
 140     shorten_protocol_name
 141 )
 142 from .downloader.rtmp import rtmpdump_version
 143 from .postprocessor import (
 144     get_postprocessor,
 145     EmbedThumbnailPP,
 146     FFmpegFixupDuplicateMoovPP,
 147     FFmpegFixupDurationPP,
 148     FFmpegFixupM3u8PP,
 149     FFmpegFixupM4aPP,
 150     FFmpegFixupStretchedPP,
 151     FFmpegFixupTimestampPP,
 152     FFmpegMergerPP,
 153     FFmpegPostProcessor,
 154     MoveFilesAfterDownloadPP,
 155     _PLUGIN_CLASSES as plugin_postprocessors
 156 )
 157 from .update import detect_variant
 158 from .version import __version__, RELEASE_GIT_HEAD
 159
 160 if compat_os_name == 'nt':
 161     import ctypes
 162
 163
 164 class YoutubeDL(object):
 165     """YoutubeDL class.
 166
 167     YoutubeDL objects are the ones responsible of downloading the
 168     actual video file and writing it to disk if the user has requested
 169     it, among some other tasks. In most cases there should be one per
 170     program. As, given a video URL, the downloader doesn't know how to
 171     extract all the needed information, task that InfoExtractors do, it
 172     has to pass the URL to one of them.
 173
 174     For this, YoutubeDL objects have a method that allows
 175     InfoExtractors to be registered in a given order. When it is passed
 176     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 177     finds that reports being able to handle it. The InfoExtractor extracts
 178     all the information about the video or videos the URL refers to, and
 179     YoutubeDL process the extracted information, possibly using a File
 180     Downloader to download the video.
 181
 182     YoutubeDL objects accept a lot of parameters. In order not to saturate
 183     the object constructor with arguments, it receives a dictionary of
 184     options instead. These options are available through the params
 185     attribute for the InfoExtractors to use. The YoutubeDL also
 186     registers itself as the downloader in charge for the InfoExtractors
 187     that are added to it, so this is a "mutual registration".
 188
 189     Available options:
 190
 191     username:          Username for authentication purposes.
 192     password:          Password for authentication purposes.
 193     videopassword:     Password for accessing a video.
 194     ap_mso:            Adobe Pass multiple-system operator identifier.
 195     ap_username:       Multiple-system operator account username.
 196     ap_password:       Multiple-system operator account password.
 197     usenetrc:          Use netrc for authentication instead.
 198     verbose:           Print additional info to stdout.
 199     quiet:             Do not print messages to stdout.
 200     no_warnings:       Do not print out anything for warnings.
 201     forceprint:        A list of templates to force print
 202     forceurl:          Force printing final URL. (Deprecated)
 203     forcetitle:        Force printing title. (Deprecated)
 204     forceid:           Force printing ID. (Deprecated)
 205     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 206     forcedescription:  Force printing description. (Deprecated)
 207     forcefilename:     Force printing final filename. (Deprecated)
 208     forceduration:     Force printing duration. (Deprecated)
 209     forcejson:         Force printing info_dict as JSON.
 210     dump_single_json:  Force printing the info_dict of the whole playlist
 211                        (or video) as a single JSON line.
 212     force_write_download_archive: Force writing download archive regardless
 213                        of 'skip_download' or 'simulate'.
 214     simulate:          Do not download the video files. If unset (or None),
 215                        simulate only if listsubtitles, listformats or list_thumbnails is used
 216     format:            Video format code. see "FORMAT SELECTION" for more details.
 217                        You can also pass a function. The function takes 'ctx' as
 218                        argument and returns the formats to download.
 219                        See "build_format_selector" for an implementation
 220     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 221     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 222                        extracting metadata even if the video is not actually
 223                        available for download (experimental)
 224     format_sort:       A list of fields by which to sort the video formats.
 225                        See "Sorting Formats" for more details.
 226     format_sort_force: Force the given format_sort. see "Sorting Formats"
 227                        for more details.
 228     allow_multiple_video_streams:   Allow multiple video streams to be merged
 229                        into a single file
 230     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 231                        into a single file
 232     check_formats      Whether to test if the formats are downloadable.
 233                        Can be True (check all), False (check none),
 234                        'selected' (check selected formats),
 235                        or None (check only if requested by extractor)
 236     paths:             Dictionary of output paths. The allowed keys are 'home'
 237                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 238     outtmpl:           Dictionary of templates for output names. Allowed keys
 239                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 240                        For compatibility with youtube-dl, a single string can also be used
 241     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 242     restrictfilenames: Do not allow "&" and spaces in file names
 243     trim_file_name:    Limit length of filename (extension excluded)
 244     windowsfilenames:  Force the filenames to be windows compatible
 245     ignoreerrors:      Do not stop on download/postprocessing errors.
 246                        Can be 'only_download' to ignore only download errors.
 247                        Default is 'only_download' for CLI, but False for API
 248     skip_playlist_after_errors: Number of allowed failures until the rest of
 249                        the playlist is skipped
 250     force_generic_extractor: Force downloader to use the generic extractor
 251     overwrites:        Overwrite all video and metadata files if True,
 252                        overwrite only non-video files if None
 253                        and don't overwrite any file if False
 254                        For compatibility with youtube-dl,
 255                        "nooverwrites" may also be used instead
 256     playliststart:     Playlist item to start at.
 257     playlistend:       Playlist item to end at.
 258     playlist_items:    Specific indices of playlist to download.
 259     playlistreverse:   Download playlist items in reverse order.
 260     playlistrandom:    Download playlist items in random order.
 261     matchtitle:        Download only matching titles.
 262     rejecttitle:       Reject downloads for matching titles.
 263     logger:            Log messages to a logging.Logger instance.
 264     logtostderr:       Log messages to stderr instead of stdout.
 265     consoletitle:       Display progress in console window's titlebar.
 266     writedescription:  Write the video description to a .description file
 267     writeinfojson:     Write the video description to a .info.json file
 268     clean_infojson:    Remove private fields from the infojson
 269     getcomments:       Extract video comments. This will not be written to disk
 270                        unless writeinfojson is also given
 271     writeannotations:  Write the video annotations to a .annotations.xml file
 272     writethumbnail:    Write the thumbnail image to a file
 273     allow_playlist_files: Whether to write playlists' description, infojson etc
 274                        also to disk when using the 'write*' options
 275     write_all_thumbnails:  Write all thumbnail formats to files
 276     writelink:         Write an internet shortcut file, depending on the
 277                        current platform (.url/.webloc/.desktop)
 278     writeurllink:      Write a Windows internet shortcut file (.url)
 279     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 280     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 281     writesubtitles:    Write the video subtitles to a file
 282     writeautomaticsub: Write the automatically generated subtitles to a file
 283     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 284                        Downloads all the subtitles of the video
 285                        (requires writesubtitles or writeautomaticsub)
 286     listsubtitles:     Lists all available subtitles for the video
 287     subtitlesformat:   The format code for subtitles
 288     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 289                        The list may contain "all" to refer to all the available
 290                        subtitles. The language can be prefixed with a "-" to
 291                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 292     keepvideo:         Keep the video file after post-processing
 293     daterange:         A DateRange object, download only if the upload_date is in the range.
 294     skip_download:     Skip the actual download of the video file
 295     cachedir:          Location of the cache files in the filesystem.
 296                        False to disable filesystem cache.
 297     noplaylist:        Download single video instead of a playlist if in doubt.
 298     age_limit:         An integer representing the user's age in years.
 299                        Unsuitable videos for the given age are skipped.
 300     min_views:         An integer representing the minimum view count the video
 301                        must have in order to not be skipped.
 302                        Videos without view count information are always
 303                        downloaded. None for no limit.
 304     max_views:         An integer representing the maximum view count.
 305                        Videos that are more popular than that are not
 306                        downloaded.
 307                        Videos without view count information are always
 308                        downloaded. None for no limit.
 309     download_archive:  File name of a file where all downloads are recorded.
 310                        Videos already present in the file are not downloaded
 311                        again.
 312     break_on_existing: Stop the download process after attempting to download a
 313                        file that is in the archive.
 314     break_on_reject:   Stop the download process when encountering a video that
 315                        has been filtered out.
 316     break_per_url:     Whether break_on_reject and break_on_existing
 317                        should act on each input URL as opposed to for the entire queue
 318     cookiefile:        File name where cookies should be read from and dumped to
 319     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 320                        name/path from where cookies are loaded.
 321                        Eg: ('chrome', ) or ('vivaldi', 'default')
 322     nocheckcertificate:Do not verify SSL certificates
 323     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 324                        At the moment, this is only supported by YouTube.
 325     proxy:             URL of the proxy server to use
 326     geo_verification_proxy:  URL of the proxy to use for IP address verification
 327                        on geo-restricted sites.
 328     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 329     bidi_workaround:   Work around buggy terminals without bidirectional text
 330                        support, using fridibi
 331     debug_printtraffic:Print out sent and received HTTP traffic
 332     include_ads:       Download ads as well (deprecated)
 333     default_search:    Prepend this string if an input url is not valid.
 334                        'auto' for elaborate guessing
 335     encoding:          Use this encoding instead of the system-specified.
 336     extract_flat:      Do not resolve URLs, return the immediate result.
 337                        Pass in 'in_playlist' to only show this behavior for
 338                        playlist items.
 339     wait_for_video:    If given, wait for scheduled streams to become available.
 340                        The value should be a tuple containing the range
 341                        (min_secs, max_secs) to wait between retries
 342     postprocessors:    A list of dictionaries, each with an entry
 343                        * key:  The name of the postprocessor. See
 344                                yt_dlp/postprocessor/__init__.py for a list.
 345                        * when: When to run the postprocessor. Can be one of
 346                                pre_process|before_dl|post_process|after_move.
 347                                Assumed to be 'post_process' if not given
 348     post_hooks:        Deprecated - Register a custom postprocessor instead
 349                        A list of functions that get called as the final step
 350                        for each video file, after all postprocessors have been
 351                        called. The filename will be passed as the only argument.
 352     progress_hooks:    A list of functions that get called on download
 353                        progress, with a dictionary with the entries
 354                        * status: One of "downloading", "error", or "finished".
 355                                  Check this first and ignore unknown values.
 356                        * info_dict: The extracted info_dict
 357
 358                        If status is one of "downloading", or "finished", the
 359                        following properties may also be present:
 360                        * filename: The final filename (always present)
 361                        * tmpfilename: The filename we're currently writing to
 362                        * downloaded_bytes: Bytes on disk
 363                        * total_bytes: Size of the whole file, None if unknown
 364                        * total_bytes_estimate: Guess of the eventual file size,
 365                                                None if unavailable.
 366                        * elapsed: The number of seconds since download started.
 367                        * eta: The estimated time in seconds, None if unknown
 368                        * speed: The download speed in bytes/second, None if
 369                                 unknown
 370                        * fragment_index: The counter of the currently
 371                                          downloaded video fragment.
 372                        * fragment_count: The number of fragments (= individual
 373                                          files that will be merged)
 374
 375                        Progress hooks are guaranteed to be called at least once
 376                        (with status "finished") if the download is successful.
 377     postprocessor_hooks:  A list of functions that get called on postprocessing
 378                        progress, with a dictionary with the entries
 379                        * status: One of "started", "processing", or "finished".
 380                                  Check this first and ignore unknown values.
 381                        * postprocessor: Name of the postprocessor
 382                        * info_dict: The extracted info_dict
 383
 384                        Progress hooks are guaranteed to be called at least twice
 385                        (with status "started" and "finished") if the processing is successful.
 386     merge_output_format: Extension to use when merging formats.
 387     final_ext:         Expected final extension; used to detect when the file was
 388                        already downloaded and converted
 389     fixup:             Automatically correct known faults of the file.
 390                        One of:
 391                        - "never": do nothing
 392                        - "warn": only emit a warning
 393                        - "detect_or_warn": check whether we can do anything
 394                                            about it, warn otherwise (default)
 395     source_address:    Client-side IP address to bind to.
 396     call_home:         Boolean, true iff we are allowed to contact the
 397                        yt-dlp servers for debugging. (BROKEN)
 398     sleep_interval_requests: Number of seconds to sleep between requests
 399                        during extraction
 400     sleep_interval:    Number of seconds to sleep before each download when
 401                        used alone or a lower bound of a range for randomized
 402                        sleep before each download (minimum possible number
 403                        of seconds to sleep) when used along with
 404                        max_sleep_interval.
 405     max_sleep_interval:Upper bound of a range for randomized sleep before each
 406                        download (maximum possible number of seconds to sleep).
 407                        Must only be used along with sleep_interval.
 408                        Actual sleep time will be a random float from range
 409                        [sleep_interval; max_sleep_interval].
 410     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 411     listformats:       Print an overview of available video formats and exit.
 412     list_thumbnails:   Print a table of all thumbnails and exit.
 413     match_filter:      A function that gets called with the info_dict of
 414                        every video.
 415                        If it returns a message, the video is ignored.
 416                        If it returns None, the video is downloaded.
 417                        match_filter_func in utils.py is one example for this.
 418     no_color:          Do not emit color codes in output.
 419     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 420                        HTTP header
 421     geo_bypass_country:
 422                        Two-letter ISO 3166-2 country code that will be used for
 423                        explicit geographic restriction bypassing via faking
 424                        X-Forwarded-For HTTP header
 425     geo_bypass_ip_block:
 426                        IP range in CIDR notation that will be used similarly to
 427                        geo_bypass_country
 428
 429     The following options determine which downloader is picked:
 430     external_downloader: A dictionary of protocol keys and the executable of the
 431                        external downloader to use for it. The allowed protocols
 432                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 433                        Set the value to 'native' to use the native downloader
 434     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 435                        or {'m3u8': 'ffmpeg'} instead.
 436                        Use the native HLS downloader instead of ffmpeg/avconv
 437                        if True, otherwise use ffmpeg/avconv if False, otherwise
 438                        use downloader suggested by extractor if None.
 439     compat_opts:       Compatibility options. See "Differences in default behavior".
 440                        The following options do not work when used through the API:
 441                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 442                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 443                        Refer __init__.py for their implementation
 444     progress_template: Dictionary of templates for progress outputs.
 445                        Allowed keys are 'download', 'postprocess',
 446                        'download-title' (console title) and 'postprocess-title'.
 447                        The template is mapped on a dictionary with keys 'progress' and 'info'
 448
 449     The following parameters are not used by YoutubeDL itself, they are used by
 450     the downloader (see yt_dlp/downloader/common.py):
 451     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 452     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 453     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 454     external_downloader_args, concurrent_fragment_downloads.
 455
 456     The following options are used by the post processors:
 457     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 458                        otherwise prefer ffmpeg. (avconv support is deprecated)
 459     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 460                        to the binary or its containing directory.
 461     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 462                        and a list of additional command-line arguments for the
 463                        postprocessor/executable. The dict can also have "PP+EXE" keys
 464                        which are used when the given exe is used by the given PP.
 465                        Use 'default' as the name for arguments to passed to all PP
 466                        For compatibility with youtube-dl, a single list of args
 467                        can also be used
 468
 469     The following options are used by the extractors:
 470     extractor_retries: Number of times to retry for known errors
 471     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 472     hls_split_discontinuity: Split HLS playlists to different formats at
 473                        discontinuities such as ad breaks (default: False)
 474     extractor_args:    A dictionary of arguments to be passed to the extractors.
 475                        See "EXTRACTOR ARGUMENTS" for details.
 476                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 477     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 478                        If True (default), DASH manifests and related
 479                        data will be downloaded and processed by extractor.
 480                        You can reduce network I/O by disabling it if you don't
 481                        care about DASH. (only for youtube)
 482     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 483                        If True (default), HLS manifests and related
 484                        data will be downloaded and processed by extractor.
 485                        You can reduce network I/O by disabling it if you don't
 486                        care about HLS. (only for youtube)
 487     """
 488
 489     _NUMERIC_FIELDS = set((
 490         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 491         'timestamp', 'release_timestamp',
 492         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 493         'average_rating', 'comment_count', 'age_limit',
 494         'start_time', 'end_time',
 495         'chapter_number', 'season_number', 'episode_number',
 496         'track_number', 'disc_number', 'release_year',
 497     ))
 498
 499     _format_selection_exts = {
 500         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 501         'video': {'mp4', 'flv', 'webm', '3gp'},
 502         'storyboards': {'mhtml'},
 503     }
 504
 505     params = None
 506     _ies = {}
 507     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 508     _printed_messages = set()
 509     _first_webpage_request = True
 510     _download_retcode = None
 511     _num_downloads = None
 512     _playlist_level = 0
 513     _playlist_urls = set()
 514     _screen_file = None
 515
 516     def __init__(self, params=None, auto_init=True):
 517         """Create a FileDownloader object with the given options.
 518         @param auto_init    Whether to load the default extractors and print header (if verbose).
 519                             Set to 'no_verbose_header' to not print the header
 520         """
 521         if params is None:
 522             params = {}
 523         self._ies = {}
 524         self._ies_instances = {}
 525         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 526         self._printed_messages = set()
 527         self._first_webpage_request = True
 528         self._post_hooks = []
 529         self._progress_hooks = []
 530         self._postprocessor_hooks = []
 531         self._download_retcode = 0
 532         self._num_downloads = 0
 533         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 534         self._err_file = sys.stderr
 535         self.params = params
 536         self.cache = Cache(self)
 537
 538         windows_enable_vt_mode()
 539         self._allow_colors = {
 540             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 541             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 542         }
 543
 544         if sys.version_info < (3, 6):
 545             self.report_warning(
 546                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 547
 548         if self.params.get('allow_unplayable_formats'):
 549             self.report_warning(
 550                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 551                 'This is a developer option intended for debugging. \n'
 552                 '         If you experience any issues while using this option, '
 553                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 554
 555         def check_deprecated(param, option, suggestion):
 556             if self.params.get(param) is not None:
 557                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 558                 return True
 559             return False
 560
 561         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 562             if self.params.get('geo_verification_proxy') is None:
 563                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 564
 565         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 566         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 567         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 568
 569         for msg in self.params.get('_warnings', []):
 570             self.report_warning(msg)
 571         for msg in self.params.get('_deprecation_warnings', []):
 572             self.deprecation_warning(msg)
 573
 574         if 'list-formats' in self.params.get('compat_opts', []):
 575             self.params['listformats_table'] = False
 576
 577         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 578             # nooverwrites was unnecessarily changed to overwrites
 579             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 580             # This ensures compatibility with both keys
 581             self.params['overwrites'] = not self.params['nooverwrites']
 582         elif self.params.get('overwrites') is None:
 583             self.params.pop('overwrites', None)
 584         else:
 585             self.params['nooverwrites'] = not self.params['overwrites']
 586
 587         if params.get('bidi_workaround', False):
 588             try:
 589                 import pty
 590                 master, slave = pty.openpty()
 591                 width = compat_get_terminal_size().columns
 592                 if width is None:
 593                     width_args = []
 594                 else:
 595                     width_args = ['-w', str(width)]
 596                 sp_kwargs = dict(
 597                     stdin=subprocess.PIPE,
 598                     stdout=slave,
 599                     stderr=self._err_file)
 600                 try:
 601                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 602                 except OSError:
 603                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 604                 self._output_channel = os.fdopen(master, 'rb')
 605             except OSError as ose:
 606                 if ose.errno == errno.ENOENT:
 607                     self.report_warning(
 608                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 609                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 610                 else:
 611                     raise
 612
 613         if (sys.platform != 'win32'
 614                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 615                 and not params.get('restrictfilenames', False)):
 616             # Unicode filesystem API will throw errors (#1474, #13027)
 617             self.report_warning(
 618                 'Assuming --restrict-filenames since file system encoding '
 619                 'cannot encode all characters. '
 620                 'Set the LC_ALL environment variable to fix this.')
 621             self.params['restrictfilenames'] = True
 622
 623         self.outtmpl_dict = self.parse_outtmpl()
 624
 625         # Creating format selector here allows us to catch syntax errors before the extraction
 626         self.format_selector = (
 627             self.params.get('format') if self.params.get('format') in (None, '-')
 628             else self.params['format'] if callable(self.params['format'])
 629             else self.build_format_selector(self.params['format']))
 630
 631         self._setup_opener()
 632
 633         if auto_init:
 634             if auto_init != 'no_verbose_header':
 635                 self.print_debug_header()
 636             self.add_default_info_extractors()
 637
 638         hooks = {
 639             'post_hooks': self.add_post_hook,
 640             'progress_hooks': self.add_progress_hook,
 641             'postprocessor_hooks': self.add_postprocessor_hook,
 642         }
 643         for opt, fn in hooks.items():
 644             for ph in self.params.get(opt, []):
 645                 fn(ph)
 646
 647         for pp_def_raw in self.params.get('postprocessors', []):
 648             pp_def = dict(pp_def_raw)
 649             when = pp_def.pop('when', 'post_process')
 650             self.add_post_processor(
 651                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 652                 when=when)
 653
 654         register_socks_protocols()
 655
 656         def preload_download_archive(fn):
 657             """Preload the archive, if any is specified"""
 658             if fn is None:
 659                 return False
 660             self.write_debug(f'Loading archive file {fn!r}')
 661             try:
 662                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 663                     for line in archive_file:
 664                         self.archive.add(line.strip())
 665             except IOError as ioe:
 666                 if ioe.errno != errno.ENOENT:
 667                     raise
 668                 return False
 669             return True
 670
 671         self.archive = set()
 672         preload_download_archive(self.params.get('download_archive'))
 673
 674     def warn_if_short_id(self, argv):
 675         # short YouTube ID starting with dash?
 676         idxs = [
 677             i for i, a in enumerate(argv)
 678             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 679         if idxs:
 680             correct_argv = (
 681                 ['yt-dlp']
 682                 + [a for i, a in enumerate(argv) if i not in idxs]
 683                 + ['--'] + [argv[i] for i in idxs]
 684             )
 685             self.report_warning(
 686                 'Long argument string detected. '
 687                 'Use -- to separate parameters and URLs, like this:\n%s' %
 688                 args_to_str(correct_argv))
 689
 690     def add_info_extractor(self, ie):
 691         """Add an InfoExtractor object to the end of the list."""
 692         ie_key = ie.ie_key()
 693         self._ies[ie_key] = ie
 694         if not isinstance(ie, type):
 695             self._ies_instances[ie_key] = ie
 696             ie.set_downloader(self)
 697
 698     def _get_info_extractor_class(self, ie_key):
 699         ie = self._ies.get(ie_key)
 700         if ie is None:
 701             ie = get_info_extractor(ie_key)
 702             self.add_info_extractor(ie)
 703         return ie
 704
 705     def get_info_extractor(self, ie_key):
 706         """
 707         Get an instance of an IE with name ie_key, it will try to get one from
 708         the _ies list, if there's no instance it will create a new one and add
 709         it to the extractor list.
 710         """
 711         ie = self._ies_instances.get(ie_key)
 712         if ie is None:
 713             ie = get_info_extractor(ie_key)()
 714             self.add_info_extractor(ie)
 715         return ie
 716
 717     def add_default_info_extractors(self):
 718         """
 719         Add the InfoExtractors returned by gen_extractors to the end of the list
 720         """
 721         for ie in gen_extractor_classes():
 722             self.add_info_extractor(ie)
 723
 724     def add_post_processor(self, pp, when='post_process'):
 725         """Add a PostProcessor object to the end of the chain."""
 726         self._pps[when].append(pp)
 727         pp.set_downloader(self)
 728
 729     def add_post_hook(self, ph):
 730         """Add the post hook"""
 731         self._post_hooks.append(ph)
 732
 733     def add_progress_hook(self, ph):
 734         """Add the download progress hook"""
 735         self._progress_hooks.append(ph)
 736
 737     def add_postprocessor_hook(self, ph):
 738         """Add the postprocessing progress hook"""
 739         self._postprocessor_hooks.append(ph)
 740         for pps in self._pps.values():
 741             for pp in pps:
 742                 pp.add_progress_hook(ph)
 743
 744     def _bidi_workaround(self, message):
 745         if not hasattr(self, '_output_channel'):
 746             return message
 747
 748         assert hasattr(self, '_output_process')
 749         assert isinstance(message, compat_str)
 750         line_count = message.count('\n') + 1
 751         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 752         self._output_process.stdin.flush()
 753         res = ''.join(self._output_channel.readline().decode('utf-8')
 754                       for _ in range(line_count))
 755         return res[:-len('\n')]
 756
 757     def _write_string(self, message, out=None, only_once=False):
 758         if only_once:
 759             if message in self._printed_messages:
 760                 return
 761             self._printed_messages.add(message)
 762         write_string(message, out=out, encoding=self.params.get('encoding'))
 763
 764     def to_stdout(self, message, skip_eol=False, quiet=False):
 765         """Print message to stdout"""
 766         if self.params.get('logger'):
 767             self.params['logger'].debug(message)
 768         elif not quiet or self.params.get('verbose'):
 769             self._write_string(
 770                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 771                 self._err_file if quiet else self._screen_file)
 772
 773     def to_stderr(self, message, only_once=False):
 774         """Print message to stderr"""
 775         assert isinstance(message, compat_str)
 776         if self.params.get('logger'):
 777             self.params['logger'].error(message)
 778         else:
 779             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 780
 781     def to_console_title(self, message):
 782         if not self.params.get('consoletitle', False):
 783             return
 784         message = remove_terminal_sequences(message)
 785         if compat_os_name == 'nt':
 786             if ctypes.windll.kernel32.GetConsoleWindow():
 787                 # c_wchar_p() might not be necessary if `message` is
 788                 # already of type unicode()
 789                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 790         elif 'TERM' in os.environ:
 791             self._write_string('\033]0;%s\007' % message, self._screen_file)
 792
 793     def save_console_title(self):
 794         if not self.params.get('consoletitle', False):
 795             return
 796         if self.params.get('simulate'):
 797             return
 798         if compat_os_name != 'nt' and 'TERM' in os.environ:
 799             # Save the title on stack
 800             self._write_string('\033[22;0t', self._screen_file)
 801
 802     def restore_console_title(self):
 803         if not self.params.get('consoletitle', False):
 804             return
 805         if self.params.get('simulate'):
 806             return
 807         if compat_os_name != 'nt' and 'TERM' in os.environ:
 808             # Restore the title from stack
 809             self._write_string('\033[23;0t', self._screen_file)
 810
 811     def __enter__(self):
 812         self.save_console_title()
 813         return self
 814
 815     def __exit__(self, *args):
 816         self.restore_console_title()
 817
 818         if self.params.get('cookiefile') is not None:
 819             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 820
 821     def trouble(self, message=None, tb=None, is_error=True):
 822         """Determine action to take when a download problem appears.
 823
 824         Depending on if the downloader has been configured to ignore
 825         download errors or not, this method may throw an exception or
 826         not when errors are found, after printing the message.
 827
 828         @param tb          If given, is additional traceback information
 829         @param is_error    Whether to raise error according to ignorerrors
 830         """
 831         if message is not None:
 832             self.to_stderr(message)
 833         if self.params.get('verbose'):
 834             if tb is None:
 835                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 836                     tb = ''
 837                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 838                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 839                     tb += encode_compat_str(traceback.format_exc())
 840                 else:
 841                     tb_data = traceback.format_list(traceback.extract_stack())
 842                     tb = ''.join(tb_data)
 843             if tb:
 844                 self.to_stderr(tb)
 845         if not is_error:
 846             return
 847         if not self.params.get('ignoreerrors'):
 848             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 849                 exc_info = sys.exc_info()[1].exc_info
 850             else:
 851                 exc_info = sys.exc_info()
 852             raise DownloadError(message, exc_info)
 853         self._download_retcode = 1
 854
 855     def to_screen(self, message, skip_eol=False):
 856         """Print message to stdout if not in quiet mode"""
 857         self.to_stdout(
 858             message, skip_eol, quiet=self.params.get('quiet', False))
 859
 860     class Styles(Enum):
 861         HEADERS = 'yellow'
 862         EMPHASIS = 'light blue'
 863         ID = 'green'
 864         DELIM = 'blue'
 865         ERROR = 'red'
 866         WARNING = 'yellow'
 867         SUPPRESS = 'light black'
 868
 869     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 870         if test_encoding:
 871             original_text = text
 872             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 873             text = text.encode(encoding, 'ignore').decode(encoding)
 874             if fallback is not None and text != original_text:
 875                 text = fallback
 876         if isinstance(f, self.Styles):
 877             f = f.value
 878         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 879
 880     def _format_screen(self, *args, **kwargs):
 881         return self._format_text(
 882             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 883
 884     def _format_err(self, *args, **kwargs):
 885         return self._format_text(
 886             self._err_file, self._allow_colors['err'], *args, **kwargs)
 887
 888     def report_warning(self, message, only_once=False):
 889         '''
 890         Print the message to stderr, it will be prefixed with 'WARNING:'
 891         If stderr is a tty file the 'WARNING:' will be colored
 892         '''
 893         if self.params.get('logger') is not None:
 894             self.params['logger'].warning(message)
 895         else:
 896             if self.params.get('no_warnings'):
 897                 return
 898             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 899
 900     def deprecation_warning(self, message):
 901         if self.params.get('logger') is not None:
 902             self.params['logger'].warning('DeprecationWarning: {message}')
 903         else:
 904             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 905
 906     def report_error(self, message, *args, **kwargs):
 907         '''
 908         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 909         in red if stderr is a tty file.
 910         '''
 911         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 912
 913     def write_debug(self, message, only_once=False):
 914         '''Log debug message or Print message to stderr'''
 915         if not self.params.get('verbose', False):
 916             return
 917         message = '[debug] %s' % message
 918         if self.params.get('logger'):
 919             self.params['logger'].debug(message)
 920         else:
 921             self.to_stderr(message, only_once)
 922
 923     def report_file_already_downloaded(self, file_name):
 924         """Report file has already been fully downloaded."""
 925         try:
 926             self.to_screen('[download] %s has already been downloaded' % file_name)
 927         except UnicodeEncodeError:
 928             self.to_screen('[download] The file has already been downloaded')
 929
 930     def report_file_delete(self, file_name):
 931         """Report that existing file will be deleted."""
 932         try:
 933             self.to_screen('Deleting existing file %s' % file_name)
 934         except UnicodeEncodeError:
 935             self.to_screen('Deleting existing file')
 936
 937     def raise_no_formats(self, info, forced=False):
 938         has_drm = info.get('__has_drm')
 939         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 940         expected = self.params.get('ignore_no_formats_error')
 941         if forced or not expected:
 942             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 943                                  expected=has_drm or expected)
 944         else:
 945             self.report_warning(msg)
 946
 947     def parse_outtmpl(self):
 948         outtmpl_dict = self.params.get('outtmpl', {})
 949         if not isinstance(outtmpl_dict, dict):
 950             outtmpl_dict = {'default': outtmpl_dict}
 951         # Remove spaces in the default template
 952         if self.params.get('restrictfilenames'):
 953             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 954         else:
 955             sanitize = lambda x: x
 956         outtmpl_dict.update({
 957             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 958             if outtmpl_dict.get(k) is None})
 959         for key, val in outtmpl_dict.items():
 960             if isinstance(val, bytes):
 961                 self.report_warning(
 962                     'Parameter outtmpl is bytes, but should be a unicode string. '
 963                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 964         return outtmpl_dict
 965
 966     def get_output_path(self, dir_type='', filename=None):
 967         paths = self.params.get('paths', {})
 968         assert isinstance(paths, dict)
 969         path = os.path.join(
 970             expand_path(paths.get('home', '').strip()),
 971             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 972             filename or '')
 973
 974         # Temporary fix for #4787
 975         # 'Treat' all problem characters by passing filename through preferredencoding
 976         # to workaround encoding issues with subprocess on python2 @ Windows
 977         if sys.version_info < (3, 0) and sys.platform == 'win32':
 978             path = encodeFilename(path, True).decode(preferredencoding())
 979         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 980
 981     @staticmethod
 982     def _outtmpl_expandpath(outtmpl):
 983         # expand_path translates '%%' into '%' and '$$' into '$'
 984         # correspondingly that is not what we want since we need to keep
 985         # '%%' intact for template dict substitution step. Working around
 986         # with boundary-alike separator hack.
 987         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 988         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 989
 990         # outtmpl should be expand_path'ed before template dict substitution
 991         # because meta fields may contain env variables we don't want to
 992         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 993         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 994         return expand_path(outtmpl).replace(sep, '')
 995
 996     @staticmethod
 997     def escape_outtmpl(outtmpl):
 998         ''' Escape any remaining strings like %s, %abc% etc. '''
 999         return re.sub(
1000             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1001             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1002             outtmpl)
1003
1004     @classmethod
1005     def validate_outtmpl(cls, outtmpl):
1006         ''' @return None or Exception object '''
1007         outtmpl = re.sub(
1008             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
1009             lambda mobj: f'{mobj.group(0)[:-1]}s',
1010             cls._outtmpl_expandpath(outtmpl))
1011         try:
1012             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1013             return None
1014         except ValueError as err:
1015             return err
1016
1017     @staticmethod
1018     def _copy_infodict(info_dict):
1019         info_dict = dict(info_dict)
1020         for key in ('__original_infodict', '__postprocessors'):
1021             info_dict.pop(key, None)
1022         return info_dict
1023
1024     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
1025         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1026         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1027
1028         info_dict = self._copy_infodict(info_dict)
1029         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1030             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1031             if info_dict.get('duration', None) is not None
1032             else None)
1033         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1034         if info_dict.get('resolution') is None:
1035             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1036
1037         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1038         # of %(field)s to %(field)0Nd for backward compatibility
1039         field_size_compat_map = {
1040             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1041             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1042             'autonumber': self.params.get('autonumber_size') or 5,
1043         }
1044
1045         TMPL_DICT = {}
1046         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1047         MATH_FUNCTIONS = {
1048             '+': float.__add__,
1049             '-': float.__sub__,
1050         }
1051         # Field is of the form key1.key2...
1052         # where keys (except first) can be string, int or slice
1053         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1054         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1055         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1056         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1057             (?P<negate>-)?
1058             (?P<fields>{field})
1059             (?P<maths>(?:{math_op}{math_field})*)
1060             (?:>(?P<strf_format>.+?))?
1061             (?P<alternate>(?<!\\),[^|&)]+)?
1062             (?:&(?P<replacement>.*?))?
1063             (?:\|(?P<default>.*?))?
1064             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1065
1066         def _traverse_infodict(k):
1067             k = k.split('.')
1068             if k[0] == '':
1069                 k.pop(0)
1070             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1071
1072         def get_value(mdict):
1073             # Object traversal
1074             value = _traverse_infodict(mdict['fields'])
1075             # Negative
1076             if mdict['negate']:
1077                 value = float_or_none(value)
1078                 if value is not None:
1079                     value *= -1
1080             # Do maths
1081             offset_key = mdict['maths']
1082             if offset_key:
1083                 value = float_or_none(value)
1084                 operator = None
1085                 while offset_key:
1086                     item = re.match(
1087                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1088                         offset_key).group(0)
1089                     offset_key = offset_key[len(item):]
1090                     if operator is None:
1091                         operator = MATH_FUNCTIONS[item]
1092                         continue
1093                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1094                     offset = float_or_none(item)
1095                     if offset is None:
1096                         offset = float_or_none(_traverse_infodict(item))
1097                     try:
1098                         value = operator(value, multiplier * offset)
1099                     except (TypeError, ZeroDivisionError):
1100                         return None
1101                     operator = None
1102             # Datetime formatting
1103             if mdict['strf_format']:
1104                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1105
1106             return value
1107
1108         na = self.params.get('outtmpl_na_placeholder', 'NA')
1109
1110         def _dumpjson_default(obj):
1111             if isinstance(obj, (set, LazyList)):
1112                 return list(obj)
1113             return repr(obj)
1114
1115         def create_key(outer_mobj):
1116             if not outer_mobj.group('has_key'):
1117                 return outer_mobj.group(0)
1118             key = outer_mobj.group('key')
1119             mobj = re.match(INTERNAL_FORMAT_RE, key)
1120             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1121             value, replacement, default = None, None, na
1122             while mobj:
1123                 mobj = mobj.groupdict()
1124                 default = mobj['default'] if mobj['default'] is not None else default
1125                 value = get_value(mobj)
1126                 replacement = mobj['replacement']
1127                 if value is None and mobj['alternate']:
1128                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1129                 else:
1130                     break
1131
1132             fmt = outer_mobj.group('format')
1133             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1134                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1135
1136             value = default if value is None else value if replacement is None else replacement
1137
1138             flags = outer_mobj.group('conversion') or ''
1139             str_fmt = f'{fmt[:-1]}s'
1140             if fmt[-1] == 'l':  # list
1141                 delim = '\n' if '#' in flags else ', '
1142                 value, fmt = delim.join(variadic(value)), str_fmt
1143             elif fmt[-1] == 'j':  # json
1144                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1145             elif fmt[-1] == 'q':  # quoted
1146                 value = map(str, variadic(value) if '#' in flags else [value])
1147                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1148             elif fmt[-1] == 'B':  # bytes
1149                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1150                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1151             elif fmt[-1] == 'U':  # unicode normalized
1152                 value, fmt = unicodedata.normalize(
1153                     # "+" = compatibility equivalence, "#" = NFD
1154                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1155                     value), str_fmt
1156             elif fmt[-1] == 'c':
1157                 if value:
1158                     value = str(value)[0]
1159                 else:
1160                     fmt = str_fmt
1161             elif fmt[-1] not in 'rs':  # numeric
1162                 value = float_or_none(value)
1163                 if value is None:
1164                     value, fmt = default, 's'
1165
1166             if sanitize:
1167                 if fmt[-1] == 'r':
1168                     # If value is an object, sanitize might convert it to a string
1169                     # So we convert it to repr first
1170                     value, fmt = repr(value), str_fmt
1171                 if fmt[-1] in 'csr':
1172                     value = sanitize(initial_field, value)
1173
1174             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1175             TMPL_DICT[key] = value
1176             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1177
1178         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1179
1180     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1181         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1182         return self.escape_outtmpl(outtmpl) % info_dict
1183
1184     def _prepare_filename(self, info_dict, tmpl_type='default'):
1185         try:
1186             sanitize = lambda k, v: sanitize_filename(
1187                 compat_str(v),
1188                 restricted=self.params.get('restrictfilenames'),
1189                 is_id=(k == 'id' or k.endswith('_id')))
1190             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1191             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1192
1193             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1194             if filename and force_ext is not None:
1195                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1196
1197             # https://github.com/blackjack4494/youtube-dlc/issues/85
1198             trim_file_name = self.params.get('trim_file_name', False)
1199             if trim_file_name:
1200                 no_ext, *ext = filename.rsplit('.', 2)
1201                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1202
1203             return filename
1204         except ValueError as err:
1205             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1206             return None
1207
1208     def prepare_filename(self, info_dict, dir_type='', warn=False):
1209         """Generate the output filename."""
1210
1211         filename = self._prepare_filename(info_dict, dir_type or 'default')
1212         if not filename and dir_type not in ('', 'temp'):
1213             return ''
1214
1215         if warn:
1216             if not self.params.get('paths'):
1217                 pass
1218             elif filename == '-':
1219                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1220             elif os.path.isabs(filename):
1221                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1222         if filename == '-' or not filename:
1223             return filename
1224
1225         return self.get_output_path(dir_type, filename)
1226
1227     def _match_entry(self, info_dict, incomplete=False, silent=False):
1228         """ Returns None if the file should be downloaded """
1229
1230         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1231
1232         def check_filter():
1233             if 'title' in info_dict:
1234                 # This can happen when we're just evaluating the playlist
1235                 title = info_dict['title']
1236                 matchtitle = self.params.get('matchtitle', False)
1237                 if matchtitle:
1238                     if not re.search(matchtitle, title, re.IGNORECASE):
1239                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1240                 rejecttitle = self.params.get('rejecttitle', False)
1241                 if rejecttitle:
1242                     if re.search(rejecttitle, title, re.IGNORECASE):
1243                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1244             date = info_dict.get('upload_date')
1245             if date is not None:
1246                 dateRange = self.params.get('daterange', DateRange())
1247                 if date not in dateRange:
1248                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1249             view_count = info_dict.get('view_count')
1250             if view_count is not None:
1251                 min_views = self.params.get('min_views')
1252                 if min_views is not None and view_count < min_views:
1253                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1254                 max_views = self.params.get('max_views')
1255                 if max_views is not None and view_count > max_views:
1256                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1257             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1258                 return 'Skipping "%s" because it is age restricted' % video_title
1259
1260             match_filter = self.params.get('match_filter')
1261             if match_filter is not None:
1262                 try:
1263                     ret = match_filter(info_dict, incomplete=incomplete)
1264                 except TypeError:
1265                     # For backward compatibility
1266                     ret = None if incomplete else match_filter(info_dict)
1267                 if ret is not None:
1268                     return ret
1269             return None
1270
1271         if self.in_download_archive(info_dict):
1272             reason = '%s has already been recorded in the archive' % video_title
1273             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1274         else:
1275             reason = check_filter()
1276             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1277         if reason is not None:
1278             if not silent:
1279                 self.to_screen('[download] ' + reason)
1280             if self.params.get(break_opt, False):
1281                 raise break_err()
1282         return reason
1283
1284     @staticmethod
1285     def add_extra_info(info_dict, extra_info):
1286         '''Set the keys from extra_info in info dict if they are missing'''
1287         for key, value in extra_info.items():
1288             info_dict.setdefault(key, value)
1289
1290     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1291                      process=True, force_generic_extractor=False):
1292         """
1293         Return a list with a dictionary for each video extracted.
1294
1295         Arguments:
1296         url -- URL to extract
1297
1298         Keyword arguments:
1299         download -- whether to download videos during extraction
1300         ie_key -- extractor key hint
1301         extra_info -- dictionary containing the extra values to add to each result
1302         process -- whether to resolve all unresolved references (URLs, playlist items),
1303             must be True for download to work.
1304         force_generic_extractor -- force using the generic extractor
1305         """
1306
1307         if extra_info is None:
1308             extra_info = {}
1309
1310         if not ie_key and force_generic_extractor:
1311             ie_key = 'Generic'
1312
1313         if ie_key:
1314             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1315         else:
1316             ies = self._ies
1317
1318         for ie_key, ie in ies.items():
1319             if not ie.suitable(url):
1320                 continue
1321
1322             if not ie.working():
1323                 self.report_warning('The program functionality for this site has been marked as broken, '
1324                                     'and will probably not work.')
1325
1326             temp_id = ie.get_temp_id(url)
1327             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1328                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1329                 if self.params.get('break_on_existing', False):
1330                     raise ExistingVideoReached()
1331                 break
1332             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1333         else:
1334             self.report_error('no suitable InfoExtractor for URL %s' % url)
1335
1336     def __handle_extraction_exceptions(func):
1337         @functools.wraps(func)
1338         def wrapper(self, *args, **kwargs):
1339             try:
1340                 return func(self, *args, **kwargs)
1341             except GeoRestrictedError as e:
1342                 msg = e.msg
1343                 if e.countries:
1344                     msg += '\nThis video is available in %s.' % ', '.join(
1345                         map(ISO3166Utils.short2full, e.countries))
1346                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1347                 self.report_error(msg)
1348             except ExtractorError as e:  # An error we somewhat expected
1349                 self.report_error(compat_str(e), e.format_traceback())
1350             except ReExtractInfo as e:
1351                 if e.expected:
1352                     self.to_screen(f'{e}; Re-extracting data')
1353                 else:
1354                     self.to_stderr('\r')
1355                     self.report_warning(f'{e}; Re-extracting data')
1356                 return wrapper(self, *args, **kwargs)
1357             except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1358                 raise
1359             except Exception as e:
1360                 if self.params.get('ignoreerrors'):
1361                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1362                 else:
1363                     raise
1364         return wrapper
1365
1366     def _wait_for_video(self, ie_result):
1367         if (not self.params.get('wait_for_video')
1368                 or ie_result.get('_type', 'video') != 'video'
1369                 or ie_result.get('formats') or ie_result.get('url')):
1370             return
1371
1372         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1373         last_msg = ''
1374
1375         def progress(msg):
1376             nonlocal last_msg
1377             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1378             last_msg = msg
1379
1380         min_wait, max_wait = self.params.get('wait_for_video')
1381         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1382         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1383             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1384             self.report_warning('Release time of video is not known')
1385         elif (diff or 0) <= 0:
1386             self.report_warning('Video should already be available according to extracted info')
1387         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1388         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1389
1390         wait_till = time.time() + diff
1391         try:
1392             while True:
1393                 diff = wait_till - time.time()
1394                 if diff <= 0:
1395                     progress('')
1396                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1397                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1398                 time.sleep(1)
1399         except KeyboardInterrupt:
1400             progress('')
1401             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1402         except BaseException as e:
1403             if not isinstance(e, ReExtractInfo):
1404                 self.to_screen('')
1405             raise
1406
1407     @__handle_extraction_exceptions
1408     def __extract_info(self, url, ie, download, extra_info, process):
1409         ie_result = ie.extract(url)
1410         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1411             return
1412         if isinstance(ie_result, list):
1413             # Backwards compatibility: old IE result format
1414             ie_result = {
1415                 '_type': 'compat_list',
1416                 'entries': ie_result,
1417             }
1418         if extra_info.get('original_url'):
1419             ie_result.setdefault('original_url', extra_info['original_url'])
1420         self.add_default_extra_info(ie_result, ie, url)
1421         if process:
1422             self._wait_for_video(ie_result)
1423             return self.process_ie_result(ie_result, download, extra_info)
1424         else:
1425             return ie_result
1426
1427     def add_default_extra_info(self, ie_result, ie, url):
1428         if url is not None:
1429             self.add_extra_info(ie_result, {
1430                 'webpage_url': url,
1431                 'original_url': url,
1432                 'webpage_url_basename': url_basename(url),
1433                 'webpage_url_domain': get_domain(url),
1434             })
1435         if ie is not None:
1436             self.add_extra_info(ie_result, {
1437                 'extractor': ie.IE_NAME,
1438                 'extractor_key': ie.ie_key(),
1439             })
1440
1441     def process_ie_result(self, ie_result, download=True, extra_info=None):
1442         """
1443         Take the result of the ie(may be modified) and resolve all unresolved
1444         references (URLs, playlist items).
1445
1446         It will also download the videos if 'download'.
1447         Returns the resolved ie_result.
1448         """
1449         if extra_info is None:
1450             extra_info = {}
1451         result_type = ie_result.get('_type', 'video')
1452
1453         if result_type in ('url', 'url_transparent'):
1454             ie_result['url'] = sanitize_url(ie_result['url'])
1455             if ie_result.get('original_url'):
1456                 extra_info.setdefault('original_url', ie_result['original_url'])
1457
1458             extract_flat = self.params.get('extract_flat', False)
1459             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1460                     or extract_flat is True):
1461                 info_copy = ie_result.copy()
1462                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1463                 if ie and not ie_result.get('id'):
1464                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1465                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1466                 self.add_extra_info(info_copy, extra_info)
1467                 info_copy, _ = self.pre_process(info_copy)
1468                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1469                 if self.params.get('force_write_download_archive', False):
1470                     self.record_download_archive(info_copy)
1471                 return ie_result
1472
1473         if result_type == 'video':
1474             self.add_extra_info(ie_result, extra_info)
1475             ie_result = self.process_video_result(ie_result, download=download)
1476             additional_urls = (ie_result or {}).get('additional_urls')
1477             if additional_urls:
1478                 # TODO: Improve MetadataParserPP to allow setting a list
1479                 if isinstance(additional_urls, compat_str):
1480                     additional_urls = [additional_urls]
1481                 self.to_screen(
1482                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1483                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1484                 ie_result['additional_entries'] = [
1485                     self.extract_info(
1486                         url, download, extra_info,
1487                         force_generic_extractor=self.params.get('force_generic_extractor'))
1488                     for url in additional_urls
1489                 ]
1490             return ie_result
1491         elif result_type == 'url':
1492             # We have to add extra_info to the results because it may be
1493             # contained in a playlist
1494             return self.extract_info(
1495                 ie_result['url'], download,
1496                 ie_key=ie_result.get('ie_key'),
1497                 extra_info=extra_info)
1498         elif result_type == 'url_transparent':
1499             # Use the information from the embedding page
1500             info = self.extract_info(
1501                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1502                 extra_info=extra_info, download=False, process=False)
1503
1504             # extract_info may return None when ignoreerrors is enabled and
1505             # extraction failed with an error, don't crash and return early
1506             # in this case
1507             if not info:
1508                 return info
1509
1510             force_properties = dict(
1511                 (k, v) for k, v in ie_result.items() if v is not None)
1512             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1513                 if f in force_properties:
1514                     del force_properties[f]
1515             new_result = info.copy()
1516             new_result.update(force_properties)
1517
1518             # Extracted info may not be a video result (i.e.
1519             # info.get('_type', 'video') != video) but rather an url or
1520             # url_transparent. In such cases outer metadata (from ie_result)
1521             # should be propagated to inner one (info). For this to happen
1522             # _type of info should be overridden with url_transparent. This
1523             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1524             if new_result.get('_type') == 'url':
1525                 new_result['_type'] = 'url_transparent'
1526
1527             return self.process_ie_result(
1528                 new_result, download=download, extra_info=extra_info)
1529         elif result_type in ('playlist', 'multi_video'):
1530             # Protect from infinite recursion due to recursively nested playlists
1531             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1532             webpage_url = ie_result['webpage_url']
1533             if webpage_url in self._playlist_urls:
1534                 self.to_screen(
1535                     '[download] Skipping already downloaded playlist: %s'
1536                     % ie_result.get('title') or ie_result.get('id'))
1537                 return
1538
1539             self._playlist_level += 1
1540             self._playlist_urls.add(webpage_url)
1541             self._sanitize_thumbnails(ie_result)
1542             try:
1543                 return self.__process_playlist(ie_result, download)
1544             finally:
1545                 self._playlist_level -= 1
1546                 if not self._playlist_level:
1547                     self._playlist_urls.clear()
1548         elif result_type == 'compat_list':
1549             self.report_warning(
1550                 'Extractor %s returned a compat_list result. '
1551                 'It needs to be updated.' % ie_result.get('extractor'))
1552
1553             def _fixup(r):
1554                 self.add_extra_info(r, {
1555                     'extractor': ie_result['extractor'],
1556                     'webpage_url': ie_result['webpage_url'],
1557                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1558                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1559                     'extractor_key': ie_result['extractor_key'],
1560                 })
1561                 return r
1562             ie_result['entries'] = [
1563                 self.process_ie_result(_fixup(r), download, extra_info)
1564                 for r in ie_result['entries']
1565             ]
1566             return ie_result
1567         else:
1568             raise Exception('Invalid result type: %s' % result_type)
1569
1570     def _ensure_dir_exists(self, path):
1571         return make_dir(path, self.report_error)
1572
1573     def __process_playlist(self, ie_result, download):
1574         # We process each entry in the playlist
1575         playlist = ie_result.get('title') or ie_result.get('id')
1576         self.to_screen('[download] Downloading playlist: %s' % playlist)
1577
1578         if 'entries' not in ie_result:
1579             raise EntryNotInPlaylist('There are no entries')
1580
1581         MissingEntry = object()
1582         incomplete_entries = bool(ie_result.get('requested_entries'))
1583         if incomplete_entries:
1584             def fill_missing_entries(entries, indices):
1585                 ret = [MissingEntry] * max(indices)
1586                 for i, entry in zip(indices, entries):
1587                     ret[i - 1] = entry
1588                 return ret
1589             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1590
1591         playlist_results = []
1592
1593         playliststart = self.params.get('playliststart', 1)
1594         playlistend = self.params.get('playlistend')
1595         # For backwards compatibility, interpret -1 as whole list
1596         if playlistend == -1:
1597             playlistend = None
1598
1599         playlistitems_str = self.params.get('playlist_items')
1600         playlistitems = None
1601         if playlistitems_str is not None:
1602             def iter_playlistitems(format):
1603                 for string_segment in format.split(','):
1604                     if '-' in string_segment:
1605                         start, end = string_segment.split('-')
1606                         for item in range(int(start), int(end) + 1):
1607                             yield int(item)
1608                     else:
1609                         yield int(string_segment)
1610             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1611
1612         ie_entries = ie_result['entries']
1613         msg = (
1614             'Downloading %d videos' if not isinstance(ie_entries, list)
1615             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1616
1617         if isinstance(ie_entries, list):
1618             def get_entry(i):
1619                 return ie_entries[i - 1]
1620         else:
1621             if not isinstance(ie_entries, (PagedList, LazyList)):
1622                 ie_entries = LazyList(ie_entries)
1623
1624             def get_entry(i):
1625                 return YoutubeDL.__handle_extraction_exceptions(
1626                     lambda self, i: ie_entries[i - 1]
1627                 )(self, i)
1628
1629         entries = []
1630         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1631         for i in items:
1632             if i == 0:
1633                 continue
1634             if playlistitems is None and playlistend is not None and playlistend < i:
1635                 break
1636             entry = None
1637             try:
1638                 entry = get_entry(i)
1639                 if entry is MissingEntry:
1640                     raise EntryNotInPlaylist()
1641             except (IndexError, EntryNotInPlaylist):
1642                 if incomplete_entries:
1643                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1644                 elif not playlistitems:
1645                     break
1646             entries.append(entry)
1647             try:
1648                 if entry is not None:
1649                     self._match_entry(entry, incomplete=True, silent=True)
1650             except (ExistingVideoReached, RejectedVideoReached):
1651                 break
1652         ie_result['entries'] = entries
1653
1654         # Save playlist_index before re-ordering
1655         entries = [
1656             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1657             for i, entry in enumerate(entries, 1)
1658             if entry is not None]
1659         n_entries = len(entries)
1660
1661         if not playlistitems and (playliststart != 1 or playlistend):
1662             playlistitems = list(range(playliststart, playliststart + n_entries))
1663         ie_result['requested_entries'] = playlistitems
1664
1665         _infojson_written = False
1666         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1667             ie_copy = {
1668                 'playlist': playlist,
1669                 'playlist_id': ie_result.get('id'),
1670                 'playlist_title': ie_result.get('title'),
1671                 'playlist_uploader': ie_result.get('uploader'),
1672                 'playlist_uploader_id': ie_result.get('uploader_id'),
1673                 'playlist_index': 0,
1674                 'n_entries': n_entries,
1675             }
1676             ie_copy.update(dict(ie_result))
1677
1678             _infojson_written = self._write_info_json(
1679                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1680             if _infojson_written is None:
1681                 return
1682             if self._write_description('playlist', ie_result,
1683                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1684                 return
1685             # TODO: This should be passed to ThumbnailsConvertor if necessary
1686             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1687
1688         if self.params.get('playlistreverse', False):
1689             entries = entries[::-1]
1690         if self.params.get('playlistrandom', False):
1691             random.shuffle(entries)
1692
1693         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1694
1695         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1696         failures = 0
1697         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1698         for i, entry_tuple in enumerate(entries, 1):
1699             playlist_index, entry = entry_tuple
1700             if 'playlist-index' in self.params.get('compat_opts', []):
1701                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1702             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1703             # This __x_forwarded_for_ip thing is a bit ugly but requires
1704             # minimal changes
1705             if x_forwarded_for:
1706                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1707             extra = {
1708                 'n_entries': n_entries,
1709                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1710                 'playlist_index': playlist_index,
1711                 'playlist_autonumber': i,
1712                 'playlist': playlist,
1713                 'playlist_id': ie_result.get('id'),
1714                 'playlist_title': ie_result.get('title'),
1715                 'playlist_uploader': ie_result.get('uploader'),
1716                 'playlist_uploader_id': ie_result.get('uploader_id'),
1717                 'extractor': ie_result['extractor'],
1718                 'webpage_url': ie_result['webpage_url'],
1719                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1720                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1721                 'extractor_key': ie_result['extractor_key'],
1722             }
1723
1724             if self._match_entry(entry, incomplete=True) is not None:
1725                 continue
1726
1727             entry_result = self.__process_iterable_entry(entry, download, extra)
1728             if not entry_result:
1729                 failures += 1
1730             if failures >= max_failures:
1731                 self.report_error(
1732                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1733                 break
1734             playlist_results.append(entry_result)
1735         ie_result['entries'] = playlist_results
1736
1737         # Write the updated info to json
1738         if _infojson_written and self._write_info_json(
1739                 'updated playlist', ie_result,
1740                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1741             return
1742         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1743         return ie_result
1744
1745     @__handle_extraction_exceptions
1746     def __process_iterable_entry(self, entry, download, extra_info):
1747         return self.process_ie_result(
1748             entry, download=download, extra_info=extra_info)
1749
1750     def _build_format_filter(self, filter_spec):
1751         " Returns a function to filter the formats according to the filter_spec "
1752
1753         OPERATORS = {
1754             '<': operator.lt,
1755             '<=': operator.le,
1756             '>': operator.gt,
1757             '>=': operator.ge,
1758             '=': operator.eq,
1759             '!=': operator.ne,
1760         }
1761         operator_rex = re.compile(r'''(?x)\s*
1762             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1763             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1764             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1765             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1766         m = operator_rex.fullmatch(filter_spec)
1767         if m:
1768             try:
1769                 comparison_value = int(m.group('value'))
1770             except ValueError:
1771                 comparison_value = parse_filesize(m.group('value'))
1772                 if comparison_value is None:
1773                     comparison_value = parse_filesize(m.group('value') + 'B')
1774                 if comparison_value is None:
1775                     raise ValueError(
1776                         'Invalid value %r in format specification %r' % (
1777                             m.group('value'), filter_spec))
1778             op = OPERATORS[m.group('op')]
1779
1780         if not m:
1781             STR_OPERATORS = {
1782                 '=': operator.eq,
1783                 '^=': lambda attr, value: attr.startswith(value),
1784                 '$=': lambda attr, value: attr.endswith(value),
1785                 '*=': lambda attr, value: value in attr,
1786             }
1787             str_operator_rex = re.compile(r'''(?x)\s*
1788                 (?P<key>[a-zA-Z0-9._-]+)\s*
1789                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1790                 (?P<value>[a-zA-Z0-9._-]+)\s*
1791                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1792             m = str_operator_rex.fullmatch(filter_spec)
1793             if m:
1794                 comparison_value = m.group('value')
1795                 str_op = STR_OPERATORS[m.group('op')]
1796                 if m.group('negation'):
1797                     op = lambda attr, value: not str_op(attr, value)
1798                 else:
1799                     op = str_op
1800
1801         if not m:
1802             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1803
1804         def _filter(f):
1805             actual_value = f.get(m.group('key'))
1806             if actual_value is None:
1807                 return m.group('none_inclusive')
1808             return op(actual_value, comparison_value)
1809         return _filter
1810
1811     def _check_formats(self, formats):
1812         for f in formats:
1813             self.to_screen('[info] Testing format %s' % f['format_id'])
1814             path = self.get_output_path('temp')
1815             if not self._ensure_dir_exists(f'{path}/'):
1816                 continue
1817             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1818             temp_file.close()
1819             try:
1820                 success, _ = self.dl(temp_file.name, f, test=True)
1821             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1822                 success = False
1823             finally:
1824                 if os.path.exists(temp_file.name):
1825                     try:
1826                         os.remove(temp_file.name)
1827                     except OSError:
1828                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1829             if success:
1830                 yield f
1831             else:
1832                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1833
1834     def _default_format_spec(self, info_dict, download=True):
1835
1836         def can_merge():
1837             merger = FFmpegMergerPP(self)
1838             return merger.available and merger.can_merge()
1839
1840         prefer_best = (
1841             not self.params.get('simulate')
1842             and download
1843             and (
1844                 not can_merge()
1845                 or info_dict.get('is_live', False)
1846                 or self.outtmpl_dict['default'] == '-'))
1847         compat = (
1848             prefer_best
1849             or self.params.get('allow_multiple_audio_streams', False)
1850             or 'format-spec' in self.params.get('compat_opts', []))
1851
1852         return (
1853             'best/bestvideo+bestaudio' if prefer_best
1854             else 'bestvideo*+bestaudio/best' if not compat
1855             else 'bestvideo+bestaudio/best')
1856
1857     def build_format_selector(self, format_spec):
1858         def syntax_error(note, start):
1859             message = (
1860                 'Invalid format specification: '
1861                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1862             return SyntaxError(message)
1863
1864         PICKFIRST = 'PICKFIRST'
1865         MERGE = 'MERGE'
1866         SINGLE = 'SINGLE'
1867         GROUP = 'GROUP'
1868         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1869
1870         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1871                                   'video': self.params.get('allow_multiple_video_streams', False)}
1872
1873         check_formats = self.params.get('check_formats') == 'selected'
1874
1875         def _parse_filter(tokens):
1876             filter_parts = []
1877             for type, string, start, _, _ in tokens:
1878                 if type == tokenize.OP and string == ']':
1879                     return ''.join(filter_parts)
1880                 else:
1881                     filter_parts.append(string)
1882
1883         def _remove_unused_ops(tokens):
1884             # Remove operators that we don't use and join them with the surrounding strings
1885             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1886             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1887             last_string, last_start, last_end, last_line = None, None, None, None
1888             for type, string, start, end, line in tokens:
1889                 if type == tokenize.OP and string == '[':
1890                     if last_string:
1891                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1892                         last_string = None
1893                     yield type, string, start, end, line
1894                     # everything inside brackets will be handled by _parse_filter
1895                     for type, string, start, end, line in tokens:
1896                         yield type, string, start, end, line
1897                         if type == tokenize.OP and string == ']':
1898                             break
1899                 elif type == tokenize.OP and string in ALLOWED_OPS:
1900                     if last_string:
1901                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1902                         last_string = None
1903                     yield type, string, start, end, line
1904                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1905                     if not last_string:
1906                         last_string = string
1907                         last_start = start
1908                         last_end = end
1909                     else:
1910                         last_string += string
1911             if last_string:
1912                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1913
1914         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1915             selectors = []
1916             current_selector = None
1917             for type, string, start, _, _ in tokens:
1918                 # ENCODING is only defined in python 3.x
1919                 if type == getattr(tokenize, 'ENCODING', None):
1920                     continue
1921                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1922                     current_selector = FormatSelector(SINGLE, string, [])
1923                 elif type == tokenize.OP:
1924                     if string == ')':
1925                         if not inside_group:
1926                             # ')' will be handled by the parentheses group
1927                             tokens.restore_last_token()
1928                         break
1929                     elif inside_merge and string in ['/', ',']:
1930                         tokens.restore_last_token()
1931                         break
1932                     elif inside_choice and string == ',':
1933                         tokens.restore_last_token()
1934                         break
1935                     elif string == ',':
1936                         if not current_selector:
1937                             raise syntax_error('"," must follow a format selector', start)
1938                         selectors.append(current_selector)
1939                         current_selector = None
1940                     elif string == '/':
1941                         if not current_selector:
1942                             raise syntax_error('"/" must follow a format selector', start)
1943                         first_choice = current_selector
1944                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1945                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1946                     elif string == '[':
1947                         if not current_selector:
1948                             current_selector = FormatSelector(SINGLE, 'best', [])
1949                         format_filter = _parse_filter(tokens)
1950                         current_selector.filters.append(format_filter)
1951                     elif string == '(':
1952                         if current_selector:
1953                             raise syntax_error('Unexpected "("', start)
1954                         group = _parse_format_selection(tokens, inside_group=True)
1955                         current_selector = FormatSelector(GROUP, group, [])
1956                     elif string == '+':
1957                         if not current_selector:
1958                             raise syntax_error('Unexpected "+"', start)
1959                         selector_1 = current_selector
1960                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1961                         if not selector_2:
1962                             raise syntax_error('Expected a selector', start)
1963                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1964                     else:
1965                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1966                 elif type == tokenize.ENDMARKER:
1967                     break
1968             if current_selector:
1969                 selectors.append(current_selector)
1970             return selectors
1971
1972         def _merge(formats_pair):
1973             format_1, format_2 = formats_pair
1974
1975             formats_info = []
1976             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1977             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1978
1979             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1980                 get_no_more = {'video': False, 'audio': False}
1981                 for (i, fmt_info) in enumerate(formats_info):
1982                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1983                         formats_info.pop(i)
1984                         continue
1985                     for aud_vid in ['audio', 'video']:
1986                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1987                             if get_no_more[aud_vid]:
1988                                 formats_info.pop(i)
1989                                 break
1990                             get_no_more[aud_vid] = True
1991
1992             if len(formats_info) == 1:
1993                 return formats_info[0]
1994
1995             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1996             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1997
1998             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1999             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2000
2001             output_ext = self.params.get('merge_output_format')
2002             if not output_ext:
2003                 if the_only_video:
2004                     output_ext = the_only_video['ext']
2005                 elif the_only_audio and not video_fmts:
2006                     output_ext = the_only_audio['ext']
2007                 else:
2008                     output_ext = 'mkv'
2009
2010             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2011
2012             new_dict = {
2013                 'requested_formats': formats_info,
2014                 'format': '+'.join(filtered('format')),
2015                 'format_id': '+'.join(filtered('format_id')),
2016                 'ext': output_ext,
2017                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2018                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2019                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2020                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2021                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2022             }
2023
2024             if the_only_video:
2025                 new_dict.update({
2026                     'width': the_only_video.get('width'),
2027                     'height': the_only_video.get('height'),
2028                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2029                     'fps': the_only_video.get('fps'),
2030                     'dynamic_range': the_only_video.get('dynamic_range'),
2031                     'vcodec': the_only_video.get('vcodec'),
2032                     'vbr': the_only_video.get('vbr'),
2033                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2034                 })
2035
2036             if the_only_audio:
2037                 new_dict.update({
2038                     'acodec': the_only_audio.get('acodec'),
2039                     'abr': the_only_audio.get('abr'),
2040                     'asr': the_only_audio.get('asr'),
2041                 })
2042
2043             return new_dict
2044
2045         def _check_formats(formats):
2046             if not check_formats:
2047                 yield from formats
2048                 return
2049             yield from self._check_formats(formats)
2050
2051         def _build_selector_function(selector):
2052             if isinstance(selector, list):  # ,
2053                 fs = [_build_selector_function(s) for s in selector]
2054
2055                 def selector_function(ctx):
2056                     for f in fs:
2057                         yield from f(ctx)
2058                 return selector_function
2059
2060             elif selector.type == GROUP:  # ()
2061                 selector_function = _build_selector_function(selector.selector)
2062
2063             elif selector.type == PICKFIRST:  # /
2064                 fs = [_build_selector_function(s) for s in selector.selector]
2065
2066                 def selector_function(ctx):
2067                     for f in fs:
2068                         picked_formats = list(f(ctx))
2069                         if picked_formats:
2070                             return picked_formats
2071                     return []
2072
2073             elif selector.type == MERGE:  # +
2074                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2075
2076                 def selector_function(ctx):
2077                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2078                         yield _merge(pair)
2079
2080             elif selector.type == SINGLE:  # atom
2081                 format_spec = selector.selector or 'best'
2082
2083                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2084                 if format_spec == 'all':
2085                     def selector_function(ctx):
2086                         yield from _check_formats(ctx['formats'][::-1])
2087                 elif format_spec == 'mergeall':
2088                     def selector_function(ctx):
2089                         formats = list(_check_formats(ctx['formats']))
2090                         if not formats:
2091                             return
2092                         merged_format = formats[-1]
2093                         for f in formats[-2::-1]:
2094                             merged_format = _merge((merged_format, f))
2095                         yield merged_format
2096
2097                 else:
2098                     format_fallback, format_reverse, format_idx = False, True, 1
2099                     mobj = re.match(
2100                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2101                         format_spec)
2102                     if mobj is not None:
2103                         format_idx = int_or_none(mobj.group('n'), default=1)
2104                         format_reverse = mobj.group('bw')[0] == 'b'
2105                         format_type = (mobj.group('type') or [None])[0]
2106                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2107                         format_modified = mobj.group('mod') is not None
2108
2109                         format_fallback = not format_type and not format_modified  # for b, w
2110                         _filter_f = (
2111                             (lambda f: f.get('%scodec' % format_type) != 'none')
2112                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2113                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2114                             if format_type  # bv, ba, wv, wa
2115                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2116                             if not format_modified  # b, w
2117                             else lambda f: True)  # b*, w*
2118                         filter_f = lambda f: _filter_f(f) and (
2119                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2120                     else:
2121                         if format_spec in self._format_selection_exts['audio']:
2122                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2123                         elif format_spec in self._format_selection_exts['video']:
2124                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2125                         elif format_spec in self._format_selection_exts['storyboards']:
2126                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2127                         else:
2128                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2129
2130                     def selector_function(ctx):
2131                         formats = list(ctx['formats'])
2132                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2133                         if format_fallback and ctx['incomplete_formats'] and not matches:
2134                             # for extractors with incomplete formats (audio only (soundcloud)
2135                             # or video only (imgur)) best/worst will fallback to
2136                             # best/worst {video,audio}-only format
2137                             matches = formats
2138                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2139                         try:
2140                             yield matches[format_idx - 1]
2141                         except IndexError:
2142                             return
2143
2144             filters = [self._build_format_filter(f) for f in selector.filters]
2145
2146             def final_selector(ctx):
2147                 ctx_copy = dict(ctx)
2148                 for _filter in filters:
2149                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2150                 return selector_function(ctx_copy)
2151             return final_selector
2152
2153         stream = io.BytesIO(format_spec.encode('utf-8'))
2154         try:
2155             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2156         except tokenize.TokenError:
2157             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2158
2159         class TokenIterator(object):
2160             def __init__(self, tokens):
2161                 self.tokens = tokens
2162                 self.counter = 0
2163
2164             def __iter__(self):
2165                 return self
2166
2167             def __next__(self):
2168                 if self.counter >= len(self.tokens):
2169                     raise StopIteration()
2170                 value = self.tokens[self.counter]
2171                 self.counter += 1
2172                 return value
2173
2174             next = __next__
2175
2176             def restore_last_token(self):
2177                 self.counter -= 1
2178
2179         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2180         return _build_selector_function(parsed_selector)
2181
2182     def _calc_headers(self, info_dict):
2183         res = std_headers.copy()
2184
2185         add_headers = info_dict.get('http_headers')
2186         if add_headers:
2187             res.update(add_headers)
2188
2189         cookies = self._calc_cookies(info_dict)
2190         if cookies:
2191             res['Cookie'] = cookies
2192
2193         if 'X-Forwarded-For' not in res:
2194             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2195             if x_forwarded_for_ip:
2196                 res['X-Forwarded-For'] = x_forwarded_for_ip
2197
2198         return res
2199
2200     def _calc_cookies(self, info_dict):
2201         pr = sanitized_Request(info_dict['url'])
2202         self.cookiejar.add_cookie_header(pr)
2203         return pr.get_header('Cookie')
2204
2205     def _sort_thumbnails(self, thumbnails):
2206         thumbnails.sort(key=lambda t: (
2207             t.get('preference') if t.get('preference') is not None else -1,
2208             t.get('width') if t.get('width') is not None else -1,
2209             t.get('height') if t.get('height') is not None else -1,
2210             t.get('id') if t.get('id') is not None else '',
2211             t.get('url')))
2212
2213     def _sanitize_thumbnails(self, info_dict):
2214         thumbnails = info_dict.get('thumbnails')
2215         if thumbnails is None:
2216             thumbnail = info_dict.get('thumbnail')
2217             if thumbnail:
2218                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2219         if not thumbnails:
2220             return
2221
2222         def check_thumbnails(thumbnails):
2223             for t in thumbnails:
2224                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2225                 try:
2226                     self.urlopen(HEADRequest(t['url']))
2227                 except network_exceptions as err:
2228                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2229                     continue
2230                 yield t
2231
2232         self._sort_thumbnails(thumbnails)
2233         for i, t in enumerate(thumbnails):
2234             if t.get('id') is None:
2235                 t['id'] = '%d' % i
2236             if t.get('width') and t.get('height'):
2237                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2238             t['url'] = sanitize_url(t['url'])
2239
2240         if self.params.get('check_formats') is True:
2241             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2242         else:
2243             info_dict['thumbnails'] = thumbnails
2244
2245     def process_video_result(self, info_dict, download=True):
2246         assert info_dict.get('_type', 'video') == 'video'
2247
2248         if 'id' not in info_dict:
2249             raise ExtractorError('Missing "id" field in extractor result')
2250         if 'title' not in info_dict:
2251             raise ExtractorError('Missing "title" field in extractor result',
2252                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2253
2254         def report_force_conversion(field, field_not, conversion):
2255             self.report_warning(
2256                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2257                 % (field, field_not, conversion))
2258
2259         def sanitize_string_field(info, string_field):
2260             field = info.get(string_field)
2261             if field is None or isinstance(field, compat_str):
2262                 return
2263             report_force_conversion(string_field, 'a string', 'string')
2264             info[string_field] = compat_str(field)
2265
2266         def sanitize_numeric_fields(info):
2267             for numeric_field in self._NUMERIC_FIELDS:
2268                 field = info.get(numeric_field)
2269                 if field is None or isinstance(field, compat_numeric_types):
2270                     continue
2271                 report_force_conversion(numeric_field, 'numeric', 'int')
2272                 info[numeric_field] = int_or_none(field)
2273
2274         sanitize_string_field(info_dict, 'id')
2275         sanitize_numeric_fields(info_dict)
2276
2277         if 'playlist' not in info_dict:
2278             # It isn't part of a playlist
2279             info_dict['playlist'] = None
2280             info_dict['playlist_index'] = None
2281
2282         self._sanitize_thumbnails(info_dict)
2283
2284         thumbnail = info_dict.get('thumbnail')
2285         thumbnails = info_dict.get('thumbnails')
2286         if thumbnail:
2287             info_dict['thumbnail'] = sanitize_url(thumbnail)
2288         elif thumbnails:
2289             info_dict['thumbnail'] = thumbnails[-1]['url']
2290
2291         if info_dict.get('display_id') is None and 'id' in info_dict:
2292             info_dict['display_id'] = info_dict['id']
2293
2294         if info_dict.get('duration') is not None:
2295             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2296
2297         for ts_key, date_key in (
2298                 ('timestamp', 'upload_date'),
2299                 ('release_timestamp', 'release_date'),
2300         ):
2301             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2302                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2303                 # see http://bugs.python.org/issue1646728)
2304                 try:
2305                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2306                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2307                 except (ValueError, OverflowError, OSError):
2308                     pass
2309
2310         live_keys = ('is_live', 'was_live')
2311         live_status = info_dict.get('live_status')
2312         if live_status is None:
2313             for key in live_keys:
2314                 if info_dict.get(key) is False:
2315                     continue
2316                 if info_dict.get(key):
2317                     live_status = key
2318                 break
2319             if all(info_dict.get(key) is False for key in live_keys):
2320                 live_status = 'not_live'
2321         if live_status:
2322             info_dict['live_status'] = live_status
2323             for key in live_keys:
2324                 if info_dict.get(key) is None:
2325                     info_dict[key] = (live_status == key)
2326
2327         # Auto generate title fields corresponding to the *_number fields when missing
2328         # in order to always have clean titles. This is very common for TV series.
2329         for field in ('chapter', 'season', 'episode'):
2330             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2331                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2332
2333         for cc_kind in ('subtitles', 'automatic_captions'):
2334             cc = info_dict.get(cc_kind)
2335             if cc:
2336                 for _, subtitle in cc.items():
2337                     for subtitle_format in subtitle:
2338                         if subtitle_format.get('url'):
2339                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2340                         if subtitle_format.get('ext') is None:
2341                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2342
2343         automatic_captions = info_dict.get('automatic_captions')
2344         subtitles = info_dict.get('subtitles')
2345
2346         info_dict['requested_subtitles'] = self.process_subtitles(
2347             info_dict['id'], subtitles, automatic_captions)
2348
2349         if info_dict.get('formats') is None:
2350             # There's only one format available
2351             formats = [info_dict]
2352         else:
2353             formats = info_dict['formats']
2354
2355         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2356         if not self.params.get('allow_unplayable_formats'):
2357             formats = [f for f in formats if not f.get('has_drm')]
2358
2359         if info_dict.get('is_live'):
2360             get_from_start = bool(self.params.get('live_from_start'))
2361             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2362
2363         if not formats:
2364             self.raise_no_formats(info_dict)
2365
2366         def is_wellformed(f):
2367             url = f.get('url')
2368             if not url:
2369                 self.report_warning(
2370                     '"url" field is missing or empty - skipping format, '
2371                     'there is an error in extractor')
2372                 return False
2373             if isinstance(url, bytes):
2374                 sanitize_string_field(f, 'url')
2375             return True
2376
2377         # Filter out malformed formats for better extraction robustness
2378         formats = list(filter(is_wellformed, formats))
2379
2380         formats_dict = {}
2381
2382         # We check that all the formats have the format and format_id fields
2383         for i, format in enumerate(formats):
2384             sanitize_string_field(format, 'format_id')
2385             sanitize_numeric_fields(format)
2386             format['url'] = sanitize_url(format['url'])
2387             if not format.get('format_id'):
2388                 format['format_id'] = compat_str(i)
2389             else:
2390                 # Sanitize format_id from characters used in format selector expression
2391                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2392             format_id = format['format_id']
2393             if format_id not in formats_dict:
2394                 formats_dict[format_id] = []
2395             formats_dict[format_id].append(format)
2396
2397         # Make sure all formats have unique format_id
2398         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2399         for format_id, ambiguous_formats in formats_dict.items():
2400             ambigious_id = len(ambiguous_formats) > 1
2401             for i, format in enumerate(ambiguous_formats):
2402                 if ambigious_id:
2403                     format['format_id'] = '%s-%d' % (format_id, i)
2404                 if format.get('ext') is None:
2405                     format['ext'] = determine_ext(format['url']).lower()
2406                 # Ensure there is no conflict between id and ext in format selection
2407                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2408                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2409                     format['format_id'] = 'f%s' % format['format_id']
2410
2411         for i, format in enumerate(formats):
2412             if format.get('format') is None:
2413                 format['format'] = '{id} - {res}{note}'.format(
2414                     id=format['format_id'],
2415                     res=self.format_resolution(format),
2416                     note=format_field(format, 'format_note', ' (%s)'),
2417                 )
2418             if format.get('protocol') is None:
2419                 format['protocol'] = determine_protocol(format)
2420             if format.get('resolution') is None:
2421                 format['resolution'] = self.format_resolution(format, default=None)
2422             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2423                 format['dynamic_range'] = 'SDR'
2424             if (info_dict.get('duration') and format.get('tbr')
2425                     and not format.get('filesize') and not format.get('filesize_approx')):
2426                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2427
2428             # Add HTTP headers, so that external programs can use them from the
2429             # json output
2430             full_format_info = info_dict.copy()
2431             full_format_info.update(format)
2432             format['http_headers'] = self._calc_headers(full_format_info)
2433         # Remove private housekeeping stuff
2434         if '__x_forwarded_for_ip' in info_dict:
2435             del info_dict['__x_forwarded_for_ip']
2436
2437         # TODO Central sorting goes here
2438
2439         if self.params.get('check_formats') is True:
2440             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2441
2442         if not formats or formats[0] is not info_dict:
2443             # only set the 'formats' fields if the original info_dict list them
2444             # otherwise we end up with a circular reference, the first (and unique)
2445             # element in the 'formats' field in info_dict is info_dict itself,
2446             # which can't be exported to json
2447             info_dict['formats'] = formats
2448
2449         info_dict, _ = self.pre_process(info_dict)
2450
2451         # The pre-processors may have modified the formats
2452         formats = info_dict.get('formats', [info_dict])
2453
2454         list_only = self.params.get('simulate') is None and (
2455             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2456         interactive_format_selection = not list_only and self.format_selector == '-'
2457         if self.params.get('list_thumbnails'):
2458             self.list_thumbnails(info_dict)
2459         if self.params.get('listsubtitles'):
2460             if 'automatic_captions' in info_dict:
2461                 self.list_subtitles(
2462                     info_dict['id'], automatic_captions, 'automatic captions')
2463             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2464         if self.params.get('listformats') or interactive_format_selection:
2465             if not info_dict.get('formats') and not info_dict.get('url'):
2466                 self.to_screen('%s has no formats' % info_dict['id'])
2467             else:
2468                 self.list_formats(info_dict)
2469         if list_only:
2470             # Without this printing, -F --print-json will not work
2471             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2472             return
2473
2474         format_selector = self.format_selector
2475         if format_selector is None:
2476             req_format = self._default_format_spec(info_dict, download=download)
2477             self.write_debug('Default format spec: %s' % req_format)
2478             format_selector = self.build_format_selector(req_format)
2479
2480         while True:
2481             if interactive_format_selection:
2482                 req_format = input(
2483                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2484                 try:
2485                     format_selector = self.build_format_selector(req_format)
2486                 except SyntaxError as err:
2487                     self.report_error(err, tb=False, is_error=False)
2488                     continue
2489
2490             # While in format selection we may need to have an access to the original
2491             # format set in order to calculate some metrics or do some processing.
2492             # For now we need to be able to guess whether original formats provided
2493             # by extractor are incomplete or not (i.e. whether extractor provides only
2494             # video-only or audio-only formats) for proper formats selection for
2495             # extractors with such incomplete formats (see
2496             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2497             # Since formats may be filtered during format selection and may not match
2498             # the original formats the results may be incorrect. Thus original formats
2499             # or pre-calculated metrics should be passed to format selection routines
2500             # as well.
2501             # We will pass a context object containing all necessary additional data
2502             # instead of just formats.
2503             # This fixes incorrect format selection issue (see
2504             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2505             incomplete_formats = (
2506                 # All formats are video-only or
2507                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2508                 # all formats are audio-only
2509                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2510
2511             ctx = {
2512                 'formats': formats,
2513                 'incomplete_formats': incomplete_formats,
2514             }
2515
2516             formats_to_download = list(format_selector(ctx))
2517             if interactive_format_selection and not formats_to_download:
2518                 self.report_error('Requested format is not available', tb=False, is_error=False)
2519                 continue
2520             break
2521
2522         if not formats_to_download:
2523             if not self.params.get('ignore_no_formats_error'):
2524                 raise ExtractorError('Requested format is not available', expected=True,
2525                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2526             else:
2527                 self.report_warning('Requested format is not available')
2528                 # Process what we can, even without any available formats.
2529                 self.process_info(dict(info_dict))
2530         elif download:
2531             self.to_screen(
2532                 '[info] %s: Downloading %d format(s): %s' % (
2533                     info_dict['id'], len(formats_to_download),
2534                     ", ".join([f['format_id'] for f in formats_to_download])))
2535             for fmt in formats_to_download:
2536                 new_info = dict(info_dict)
2537                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2538                 new_info['__original_infodict'] = info_dict
2539                 new_info.update(fmt)
2540                 self.process_info(new_info)
2541         # We update the info dict with the selected best quality format (backwards compatibility)
2542         if formats_to_download:
2543             info_dict.update(formats_to_download[-1])
2544         return info_dict
2545
2546     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2547         """Select the requested subtitles and their format"""
2548         available_subs = {}
2549         if normal_subtitles and self.params.get('writesubtitles'):
2550             available_subs.update(normal_subtitles)
2551         if automatic_captions and self.params.get('writeautomaticsub'):
2552             for lang, cap_info in automatic_captions.items():
2553                 if lang not in available_subs:
2554                     available_subs[lang] = cap_info
2555
2556         if (not self.params.get('writesubtitles') and not
2557                 self.params.get('writeautomaticsub') or not
2558                 available_subs):
2559             return None
2560
2561         all_sub_langs = available_subs.keys()
2562         if self.params.get('allsubtitles', False):
2563             requested_langs = all_sub_langs
2564         elif self.params.get('subtitleslangs', False):
2565             # A list is used so that the order of languages will be the same as
2566             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2567             requested_langs = []
2568             for lang_re in self.params.get('subtitleslangs'):
2569                 if lang_re == 'all':
2570                     requested_langs.extend(all_sub_langs)
2571                     continue
2572                 discard = lang_re[0] == '-'
2573                 if discard:
2574                     lang_re = lang_re[1:]
2575                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2576                 if discard:
2577                     for lang in current_langs:
2578                         while lang in requested_langs:
2579                             requested_langs.remove(lang)
2580                 else:
2581                     requested_langs.extend(current_langs)
2582             requested_langs = orderedSet(requested_langs)
2583         elif 'en' in available_subs:
2584             requested_langs = ['en']
2585         else:
2586             requested_langs = [list(all_sub_langs)[0]]
2587         if requested_langs:
2588             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2589
2590         formats_query = self.params.get('subtitlesformat', 'best')
2591         formats_preference = formats_query.split('/') if formats_query else []
2592         subs = {}
2593         for lang in requested_langs:
2594             formats = available_subs.get(lang)
2595             if formats is None:
2596                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2597                 continue
2598             for ext in formats_preference:
2599                 if ext == 'best':
2600                     f = formats[-1]
2601                     break
2602                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2603                 if matches:
2604                     f = matches[-1]
2605                     break
2606             else:
2607                 f = formats[-1]
2608                 self.report_warning(
2609                     'No subtitle format found matching "%s" for language %s, '
2610                     'using %s' % (formats_query, lang, f['ext']))
2611             subs[lang] = f
2612         return subs
2613
2614     def __forced_printings(self, info_dict, filename, incomplete):
2615         def print_mandatory(field, actual_field=None):
2616             if actual_field is None:
2617                 actual_field = field
2618             if (self.params.get('force%s' % field, False)
2619                     and (not incomplete or info_dict.get(actual_field) is not None)):
2620                 self.to_stdout(info_dict[actual_field])
2621
2622         def print_optional(field):
2623             if (self.params.get('force%s' % field, False)
2624                     and info_dict.get(field) is not None):
2625                 self.to_stdout(info_dict[field])
2626
2627         info_dict = info_dict.copy()
2628         if filename is not None:
2629             info_dict['filename'] = filename
2630         if info_dict.get('requested_formats') is not None:
2631             # For RTMP URLs, also include the playpath
2632             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2633         elif 'url' in info_dict:
2634             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2635
2636         if self.params.get('forceprint') or self.params.get('forcejson'):
2637             self.post_extract(info_dict)
2638         for tmpl in self.params.get('forceprint', []):
2639             mobj = re.match(r'\w+(=?)$', tmpl)
2640             if mobj and mobj.group(1):
2641                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2642             elif mobj:
2643                 tmpl = '%({})s'.format(tmpl)
2644             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2645
2646         print_mandatory('title')
2647         print_mandatory('id')
2648         print_mandatory('url', 'urls')
2649         print_optional('thumbnail')
2650         print_optional('description')
2651         print_optional('filename')
2652         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2653             self.to_stdout(formatSeconds(info_dict['duration']))
2654         print_mandatory('format')
2655
2656         if self.params.get('forcejson'):
2657             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2658
2659     def dl(self, name, info, subtitle=False, test=False):
2660         if not info.get('url'):
2661             self.raise_no_formats(info, True)
2662
2663         if test:
2664             verbose = self.params.get('verbose')
2665             params = {
2666                 'test': True,
2667                 'quiet': self.params.get('quiet') or not verbose,
2668                 'verbose': verbose,
2669                 'noprogress': not verbose,
2670                 'nopart': True,
2671                 'skip_unavailable_fragments': False,
2672                 'keep_fragments': False,
2673                 'overwrites': True,
2674                 '_no_ytdl_file': True,
2675             }
2676         else:
2677             params = self.params
2678         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2679         if not test:
2680             for ph in self._progress_hooks:
2681                 fd.add_progress_hook(ph)
2682             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2683             self.write_debug('Invoking downloader on "%s"' % urls)
2684
2685         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2686         # But it may contain objects that are not deep-copyable
2687         new_info = self._copy_infodict(info)
2688         if new_info.get('http_headers') is None:
2689             new_info['http_headers'] = self._calc_headers(new_info)
2690         return fd.download(name, new_info, subtitle)
2691
2692     def process_info(self, info_dict):
2693         """Process a single resolved IE result."""
2694
2695         assert info_dict.get('_type', 'video') == 'video'
2696
2697         max_downloads = self.params.get('max_downloads')
2698         if max_downloads is not None:
2699             if self._num_downloads >= int(max_downloads):
2700                 raise MaxDownloadsReached()
2701
2702         if info_dict.get('is_live') and not self.params.get('live_from_start'):
2703             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2704
2705         # TODO: backward compatibility, to be removed
2706         info_dict['fulltitle'] = info_dict['title']
2707
2708         if 'format' not in info_dict and 'ext' in info_dict:
2709             info_dict['format'] = info_dict['ext']
2710
2711         if self._match_entry(info_dict) is not None:
2712             return
2713
2714         self.post_extract(info_dict)
2715         self._num_downloads += 1
2716
2717         # info_dict['_filename'] needs to be set for backward compatibility
2718         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2719         temp_filename = self.prepare_filename(info_dict, 'temp')
2720         files_to_move = {}
2721
2722         # Forced printings
2723         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2724
2725         if self.params.get('simulate'):
2726             if self.params.get('force_write_download_archive', False):
2727                 self.record_download_archive(info_dict)
2728             # Do nothing else if in simulate mode
2729             return
2730
2731         if full_filename is None:
2732             return
2733         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2734             return
2735         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2736             return
2737
2738         if self._write_description('video', info_dict,
2739                                    self.prepare_filename(info_dict, 'description')) is None:
2740             return
2741
2742         sub_files = self._write_subtitles(info_dict, temp_filename)
2743         if sub_files is None:
2744             return
2745         files_to_move.update(dict(sub_files))
2746
2747         thumb_files = self._write_thumbnails(
2748             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2749         if thumb_files is None:
2750             return
2751         files_to_move.update(dict(thumb_files))
2752
2753         infofn = self.prepare_filename(info_dict, 'infojson')
2754         _infojson_written = self._write_info_json('video', info_dict, infofn)
2755         if _infojson_written:
2756             info_dict['infojson_filename'] = infofn
2757             # For backward compatibility, even though it was a private field
2758             info_dict['__infojson_filename'] = infofn
2759         elif _infojson_written is None:
2760             return
2761
2762         # Note: Annotations are deprecated
2763         annofn = None
2764         if self.params.get('writeannotations', False):
2765             annofn = self.prepare_filename(info_dict, 'annotation')
2766         if annofn:
2767             if not self._ensure_dir_exists(encodeFilename(annofn)):
2768                 return
2769             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2770                 self.to_screen('[info] Video annotations are already present')
2771             elif not info_dict.get('annotations'):
2772                 self.report_warning('There are no annotations to write.')
2773             else:
2774                 try:
2775                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2776                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2777                         annofile.write(info_dict['annotations'])
2778                 except (KeyError, TypeError):
2779                     self.report_warning('There are no annotations to write.')
2780                 except (OSError, IOError):
2781                     self.report_error('Cannot write annotations file: ' + annofn)
2782                     return
2783
2784         # Write internet shortcut files
2785         def _write_link_file(link_type):
2786             if 'webpage_url' not in info_dict:
2787                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2788                 return False
2789             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2790             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2791                 return False
2792             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2793                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2794                 return True
2795             try:
2796                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2797                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2798                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2799                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2800                     if link_type == 'desktop':
2801                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2802                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2803             except (OSError, IOError):
2804                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2805                 return False
2806             return True
2807
2808         write_links = {
2809             'url': self.params.get('writeurllink'),
2810             'webloc': self.params.get('writewebloclink'),
2811             'desktop': self.params.get('writedesktoplink'),
2812         }
2813         if self.params.get('writelink'):
2814             link_type = ('webloc' if sys.platform == 'darwin'
2815                          else 'desktop' if sys.platform.startswith('linux')
2816                          else 'url')
2817             write_links[link_type] = True
2818
2819         if any(should_write and not _write_link_file(link_type)
2820                for link_type, should_write in write_links.items()):
2821             return
2822
2823         try:
2824             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2825         except PostProcessingError as err:
2826             self.report_error('Preprocessing: %s' % str(err))
2827             return
2828
2829         must_record_download_archive = False
2830         if self.params.get('skip_download', False):
2831             info_dict['filepath'] = temp_filename
2832             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2833             info_dict['__files_to_move'] = files_to_move
2834             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2835         else:
2836             # Download
2837             info_dict.setdefault('__postprocessors', [])
2838             try:
2839
2840                 def existing_file(*filepaths):
2841                     ext = info_dict.get('ext')
2842                     final_ext = self.params.get('final_ext', ext)
2843                     existing_files = []
2844                     for file in orderedSet(filepaths):
2845                         if final_ext != ext:
2846                             converted = replace_extension(file, final_ext, ext)
2847                             if os.path.exists(encodeFilename(converted)):
2848                                 existing_files.append(converted)
2849                         if os.path.exists(encodeFilename(file)):
2850                             existing_files.append(file)
2851
2852                     if not existing_files or self.params.get('overwrites', False):
2853                         for file in orderedSet(existing_files):
2854                             self.report_file_delete(file)
2855                             os.remove(encodeFilename(file))
2856                         return None
2857
2858                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2859                     return existing_files[0]
2860
2861                 success = True
2862                 if info_dict.get('requested_formats') is not None:
2863
2864                     def compatible_formats(formats):
2865                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2866                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2867                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2868                         if len(video_formats) > 2 or len(audio_formats) > 2:
2869                             return False
2870
2871                         # Check extension
2872                         exts = set(format.get('ext') for format in formats)
2873                         COMPATIBLE_EXTS = (
2874                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2875                             set(('webm',)),
2876                         )
2877                         for ext_sets in COMPATIBLE_EXTS:
2878                             if ext_sets.issuperset(exts):
2879                                 return True
2880                         # TODO: Check acodec/vcodec
2881                         return False
2882
2883                     requested_formats = info_dict['requested_formats']
2884                     old_ext = info_dict['ext']
2885                     if self.params.get('merge_output_format') is None:
2886                         if not compatible_formats(requested_formats):
2887                             info_dict['ext'] = 'mkv'
2888                             self.report_warning(
2889                                 'Requested formats are incompatible for merge and will be merged into mkv')
2890                         if (info_dict['ext'] == 'webm'
2891                                 and info_dict.get('thumbnails')
2892                                 # check with type instead of pp_key, __name__, or isinstance
2893                                 # since we dont want any custom PPs to trigger this
2894                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2895                             info_dict['ext'] = 'mkv'
2896                             self.report_warning(
2897                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2898                     new_ext = info_dict['ext']
2899
2900                     def correct_ext(filename, ext=new_ext):
2901                         if filename == '-':
2902                             return filename
2903                         filename_real_ext = os.path.splitext(filename)[1][1:]
2904                         filename_wo_ext = (
2905                             os.path.splitext(filename)[0]
2906                             if filename_real_ext in (old_ext, new_ext)
2907                             else filename)
2908                         return '%s.%s' % (filename_wo_ext, ext)
2909
2910                     # Ensure filename always has a correct extension for successful merge
2911                     full_filename = correct_ext(full_filename)
2912                     temp_filename = correct_ext(temp_filename)
2913                     dl_filename = existing_file(full_filename, temp_filename)
2914                     info_dict['__real_download'] = False
2915
2916                     downloaded = []
2917                     merger = FFmpegMergerPP(self)
2918
2919                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2920                     if dl_filename is not None:
2921                         self.report_file_already_downloaded(dl_filename)
2922                     elif fd:
2923                         for f in requested_formats if fd != FFmpegFD else []:
2924                             f['filepath'] = fname = prepend_extension(
2925                                 correct_ext(temp_filename, info_dict['ext']),
2926                                 'f%s' % f['format_id'], info_dict['ext'])
2927                             downloaded.append(fname)
2928                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2929                         success, real_download = self.dl(temp_filename, info_dict)
2930                         info_dict['__real_download'] = real_download
2931                     else:
2932                         if self.params.get('allow_unplayable_formats'):
2933                             self.report_warning(
2934                                 'You have requested merging of multiple formats '
2935                                 'while also allowing unplayable formats to be downloaded. '
2936                                 'The formats won\'t be merged to prevent data corruption.')
2937                         elif not merger.available:
2938                             self.report_warning(
2939                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2940                                 'The formats won\'t be merged.')
2941
2942                         if temp_filename == '-':
2943                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
2944                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2945                                       else 'but ffmpeg is not installed')
2946                             self.report_warning(
2947                                 f'You have requested downloading multiple formats to stdout {reason}. '
2948                                 'The formats will be streamed one after the other')
2949                             fname = temp_filename
2950                         for f in requested_formats:
2951                             new_info = dict(info_dict)
2952                             del new_info['requested_formats']
2953                             new_info.update(f)
2954                             if temp_filename != '-':
2955                                 fname = prepend_extension(
2956                                     correct_ext(temp_filename, new_info['ext']),
2957                                     'f%s' % f['format_id'], new_info['ext'])
2958                                 if not self._ensure_dir_exists(fname):
2959                                     return
2960                                 f['filepath'] = fname
2961                                 downloaded.append(fname)
2962                             partial_success, real_download = self.dl(fname, new_info)
2963                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2964                             success = success and partial_success
2965
2966                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
2967                         info_dict['__postprocessors'].append(merger)
2968                         info_dict['__files_to_merge'] = downloaded
2969                         # Even if there were no downloads, it is being merged only now
2970                         info_dict['__real_download'] = True
2971                     else:
2972                         for file in downloaded:
2973                             files_to_move[file] = None
2974                 else:
2975                     # Just a single file
2976                     dl_filename = existing_file(full_filename, temp_filename)
2977                     if dl_filename is None or dl_filename == temp_filename:
2978                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2979                         # So we should try to resume the download
2980                         success, real_download = self.dl(temp_filename, info_dict)
2981                         info_dict['__real_download'] = real_download
2982                     else:
2983                         self.report_file_already_downloaded(dl_filename)
2984
2985                 dl_filename = dl_filename or temp_filename
2986                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2987
2988             except network_exceptions as err:
2989                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2990                 return
2991             except (OSError, IOError) as err:
2992                 raise UnavailableVideoError(err)
2993             except (ContentTooShortError, ) as err:
2994                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2995                 return
2996
2997             if success and full_filename != '-':
2998
2999                 def fixup():
3000                     do_fixup = True
3001                     fixup_policy = self.params.get('fixup')
3002                     vid = info_dict['id']
3003
3004                     if fixup_policy in ('ignore', 'never'):
3005                         return
3006                     elif fixup_policy == 'warn':
3007                         do_fixup = False
3008                     elif fixup_policy != 'force':
3009                         assert fixup_policy in ('detect_or_warn', None)
3010                         if not info_dict.get('__real_download'):
3011                             do_fixup = False
3012
3013                     def ffmpeg_fixup(cndn, msg, cls):
3014                         if not cndn:
3015                             return
3016                         if not do_fixup:
3017                             self.report_warning(f'{vid}: {msg}')
3018                             return
3019                         pp = cls(self)
3020                         if pp.available:
3021                             info_dict['__postprocessors'].append(pp)
3022                         else:
3023                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3024
3025                     stretched_ratio = info_dict.get('stretched_ratio')
3026                     ffmpeg_fixup(
3027                         stretched_ratio not in (1, None),
3028                         f'Non-uniform pixel ratio {stretched_ratio}',
3029                         FFmpegFixupStretchedPP)
3030
3031                     ffmpeg_fixup(
3032                         (info_dict.get('requested_formats') is None
3033                          and info_dict.get('container') == 'm4a_dash'
3034                          and info_dict.get('ext') == 'm4a'),
3035                         'writing DASH m4a. Only some players support this container',
3036                         FFmpegFixupM4aPP)
3037
3038                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3039                     downloader = downloader.__name__ if downloader else None
3040
3041                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3042                         ffmpeg_fixup(downloader == 'HlsFD',
3043                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3044                                      FFmpegFixupM3u8PP)
3045                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3046                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3047
3048                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3049                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3050
3051                 fixup()
3052                 try:
3053                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
3054                 except PostProcessingError as err:
3055                     self.report_error('Postprocessing: %s' % str(err))
3056                     return
3057                 try:
3058                     for ph in self._post_hooks:
3059                         ph(info_dict['filepath'])
3060                 except Exception as err:
3061                     self.report_error('post hooks: %s' % str(err))
3062                     return
3063                 must_record_download_archive = True
3064
3065         if must_record_download_archive or self.params.get('force_write_download_archive', False):
3066             self.record_download_archive(info_dict)
3067         max_downloads = self.params.get('max_downloads')
3068         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3069             raise MaxDownloadsReached()
3070
3071     def __download_wrapper(self, func):
3072         @functools.wraps(func)
3073         def wrapper(*args, **kwargs):
3074             try:
3075                 res = func(*args, **kwargs)
3076             except UnavailableVideoError as e:
3077                 self.report_error(e)
3078             except MaxDownloadsReached as e:
3079                 self.to_screen(f'[info] {e}')
3080                 raise
3081             except DownloadCancelled as e:
3082                 self.to_screen(f'[info] {e}')
3083                 if not self.params.get('break_per_url'):
3084                     raise
3085             else:
3086                 if self.params.get('dump_single_json', False):
3087                     self.post_extract(res)
3088                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3089         return wrapper
3090
3091     def download(self, url_list):
3092         """Download a given list of URLs."""
3093         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3094         outtmpl = self.outtmpl_dict['default']
3095         if (len(url_list) > 1
3096                 and outtmpl != '-'
3097                 and '%' not in outtmpl
3098                 and self.params.get('max_downloads') != 1):
3099             raise SameFileError(outtmpl)
3100
3101         for url in url_list:
3102             self.__download_wrapper(self.extract_info)(
3103                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3104
3105         return self._download_retcode
3106
3107     def download_with_info_file(self, info_filename):
3108         with contextlib.closing(fileinput.FileInput(
3109                 [info_filename], mode='r',
3110                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3111             # FileInput doesn't have a read method, we can't call json.load
3112             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3113         try:
3114             self.__download_wrapper(self.process_ie_result)(info, download=True)
3115         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3116             if not isinstance(e, EntryNotInPlaylist):
3117                 self.to_stderr('\r')
3118             webpage_url = info.get('webpage_url')
3119             if webpage_url is not None:
3120                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3121                 return self.download([webpage_url])
3122             else:
3123                 raise
3124         return self._download_retcode
3125
3126     @staticmethod
3127     def sanitize_info(info_dict, remove_private_keys=False):
3128         ''' Sanitize the infodict for converting to json '''
3129         if info_dict is None:
3130             return info_dict
3131         info_dict.setdefault('epoch', int(time.time()))
3132         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3133         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3134         if remove_private_keys:
3135             remove_keys |= {
3136                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3137                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3138             }
3139             empty_values = (None, {}, [], set(), tuple())
3140             reject = lambda k, v: k not in keep_keys and (
3141                 k.startswith('_') or k in remove_keys or v in empty_values)
3142         else:
3143             reject = lambda k, v: k in remove_keys
3144
3145         def filter_fn(obj):
3146             if isinstance(obj, dict):
3147                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3148             elif isinstance(obj, (list, tuple, set, LazyList)):
3149                 return list(map(filter_fn, obj))
3150             elif obj is None or isinstance(obj, (str, int, float, bool)):
3151                 return obj
3152             else:
3153                 return repr(obj)
3154
3155         return filter_fn(info_dict)
3156
3157     @staticmethod
3158     def filter_requested_info(info_dict, actually_filter=True):
3159         ''' Alias of sanitize_info for backward compatibility '''
3160         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3161
3162     def run_pp(self, pp, infodict):
3163         files_to_delete = []
3164         if '__files_to_move' not in infodict:
3165             infodict['__files_to_move'] = {}
3166         try:
3167             files_to_delete, infodict = pp.run(infodict)
3168         except PostProcessingError as e:
3169             # Must be True and not 'only_download'
3170             if self.params.get('ignoreerrors') is True:
3171                 self.report_error(e)
3172                 return infodict
3173             raise
3174
3175         if not files_to_delete:
3176             return infodict
3177         if self.params.get('keepvideo', False):
3178             for f in files_to_delete:
3179                 infodict['__files_to_move'].setdefault(f, '')
3180         else:
3181             for old_filename in set(files_to_delete):
3182                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3183                 try:
3184                     os.remove(encodeFilename(old_filename))
3185                 except (IOError, OSError):
3186                     self.report_warning('Unable to remove downloaded original file')
3187                 if old_filename in infodict['__files_to_move']:
3188                     del infodict['__files_to_move'][old_filename]
3189         return infodict
3190
3191     @staticmethod
3192     def post_extract(info_dict):
3193         def actual_post_extract(info_dict):
3194             if info_dict.get('_type') in ('playlist', 'multi_video'):
3195                 for video_dict in info_dict.get('entries', {}):
3196                     actual_post_extract(video_dict or {})
3197                 return
3198
3199             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3200             extra = post_extractor().items()
3201             info_dict.update(extra)
3202             info_dict.pop('__post_extractor', None)
3203
3204             original_infodict = info_dict.get('__original_infodict') or {}
3205             original_infodict.update(extra)
3206             original_infodict.pop('__post_extractor', None)
3207
3208         actual_post_extract(info_dict or {})
3209
3210     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3211         info = dict(ie_info)
3212         info['__files_to_move'] = files_to_move or {}
3213         for pp in self._pps[key]:
3214             info = self.run_pp(pp, info)
3215         return info, info.pop('__files_to_move', None)
3216
3217     def post_process(self, filename, ie_info, files_to_move=None):
3218         """Run all the postprocessors on the given file."""
3219         info = dict(ie_info)
3220         info['filepath'] = filename
3221         info['__files_to_move'] = files_to_move or {}
3222
3223         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3224             info = self.run_pp(pp, info)
3225         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3226         del info['__files_to_move']
3227         for pp in self._pps['after_move']:
3228             info = self.run_pp(pp, info)
3229         return info
3230
3231     def _make_archive_id(self, info_dict):
3232         video_id = info_dict.get('id')
3233         if not video_id:
3234             return
3235         # Future-proof against any change in case
3236         # and backwards compatibility with prior versions
3237         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3238         if extractor is None:
3239             url = str_or_none(info_dict.get('url'))
3240             if not url:
3241                 return
3242             # Try to find matching extractor for the URL and take its ie_key
3243             for ie_key, ie in self._ies.items():
3244                 if ie.suitable(url):
3245                     extractor = ie_key
3246                     break
3247             else:
3248                 return
3249         return '%s %s' % (extractor.lower(), video_id)
3250
3251     def in_download_archive(self, info_dict):
3252         fn = self.params.get('download_archive')
3253         if fn is None:
3254             return False
3255
3256         vid_id = self._make_archive_id(info_dict)
3257         if not vid_id:
3258             return False  # Incomplete video information
3259
3260         return vid_id in self.archive
3261
3262     def record_download_archive(self, info_dict):
3263         fn = self.params.get('download_archive')
3264         if fn is None:
3265             return
3266         vid_id = self._make_archive_id(info_dict)
3267         assert vid_id
3268         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3269             archive_file.write(vid_id + '\n')
3270         self.archive.add(vid_id)
3271
3272     @staticmethod
3273     def format_resolution(format, default='unknown'):
3274         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3275             return 'audio only'
3276         if format.get('resolution') is not None:
3277             return format['resolution']
3278         if format.get('width') and format.get('height'):
3279             return '%dx%d' % (format['width'], format['height'])
3280         elif format.get('height'):
3281             return '%sp' % format['height']
3282         elif format.get('width'):
3283             return '%dx?' % format['width']
3284         return default
3285
3286     def _format_note(self, fdict):
3287         res = ''
3288         if fdict.get('ext') in ['f4f', 'f4m']:
3289             res += '(unsupported)'
3290         if fdict.get('language'):
3291             if res:
3292                 res += ' '
3293             res += '[%s]' % fdict['language']
3294         if fdict.get('format_note') is not None:
3295             if res:
3296                 res += ' '
3297             res += fdict['format_note']
3298         if fdict.get('tbr') is not None:
3299             if res:
3300                 res += ', '
3301             res += '%4dk' % fdict['tbr']
3302         if fdict.get('container') is not None:
3303             if res:
3304                 res += ', '
3305             res += '%s container' % fdict['container']
3306         if (fdict.get('vcodec') is not None
3307                 and fdict.get('vcodec') != 'none'):
3308             if res:
3309                 res += ', '
3310             res += fdict['vcodec']
3311             if fdict.get('vbr') is not None:
3312                 res += '@'
3313         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3314             res += 'video@'
3315         if fdict.get('vbr') is not None:
3316             res += '%4dk' % fdict['vbr']
3317         if fdict.get('fps') is not None:
3318             if res:
3319                 res += ', '
3320             res += '%sfps' % fdict['fps']
3321         if fdict.get('acodec') is not None:
3322             if res:
3323                 res += ', '
3324             if fdict['acodec'] == 'none':
3325                 res += 'video only'
3326             else:
3327                 res += '%-5s' % fdict['acodec']
3328         elif fdict.get('abr') is not None:
3329             if res:
3330                 res += ', '
3331             res += 'audio'
3332         if fdict.get('abr') is not None:
3333             res += '@%3dk' % fdict['abr']
3334         if fdict.get('asr') is not None:
3335             res += ' (%5dHz)' % fdict['asr']
3336         if fdict.get('filesize') is not None:
3337             if res:
3338                 res += ', '
3339             res += format_bytes(fdict['filesize'])
3340         elif fdict.get('filesize_approx') is not None:
3341             if res:
3342                 res += ', '
3343             res += '~' + format_bytes(fdict['filesize_approx'])
3344         return res
3345
3346     def _list_format_headers(self, *headers):
3347         if self.params.get('listformats_table', True) is not False:
3348             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3349         return headers
3350
3351     def list_formats(self, info_dict):
3352         formats = info_dict.get('formats', [info_dict])
3353         new_format = self.params.get('listformats_table', True) is not False
3354         if new_format:
3355             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3356             table = [
3357                 [
3358                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3359                     format_field(f, 'ext'),
3360                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3361                     format_field(f, 'fps', '\t%d'),
3362                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3363                     delim,
3364                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3365                     format_field(f, 'tbr', '\t%dk'),
3366                     shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
3367                     delim,
3368                     format_field(f, 'vcodec', default='unknown').replace(
3369                         'none',
3370                         'images' if f.get('acodec') == 'none'
3371                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3372                     format_field(f, 'vbr', '\t%dk'),
3373                     format_field(f, 'acodec', default='unknown').replace(
3374                         'none',
3375                         '' if f.get('vcodec') == 'none'
3376                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3377                     format_field(f, 'abr', '\t%dk'),
3378                     format_field(f, 'asr', '\t%dHz'),
3379                     join_nonempty(
3380                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3381                         format_field(f, 'language', '[%s]'),
3382                         join_nonempty(
3383                             format_field(f, 'format_note'),
3384                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3385                             delim=', '),
3386                         delim=' '),
3387                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3388             header_line = self._list_format_headers(
3389                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3390                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3391         else:
3392             table = [
3393                 [
3394                     format_field(f, 'format_id'),
3395                     format_field(f, 'ext'),
3396                     self.format_resolution(f),
3397                     self._format_note(f)]
3398                 for f in formats
3399                 if f.get('preference') is None or f['preference'] >= -1000]
3400             header_line = ['format code', 'extension', 'resolution', 'note']
3401
3402         self.to_screen(
3403             '[info] Available formats for %s:' % info_dict['id'])
3404         self.to_stdout(render_table(
3405             header_line, table,
3406             extra_gap=(0 if new_format else 1),
3407             hide_empty=new_format,
3408             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3409
3410     def list_thumbnails(self, info_dict):
3411         thumbnails = list(info_dict.get('thumbnails'))
3412         if not thumbnails:
3413             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3414             return
3415
3416         self.to_screen(
3417             '[info] Thumbnails for %s:' % info_dict['id'])
3418         self.to_stdout(render_table(
3419             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3420             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3421
3422     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3423         if not subtitles:
3424             self.to_screen('%s has no %s' % (video_id, name))
3425             return
3426         self.to_screen(
3427             'Available %s for %s:' % (name, video_id))
3428
3429         def _row(lang, formats):
3430             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3431             if len(set(names)) == 1:
3432                 names = [] if names[0] == 'unknown' else names[:1]
3433             return [lang, ', '.join(names), ', '.join(exts)]
3434
3435         self.to_stdout(render_table(
3436             self._list_format_headers('Language', 'Name', 'Formats'),
3437             [_row(lang, formats) for lang, formats in subtitles.items()],
3438             hide_empty=True))
3439
3440     def urlopen(self, req):
3441         """ Start an HTTP download """
3442         if isinstance(req, compat_basestring):
3443             req = sanitized_Request(req)
3444         return self._opener.open(req, timeout=self._socket_timeout)
3445
3446     def print_debug_header(self):
3447         if not self.params.get('verbose'):
3448             return
3449
3450         def get_encoding(stream):
3451             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3452             if not supports_terminal_sequences(stream):
3453                 from .compat import WINDOWS_VT_MODE
3454                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3455             return ret
3456
3457         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3458             locale.getpreferredencoding(),
3459             sys.getfilesystemencoding(),
3460             get_encoding(self._screen_file), get_encoding(self._err_file),
3461             self.get_encoding())
3462
3463         logger = self.params.get('logger')
3464         if logger:
3465             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3466             write_debug(encoding_str)
3467         else:
3468             write_string(f'[debug] {encoding_str}\n', encoding=None)
3469             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3470
3471         source = detect_variant()
3472         write_debug(join_nonempty(
3473             'yt-dlp version', __version__,
3474             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3475             '' if source == 'unknown' else f'({source})',
3476             delim=' '))
3477         if not _LAZY_LOADER:
3478             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3479                 write_debug('Lazy loading extractors is forcibly disabled')
3480             else:
3481                 write_debug('Lazy loading extractors is disabled')
3482         if plugin_extractors or plugin_postprocessors:
3483             write_debug('Plugins: %s' % [
3484                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3485                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3486         if self.params.get('compat_opts'):
3487             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3488
3489         if source == 'source':
3490             try:
3491                 sp = Popen(
3492                     ['git', 'rev-parse', '--short', 'HEAD'],
3493                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3494                     cwd=os.path.dirname(os.path.abspath(__file__)))
3495                 out, err = sp.communicate_or_kill()
3496                 out = out.decode().strip()
3497                 if re.match('[0-9a-f]+', out):
3498                     write_debug('Git HEAD: %s' % out)
3499             except Exception:
3500                 try:
3501                     sys.exc_clear()
3502                 except Exception:
3503                     pass
3504
3505         def python_implementation():
3506             impl_name = platform.python_implementation()
3507             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3508                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3509             return impl_name
3510
3511         write_debug('Python version %s (%s %s) - %s' % (
3512             platform.python_version(),
3513             python_implementation(),
3514             platform.architecture()[0],
3515             platform_name()))
3516
3517         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3518         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3519         if ffmpeg_features:
3520             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3521
3522         exe_versions['rtmpdump'] = rtmpdump_version()
3523         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3524         exe_str = ', '.join(
3525             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3526         ) or 'none'
3527         write_debug('exe versions: %s' % exe_str)
3528
3529         from .downloader.websocket import has_websockets
3530         from .postprocessor.embedthumbnail import has_mutagen
3531         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3532
3533         lib_str = join_nonempty(
3534             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3535             KEYRING_AVAILABLE and 'keyring',
3536             has_mutagen and 'mutagen',
3537             SQLITE_AVAILABLE and 'sqlite',
3538             has_websockets and 'websockets',
3539             delim=', ') or 'none'
3540         write_debug('Optional libraries: %s' % lib_str)
3541
3542         proxy_map = {}
3543         for handler in self._opener.handlers:
3544             if hasattr(handler, 'proxies'):
3545                 proxy_map.update(handler.proxies)
3546         write_debug(f'Proxy map: {proxy_map}')
3547
3548         # Not implemented
3549         if False and self.params.get('call_home'):
3550             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3551             write_debug('Public IP address: %s' % ipaddr)
3552             latest_version = self.urlopen(
3553                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3554             if version_tuple(latest_version) > version_tuple(__version__):
3555                 self.report_warning(
3556                     'You are using an outdated version (newest version: %s)! '
3557                     'See https://yt-dl.org/update if you need help updating.' %
3558                     latest_version)
3559
3560     def _setup_opener(self):
3561         timeout_val = self.params.get('socket_timeout')
3562         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3563
3564         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3565         opts_cookiefile = self.params.get('cookiefile')
3566         opts_proxy = self.params.get('proxy')
3567
3568         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3569
3570         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3571         if opts_proxy is not None:
3572             if opts_proxy == '':
3573                 proxies = {}
3574             else:
3575                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3576         else:
3577             proxies = compat_urllib_request.getproxies()
3578             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3579             if 'http' in proxies and 'https' not in proxies:
3580                 proxies['https'] = proxies['http']
3581         proxy_handler = PerRequestProxyHandler(proxies)
3582
3583         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3584         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3585         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3586         redirect_handler = YoutubeDLRedirectHandler()
3587         data_handler = compat_urllib_request_DataHandler()
3588
3589         # When passing our own FileHandler instance, build_opener won't add the
3590         # default FileHandler and allows us to disable the file protocol, which
3591         # can be used for malicious purposes (see
3592         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3593         file_handler = compat_urllib_request.FileHandler()
3594
3595         def file_open(*args, **kwargs):
3596             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3597         file_handler.file_open = file_open
3598
3599         opener = compat_urllib_request.build_opener(
3600             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3601
3602         # Delete the default user-agent header, which would otherwise apply in
3603         # cases where our custom HTTP handler doesn't come into play
3604         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3605         opener.addheaders = []
3606         self._opener = opener
3607
3608     def encode(self, s):
3609         if isinstance(s, bytes):
3610             return s  # Already encoded
3611
3612         try:
3613             return s.encode(self.get_encoding())
3614         except UnicodeEncodeError as err:
3615             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3616             raise
3617
3618     def get_encoding(self):
3619         encoding = self.params.get('encoding')
3620         if encoding is None:
3621             encoding = preferredencoding()
3622         return encoding
3623
3624     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3625         ''' Write infojson and returns True = written, False = skip, None = error '''
3626         if overwrite is None:
3627             overwrite = self.params.get('overwrites', True)
3628         if not self.params.get('writeinfojson'):
3629             return False
3630         elif not infofn:
3631             self.write_debug(f'Skipping writing {label} infojson')
3632             return False
3633         elif not self._ensure_dir_exists(infofn):
3634             return None
3635         elif not overwrite and os.path.exists(infofn):
3636             self.to_screen(f'[info] {label.title()} metadata is already present')
3637         else:
3638             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3639             try:
3640                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3641             except (OSError, IOError):
3642                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3643                 return None
3644         return True
3645
3646     def _write_description(self, label, ie_result, descfn):
3647         ''' Write description and returns True = written, False = skip, None = error '''
3648         if not self.params.get('writedescription'):
3649             return False
3650         elif not descfn:
3651             self.write_debug(f'Skipping writing {label} description')
3652             return False
3653         elif not self._ensure_dir_exists(descfn):
3654             return None
3655         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3656             self.to_screen(f'[info] {label.title()} description is already present')
3657         elif ie_result.get('description') is None:
3658             self.report_warning(f'There\'s no {label} description to write')
3659             return False
3660         else:
3661             try:
3662                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3663                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3664                     descfile.write(ie_result['description'])
3665             except (OSError, IOError):
3666                 self.report_error(f'Cannot write {label} description file {descfn}')
3667                 return None
3668         return True
3669
3670     def _write_subtitles(self, info_dict, filename):
3671         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3672         ret = []
3673         subtitles = info_dict.get('requested_subtitles')
3674         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3675             # subtitles download errors are already managed as troubles in relevant IE
3676             # that way it will silently go on when used with unsupporting IE
3677             return ret
3678
3679         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3680         if not sub_filename_base:
3681             self.to_screen('[info] Skipping writing video subtitles')
3682             return ret
3683         for sub_lang, sub_info in subtitles.items():
3684             sub_format = sub_info['ext']
3685             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3686             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3687             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3688                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3689                 sub_info['filepath'] = sub_filename
3690                 ret.append((sub_filename, sub_filename_final))
3691                 continue
3692
3693             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3694             if sub_info.get('data') is not None:
3695                 try:
3696                     # Use newline='' to prevent conversion of newline characters
3697                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3698                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3699                         subfile.write(sub_info['data'])
3700                     sub_info['filepath'] = sub_filename
3701                     ret.append((sub_filename, sub_filename_final))
3702                     continue
3703                 except (OSError, IOError):
3704                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3705                     return None
3706
3707             try:
3708                 sub_copy = sub_info.copy()
3709                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3710                 self.dl(sub_filename, sub_copy, subtitle=True)
3711                 sub_info['filepath'] = sub_filename
3712                 ret.append((sub_filename, sub_filename_final))
3713             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3714                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3715                 continue
3716         return ret
3717
3718     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3719         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3720         write_all = self.params.get('write_all_thumbnails', False)
3721         thumbnails, ret = [], []
3722         if write_all or self.params.get('writethumbnail', False):
3723             thumbnails = info_dict.get('thumbnails') or []
3724         multiple = write_all and len(thumbnails) > 1
3725
3726         if thumb_filename_base is None:
3727             thumb_filename_base = filename
3728         if thumbnails and not thumb_filename_base:
3729             self.write_debug(f'Skipping writing {label} thumbnail')
3730             return ret
3731
3732         for idx, t in list(enumerate(thumbnails))[::-1]:
3733             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3734             thumb_display_id = f'{label} thumbnail {t["id"]}'
3735             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3736             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3737
3738             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3739                 ret.append((thumb_filename, thumb_filename_final))
3740                 t['filepath'] = thumb_filename
3741                 self.to_screen('[info] %s is already present' % (
3742                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3743             else:
3744                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3745                 try:
3746                     uf = self.urlopen(t['url'])
3747                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3748                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3749                         shutil.copyfileobj(uf, thumbf)
3750                     ret.append((thumb_filename, thumb_filename_final))
3751                     t['filepath'] = thumb_filename
3752                 except network_exceptions as err:
3753                     thumbnails.pop(idx)
3754                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3755             if ret and not write_all:
3756                 break
3757         return ret