yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     join_nonempty,
  78     LazyList,
  79     LINK_TEMPLATES,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     ReExtractInfo,
  97     register_socks_protocols,
  98     RejectedVideoReached,
  99     render_table,
 100     replace_extension,
 101     SameFileError,
 102     sanitize_filename,
 103     sanitize_path,
 104     sanitize_url,
 105     sanitized_Request,
 106     std_headers,
 107     STR_FORMAT_RE_TMPL,
 108     STR_FORMAT_TYPES,
 109     str_or_none,
 110     strftime_or_none,
 111     subtitles_filename,
 112     supports_terminal_sequences,
 113     timetuple_from_msec,
 114     to_high_limit_path,
 115     traverse_obj,
 116     try_get,
 117     UnavailableVideoError,
 118     url_basename,
 119     variadic,
 120     version_tuple,
 121     write_json_file,
 122     write_string,
 123     YoutubeDLCookieProcessor,
 124     YoutubeDLHandler,
 125     YoutubeDLRedirectHandler,
 126 )
 127 from .cache import Cache
 128 from .minicurses import format_text
 129 from .extractor import (
 130     gen_extractor_classes,
 131     get_info_extractor,
 132     _LAZY_LOADER,
 133     _PLUGIN_CLASSES as plugin_extractors
 134 )
 135 from .extractor.openload import PhantomJSwrapper
 136 from .downloader import (
 137     FFmpegFD,
 138     get_suitable_downloader,
 139     shorten_protocol_name
 140 )
 141 from .downloader.rtmp import rtmpdump_version
 142 from .postprocessor import (
 143     get_postprocessor,
 144     EmbedThumbnailPP,
 145     FFmpegFixupDurationPP,
 146     FFmpegFixupM3u8PP,
 147     FFmpegFixupM4aPP,
 148     FFmpegFixupStretchedPP,
 149     FFmpegFixupTimestampPP,
 150     FFmpegMergerPP,
 151     FFmpegPostProcessor,
 152     MoveFilesAfterDownloadPP,
 153     _PLUGIN_CLASSES as plugin_postprocessors
 154 )
 155 from .update import detect_variant
 156 from .version import __version__, RELEASE_GIT_HEAD
 157
 158 if compat_os_name == 'nt':
 159     import ctypes
 160
 161
 162 class YoutubeDL(object):
 163     """YoutubeDL class.
 164
 165     YoutubeDL objects are the ones responsible of downloading the
 166     actual video file and writing it to disk if the user has requested
 167     it, among some other tasks. In most cases there should be one per
 168     program. As, given a video URL, the downloader doesn't know how to
 169     extract all the needed information, task that InfoExtractors do, it
 170     has to pass the URL to one of them.
 171
 172     For this, YoutubeDL objects have a method that allows
 173     InfoExtractors to be registered in a given order. When it is passed
 174     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 175     finds that reports being able to handle it. The InfoExtractor extracts
 176     all the information about the video or videos the URL refers to, and
 177     YoutubeDL process the extracted information, possibly using a File
 178     Downloader to download the video.
 179
 180     YoutubeDL objects accept a lot of parameters. In order not to saturate
 181     the object constructor with arguments, it receives a dictionary of
 182     options instead. These options are available through the params
 183     attribute for the InfoExtractors to use. The YoutubeDL also
 184     registers itself as the downloader in charge for the InfoExtractors
 185     that are added to it, so this is a "mutual registration".
 186
 187     Available options:
 188
 189     username:          Username for authentication purposes.
 190     password:          Password for authentication purposes.
 191     videopassword:     Password for accessing a video.
 192     ap_mso:            Adobe Pass multiple-system operator identifier.
 193     ap_username:       Multiple-system operator account username.
 194     ap_password:       Multiple-system operator account password.
 195     usenetrc:          Use netrc for authentication instead.
 196     verbose:           Print additional info to stdout.
 197     quiet:             Do not print messages to stdout.
 198     no_warnings:       Do not print out anything for warnings.
 199     forceprint:        A list of templates to force print
 200     forceurl:          Force printing final URL. (Deprecated)
 201     forcetitle:        Force printing title. (Deprecated)
 202     forceid:           Force printing ID. (Deprecated)
 203     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 204     forcedescription:  Force printing description. (Deprecated)
 205     forcefilename:     Force printing final filename. (Deprecated)
 206     forceduration:     Force printing duration. (Deprecated)
 207     forcejson:         Force printing info_dict as JSON.
 208     dump_single_json:  Force printing the info_dict of the whole playlist
 209                        (or video) as a single JSON line.
 210     force_write_download_archive: Force writing download archive regardless
 211                        of 'skip_download' or 'simulate'.
 212     simulate:          Do not download the video files. If unset (or None),
 213                        simulate only if listsubtitles, listformats or list_thumbnails is used
 214     format:            Video format code. see "FORMAT SELECTION" for more details.
 215                        You can also pass a function. The function takes 'ctx' as
 216                        argument and returns the formats to download.
 217                        See "build_format_selector" for an implementation
 218     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 219     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 220                        extracting metadata even if the video is not actually
 221                        available for download (experimental)
 222     format_sort:       A list of fields by which to sort the video formats.
 223                        See "Sorting Formats" for more details.
 224     format_sort_force: Force the given format_sort. see "Sorting Formats"
 225                        for more details.
 226     allow_multiple_video_streams:   Allow multiple video streams to be merged
 227                        into a single file
 228     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 229                        into a single file
 230     check_formats      Whether to test if the formats are downloadable.
 231                        Can be True (check all), False (check none),
 232                        'selected' (check selected formats),
 233                        or None (check only if requested by extractor)
 234     paths:             Dictionary of output paths. The allowed keys are 'home'
 235                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 236     outtmpl:           Dictionary of templates for output names. Allowed keys
 237                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 238                        For compatibility with youtube-dl, a single string can also be used
 239     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 240     restrictfilenames: Do not allow "&" and spaces in file names
 241     trim_file_name:    Limit length of filename (extension excluded)
 242     windowsfilenames:  Force the filenames to be windows compatible
 243     ignoreerrors:      Do not stop on download/postprocessing errors.
 244                        Can be 'only_download' to ignore only download errors.
 245                        Default is 'only_download' for CLI, but False for API
 246     skip_playlist_after_errors: Number of allowed failures until the rest of
 247                        the playlist is skipped
 248     force_generic_extractor: Force downloader to use the generic extractor
 249     overwrites:        Overwrite all video and metadata files if True,
 250                        overwrite only non-video files if None
 251                        and don't overwrite any file if False
 252                        For compatibility with youtube-dl,
 253                        "nooverwrites" may also be used instead
 254     playliststart:     Playlist item to start at.
 255     playlistend:       Playlist item to end at.
 256     playlist_items:    Specific indices of playlist to download.
 257     playlistreverse:   Download playlist items in reverse order.
 258     playlistrandom:    Download playlist items in random order.
 259     matchtitle:        Download only matching titles.
 260     rejecttitle:       Reject downloads for matching titles.
 261     logger:            Log messages to a logging.Logger instance.
 262     logtostderr:       Log messages to stderr instead of stdout.
 263     consoletitle:       Display progress in console window's titlebar.
 264     writedescription:  Write the video description to a .description file
 265     writeinfojson:     Write the video description to a .info.json file
 266     clean_infojson:    Remove private fields from the infojson
 267     getcomments:       Extract video comments. This will not be written to disk
 268                        unless writeinfojson is also given
 269     writeannotations:  Write the video annotations to a .annotations.xml file
 270     writethumbnail:    Write the thumbnail image to a file
 271     allow_playlist_files: Whether to write playlists' description, infojson etc
 272                        also to disk when using the 'write*' options
 273     write_all_thumbnails:  Write all thumbnail formats to files
 274     writelink:         Write an internet shortcut file, depending on the
 275                        current platform (.url/.webloc/.desktop)
 276     writeurllink:      Write a Windows internet shortcut file (.url)
 277     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 278     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 279     writesubtitles:    Write the video subtitles to a file
 280     writeautomaticsub: Write the automatically generated subtitles to a file
 281     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 282                        Downloads all the subtitles of the video
 283                        (requires writesubtitles or writeautomaticsub)
 284     listsubtitles:     Lists all available subtitles for the video
 285     subtitlesformat:   The format code for subtitles
 286     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 287                        The list may contain "all" to refer to all the available
 288                        subtitles. The language can be prefixed with a "-" to
 289                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 290     keepvideo:         Keep the video file after post-processing
 291     daterange:         A DateRange object, download only if the upload_date is in the range.
 292     skip_download:     Skip the actual download of the video file
 293     cachedir:          Location of the cache files in the filesystem.
 294                        False to disable filesystem cache.
 295     noplaylist:        Download single video instead of a playlist if in doubt.
 296     age_limit:         An integer representing the user's age in years.
 297                        Unsuitable videos for the given age are skipped.
 298     min_views:         An integer representing the minimum view count the video
 299                        must have in order to not be skipped.
 300                        Videos without view count information are always
 301                        downloaded. None for no limit.
 302     max_views:         An integer representing the maximum view count.
 303                        Videos that are more popular than that are not
 304                        downloaded.
 305                        Videos without view count information are always
 306                        downloaded. None for no limit.
 307     download_archive:  File name of a file where all downloads are recorded.
 308                        Videos already present in the file are not downloaded
 309                        again.
 310     break_on_existing: Stop the download process after attempting to download a
 311                        file that is in the archive.
 312     break_on_reject:   Stop the download process when encountering a video that
 313                        has been filtered out.
 314     break_per_url:     Whether break_on_reject and break_on_existing
 315                        should act on each input URL as opposed to for the entire queue
 316     cookiefile:        File name where cookies should be read from and dumped to
 317     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 318                        name/path from where cookies are loaded.
 319                        Eg: ('chrome', ) or ('vivaldi', 'default')
 320     nocheckcertificate:Do not verify SSL certificates
 321     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 322                        At the moment, this is only supported by YouTube.
 323     proxy:             URL of the proxy server to use
 324     geo_verification_proxy:  URL of the proxy to use for IP address verification
 325                        on geo-restricted sites.
 326     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 327     bidi_workaround:   Work around buggy terminals without bidirectional text
 328                        support, using fridibi
 329     debug_printtraffic:Print out sent and received HTTP traffic
 330     include_ads:       Download ads as well
 331     default_search:    Prepend this string if an input url is not valid.
 332                        'auto' for elaborate guessing
 333     encoding:          Use this encoding instead of the system-specified.
 334     extract_flat:      Do not resolve URLs, return the immediate result.
 335                        Pass in 'in_playlist' to only show this behavior for
 336                        playlist items.
 337     wait_for_video:    If given, wait for scheduled streams to become available.
 338                        The value should be a tuple containing the range
 339                        (min_secs, max_secs) to wait between retries
 340     postprocessors:    A list of dictionaries, each with an entry
 341                        * key:  The name of the postprocessor. See
 342                                yt_dlp/postprocessor/__init__.py for a list.
 343                        * when: When to run the postprocessor. Can be one of
 344                                pre_process|before_dl|post_process|after_move.
 345                                Assumed to be 'post_process' if not given
 346     post_hooks:        Deprecated - Register a custom postprocessor instead
 347                        A list of functions that get called as the final step
 348                        for each video file, after all postprocessors have been
 349                        called. The filename will be passed as the only argument.
 350     progress_hooks:    A list of functions that get called on download
 351                        progress, with a dictionary with the entries
 352                        * status: One of "downloading", "error", or "finished".
 353                                  Check this first and ignore unknown values.
 354                        * info_dict: The extracted info_dict
 355
 356                        If status is one of "downloading", or "finished", the
 357                        following properties may also be present:
 358                        * filename: The final filename (always present)
 359                        * tmpfilename: The filename we're currently writing to
 360                        * downloaded_bytes: Bytes on disk
 361                        * total_bytes: Size of the whole file, None if unknown
 362                        * total_bytes_estimate: Guess of the eventual file size,
 363                                                None if unavailable.
 364                        * elapsed: The number of seconds since download started.
 365                        * eta: The estimated time in seconds, None if unknown
 366                        * speed: The download speed in bytes/second, None if
 367                                 unknown
 368                        * fragment_index: The counter of the currently
 369                                          downloaded video fragment.
 370                        * fragment_count: The number of fragments (= individual
 371                                          files that will be merged)
 372
 373                        Progress hooks are guaranteed to be called at least once
 374                        (with status "finished") if the download is successful.
 375     postprocessor_hooks:  A list of functions that get called on postprocessing
 376                        progress, with a dictionary with the entries
 377                        * status: One of "started", "processing", or "finished".
 378                                  Check this first and ignore unknown values.
 379                        * postprocessor: Name of the postprocessor
 380                        * info_dict: The extracted info_dict
 381
 382                        Progress hooks are guaranteed to be called at least twice
 383                        (with status "started" and "finished") if the processing is successful.
 384     merge_output_format: Extension to use when merging formats.
 385     final_ext:         Expected final extension; used to detect when the file was
 386                        already downloaded and converted
 387     fixup:             Automatically correct known faults of the file.
 388                        One of:
 389                        - "never": do nothing
 390                        - "warn": only emit a warning
 391                        - "detect_or_warn": check whether we can do anything
 392                                            about it, warn otherwise (default)
 393     source_address:    Client-side IP address to bind to.
 394     call_home:         Boolean, true iff we are allowed to contact the
 395                        yt-dlp servers for debugging. (BROKEN)
 396     sleep_interval_requests: Number of seconds to sleep between requests
 397                        during extraction
 398     sleep_interval:    Number of seconds to sleep before each download when
 399                        used alone or a lower bound of a range for randomized
 400                        sleep before each download (minimum possible number
 401                        of seconds to sleep) when used along with
 402                        max_sleep_interval.
 403     max_sleep_interval:Upper bound of a range for randomized sleep before each
 404                        download (maximum possible number of seconds to sleep).
 405                        Must only be used along with sleep_interval.
 406                        Actual sleep time will be a random float from range
 407                        [sleep_interval; max_sleep_interval].
 408     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 409     listformats:       Print an overview of available video formats and exit.
 410     list_thumbnails:   Print a table of all thumbnails and exit.
 411     match_filter:      A function that gets called with the info_dict of
 412                        every video.
 413                        If it returns a message, the video is ignored.
 414                        If it returns None, the video is downloaded.
 415                        match_filter_func in utils.py is one example for this.
 416     no_color:          Do not emit color codes in output.
 417     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 418                        HTTP header
 419     geo_bypass_country:
 420                        Two-letter ISO 3166-2 country code that will be used for
 421                        explicit geographic restriction bypassing via faking
 422                        X-Forwarded-For HTTP header
 423     geo_bypass_ip_block:
 424                        IP range in CIDR notation that will be used similarly to
 425                        geo_bypass_country
 426
 427     The following options determine which downloader is picked:
 428     external_downloader: A dictionary of protocol keys and the executable of the
 429                        external downloader to use for it. The allowed protocols
 430                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 431                        Set the value to 'native' to use the native downloader
 432     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 433                        or {'m3u8': 'ffmpeg'} instead.
 434                        Use the native HLS downloader instead of ffmpeg/avconv
 435                        if True, otherwise use ffmpeg/avconv if False, otherwise
 436                        use downloader suggested by extractor if None.
 437     compat_opts:       Compatibility options. See "Differences in default behavior".
 438                        The following options do not work when used through the API:
 439                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 440                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 441                        Refer __init__.py for their implementation
 442     progress_template: Dictionary of templates for progress outputs.
 443                        Allowed keys are 'download', 'postprocess',
 444                        'download-title' (console title) and 'postprocess-title'.
 445                        The template is mapped on a dictionary with keys 'progress' and 'info'
 446
 447     The following parameters are not used by YoutubeDL itself, they are used by
 448     the downloader (see yt_dlp/downloader/common.py):
 449     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 450     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 451     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 452     external_downloader_args, concurrent_fragment_downloads.
 453
 454     The following options are used by the post processors:
 455     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 456                        otherwise prefer ffmpeg. (avconv support is deprecated)
 457     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 458                        to the binary or its containing directory.
 459     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 460                        and a list of additional command-line arguments for the
 461                        postprocessor/executable. The dict can also have "PP+EXE" keys
 462                        which are used when the given exe is used by the given PP.
 463                        Use 'default' as the name for arguments to passed to all PP
 464                        For compatibility with youtube-dl, a single list of args
 465                        can also be used
 466
 467     The following options are used by the extractors:
 468     extractor_retries: Number of times to retry for known errors
 469     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 470     hls_split_discontinuity: Split HLS playlists to different formats at
 471                        discontinuities such as ad breaks (default: False)
 472     extractor_args:    A dictionary of arguments to be passed to the extractors.
 473                        See "EXTRACTOR ARGUMENTS" for details.
 474                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 475     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 476                        If True (default), DASH manifests and related
 477                        data will be downloaded and processed by extractor.
 478                        You can reduce network I/O by disabling it if you don't
 479                        care about DASH. (only for youtube)
 480     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 481                        If True (default), HLS manifests and related
 482                        data will be downloaded and processed by extractor.
 483                        You can reduce network I/O by disabling it if you don't
 484                        care about HLS. (only for youtube)
 485     """
 486
 487     _NUMERIC_FIELDS = set((
 488         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 489         'timestamp', 'release_timestamp',
 490         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 491         'average_rating', 'comment_count', 'age_limit',
 492         'start_time', 'end_time',
 493         'chapter_number', 'season_number', 'episode_number',
 494         'track_number', 'disc_number', 'release_year',
 495     ))
 496
 497     _format_selection_exts = {
 498         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 499         'video': {'mp4', 'flv', 'webm', '3gp'},
 500         'storyboards': {'mhtml'},
 501     }
 502
 503     params = None
 504     _ies = {}
 505     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 506     _printed_messages = set()
 507     _first_webpage_request = True
 508     _download_retcode = None
 509     _num_downloads = None
 510     _playlist_level = 0
 511     _playlist_urls = set()
 512     _screen_file = None
 513
 514     def __init__(self, params=None, auto_init=True):
 515         """Create a FileDownloader object with the given options.
 516         @param auto_init    Whether to load the default extractors and print header (if verbose).
 517                             Set to 'no_verbose_header' to not print the header
 518         """
 519         if params is None:
 520             params = {}
 521         self._ies = {}
 522         self._ies_instances = {}
 523         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 524         self._printed_messages = set()
 525         self._first_webpage_request = True
 526         self._post_hooks = []
 527         self._progress_hooks = []
 528         self._postprocessor_hooks = []
 529         self._download_retcode = 0
 530         self._num_downloads = 0
 531         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 532         self._err_file = sys.stderr
 533         self.params = params
 534         self.cache = Cache(self)
 535
 536         windows_enable_vt_mode()
 537         self._allow_colors = {
 538             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 539             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 540         }
 541
 542         if sys.version_info < (3, 6):
 543             self.report_warning(
 544                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 545
 546         if self.params.get('allow_unplayable_formats'):
 547             self.report_warning(
 548                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 549                 'This is a developer option intended for debugging. \n'
 550                 '         If you experience any issues while using this option, '
 551                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 552
 553         def check_deprecated(param, option, suggestion):
 554             if self.params.get(param) is not None:
 555                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 556                 return True
 557             return False
 558
 559         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 560             if self.params.get('geo_verification_proxy') is None:
 561                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 562
 563         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 564         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 565         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 566
 567         for msg in self.params.get('_warnings', []):
 568             self.report_warning(msg)
 569
 570         if 'list-formats' in self.params.get('compat_opts', []):
 571             self.params['listformats_table'] = False
 572
 573         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 574             # nooverwrites was unnecessarily changed to overwrites
 575             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 576             # This ensures compatibility with both keys
 577             self.params['overwrites'] = not self.params['nooverwrites']
 578         elif self.params.get('overwrites') is None:
 579             self.params.pop('overwrites', None)
 580         else:
 581             self.params['nooverwrites'] = not self.params['overwrites']
 582
 583         if params.get('bidi_workaround', False):
 584             try:
 585                 import pty
 586                 master, slave = pty.openpty()
 587                 width = compat_get_terminal_size().columns
 588                 if width is None:
 589                     width_args = []
 590                 else:
 591                     width_args = ['-w', str(width)]
 592                 sp_kwargs = dict(
 593                     stdin=subprocess.PIPE,
 594                     stdout=slave,
 595                     stderr=self._err_file)
 596                 try:
 597                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 598                 except OSError:
 599                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 600                 self._output_channel = os.fdopen(master, 'rb')
 601             except OSError as ose:
 602                 if ose.errno == errno.ENOENT:
 603                     self.report_warning(
 604                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 605                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 606                 else:
 607                     raise
 608
 609         if (sys.platform != 'win32'
 610                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 611                 and not params.get('restrictfilenames', False)):
 612             # Unicode filesystem API will throw errors (#1474, #13027)
 613             self.report_warning(
 614                 'Assuming --restrict-filenames since file system encoding '
 615                 'cannot encode all characters. '
 616                 'Set the LC_ALL environment variable to fix this.')
 617             self.params['restrictfilenames'] = True
 618
 619         self.outtmpl_dict = self.parse_outtmpl()
 620
 621         # Creating format selector here allows us to catch syntax errors before the extraction
 622         self.format_selector = (
 623             None if self.params.get('format') is None
 624             else self.params['format'] if callable(self.params['format'])
 625             else self.build_format_selector(self.params['format']))
 626
 627         self._setup_opener()
 628
 629         if auto_init:
 630             if auto_init != 'no_verbose_header':
 631                 self.print_debug_header()
 632             self.add_default_info_extractors()
 633
 634         for pp_def_raw in self.params.get('postprocessors', []):
 635             pp_def = dict(pp_def_raw)
 636             when = pp_def.pop('when', 'post_process')
 637             pp_class = get_postprocessor(pp_def.pop('key'))
 638             pp = pp_class(self, **compat_kwargs(pp_def))
 639             self.add_post_processor(pp, when=when)
 640
 641         hooks = {
 642             'post_hooks': self.add_post_hook,
 643             'progress_hooks': self.add_progress_hook,
 644             'postprocessor_hooks': self.add_postprocessor_hook,
 645         }
 646         for opt, fn in hooks.items():
 647             for ph in self.params.get(opt, []):
 648                 fn(ph)
 649
 650         register_socks_protocols()
 651
 652         def preload_download_archive(fn):
 653             """Preload the archive, if any is specified"""
 654             if fn is None:
 655                 return False
 656             self.write_debug(f'Loading archive file {fn!r}')
 657             try:
 658                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 659                     for line in archive_file:
 660                         self.archive.add(line.strip())
 661             except IOError as ioe:
 662                 if ioe.errno != errno.ENOENT:
 663                     raise
 664                 return False
 665             return True
 666
 667         self.archive = set()
 668         preload_download_archive(self.params.get('download_archive'))
 669
 670     def warn_if_short_id(self, argv):
 671         # short YouTube ID starting with dash?
 672         idxs = [
 673             i for i, a in enumerate(argv)
 674             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 675         if idxs:
 676             correct_argv = (
 677                 ['yt-dlp']
 678                 + [a for i, a in enumerate(argv) if i not in idxs]
 679                 + ['--'] + [argv[i] for i in idxs]
 680             )
 681             self.report_warning(
 682                 'Long argument string detected. '
 683                 'Use -- to separate parameters and URLs, like this:\n%s' %
 684                 args_to_str(correct_argv))
 685
 686     def add_info_extractor(self, ie):
 687         """Add an InfoExtractor object to the end of the list."""
 688         ie_key = ie.ie_key()
 689         self._ies[ie_key] = ie
 690         if not isinstance(ie, type):
 691             self._ies_instances[ie_key] = ie
 692             ie.set_downloader(self)
 693
 694     def _get_info_extractor_class(self, ie_key):
 695         ie = self._ies.get(ie_key)
 696         if ie is None:
 697             ie = get_info_extractor(ie_key)
 698             self.add_info_extractor(ie)
 699         return ie
 700
 701     def get_info_extractor(self, ie_key):
 702         """
 703         Get an instance of an IE with name ie_key, it will try to get one from
 704         the _ies list, if there's no instance it will create a new one and add
 705         it to the extractor list.
 706         """
 707         ie = self._ies_instances.get(ie_key)
 708         if ie is None:
 709             ie = get_info_extractor(ie_key)()
 710             self.add_info_extractor(ie)
 711         return ie
 712
 713     def add_default_info_extractors(self):
 714         """
 715         Add the InfoExtractors returned by gen_extractors to the end of the list
 716         """
 717         for ie in gen_extractor_classes():
 718             self.add_info_extractor(ie)
 719
 720     def add_post_processor(self, pp, when='post_process'):
 721         """Add a PostProcessor object to the end of the chain."""
 722         self._pps[when].append(pp)
 723         pp.set_downloader(self)
 724
 725     def add_post_hook(self, ph):
 726         """Add the post hook"""
 727         self._post_hooks.append(ph)
 728
 729     def add_progress_hook(self, ph):
 730         """Add the download progress hook"""
 731         self._progress_hooks.append(ph)
 732
 733     def add_postprocessor_hook(self, ph):
 734         """Add the postprocessing progress hook"""
 735         self._postprocessor_hooks.append(ph)
 736
 737     def _bidi_workaround(self, message):
 738         if not hasattr(self, '_output_channel'):
 739             return message
 740
 741         assert hasattr(self, '_output_process')
 742         assert isinstance(message, compat_str)
 743         line_count = message.count('\n') + 1
 744         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 745         self._output_process.stdin.flush()
 746         res = ''.join(self._output_channel.readline().decode('utf-8')
 747                       for _ in range(line_count))
 748         return res[:-len('\n')]
 749
 750     def _write_string(self, message, out=None, only_once=False):
 751         if only_once:
 752             if message in self._printed_messages:
 753                 return
 754             self._printed_messages.add(message)
 755         write_string(message, out=out, encoding=self.params.get('encoding'))
 756
 757     def to_stdout(self, message, skip_eol=False, quiet=False):
 758         """Print message to stdout"""
 759         if self.params.get('logger'):
 760             self.params['logger'].debug(message)
 761         elif not quiet or self.params.get('verbose'):
 762             self._write_string(
 763                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 764                 self._err_file if quiet else self._screen_file)
 765
 766     def to_stderr(self, message, only_once=False):
 767         """Print message to stderr"""
 768         assert isinstance(message, compat_str)
 769         if self.params.get('logger'):
 770             self.params['logger'].error(message)
 771         else:
 772             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 773
 774     def to_console_title(self, message):
 775         if not self.params.get('consoletitle', False):
 776             return
 777         if compat_os_name == 'nt':
 778             if ctypes.windll.kernel32.GetConsoleWindow():
 779                 # c_wchar_p() might not be necessary if `message` is
 780                 # already of type unicode()
 781                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 782         elif 'TERM' in os.environ:
 783             self._write_string('\033]0;%s\007' % message, self._screen_file)
 784
 785     def save_console_title(self):
 786         if not self.params.get('consoletitle', False):
 787             return
 788         if self.params.get('simulate'):
 789             return
 790         if compat_os_name != 'nt' and 'TERM' in os.environ:
 791             # Save the title on stack
 792             self._write_string('\033[22;0t', self._screen_file)
 793
 794     def restore_console_title(self):
 795         if not self.params.get('consoletitle', False):
 796             return
 797         if self.params.get('simulate'):
 798             return
 799         if compat_os_name != 'nt' and 'TERM' in os.environ:
 800             # Restore the title from stack
 801             self._write_string('\033[23;0t', self._screen_file)
 802
 803     def __enter__(self):
 804         self.save_console_title()
 805         return self
 806
 807     def __exit__(self, *args):
 808         self.restore_console_title()
 809
 810         if self.params.get('cookiefile') is not None:
 811             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 812
 813     def trouble(self, message=None, tb=None):
 814         """Determine action to take when a download problem appears.
 815
 816         Depending on if the downloader has been configured to ignore
 817         download errors or not, this method may throw an exception or
 818         not when errors are found, after printing the message.
 819
 820         tb, if given, is additional traceback information.
 821         """
 822         if message is not None:
 823             self.to_stderr(message)
 824         if self.params.get('verbose'):
 825             if tb is None:
 826                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 827                     tb = ''
 828                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 829                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 830                     tb += encode_compat_str(traceback.format_exc())
 831                 else:
 832                     tb_data = traceback.format_list(traceback.extract_stack())
 833                     tb = ''.join(tb_data)
 834             if tb:
 835                 self.to_stderr(tb)
 836         if not self.params.get('ignoreerrors'):
 837             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 838                 exc_info = sys.exc_info()[1].exc_info
 839             else:
 840                 exc_info = sys.exc_info()
 841             raise DownloadError(message, exc_info)
 842         self._download_retcode = 1
 843
 844     def to_screen(self, message, skip_eol=False):
 845         """Print message to stdout if not in quiet mode"""
 846         self.to_stdout(
 847             message, skip_eol, quiet=self.params.get('quiet', False))
 848
 849     class Styles(Enum):
 850         HEADERS = 'yellow'
 851         EMPHASIS = 'light blue'
 852         ID = 'green'
 853         DELIM = 'blue'
 854         ERROR = 'red'
 855         WARNING = 'yellow'
 856         SUPPRESS = 'light black'
 857
 858     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 859         if test_encoding:
 860             original_text = text
 861             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 862             text = text.encode(encoding, 'ignore').decode(encoding)
 863             if fallback is not None and text != original_text:
 864                 text = fallback
 865         if isinstance(f, self.Styles):
 866             f = f.value
 867         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 868
 869     def _format_screen(self, *args, **kwargs):
 870         return self._format_text(
 871             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 872
 873     def _format_err(self, *args, **kwargs):
 874         return self._format_text(
 875             self._err_file, self._allow_colors['err'], *args, **kwargs)
 876
 877     def report_warning(self, message, only_once=False):
 878         '''
 879         Print the message to stderr, it will be prefixed with 'WARNING:'
 880         If stderr is a tty file the 'WARNING:' will be colored
 881         '''
 882         if self.params.get('logger') is not None:
 883             self.params['logger'].warning(message)
 884         else:
 885             if self.params.get('no_warnings'):
 886                 return
 887             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 888
 889     def report_error(self, message, tb=None):
 890         '''
 891         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 892         in red if stderr is a tty file.
 893         '''
 894         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 895
 896     def write_debug(self, message, only_once=False):
 897         '''Log debug message or Print message to stderr'''
 898         if not self.params.get('verbose', False):
 899             return
 900         message = '[debug] %s' % message
 901         if self.params.get('logger'):
 902             self.params['logger'].debug(message)
 903         else:
 904             self.to_stderr(message, only_once)
 905
 906     def report_file_already_downloaded(self, file_name):
 907         """Report file has already been fully downloaded."""
 908         try:
 909             self.to_screen('[download] %s has already been downloaded' % file_name)
 910         except UnicodeEncodeError:
 911             self.to_screen('[download] The file has already been downloaded')
 912
 913     def report_file_delete(self, file_name):
 914         """Report that existing file will be deleted."""
 915         try:
 916             self.to_screen('Deleting existing file %s' % file_name)
 917         except UnicodeEncodeError:
 918             self.to_screen('Deleting existing file')
 919
 920     def raise_no_formats(self, info, forced=False):
 921         has_drm = info.get('__has_drm')
 922         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 923         expected = self.params.get('ignore_no_formats_error')
 924         if forced or not expected:
 925             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 926                                  expected=has_drm or expected)
 927         else:
 928             self.report_warning(msg)
 929
 930     def parse_outtmpl(self):
 931         outtmpl_dict = self.params.get('outtmpl', {})
 932         if not isinstance(outtmpl_dict, dict):
 933             outtmpl_dict = {'default': outtmpl_dict}
 934         # Remove spaces in the default template
 935         if self.params.get('restrictfilenames'):
 936             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 937         else:
 938             sanitize = lambda x: x
 939         outtmpl_dict.update({
 940             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 941             if outtmpl_dict.get(k) is None})
 942         for key, val in outtmpl_dict.items():
 943             if isinstance(val, bytes):
 944                 self.report_warning(
 945                     'Parameter outtmpl is bytes, but should be a unicode string. '
 946                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 947         return outtmpl_dict
 948
 949     def get_output_path(self, dir_type='', filename=None):
 950         paths = self.params.get('paths', {})
 951         assert isinstance(paths, dict)
 952         path = os.path.join(
 953             expand_path(paths.get('home', '').strip()),
 954             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 955             filename or '')
 956
 957         # Temporary fix for #4787
 958         # 'Treat' all problem characters by passing filename through preferredencoding
 959         # to workaround encoding issues with subprocess on python2 @ Windows
 960         if sys.version_info < (3, 0) and sys.platform == 'win32':
 961             path = encodeFilename(path, True).decode(preferredencoding())
 962         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 963
 964     @staticmethod
 965     def _outtmpl_expandpath(outtmpl):
 966         # expand_path translates '%%' into '%' and '$$' into '$'
 967         # correspondingly that is not what we want since we need to keep
 968         # '%%' intact for template dict substitution step. Working around
 969         # with boundary-alike separator hack.
 970         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 971         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 972
 973         # outtmpl should be expand_path'ed before template dict substitution
 974         # because meta fields may contain env variables we don't want to
 975         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 976         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 977         return expand_path(outtmpl).replace(sep, '')
 978
 979     @staticmethod
 980     def escape_outtmpl(outtmpl):
 981         ''' Escape any remaining strings like %s, %abc% etc. '''
 982         return re.sub(
 983             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 984             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 985             outtmpl)
 986
 987     @classmethod
 988     def validate_outtmpl(cls, outtmpl):
 989         ''' @return None or Exception object '''
 990         outtmpl = re.sub(
 991             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 992             lambda mobj: f'{mobj.group(0)[:-1]}s',
 993             cls._outtmpl_expandpath(outtmpl))
 994         try:
 995             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 996             return None
 997         except ValueError as err:
 998             return err
 999
1000     @staticmethod
1001     def _copy_infodict(info_dict):
1002         info_dict = dict(info_dict)
1003         for key in ('__original_infodict', '__postprocessors'):
1004             info_dict.pop(key, None)
1005         return info_dict
1006
1007     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
1008         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1009         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1010
1011         info_dict = self._copy_infodict(info_dict)
1012         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1013             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1014             if info_dict.get('duration', None) is not None
1015             else None)
1016         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1017         if info_dict.get('resolution') is None:
1018             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1019
1020         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1021         # of %(field)s to %(field)0Nd for backward compatibility
1022         field_size_compat_map = {
1023             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1024             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1025             'autonumber': self.params.get('autonumber_size') or 5,
1026         }
1027
1028         TMPL_DICT = {}
1029         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1030         MATH_FUNCTIONS = {
1031             '+': float.__add__,
1032             '-': float.__sub__,
1033         }
1034         # Field is of the form key1.key2...
1035         # where keys (except first) can be string, int or slice
1036         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1037         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1038         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1039         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1040             (?P<negate>-)?
1041             (?P<fields>{field})
1042             (?P<maths>(?:{math_op}{math_field})*)
1043             (?:>(?P<strf_format>.+?))?
1044             (?P<alternate>(?<!\\),[^|)]+)?
1045             (?:\|(?P<default>.*?))?
1046             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1047
1048         def _traverse_infodict(k):
1049             k = k.split('.')
1050             if k[0] == '':
1051                 k.pop(0)
1052             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1053
1054         def get_value(mdict):
1055             # Object traversal
1056             value = _traverse_infodict(mdict['fields'])
1057             # Negative
1058             if mdict['negate']:
1059                 value = float_or_none(value)
1060                 if value is not None:
1061                     value *= -1
1062             # Do maths
1063             offset_key = mdict['maths']
1064             if offset_key:
1065                 value = float_or_none(value)
1066                 operator = None
1067                 while offset_key:
1068                     item = re.match(
1069                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1070                         offset_key).group(0)
1071                     offset_key = offset_key[len(item):]
1072                     if operator is None:
1073                         operator = MATH_FUNCTIONS[item]
1074                         continue
1075                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1076                     offset = float_or_none(item)
1077                     if offset is None:
1078                         offset = float_or_none(_traverse_infodict(item))
1079                     try:
1080                         value = operator(value, multiplier * offset)
1081                     except (TypeError, ZeroDivisionError):
1082                         return None
1083                     operator = None
1084             # Datetime formatting
1085             if mdict['strf_format']:
1086                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1087
1088             return value
1089
1090         na = self.params.get('outtmpl_na_placeholder', 'NA')
1091
1092         def _dumpjson_default(obj):
1093             if isinstance(obj, (set, LazyList)):
1094                 return list(obj)
1095             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1096
1097         def create_key(outer_mobj):
1098             if not outer_mobj.group('has_key'):
1099                 return outer_mobj.group(0)
1100             key = outer_mobj.group('key')
1101             mobj = re.match(INTERNAL_FORMAT_RE, key)
1102             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1103             value, default = None, na
1104             while mobj:
1105                 mobj = mobj.groupdict()
1106                 default = mobj['default'] if mobj['default'] is not None else default
1107                 value = get_value(mobj)
1108                 if value is None and mobj['alternate']:
1109                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1110                 else:
1111                     break
1112
1113             fmt = outer_mobj.group('format')
1114             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1115                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1116
1117             value = default if value is None else value
1118
1119             flags = outer_mobj.group('conversion') or ''
1120             str_fmt = f'{fmt[:-1]}s'
1121             if fmt[-1] == 'l':  # list
1122                 delim = '\n' if '#' in flags else ', '
1123                 value, fmt = delim.join(variadic(value)), str_fmt
1124             elif fmt[-1] == 'j':  # json
1125                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1126             elif fmt[-1] == 'q':  # quoted
1127                 value = map(str, variadic(value) if '#' in flags else [value])
1128                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1129             elif fmt[-1] == 'B':  # bytes
1130                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1131                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1132             elif fmt[-1] == 'U':  # unicode normalized
1133                 value, fmt = unicodedata.normalize(
1134                     # "+" = compatibility equivalence, "#" = NFD
1135                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1136                     value), str_fmt
1137             elif fmt[-1] == 'c':
1138                 if value:
1139                     value = str(value)[0]
1140                 else:
1141                     fmt = str_fmt
1142             elif fmt[-1] not in 'rs':  # numeric
1143                 value = float_or_none(value)
1144                 if value is None:
1145                     value, fmt = default, 's'
1146
1147             if sanitize:
1148                 if fmt[-1] == 'r':
1149                     # If value is an object, sanitize might convert it to a string
1150                     # So we convert it to repr first
1151                     value, fmt = repr(value), str_fmt
1152                 if fmt[-1] in 'csr':
1153                     value = sanitize(initial_field, value)
1154
1155             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1156             TMPL_DICT[key] = value
1157             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1158
1159         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1160
1161     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1162         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1163         return self.escape_outtmpl(outtmpl) % info_dict
1164
1165     def _prepare_filename(self, info_dict, tmpl_type='default'):
1166         try:
1167             sanitize = lambda k, v: sanitize_filename(
1168                 compat_str(v),
1169                 restricted=self.params.get('restrictfilenames'),
1170                 is_id=(k == 'id' or k.endswith('_id')))
1171             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1172             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1173
1174             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1175             if filename and force_ext is not None:
1176                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1177
1178             # https://github.com/blackjack4494/youtube-dlc/issues/85
1179             trim_file_name = self.params.get('trim_file_name', False)
1180             if trim_file_name:
1181                 fn_groups = filename.rsplit('.')
1182                 ext = fn_groups[-1]
1183                 sub_ext = ''
1184                 if len(fn_groups) > 2:
1185                     sub_ext = fn_groups[-2]
1186                 filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
1187
1188             return filename
1189         except ValueError as err:
1190             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1191             return None
1192
1193     def prepare_filename(self, info_dict, dir_type='', warn=False):
1194         """Generate the output filename."""
1195
1196         filename = self._prepare_filename(info_dict, dir_type or 'default')
1197         if not filename and dir_type not in ('', 'temp'):
1198             return ''
1199
1200         if warn:
1201             if not self.params.get('paths'):
1202                 pass
1203             elif filename == '-':
1204                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1205             elif os.path.isabs(filename):
1206                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1207         if filename == '-' or not filename:
1208             return filename
1209
1210         return self.get_output_path(dir_type, filename)
1211
1212     def _match_entry(self, info_dict, incomplete=False, silent=False):
1213         """ Returns None if the file should be downloaded """
1214
1215         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1216
1217         def check_filter():
1218             if 'title' in info_dict:
1219                 # This can happen when we're just evaluating the playlist
1220                 title = info_dict['title']
1221                 matchtitle = self.params.get('matchtitle', False)
1222                 if matchtitle:
1223                     if not re.search(matchtitle, title, re.IGNORECASE):
1224                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1225                 rejecttitle = self.params.get('rejecttitle', False)
1226                 if rejecttitle:
1227                     if re.search(rejecttitle, title, re.IGNORECASE):
1228                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1229             date = info_dict.get('upload_date')
1230             if date is not None:
1231                 dateRange = self.params.get('daterange', DateRange())
1232                 if date not in dateRange:
1233                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1234             view_count = info_dict.get('view_count')
1235             if view_count is not None:
1236                 min_views = self.params.get('min_views')
1237                 if min_views is not None and view_count < min_views:
1238                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1239                 max_views = self.params.get('max_views')
1240                 if max_views is not None and view_count > max_views:
1241                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1242             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1243                 return 'Skipping "%s" because it is age restricted' % video_title
1244
1245             match_filter = self.params.get('match_filter')
1246             if match_filter is not None:
1247                 try:
1248                     ret = match_filter(info_dict, incomplete=incomplete)
1249                 except TypeError:
1250                     # For backward compatibility
1251                     ret = None if incomplete else match_filter(info_dict)
1252                 if ret is not None:
1253                     return ret
1254             return None
1255
1256         if self.in_download_archive(info_dict):
1257             reason = '%s has already been recorded in the archive' % video_title
1258             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1259         else:
1260             reason = check_filter()
1261             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1262         if reason is not None:
1263             if not silent:
1264                 self.to_screen('[download] ' + reason)
1265             if self.params.get(break_opt, False):
1266                 raise break_err()
1267         return reason
1268
1269     @staticmethod
1270     def add_extra_info(info_dict, extra_info):
1271         '''Set the keys from extra_info in info dict if they are missing'''
1272         for key, value in extra_info.items():
1273             info_dict.setdefault(key, value)
1274
1275     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1276                      process=True, force_generic_extractor=False):
1277         """
1278         Return a list with a dictionary for each video extracted.
1279
1280         Arguments:
1281         url -- URL to extract
1282
1283         Keyword arguments:
1284         download -- whether to download videos during extraction
1285         ie_key -- extractor key hint
1286         extra_info -- dictionary containing the extra values to add to each result
1287         process -- whether to resolve all unresolved references (URLs, playlist items),
1288             must be True for download to work.
1289         force_generic_extractor -- force using the generic extractor
1290         """
1291
1292         if extra_info is None:
1293             extra_info = {}
1294
1295         if not ie_key and force_generic_extractor:
1296             ie_key = 'Generic'
1297
1298         if ie_key:
1299             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1300         else:
1301             ies = self._ies
1302
1303         for ie_key, ie in ies.items():
1304             if not ie.suitable(url):
1305                 continue
1306
1307             if not ie.working():
1308                 self.report_warning('The program functionality for this site has been marked as broken, '
1309                                     'and will probably not work.')
1310
1311             temp_id = ie.get_temp_id(url)
1312             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1313                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1314                 if self.params.get('break_on_existing', False):
1315                     raise ExistingVideoReached()
1316                 break
1317             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1318         else:
1319             self.report_error('no suitable InfoExtractor for URL %s' % url)
1320
1321     def __handle_extraction_exceptions(func):
1322         @functools.wraps(func)
1323         def wrapper(self, *args, **kwargs):
1324             try:
1325                 return func(self, *args, **kwargs)
1326             except GeoRestrictedError as e:
1327                 msg = e.msg
1328                 if e.countries:
1329                     msg += '\nThis video is available in %s.' % ', '.join(
1330                         map(ISO3166Utils.short2full, e.countries))
1331                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1332                 self.report_error(msg)
1333             except ExtractorError as e:  # An error we somewhat expected
1334                 self.report_error(compat_str(e), e.format_traceback())
1335             except ReExtractInfo as e:
1336                 if e.expected:
1337                     self.to_screen(f'{e}; Re-extracting data')
1338                 else:
1339                     self.to_stderr('\r')
1340                     self.report_warning(f'{e}; Re-extracting data')
1341                 return wrapper(self, *args, **kwargs)
1342             except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1343                 raise
1344             except Exception as e:
1345                 if self.params.get('ignoreerrors'):
1346                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1347                 else:
1348                     raise
1349         return wrapper
1350
1351     def _wait_for_video(self, ie_result):
1352         if (not self.params.get('wait_for_video')
1353                 or ie_result.get('_type', 'video') != 'video'
1354                 or ie_result.get('formats') or ie_result.get('url')):
1355             return
1356
1357         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1358         last_msg = ''
1359
1360         def progress(msg):
1361             nonlocal last_msg
1362             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1363             last_msg = msg
1364
1365         min_wait, max_wait = self.params.get('wait_for_video')
1366         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1367         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1368             diff = random.randrange(min_wait or 0, max_wait) if max_wait else min_wait
1369             self.report_warning('Release time of video is not known')
1370         elif (diff or 0) <= 0:
1371             self.report_warning('Video should already be available according to extracted info')
1372         diff = min(max(diff, min_wait or 0), max_wait or float('inf'))
1373         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1374
1375         wait_till = time.time() + diff
1376         try:
1377             while True:
1378                 diff = wait_till - time.time()
1379                 if diff <= 0:
1380                     progress('')
1381                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1382                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1383                 time.sleep(1)
1384         except KeyboardInterrupt:
1385             progress('')
1386             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1387         except BaseException as e:
1388             if not isinstance(e, ReExtractInfo):
1389                 self.to_screen('')
1390             raise
1391
1392     @__handle_extraction_exceptions
1393     def __extract_info(self, url, ie, download, extra_info, process):
1394         ie_result = ie.extract(url)
1395         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1396             return
1397         if isinstance(ie_result, list):
1398             # Backwards compatibility: old IE result format
1399             ie_result = {
1400                 '_type': 'compat_list',
1401                 'entries': ie_result,
1402             }
1403         if extra_info.get('original_url'):
1404             ie_result.setdefault('original_url', extra_info['original_url'])
1405         self.add_default_extra_info(ie_result, ie, url)
1406         if process:
1407             self._wait_for_video(ie_result)
1408             return self.process_ie_result(ie_result, download, extra_info)
1409         else:
1410             return ie_result
1411
1412     def add_default_extra_info(self, ie_result, ie, url):
1413         if url is not None:
1414             self.add_extra_info(ie_result, {
1415                 'webpage_url': url,
1416                 'original_url': url,
1417                 'webpage_url_basename': url_basename(url),
1418             })
1419         if ie is not None:
1420             self.add_extra_info(ie_result, {
1421                 'extractor': ie.IE_NAME,
1422                 'extractor_key': ie.ie_key(),
1423             })
1424
1425     def process_ie_result(self, ie_result, download=True, extra_info=None):
1426         """
1427         Take the result of the ie(may be modified) and resolve all unresolved
1428         references (URLs, playlist items).
1429
1430         It will also download the videos if 'download'.
1431         Returns the resolved ie_result.
1432         """
1433         if extra_info is None:
1434             extra_info = {}
1435         result_type = ie_result.get('_type', 'video')
1436
1437         if result_type in ('url', 'url_transparent'):
1438             ie_result['url'] = sanitize_url(ie_result['url'])
1439             if ie_result.get('original_url'):
1440                 extra_info.setdefault('original_url', ie_result['original_url'])
1441
1442             extract_flat = self.params.get('extract_flat', False)
1443             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1444                     or extract_flat is True):
1445                 info_copy = ie_result.copy()
1446                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1447                 if ie and not ie_result.get('id'):
1448                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1449                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1450                 self.add_extra_info(info_copy, extra_info)
1451                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1452                 if self.params.get('force_write_download_archive', False):
1453                     self.record_download_archive(info_copy)
1454                 return ie_result
1455
1456         if result_type == 'video':
1457             self.add_extra_info(ie_result, extra_info)
1458             ie_result = self.process_video_result(ie_result, download=download)
1459             additional_urls = (ie_result or {}).get('additional_urls')
1460             if additional_urls:
1461                 # TODO: Improve MetadataParserPP to allow setting a list
1462                 if isinstance(additional_urls, compat_str):
1463                     additional_urls = [additional_urls]
1464                 self.to_screen(
1465                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1466                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1467                 ie_result['additional_entries'] = [
1468                     self.extract_info(
1469                         url, download, extra_info,
1470                         force_generic_extractor=self.params.get('force_generic_extractor'))
1471                     for url in additional_urls
1472                 ]
1473             return ie_result
1474         elif result_type == 'url':
1475             # We have to add extra_info to the results because it may be
1476             # contained in a playlist
1477             return self.extract_info(
1478                 ie_result['url'], download,
1479                 ie_key=ie_result.get('ie_key'),
1480                 extra_info=extra_info)
1481         elif result_type == 'url_transparent':
1482             # Use the information from the embedding page
1483             info = self.extract_info(
1484                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1485                 extra_info=extra_info, download=False, process=False)
1486
1487             # extract_info may return None when ignoreerrors is enabled and
1488             # extraction failed with an error, don't crash and return early
1489             # in this case
1490             if not info:
1491                 return info
1492
1493             force_properties = dict(
1494                 (k, v) for k, v in ie_result.items() if v is not None)
1495             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1496                 if f in force_properties:
1497                     del force_properties[f]
1498             new_result = info.copy()
1499             new_result.update(force_properties)
1500
1501             # Extracted info may not be a video result (i.e.
1502             # info.get('_type', 'video') != video) but rather an url or
1503             # url_transparent. In such cases outer metadata (from ie_result)
1504             # should be propagated to inner one (info). For this to happen
1505             # _type of info should be overridden with url_transparent. This
1506             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1507             if new_result.get('_type') == 'url':
1508                 new_result['_type'] = 'url_transparent'
1509
1510             return self.process_ie_result(
1511                 new_result, download=download, extra_info=extra_info)
1512         elif result_type in ('playlist', 'multi_video'):
1513             # Protect from infinite recursion due to recursively nested playlists
1514             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1515             webpage_url = ie_result['webpage_url']
1516             if webpage_url in self._playlist_urls:
1517                 self.to_screen(
1518                     '[download] Skipping already downloaded playlist: %s'
1519                     % ie_result.get('title') or ie_result.get('id'))
1520                 return
1521
1522             self._playlist_level += 1
1523             self._playlist_urls.add(webpage_url)
1524             self._sanitize_thumbnails(ie_result)
1525             try:
1526                 return self.__process_playlist(ie_result, download)
1527             finally:
1528                 self._playlist_level -= 1
1529                 if not self._playlist_level:
1530                     self._playlist_urls.clear()
1531         elif result_type == 'compat_list':
1532             self.report_warning(
1533                 'Extractor %s returned a compat_list result. '
1534                 'It needs to be updated.' % ie_result.get('extractor'))
1535
1536             def _fixup(r):
1537                 self.add_extra_info(r, {
1538                     'extractor': ie_result['extractor'],
1539                     'webpage_url': ie_result['webpage_url'],
1540                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1541                     'extractor_key': ie_result['extractor_key'],
1542                 })
1543                 return r
1544             ie_result['entries'] = [
1545                 self.process_ie_result(_fixup(r), download, extra_info)
1546                 for r in ie_result['entries']
1547             ]
1548             return ie_result
1549         else:
1550             raise Exception('Invalid result type: %s' % result_type)
1551
1552     def _ensure_dir_exists(self, path):
1553         return make_dir(path, self.report_error)
1554
1555     def __process_playlist(self, ie_result, download):
1556         # We process each entry in the playlist
1557         playlist = ie_result.get('title') or ie_result.get('id')
1558         self.to_screen('[download] Downloading playlist: %s' % playlist)
1559
1560         if 'entries' not in ie_result:
1561             raise EntryNotInPlaylist('There are no entries')
1562
1563         MissingEntry = object()
1564         incomplete_entries = bool(ie_result.get('requested_entries'))
1565         if incomplete_entries:
1566             def fill_missing_entries(entries, indices):
1567                 ret = [MissingEntry] * max(indices)
1568                 for i, entry in zip(indices, entries):
1569                     ret[i - 1] = entry
1570                 return ret
1571             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1572
1573         playlist_results = []
1574
1575         playliststart = self.params.get('playliststart', 1)
1576         playlistend = self.params.get('playlistend')
1577         # For backwards compatibility, interpret -1 as whole list
1578         if playlistend == -1:
1579             playlistend = None
1580
1581         playlistitems_str = self.params.get('playlist_items')
1582         playlistitems = None
1583         if playlistitems_str is not None:
1584             def iter_playlistitems(format):
1585                 for string_segment in format.split(','):
1586                     if '-' in string_segment:
1587                         start, end = string_segment.split('-')
1588                         for item in range(int(start), int(end) + 1):
1589                             yield int(item)
1590                     else:
1591                         yield int(string_segment)
1592             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1593
1594         ie_entries = ie_result['entries']
1595         msg = (
1596             'Downloading %d videos' if not isinstance(ie_entries, list)
1597             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1598
1599         if isinstance(ie_entries, list):
1600             def get_entry(i):
1601                 return ie_entries[i - 1]
1602         else:
1603             if not isinstance(ie_entries, (PagedList, LazyList)):
1604                 ie_entries = LazyList(ie_entries)
1605
1606             def get_entry(i):
1607                 return YoutubeDL.__handle_extraction_exceptions(
1608                     lambda self, i: ie_entries[i - 1]
1609                 )(self, i)
1610
1611         entries = []
1612         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1613         for i in items:
1614             if i == 0:
1615                 continue
1616             if playlistitems is None and playlistend is not None and playlistend < i:
1617                 break
1618             entry = None
1619             try:
1620                 entry = get_entry(i)
1621                 if entry is MissingEntry:
1622                     raise EntryNotInPlaylist()
1623             except (IndexError, EntryNotInPlaylist):
1624                 if incomplete_entries:
1625                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1626                 elif not playlistitems:
1627                     break
1628             entries.append(entry)
1629             try:
1630                 if entry is not None:
1631                     self._match_entry(entry, incomplete=True, silent=True)
1632             except (ExistingVideoReached, RejectedVideoReached):
1633                 break
1634         ie_result['entries'] = entries
1635
1636         # Save playlist_index before re-ordering
1637         entries = [
1638             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1639             for i, entry in enumerate(entries, 1)
1640             if entry is not None]
1641         n_entries = len(entries)
1642
1643         if not playlistitems and (playliststart != 1 or playlistend):
1644             playlistitems = list(range(playliststart, playliststart + n_entries))
1645         ie_result['requested_entries'] = playlistitems
1646
1647         _infojson_written = False
1648         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1649             ie_copy = {
1650                 'playlist': playlist,
1651                 'playlist_id': ie_result.get('id'),
1652                 'playlist_title': ie_result.get('title'),
1653                 'playlist_uploader': ie_result.get('uploader'),
1654                 'playlist_uploader_id': ie_result.get('uploader_id'),
1655                 'playlist_index': 0,
1656                 'n_entries': n_entries,
1657             }
1658             ie_copy.update(dict(ie_result))
1659
1660             _infojson_written = self._write_info_json(
1661                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1662             if _infojson_written is None:
1663                 return
1664             if self._write_description('playlist', ie_result,
1665                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1666                 return
1667             # TODO: This should be passed to ThumbnailsConvertor if necessary
1668             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1669
1670         if self.params.get('playlistreverse', False):
1671             entries = entries[::-1]
1672         if self.params.get('playlistrandom', False):
1673             random.shuffle(entries)
1674
1675         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1676
1677         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1678         failures = 0
1679         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1680         for i, entry_tuple in enumerate(entries, 1):
1681             playlist_index, entry = entry_tuple
1682             if 'playlist-index' in self.params.get('compat_opts', []):
1683                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1684             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1685             # This __x_forwarded_for_ip thing is a bit ugly but requires
1686             # minimal changes
1687             if x_forwarded_for:
1688                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1689             extra = {
1690                 'n_entries': n_entries,
1691                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1692                 'playlist_index': playlist_index,
1693                 'playlist_autonumber': i,
1694                 'playlist': playlist,
1695                 'playlist_id': ie_result.get('id'),
1696                 'playlist_title': ie_result.get('title'),
1697                 'playlist_uploader': ie_result.get('uploader'),
1698                 'playlist_uploader_id': ie_result.get('uploader_id'),
1699                 'extractor': ie_result['extractor'],
1700                 'webpage_url': ie_result['webpage_url'],
1701                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1702                 'extractor_key': ie_result['extractor_key'],
1703             }
1704
1705             if self._match_entry(entry, incomplete=True) is not None:
1706                 continue
1707
1708             entry_result = self.__process_iterable_entry(entry, download, extra)
1709             if not entry_result:
1710                 failures += 1
1711             if failures >= max_failures:
1712                 self.report_error(
1713                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1714                 break
1715             playlist_results.append(entry_result)
1716         ie_result['entries'] = playlist_results
1717
1718         # Write the updated info to json
1719         if _infojson_written and self._write_info_json(
1720                 'updated playlist', ie_result,
1721                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1722             return
1723         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1724         return ie_result
1725
1726     @__handle_extraction_exceptions
1727     def __process_iterable_entry(self, entry, download, extra_info):
1728         return self.process_ie_result(
1729             entry, download=download, extra_info=extra_info)
1730
1731     def _build_format_filter(self, filter_spec):
1732         " Returns a function to filter the formats according to the filter_spec "
1733
1734         OPERATORS = {
1735             '<': operator.lt,
1736             '<=': operator.le,
1737             '>': operator.gt,
1738             '>=': operator.ge,
1739             '=': operator.eq,
1740             '!=': operator.ne,
1741         }
1742         operator_rex = re.compile(r'''(?x)\s*
1743             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1744             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1745             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1746             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1747         m = operator_rex.fullmatch(filter_spec)
1748         if m:
1749             try:
1750                 comparison_value = int(m.group('value'))
1751             except ValueError:
1752                 comparison_value = parse_filesize(m.group('value'))
1753                 if comparison_value is None:
1754                     comparison_value = parse_filesize(m.group('value') + 'B')
1755                 if comparison_value is None:
1756                     raise ValueError(
1757                         'Invalid value %r in format specification %r' % (
1758                             m.group('value'), filter_spec))
1759             op = OPERATORS[m.group('op')]
1760
1761         if not m:
1762             STR_OPERATORS = {
1763                 '=': operator.eq,
1764                 '^=': lambda attr, value: attr.startswith(value),
1765                 '$=': lambda attr, value: attr.endswith(value),
1766                 '*=': lambda attr, value: value in attr,
1767             }
1768             str_operator_rex = re.compile(r'''(?x)\s*
1769                 (?P<key>[a-zA-Z0-9._-]+)\s*
1770                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1771                 (?P<value>[a-zA-Z0-9._-]+)\s*
1772                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1773             m = str_operator_rex.fullmatch(filter_spec)
1774             if m:
1775                 comparison_value = m.group('value')
1776                 str_op = STR_OPERATORS[m.group('op')]
1777                 if m.group('negation'):
1778                     op = lambda attr, value: not str_op(attr, value)
1779                 else:
1780                     op = str_op
1781
1782         if not m:
1783             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1784
1785         def _filter(f):
1786             actual_value = f.get(m.group('key'))
1787             if actual_value is None:
1788                 return m.group('none_inclusive')
1789             return op(actual_value, comparison_value)
1790         return _filter
1791
1792     def _check_formats(self, formats):
1793         for f in formats:
1794             self.to_screen('[info] Testing format %s' % f['format_id'])
1795             path = self.get_output_path('temp')
1796             if not self._ensure_dir_exists(f'{path}/'):
1797                 continue
1798             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1799             temp_file.close()
1800             try:
1801                 success, _ = self.dl(temp_file.name, f, test=True)
1802             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1803                 success = False
1804             finally:
1805                 if os.path.exists(temp_file.name):
1806                     try:
1807                         os.remove(temp_file.name)
1808                     except OSError:
1809                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1810             if success:
1811                 yield f
1812             else:
1813                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1814
1815     def _default_format_spec(self, info_dict, download=True):
1816
1817         def can_merge():
1818             merger = FFmpegMergerPP(self)
1819             return merger.available and merger.can_merge()
1820
1821         prefer_best = (
1822             not self.params.get('simulate')
1823             and download
1824             and (
1825                 not can_merge()
1826                 or info_dict.get('is_live', False)
1827                 or self.outtmpl_dict['default'] == '-'))
1828         compat = (
1829             prefer_best
1830             or self.params.get('allow_multiple_audio_streams', False)
1831             or 'format-spec' in self.params.get('compat_opts', []))
1832
1833         return (
1834             'best/bestvideo+bestaudio' if prefer_best
1835             else 'bestvideo*+bestaudio/best' if not compat
1836             else 'bestvideo+bestaudio/best')
1837
1838     def build_format_selector(self, format_spec):
1839         def syntax_error(note, start):
1840             message = (
1841                 'Invalid format specification: '
1842                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1843             return SyntaxError(message)
1844
1845         PICKFIRST = 'PICKFIRST'
1846         MERGE = 'MERGE'
1847         SINGLE = 'SINGLE'
1848         GROUP = 'GROUP'
1849         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1850
1851         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1852                                   'video': self.params.get('allow_multiple_video_streams', False)}
1853
1854         check_formats = self.params.get('check_formats') == 'selected'
1855
1856         def _parse_filter(tokens):
1857             filter_parts = []
1858             for type, string, start, _, _ in tokens:
1859                 if type == tokenize.OP and string == ']':
1860                     return ''.join(filter_parts)
1861                 else:
1862                     filter_parts.append(string)
1863
1864         def _remove_unused_ops(tokens):
1865             # Remove operators that we don't use and join them with the surrounding strings
1866             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1867             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1868             last_string, last_start, last_end, last_line = None, None, None, None
1869             for type, string, start, end, line in tokens:
1870                 if type == tokenize.OP and string == '[':
1871                     if last_string:
1872                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1873                         last_string = None
1874                     yield type, string, start, end, line
1875                     # everything inside brackets will be handled by _parse_filter
1876                     for type, string, start, end, line in tokens:
1877                         yield type, string, start, end, line
1878                         if type == tokenize.OP and string == ']':
1879                             break
1880                 elif type == tokenize.OP and string in ALLOWED_OPS:
1881                     if last_string:
1882                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1883                         last_string = None
1884                     yield type, string, start, end, line
1885                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1886                     if not last_string:
1887                         last_string = string
1888                         last_start = start
1889                         last_end = end
1890                     else:
1891                         last_string += string
1892             if last_string:
1893                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1894
1895         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1896             selectors = []
1897             current_selector = None
1898             for type, string, start, _, _ in tokens:
1899                 # ENCODING is only defined in python 3.x
1900                 if type == getattr(tokenize, 'ENCODING', None):
1901                     continue
1902                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1903                     current_selector = FormatSelector(SINGLE, string, [])
1904                 elif type == tokenize.OP:
1905                     if string == ')':
1906                         if not inside_group:
1907                             # ')' will be handled by the parentheses group
1908                             tokens.restore_last_token()
1909                         break
1910                     elif inside_merge and string in ['/', ',']:
1911                         tokens.restore_last_token()
1912                         break
1913                     elif inside_choice and string == ',':
1914                         tokens.restore_last_token()
1915                         break
1916                     elif string == ',':
1917                         if not current_selector:
1918                             raise syntax_error('"," must follow a format selector', start)
1919                         selectors.append(current_selector)
1920                         current_selector = None
1921                     elif string == '/':
1922                         if not current_selector:
1923                             raise syntax_error('"/" must follow a format selector', start)
1924                         first_choice = current_selector
1925                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1926                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1927                     elif string == '[':
1928                         if not current_selector:
1929                             current_selector = FormatSelector(SINGLE, 'best', [])
1930                         format_filter = _parse_filter(tokens)
1931                         current_selector.filters.append(format_filter)
1932                     elif string == '(':
1933                         if current_selector:
1934                             raise syntax_error('Unexpected "("', start)
1935                         group = _parse_format_selection(tokens, inside_group=True)
1936                         current_selector = FormatSelector(GROUP, group, [])
1937                     elif string == '+':
1938                         if not current_selector:
1939                             raise syntax_error('Unexpected "+"', start)
1940                         selector_1 = current_selector
1941                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1942                         if not selector_2:
1943                             raise syntax_error('Expected a selector', start)
1944                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1945                     else:
1946                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1947                 elif type == tokenize.ENDMARKER:
1948                     break
1949             if current_selector:
1950                 selectors.append(current_selector)
1951             return selectors
1952
1953         def _merge(formats_pair):
1954             format_1, format_2 = formats_pair
1955
1956             formats_info = []
1957             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1958             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1959
1960             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1961                 get_no_more = {'video': False, 'audio': False}
1962                 for (i, fmt_info) in enumerate(formats_info):
1963                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1964                         formats_info.pop(i)
1965                         continue
1966                     for aud_vid in ['audio', 'video']:
1967                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1968                             if get_no_more[aud_vid]:
1969                                 formats_info.pop(i)
1970                                 break
1971                             get_no_more[aud_vid] = True
1972
1973             if len(formats_info) == 1:
1974                 return formats_info[0]
1975
1976             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1977             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1978
1979             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1980             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1981
1982             output_ext = self.params.get('merge_output_format')
1983             if not output_ext:
1984                 if the_only_video:
1985                     output_ext = the_only_video['ext']
1986                 elif the_only_audio and not video_fmts:
1987                     output_ext = the_only_audio['ext']
1988                 else:
1989                     output_ext = 'mkv'
1990
1991             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1992
1993             new_dict = {
1994                 'requested_formats': formats_info,
1995                 'format': '+'.join(filtered('format')),
1996                 'format_id': '+'.join(filtered('format_id')),
1997                 'ext': output_ext,
1998                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1999                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2000                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2001                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2002                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2003             }
2004
2005             if the_only_video:
2006                 new_dict.update({
2007                     'width': the_only_video.get('width'),
2008                     'height': the_only_video.get('height'),
2009                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2010                     'fps': the_only_video.get('fps'),
2011                     'dynamic_range': the_only_video.get('dynamic_range'),
2012                     'vcodec': the_only_video.get('vcodec'),
2013                     'vbr': the_only_video.get('vbr'),
2014                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2015                 })
2016
2017             if the_only_audio:
2018                 new_dict.update({
2019                     'acodec': the_only_audio.get('acodec'),
2020                     'abr': the_only_audio.get('abr'),
2021                     'asr': the_only_audio.get('asr'),
2022                 })
2023
2024             return new_dict
2025
2026         def _check_formats(formats):
2027             if not check_formats:
2028                 yield from formats
2029                 return
2030             yield from self._check_formats(formats)
2031
2032         def _build_selector_function(selector):
2033             if isinstance(selector, list):  # ,
2034                 fs = [_build_selector_function(s) for s in selector]
2035
2036                 def selector_function(ctx):
2037                     for f in fs:
2038                         yield from f(ctx)
2039                 return selector_function
2040
2041             elif selector.type == GROUP:  # ()
2042                 selector_function = _build_selector_function(selector.selector)
2043
2044             elif selector.type == PICKFIRST:  # /
2045                 fs = [_build_selector_function(s) for s in selector.selector]
2046
2047                 def selector_function(ctx):
2048                     for f in fs:
2049                         picked_formats = list(f(ctx))
2050                         if picked_formats:
2051                             return picked_formats
2052                     return []
2053
2054             elif selector.type == MERGE:  # +
2055                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2056
2057                 def selector_function(ctx):
2058                     for pair in itertools.product(
2059                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
2060                         yield _merge(pair)
2061
2062             elif selector.type == SINGLE:  # atom
2063                 format_spec = selector.selector or 'best'
2064
2065                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2066                 if format_spec == 'all':
2067                     def selector_function(ctx):
2068                         yield from _check_formats(ctx['formats'][::-1])
2069                 elif format_spec == 'mergeall':
2070                     def selector_function(ctx):
2071                         formats = list(_check_formats(ctx['formats']))
2072                         if not formats:
2073                             return
2074                         merged_format = formats[-1]
2075                         for f in formats[-2::-1]:
2076                             merged_format = _merge((merged_format, f))
2077                         yield merged_format
2078
2079                 else:
2080                     format_fallback, format_reverse, format_idx = False, True, 1
2081                     mobj = re.match(
2082                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2083                         format_spec)
2084                     if mobj is not None:
2085                         format_idx = int_or_none(mobj.group('n'), default=1)
2086                         format_reverse = mobj.group('bw')[0] == 'b'
2087                         format_type = (mobj.group('type') or [None])[0]
2088                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2089                         format_modified = mobj.group('mod') is not None
2090
2091                         format_fallback = not format_type and not format_modified  # for b, w
2092                         _filter_f = (
2093                             (lambda f: f.get('%scodec' % format_type) != 'none')
2094                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2095                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2096                             if format_type  # bv, ba, wv, wa
2097                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2098                             if not format_modified  # b, w
2099                             else lambda f: True)  # b*, w*
2100                         filter_f = lambda f: _filter_f(f) and (
2101                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2102                     else:
2103                         if format_spec in self._format_selection_exts['audio']:
2104                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2105                         elif format_spec in self._format_selection_exts['video']:
2106                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2107                         elif format_spec in self._format_selection_exts['storyboards']:
2108                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2109                         else:
2110                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2111
2112                     def selector_function(ctx):
2113                         formats = list(ctx['formats'])
2114                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2115                         if format_fallback and ctx['incomplete_formats'] and not matches:
2116                             # for extractors with incomplete formats (audio only (soundcloud)
2117                             # or video only (imgur)) best/worst will fallback to
2118                             # best/worst {video,audio}-only format
2119                             matches = formats
2120                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2121                         try:
2122                             yield matches[format_idx - 1]
2123                         except IndexError:
2124                             return
2125
2126             filters = [self._build_format_filter(f) for f in selector.filters]
2127
2128             def final_selector(ctx):
2129                 ctx_copy = copy.deepcopy(ctx)
2130                 for _filter in filters:
2131                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2132                 return selector_function(ctx_copy)
2133             return final_selector
2134
2135         stream = io.BytesIO(format_spec.encode('utf-8'))
2136         try:
2137             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2138         except tokenize.TokenError:
2139             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2140
2141         class TokenIterator(object):
2142             def __init__(self, tokens):
2143                 self.tokens = tokens
2144                 self.counter = 0
2145
2146             def __iter__(self):
2147                 return self
2148
2149             def __next__(self):
2150                 if self.counter >= len(self.tokens):
2151                     raise StopIteration()
2152                 value = self.tokens[self.counter]
2153                 self.counter += 1
2154                 return value
2155
2156             next = __next__
2157
2158             def restore_last_token(self):
2159                 self.counter -= 1
2160
2161         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2162         return _build_selector_function(parsed_selector)
2163
2164     def _calc_headers(self, info_dict):
2165         res = std_headers.copy()
2166
2167         add_headers = info_dict.get('http_headers')
2168         if add_headers:
2169             res.update(add_headers)
2170
2171         cookies = self._calc_cookies(info_dict)
2172         if cookies:
2173             res['Cookie'] = cookies
2174
2175         if 'X-Forwarded-For' not in res:
2176             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2177             if x_forwarded_for_ip:
2178                 res['X-Forwarded-For'] = x_forwarded_for_ip
2179
2180         return res
2181
2182     def _calc_cookies(self, info_dict):
2183         pr = sanitized_Request(info_dict['url'])
2184         self.cookiejar.add_cookie_header(pr)
2185         return pr.get_header('Cookie')
2186
2187     def _sort_thumbnails(self, thumbnails):
2188         thumbnails.sort(key=lambda t: (
2189             t.get('preference') if t.get('preference') is not None else -1,
2190             t.get('width') if t.get('width') is not None else -1,
2191             t.get('height') if t.get('height') is not None else -1,
2192             t.get('id') if t.get('id') is not None else '',
2193             t.get('url')))
2194
2195     def _sanitize_thumbnails(self, info_dict):
2196         thumbnails = info_dict.get('thumbnails')
2197         if thumbnails is None:
2198             thumbnail = info_dict.get('thumbnail')
2199             if thumbnail:
2200                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2201         if not thumbnails:
2202             return
2203
2204         def check_thumbnails(thumbnails):
2205             for t in thumbnails:
2206                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2207                 try:
2208                     self.urlopen(HEADRequest(t['url']))
2209                 except network_exceptions as err:
2210                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2211                     continue
2212                 yield t
2213
2214         self._sort_thumbnails(thumbnails)
2215         for i, t in enumerate(thumbnails):
2216             if t.get('id') is None:
2217                 t['id'] = '%d' % i
2218             if t.get('width') and t.get('height'):
2219                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2220             t['url'] = sanitize_url(t['url'])
2221
2222         if self.params.get('check_formats') is True:
2223             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2224         else:
2225             info_dict['thumbnails'] = thumbnails
2226
2227     def process_video_result(self, info_dict, download=True):
2228         assert info_dict.get('_type', 'video') == 'video'
2229
2230         if 'id' not in info_dict:
2231             raise ExtractorError('Missing "id" field in extractor result')
2232         if 'title' not in info_dict:
2233             raise ExtractorError('Missing "title" field in extractor result',
2234                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2235
2236         def report_force_conversion(field, field_not, conversion):
2237             self.report_warning(
2238                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2239                 % (field, field_not, conversion))
2240
2241         def sanitize_string_field(info, string_field):
2242             field = info.get(string_field)
2243             if field is None or isinstance(field, compat_str):
2244                 return
2245             report_force_conversion(string_field, 'a string', 'string')
2246             info[string_field] = compat_str(field)
2247
2248         def sanitize_numeric_fields(info):
2249             for numeric_field in self._NUMERIC_FIELDS:
2250                 field = info.get(numeric_field)
2251                 if field is None or isinstance(field, compat_numeric_types):
2252                     continue
2253                 report_force_conversion(numeric_field, 'numeric', 'int')
2254                 info[numeric_field] = int_or_none(field)
2255
2256         sanitize_string_field(info_dict, 'id')
2257         sanitize_numeric_fields(info_dict)
2258
2259         if 'playlist' not in info_dict:
2260             # It isn't part of a playlist
2261             info_dict['playlist'] = None
2262             info_dict['playlist_index'] = None
2263
2264         self._sanitize_thumbnails(info_dict)
2265
2266         thumbnail = info_dict.get('thumbnail')
2267         thumbnails = info_dict.get('thumbnails')
2268         if thumbnail:
2269             info_dict['thumbnail'] = sanitize_url(thumbnail)
2270         elif thumbnails:
2271             info_dict['thumbnail'] = thumbnails[-1]['url']
2272
2273         if info_dict.get('display_id') is None and 'id' in info_dict:
2274             info_dict['display_id'] = info_dict['id']
2275
2276         if info_dict.get('duration') is not None:
2277             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2278
2279         for ts_key, date_key in (
2280                 ('timestamp', 'upload_date'),
2281                 ('release_timestamp', 'release_date'),
2282         ):
2283             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2284                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2285                 # see http://bugs.python.org/issue1646728)
2286                 try:
2287                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2288                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2289                 except (ValueError, OverflowError, OSError):
2290                     pass
2291
2292         live_keys = ('is_live', 'was_live')
2293         live_status = info_dict.get('live_status')
2294         if live_status is None:
2295             for key in live_keys:
2296                 if info_dict.get(key) is False:
2297                     continue
2298                 if info_dict.get(key):
2299                     live_status = key
2300                 break
2301             if all(info_dict.get(key) is False for key in live_keys):
2302                 live_status = 'not_live'
2303         if live_status:
2304             info_dict['live_status'] = live_status
2305             for key in live_keys:
2306                 if info_dict.get(key) is None:
2307                     info_dict[key] = (live_status == key)
2308
2309         # Auto generate title fields corresponding to the *_number fields when missing
2310         # in order to always have clean titles. This is very common for TV series.
2311         for field in ('chapter', 'season', 'episode'):
2312             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2313                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2314
2315         for cc_kind in ('subtitles', 'automatic_captions'):
2316             cc = info_dict.get(cc_kind)
2317             if cc:
2318                 for _, subtitle in cc.items():
2319                     for subtitle_format in subtitle:
2320                         if subtitle_format.get('url'):
2321                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2322                         if subtitle_format.get('ext') is None:
2323                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2324
2325         automatic_captions = info_dict.get('automatic_captions')
2326         subtitles = info_dict.get('subtitles')
2327
2328         info_dict['requested_subtitles'] = self.process_subtitles(
2329             info_dict['id'], subtitles, automatic_captions)
2330
2331         if info_dict.get('formats') is None:
2332             # There's only one format available
2333             formats = [info_dict]
2334         else:
2335             formats = info_dict['formats']
2336
2337         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2338         if not self.params.get('allow_unplayable_formats'):
2339             formats = [f for f in formats if not f.get('has_drm')]
2340
2341         if not formats:
2342             self.raise_no_formats(info_dict)
2343
2344         def is_wellformed(f):
2345             url = f.get('url')
2346             if not url:
2347                 self.report_warning(
2348                     '"url" field is missing or empty - skipping format, '
2349                     'there is an error in extractor')
2350                 return False
2351             if isinstance(url, bytes):
2352                 sanitize_string_field(f, 'url')
2353             return True
2354
2355         # Filter out malformed formats for better extraction robustness
2356         formats = list(filter(is_wellformed, formats))
2357
2358         formats_dict = {}
2359
2360         # We check that all the formats have the format and format_id fields
2361         for i, format in enumerate(formats):
2362             sanitize_string_field(format, 'format_id')
2363             sanitize_numeric_fields(format)
2364             format['url'] = sanitize_url(format['url'])
2365             if not format.get('format_id'):
2366                 format['format_id'] = compat_str(i)
2367             else:
2368                 # Sanitize format_id from characters used in format selector expression
2369                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2370             format_id = format['format_id']
2371             if format_id not in formats_dict:
2372                 formats_dict[format_id] = []
2373             formats_dict[format_id].append(format)
2374
2375         # Make sure all formats have unique format_id
2376         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2377         for format_id, ambiguous_formats in formats_dict.items():
2378             ambigious_id = len(ambiguous_formats) > 1
2379             for i, format in enumerate(ambiguous_formats):
2380                 if ambigious_id:
2381                     format['format_id'] = '%s-%d' % (format_id, i)
2382                 if format.get('ext') is None:
2383                     format['ext'] = determine_ext(format['url']).lower()
2384                 # Ensure there is no conflict between id and ext in format selection
2385                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2386                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2387                     format['format_id'] = 'f%s' % format['format_id']
2388
2389         for i, format in enumerate(formats):
2390             if format.get('format') is None:
2391                 format['format'] = '{id} - {res}{note}'.format(
2392                     id=format['format_id'],
2393                     res=self.format_resolution(format),
2394                     note=format_field(format, 'format_note', ' (%s)'),
2395                 )
2396             if format.get('protocol') is None:
2397                 format['protocol'] = determine_protocol(format)
2398             if format.get('resolution') is None:
2399                 format['resolution'] = self.format_resolution(format, default=None)
2400             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2401                 format['dynamic_range'] = 'SDR'
2402             if (info_dict.get('duration') and format.get('tbr')
2403                     and not format.get('filesize') and not format.get('filesize_approx')):
2404                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2405
2406             # Add HTTP headers, so that external programs can use them from the
2407             # json output
2408             full_format_info = info_dict.copy()
2409             full_format_info.update(format)
2410             format['http_headers'] = self._calc_headers(full_format_info)
2411         # Remove private housekeeping stuff
2412         if '__x_forwarded_for_ip' in info_dict:
2413             del info_dict['__x_forwarded_for_ip']
2414
2415         # TODO Central sorting goes here
2416
2417         if self.params.get('check_formats') is True:
2418             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2419
2420         if not formats or formats[0] is not info_dict:
2421             # only set the 'formats' fields if the original info_dict list them
2422             # otherwise we end up with a circular reference, the first (and unique)
2423             # element in the 'formats' field in info_dict is info_dict itself,
2424             # which can't be exported to json
2425             info_dict['formats'] = formats
2426
2427         info_dict, _ = self.pre_process(info_dict)
2428
2429         # The pre-processors may have modified the formats
2430         formats = info_dict.get('formats', [info_dict])
2431
2432         if self.params.get('list_thumbnails'):
2433             self.list_thumbnails(info_dict)
2434         if self.params.get('listformats'):
2435             if not info_dict.get('formats') and not info_dict.get('url'):
2436                 self.to_screen('%s has no formats' % info_dict['id'])
2437             else:
2438                 self.list_formats(info_dict)
2439         if self.params.get('listsubtitles'):
2440             if 'automatic_captions' in info_dict:
2441                 self.list_subtitles(
2442                     info_dict['id'], automatic_captions, 'automatic captions')
2443             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2444         list_only = self.params.get('simulate') is None and (
2445             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2446         if list_only:
2447             # Without this printing, -F --print-json will not work
2448             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2449             return
2450
2451         format_selector = self.format_selector
2452         if format_selector is None:
2453             req_format = self._default_format_spec(info_dict, download=download)
2454             self.write_debug('Default format spec: %s' % req_format)
2455             format_selector = self.build_format_selector(req_format)
2456
2457         # While in format selection we may need to have an access to the original
2458         # format set in order to calculate some metrics or do some processing.
2459         # For now we need to be able to guess whether original formats provided
2460         # by extractor are incomplete or not (i.e. whether extractor provides only
2461         # video-only or audio-only formats) for proper formats selection for
2462         # extractors with such incomplete formats (see
2463         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2464         # Since formats may be filtered during format selection and may not match
2465         # the original formats the results may be incorrect. Thus original formats
2466         # or pre-calculated metrics should be passed to format selection routines
2467         # as well.
2468         # We will pass a context object containing all necessary additional data
2469         # instead of just formats.
2470         # This fixes incorrect format selection issue (see
2471         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2472         incomplete_formats = (
2473             # All formats are video-only or
2474             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2475             # all formats are audio-only
2476             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2477
2478         ctx = {
2479             'formats': formats,
2480             'incomplete_formats': incomplete_formats,
2481         }
2482
2483         formats_to_download = list(format_selector(ctx))
2484         if not formats_to_download:
2485             if not self.params.get('ignore_no_formats_error'):
2486                 raise ExtractorError('Requested format is not available', expected=True,
2487                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2488             else:
2489                 self.report_warning('Requested format is not available')
2490                 # Process what we can, even without any available formats.
2491                 self.process_info(dict(info_dict))
2492         elif download:
2493             self.to_screen(
2494                 '[info] %s: Downloading %d format(s): %s' % (
2495                     info_dict['id'], len(formats_to_download),
2496                     ", ".join([f['format_id'] for f in formats_to_download])))
2497             for fmt in formats_to_download:
2498                 new_info = dict(info_dict)
2499                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2500                 new_info['__original_infodict'] = info_dict
2501                 new_info.update(fmt)
2502                 self.process_info(new_info)
2503         # We update the info dict with the selected best quality format (backwards compatibility)
2504         if formats_to_download:
2505             info_dict.update(formats_to_download[-1])
2506         return info_dict
2507
2508     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2509         """Select the requested subtitles and their format"""
2510         available_subs = {}
2511         if normal_subtitles and self.params.get('writesubtitles'):
2512             available_subs.update(normal_subtitles)
2513         if automatic_captions and self.params.get('writeautomaticsub'):
2514             for lang, cap_info in automatic_captions.items():
2515                 if lang not in available_subs:
2516                     available_subs[lang] = cap_info
2517
2518         if (not self.params.get('writesubtitles') and not
2519                 self.params.get('writeautomaticsub') or not
2520                 available_subs):
2521             return None
2522
2523         all_sub_langs = available_subs.keys()
2524         if self.params.get('allsubtitles', False):
2525             requested_langs = all_sub_langs
2526         elif self.params.get('subtitleslangs', False):
2527             # A list is used so that the order of languages will be the same as
2528             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2529             requested_langs = []
2530             for lang_re in self.params.get('subtitleslangs'):
2531                 if lang_re == 'all':
2532                     requested_langs.extend(all_sub_langs)
2533                     continue
2534                 discard = lang_re[0] == '-'
2535                 if discard:
2536                     lang_re = lang_re[1:]
2537                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2538                 if discard:
2539                     for lang in current_langs:
2540                         while lang in requested_langs:
2541                             requested_langs.remove(lang)
2542                 else:
2543                     requested_langs.extend(current_langs)
2544             requested_langs = orderedSet(requested_langs)
2545         elif 'en' in available_subs:
2546             requested_langs = ['en']
2547         else:
2548             requested_langs = [list(all_sub_langs)[0]]
2549         if requested_langs:
2550             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2551
2552         formats_query = self.params.get('subtitlesformat', 'best')
2553         formats_preference = formats_query.split('/') if formats_query else []
2554         subs = {}
2555         for lang in requested_langs:
2556             formats = available_subs.get(lang)
2557             if formats is None:
2558                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2559                 continue
2560             for ext in formats_preference:
2561                 if ext == 'best':
2562                     f = formats[-1]
2563                     break
2564                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2565                 if matches:
2566                     f = matches[-1]
2567                     break
2568             else:
2569                 f = formats[-1]
2570                 self.report_warning(
2571                     'No subtitle format found matching "%s" for language %s, '
2572                     'using %s' % (formats_query, lang, f['ext']))
2573             subs[lang] = f
2574         return subs
2575
2576     def __forced_printings(self, info_dict, filename, incomplete):
2577         def print_mandatory(field, actual_field=None):
2578             if actual_field is None:
2579                 actual_field = field
2580             if (self.params.get('force%s' % field, False)
2581                     and (not incomplete or info_dict.get(actual_field) is not None)):
2582                 self.to_stdout(info_dict[actual_field])
2583
2584         def print_optional(field):
2585             if (self.params.get('force%s' % field, False)
2586                     and info_dict.get(field) is not None):
2587                 self.to_stdout(info_dict[field])
2588
2589         info_dict = info_dict.copy()
2590         if filename is not None:
2591             info_dict['filename'] = filename
2592         if info_dict.get('requested_formats') is not None:
2593             # For RTMP URLs, also include the playpath
2594             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2595         elif 'url' in info_dict:
2596             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2597
2598         if self.params.get('forceprint') or self.params.get('forcejson'):
2599             self.post_extract(info_dict)
2600         for tmpl in self.params.get('forceprint', []):
2601             mobj = re.match(r'\w+(=?)$', tmpl)
2602             if mobj and mobj.group(1):
2603                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2604             elif mobj:
2605                 tmpl = '%({})s'.format(tmpl)
2606             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2607
2608         print_mandatory('title')
2609         print_mandatory('id')
2610         print_mandatory('url', 'urls')
2611         print_optional('thumbnail')
2612         print_optional('description')
2613         print_optional('filename')
2614         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2615             self.to_stdout(formatSeconds(info_dict['duration']))
2616         print_mandatory('format')
2617
2618         if self.params.get('forcejson'):
2619             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2620
2621     def dl(self, name, info, subtitle=False, test=False):
2622         if not info.get('url'):
2623             self.raise_no_formats(info, True)
2624
2625         if test:
2626             verbose = self.params.get('verbose')
2627             params = {
2628                 'test': True,
2629                 'quiet': self.params.get('quiet') or not verbose,
2630                 'verbose': verbose,
2631                 'noprogress': not verbose,
2632                 'nopart': True,
2633                 'skip_unavailable_fragments': False,
2634                 'keep_fragments': False,
2635                 'overwrites': True,
2636                 '_no_ytdl_file': True,
2637             }
2638         else:
2639             params = self.params
2640         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2641         if not test:
2642             for ph in self._progress_hooks:
2643                 fd.add_progress_hook(ph)
2644             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2645             self.write_debug('Invoking downloader on "%s"' % urls)
2646
2647         new_info = copy.deepcopy(self._copy_infodict(info))
2648         if new_info.get('http_headers') is None:
2649             new_info['http_headers'] = self._calc_headers(new_info)
2650         return fd.download(name, new_info, subtitle)
2651
2652     def process_info(self, info_dict):
2653         """Process a single resolved IE result."""
2654
2655         assert info_dict.get('_type', 'video') == 'video'
2656
2657         max_downloads = self.params.get('max_downloads')
2658         if max_downloads is not None:
2659             if self._num_downloads >= int(max_downloads):
2660                 raise MaxDownloadsReached()
2661
2662         # TODO: backward compatibility, to be removed
2663         info_dict['fulltitle'] = info_dict['title']
2664
2665         if 'format' not in info_dict and 'ext' in info_dict:
2666             info_dict['format'] = info_dict['ext']
2667
2668         if self._match_entry(info_dict) is not None:
2669             return
2670
2671         self.post_extract(info_dict)
2672         self._num_downloads += 1
2673
2674         # info_dict['_filename'] needs to be set for backward compatibility
2675         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2676         temp_filename = self.prepare_filename(info_dict, 'temp')
2677         files_to_move = {}
2678
2679         # Forced printings
2680         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2681
2682         if self.params.get('simulate'):
2683             if self.params.get('force_write_download_archive', False):
2684                 self.record_download_archive(info_dict)
2685             # Do nothing else if in simulate mode
2686             return
2687
2688         if full_filename is None:
2689             return
2690         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2691             return
2692         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2693             return
2694
2695         if self._write_description('video', info_dict,
2696                                    self.prepare_filename(info_dict, 'description')) is None:
2697             return
2698
2699         sub_files = self._write_subtitles(info_dict, temp_filename)
2700         if sub_files is None:
2701             return
2702         files_to_move.update(dict(sub_files))
2703
2704         thumb_files = self._write_thumbnails(
2705             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2706         if thumb_files is None:
2707             return
2708         files_to_move.update(dict(thumb_files))
2709
2710         infofn = self.prepare_filename(info_dict, 'infojson')
2711         _infojson_written = self._write_info_json('video', info_dict, infofn)
2712         if _infojson_written:
2713             info_dict['infojson_filename'] = infofn
2714             # For backward compatability, even though it was a private field
2715             info_dict['__infojson_filename'] = infofn
2716         elif _infojson_written is None:
2717             return
2718
2719         # Note: Annotations are deprecated
2720         annofn = None
2721         if self.params.get('writeannotations', False):
2722             annofn = self.prepare_filename(info_dict, 'annotation')
2723         if annofn:
2724             if not self._ensure_dir_exists(encodeFilename(annofn)):
2725                 return
2726             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2727                 self.to_screen('[info] Video annotations are already present')
2728             elif not info_dict.get('annotations'):
2729                 self.report_warning('There are no annotations to write.')
2730             else:
2731                 try:
2732                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2733                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2734                         annofile.write(info_dict['annotations'])
2735                 except (KeyError, TypeError):
2736                     self.report_warning('There are no annotations to write.')
2737                 except (OSError, IOError):
2738                     self.report_error('Cannot write annotations file: ' + annofn)
2739                     return
2740
2741         # Write internet shortcut files
2742         def _write_link_file(link_type):
2743             if 'webpage_url' not in info_dict:
2744                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2745                 return False
2746             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2747             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2748                 return False
2749             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2750                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2751                 return True
2752             try:
2753                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2754                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2755                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2756                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2757                     if link_type == 'desktop':
2758                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2759                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2760             except (OSError, IOError):
2761                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2762                 return False
2763             return True
2764
2765         write_links = {
2766             'url': self.params.get('writeurllink'),
2767             'webloc': self.params.get('writewebloclink'),
2768             'desktop': self.params.get('writedesktoplink'),
2769         }
2770         if self.params.get('writelink'):
2771             link_type = ('webloc' if sys.platform == 'darwin'
2772                          else 'desktop' if sys.platform.startswith('linux')
2773                          else 'url')
2774             write_links[link_type] = True
2775
2776         if any(should_write and not _write_link_file(link_type)
2777                for link_type, should_write in write_links.items()):
2778             return
2779
2780         try:
2781             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2782         except PostProcessingError as err:
2783             self.report_error('Preprocessing: %s' % str(err))
2784             return
2785
2786         must_record_download_archive = False
2787         if self.params.get('skip_download', False):
2788             info_dict['filepath'] = temp_filename
2789             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2790             info_dict['__files_to_move'] = files_to_move
2791             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2792         else:
2793             # Download
2794             info_dict.setdefault('__postprocessors', [])
2795             try:
2796
2797                 def existing_file(*filepaths):
2798                     ext = info_dict.get('ext')
2799                     final_ext = self.params.get('final_ext', ext)
2800                     existing_files = []
2801                     for file in orderedSet(filepaths):
2802                         if final_ext != ext:
2803                             converted = replace_extension(file, final_ext, ext)
2804                             if os.path.exists(encodeFilename(converted)):
2805                                 existing_files.append(converted)
2806                         if os.path.exists(encodeFilename(file)):
2807                             existing_files.append(file)
2808
2809                     if not existing_files or self.params.get('overwrites', False):
2810                         for file in orderedSet(existing_files):
2811                             self.report_file_delete(file)
2812                             os.remove(encodeFilename(file))
2813                         return None
2814
2815                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2816                     return existing_files[0]
2817
2818                 success = True
2819                 if info_dict.get('requested_formats') is not None:
2820
2821                     def compatible_formats(formats):
2822                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2823                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2824                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2825                         if len(video_formats) > 2 or len(audio_formats) > 2:
2826                             return False
2827
2828                         # Check extension
2829                         exts = set(format.get('ext') for format in formats)
2830                         COMPATIBLE_EXTS = (
2831                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2832                             set(('webm',)),
2833                         )
2834                         for ext_sets in COMPATIBLE_EXTS:
2835                             if ext_sets.issuperset(exts):
2836                                 return True
2837                         # TODO: Check acodec/vcodec
2838                         return False
2839
2840                     requested_formats = info_dict['requested_formats']
2841                     old_ext = info_dict['ext']
2842                     if self.params.get('merge_output_format') is None:
2843                         if not compatible_formats(requested_formats):
2844                             info_dict['ext'] = 'mkv'
2845                             self.report_warning(
2846                                 'Requested formats are incompatible for merge and will be merged into mkv')
2847                         if (info_dict['ext'] == 'webm'
2848                                 and info_dict.get('thumbnails')
2849                                 # check with type instead of pp_key, __name__, or isinstance
2850                                 # since we dont want any custom PPs to trigger this
2851                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2852                             info_dict['ext'] = 'mkv'
2853                             self.report_warning(
2854                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2855                     new_ext = info_dict['ext']
2856
2857                     def correct_ext(filename, ext=new_ext):
2858                         if filename == '-':
2859                             return filename
2860                         filename_real_ext = os.path.splitext(filename)[1][1:]
2861                         filename_wo_ext = (
2862                             os.path.splitext(filename)[0]
2863                             if filename_real_ext in (old_ext, new_ext)
2864                             else filename)
2865                         return '%s.%s' % (filename_wo_ext, ext)
2866
2867                     # Ensure filename always has a correct extension for successful merge
2868                     full_filename = correct_ext(full_filename)
2869                     temp_filename = correct_ext(temp_filename)
2870                     dl_filename = existing_file(full_filename, temp_filename)
2871                     info_dict['__real_download'] = False
2872
2873                     if dl_filename is not None:
2874                         self.report_file_already_downloaded(dl_filename)
2875                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2876                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2877                         success, real_download = self.dl(temp_filename, info_dict)
2878                         info_dict['__real_download'] = real_download
2879                     else:
2880                         downloaded = []
2881                         merger = FFmpegMergerPP(self)
2882                         if self.params.get('allow_unplayable_formats'):
2883                             self.report_warning(
2884                                 'You have requested merging of multiple formats '
2885                                 'while also allowing unplayable formats to be downloaded. '
2886                                 'The formats won\'t be merged to prevent data corruption.')
2887                         elif not merger.available:
2888                             self.report_warning(
2889                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2890                                 'The formats won\'t be merged.')
2891
2892                         if temp_filename == '-':
2893                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2894                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2895                                       else 'but ffmpeg is not installed')
2896                             self.report_warning(
2897                                 f'You have requested downloading multiple formats to stdout {reason}. '
2898                                 'The formats will be streamed one after the other')
2899                             fname = temp_filename
2900                         for f in requested_formats:
2901                             new_info = dict(info_dict)
2902                             del new_info['requested_formats']
2903                             new_info.update(f)
2904                             if temp_filename != '-':
2905                                 fname = prepend_extension(
2906                                     correct_ext(temp_filename, new_info['ext']),
2907                                     'f%s' % f['format_id'], new_info['ext'])
2908                                 if not self._ensure_dir_exists(fname):
2909                                     return
2910                                 f['filepath'] = fname
2911                                 downloaded.append(fname)
2912                             partial_success, real_download = self.dl(fname, new_info)
2913                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2914                             success = success and partial_success
2915                         if merger.available and not self.params.get('allow_unplayable_formats'):
2916                             info_dict['__postprocessors'].append(merger)
2917                             info_dict['__files_to_merge'] = downloaded
2918                             # Even if there were no downloads, it is being merged only now
2919                             info_dict['__real_download'] = True
2920                         else:
2921                             for file in downloaded:
2922                                 files_to_move[file] = None
2923                 else:
2924                     # Just a single file
2925                     dl_filename = existing_file(full_filename, temp_filename)
2926                     if dl_filename is None or dl_filename == temp_filename:
2927                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2928                         # So we should try to resume the download
2929                         success, real_download = self.dl(temp_filename, info_dict)
2930                         info_dict['__real_download'] = real_download
2931                     else:
2932                         self.report_file_already_downloaded(dl_filename)
2933
2934                 dl_filename = dl_filename or temp_filename
2935                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2936
2937             except network_exceptions as err:
2938                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2939                 return
2940             except (OSError, IOError) as err:
2941                 raise UnavailableVideoError(err)
2942             except (ContentTooShortError, ) as err:
2943                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2944                 return
2945
2946             if success and full_filename != '-':
2947
2948                 def fixup():
2949                     do_fixup = True
2950                     fixup_policy = self.params.get('fixup')
2951                     vid = info_dict['id']
2952
2953                     if fixup_policy in ('ignore', 'never'):
2954                         return
2955                     elif fixup_policy == 'warn':
2956                         do_fixup = False
2957                     elif fixup_policy != 'force':
2958                         assert fixup_policy in ('detect_or_warn', None)
2959                         if not info_dict.get('__real_download'):
2960                             do_fixup = False
2961
2962                     def ffmpeg_fixup(cndn, msg, cls):
2963                         if not cndn:
2964                             return
2965                         if not do_fixup:
2966                             self.report_warning(f'{vid}: {msg}')
2967                             return
2968                         pp = cls(self)
2969                         if pp.available:
2970                             info_dict['__postprocessors'].append(pp)
2971                         else:
2972                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2973
2974                     stretched_ratio = info_dict.get('stretched_ratio')
2975                     ffmpeg_fixup(
2976                         stretched_ratio not in (1, None),
2977                         f'Non-uniform pixel ratio {stretched_ratio}',
2978                         FFmpegFixupStretchedPP)
2979
2980                     ffmpeg_fixup(
2981                         (info_dict.get('requested_formats') is None
2982                          and info_dict.get('container') == 'm4a_dash'
2983                          and info_dict.get('ext') == 'm4a'),
2984                         'writing DASH m4a. Only some players support this container',
2985                         FFmpegFixupM4aPP)
2986
2987                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2988                     downloader = downloader.__name__ if downloader else None
2989                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2990                                  'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
2991                                  FFmpegFixupM3u8PP)
2992                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
2993                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
2994
2995                 fixup()
2996                 try:
2997                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2998                 except PostProcessingError as err:
2999                     self.report_error('Postprocessing: %s' % str(err))
3000                     return
3001                 try:
3002                     for ph in self._post_hooks:
3003                         ph(info_dict['filepath'])
3004                 except Exception as err:
3005                     self.report_error('post hooks: %s' % str(err))
3006                     return
3007                 must_record_download_archive = True
3008
3009         if must_record_download_archive or self.params.get('force_write_download_archive', False):
3010             self.record_download_archive(info_dict)
3011         max_downloads = self.params.get('max_downloads')
3012         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3013             raise MaxDownloadsReached()
3014
3015     def __download_wrapper(self, func):
3016         @functools.wraps(func)
3017         def wrapper(*args, **kwargs):
3018             try:
3019                 res = func(*args, **kwargs)
3020             except UnavailableVideoError as e:
3021                 self.report_error(e)
3022             except MaxDownloadsReached as e:
3023                 self.to_screen(f'[info] {e}')
3024                 raise
3025             except DownloadCancelled as e:
3026                 self.to_screen(f'[info] {e}')
3027                 if not self.params.get('break_per_url'):
3028                     raise
3029             else:
3030                 if self.params.get('dump_single_json', False):
3031                     self.post_extract(res)
3032                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3033         return wrapper
3034
3035     def download(self, url_list):
3036         """Download a given list of URLs."""
3037         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3038         outtmpl = self.outtmpl_dict['default']
3039         if (len(url_list) > 1
3040                 and outtmpl != '-'
3041                 and '%' not in outtmpl
3042                 and self.params.get('max_downloads') != 1):
3043             raise SameFileError(outtmpl)
3044
3045         for url in url_list:
3046             self.__download_wrapper(self.extract_info)(
3047                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3048
3049         return self._download_retcode
3050
3051     def download_with_info_file(self, info_filename):
3052         with contextlib.closing(fileinput.FileInput(
3053                 [info_filename], mode='r',
3054                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3055             # FileInput doesn't have a read method, we can't call json.load
3056             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3057         try:
3058             self.__download_wrapper(self.process_ie_result)(info, download=True)
3059         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3060             if not isinstance(e, EntryNotInPlaylist):
3061                 self.to_stderr('\r')
3062             webpage_url = info.get('webpage_url')
3063             if webpage_url is not None:
3064                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3065                 return self.download([webpage_url])
3066             else:
3067                 raise
3068         return self._download_retcode
3069
3070     @staticmethod
3071     def sanitize_info(info_dict, remove_private_keys=False):
3072         ''' Sanitize the infodict for converting to json '''
3073         if info_dict is None:
3074             return info_dict
3075         info_dict.setdefault('epoch', int(time.time()))
3076         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3077         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3078         if remove_private_keys:
3079             remove_keys |= {
3080                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3081                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3082             }
3083             empty_values = (None, {}, [], set(), tuple())
3084             reject = lambda k, v: k not in keep_keys and (
3085                 k.startswith('_') or k in remove_keys or v in empty_values)
3086         else:
3087             reject = lambda k, v: k in remove_keys
3088         filter_fn = lambda obj: (
3089             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3090             else obj if not isinstance(obj, dict)
3091             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3092         return filter_fn(info_dict)
3093
3094     @staticmethod
3095     def filter_requested_info(info_dict, actually_filter=True):
3096         ''' Alias of sanitize_info for backward compatibility '''
3097         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3098
3099     def run_pp(self, pp, infodict):
3100         files_to_delete = []
3101         if '__files_to_move' not in infodict:
3102             infodict['__files_to_move'] = {}
3103         try:
3104             files_to_delete, infodict = pp.run(infodict)
3105         except PostProcessingError as e:
3106             # Must be True and not 'only_download'
3107             if self.params.get('ignoreerrors') is True:
3108                 self.report_error(e)
3109                 return infodict
3110             raise
3111
3112         if not files_to_delete:
3113             return infodict
3114         if self.params.get('keepvideo', False):
3115             for f in files_to_delete:
3116                 infodict['__files_to_move'].setdefault(f, '')
3117         else:
3118             for old_filename in set(files_to_delete):
3119                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3120                 try:
3121                     os.remove(encodeFilename(old_filename))
3122                 except (IOError, OSError):
3123                     self.report_warning('Unable to remove downloaded original file')
3124                 if old_filename in infodict['__files_to_move']:
3125                     del infodict['__files_to_move'][old_filename]
3126         return infodict
3127
3128     @staticmethod
3129     def post_extract(info_dict):
3130         def actual_post_extract(info_dict):
3131             if info_dict.get('_type') in ('playlist', 'multi_video'):
3132                 for video_dict in info_dict.get('entries', {}):
3133                     actual_post_extract(video_dict or {})
3134                 return
3135
3136             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3137             extra = post_extractor().items()
3138             info_dict.update(extra)
3139             info_dict.pop('__post_extractor', None)
3140
3141             original_infodict = info_dict.get('__original_infodict') or {}
3142             original_infodict.update(extra)
3143             original_infodict.pop('__post_extractor', None)
3144
3145         actual_post_extract(info_dict or {})
3146
3147     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3148         info = dict(ie_info)
3149         info['__files_to_move'] = files_to_move or {}
3150         for pp in self._pps[key]:
3151             info = self.run_pp(pp, info)
3152         return info, info.pop('__files_to_move', None)
3153
3154     def post_process(self, filename, ie_info, files_to_move=None):
3155         """Run all the postprocessors on the given file."""
3156         info = dict(ie_info)
3157         info['filepath'] = filename
3158         info['__files_to_move'] = files_to_move or {}
3159
3160         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3161             info = self.run_pp(pp, info)
3162         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3163         del info['__files_to_move']
3164         for pp in self._pps['after_move']:
3165             info = self.run_pp(pp, info)
3166         return info
3167
3168     def _make_archive_id(self, info_dict):
3169         video_id = info_dict.get('id')
3170         if not video_id:
3171             return
3172         # Future-proof against any change in case
3173         # and backwards compatibility with prior versions
3174         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3175         if extractor is None:
3176             url = str_or_none(info_dict.get('url'))
3177             if not url:
3178                 return
3179             # Try to find matching extractor for the URL and take its ie_key
3180             for ie_key, ie in self._ies.items():
3181                 if ie.suitable(url):
3182                     extractor = ie_key
3183                     break
3184             else:
3185                 return
3186         return '%s %s' % (extractor.lower(), video_id)
3187
3188     def in_download_archive(self, info_dict):
3189         fn = self.params.get('download_archive')
3190         if fn is None:
3191             return False
3192
3193         vid_id = self._make_archive_id(info_dict)
3194         if not vid_id:
3195             return False  # Incomplete video information
3196
3197         return vid_id in self.archive
3198
3199     def record_download_archive(self, info_dict):
3200         fn = self.params.get('download_archive')
3201         if fn is None:
3202             return
3203         vid_id = self._make_archive_id(info_dict)
3204         assert vid_id
3205         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3206             archive_file.write(vid_id + '\n')
3207         self.archive.add(vid_id)
3208
3209     @staticmethod
3210     def format_resolution(format, default='unknown'):
3211         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3212             return 'audio only'
3213         if format.get('resolution') is not None:
3214             return format['resolution']
3215         if format.get('width') and format.get('height'):
3216             return '%dx%d' % (format['width'], format['height'])
3217         elif format.get('height'):
3218             return '%sp' % format['height']
3219         elif format.get('width'):
3220             return '%dx?' % format['width']
3221         return default
3222
3223     def _format_note(self, fdict):
3224         res = ''
3225         if fdict.get('ext') in ['f4f', 'f4m']:
3226             res += '(unsupported)'
3227         if fdict.get('language'):
3228             if res:
3229                 res += ' '
3230             res += '[%s]' % fdict['language']
3231         if fdict.get('format_note') is not None:
3232             if res:
3233                 res += ' '
3234             res += fdict['format_note']
3235         if fdict.get('tbr') is not None:
3236             if res:
3237                 res += ', '
3238             res += '%4dk' % fdict['tbr']
3239         if fdict.get('container') is not None:
3240             if res:
3241                 res += ', '
3242             res += '%s container' % fdict['container']
3243         if (fdict.get('vcodec') is not None
3244                 and fdict.get('vcodec') != 'none'):
3245             if res:
3246                 res += ', '
3247             res += fdict['vcodec']
3248             if fdict.get('vbr') is not None:
3249                 res += '@'
3250         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3251             res += 'video@'
3252         if fdict.get('vbr') is not None:
3253             res += '%4dk' % fdict['vbr']
3254         if fdict.get('fps') is not None:
3255             if res:
3256                 res += ', '
3257             res += '%sfps' % fdict['fps']
3258         if fdict.get('acodec') is not None:
3259             if res:
3260                 res += ', '
3261             if fdict['acodec'] == 'none':
3262                 res += 'video only'
3263             else:
3264                 res += '%-5s' % fdict['acodec']
3265         elif fdict.get('abr') is not None:
3266             if res:
3267                 res += ', '
3268             res += 'audio'
3269         if fdict.get('abr') is not None:
3270             res += '@%3dk' % fdict['abr']
3271         if fdict.get('asr') is not None:
3272             res += ' (%5dHz)' % fdict['asr']
3273         if fdict.get('filesize') is not None:
3274             if res:
3275                 res += ', '
3276             res += format_bytes(fdict['filesize'])
3277         elif fdict.get('filesize_approx') is not None:
3278             if res:
3279                 res += ', '
3280             res += '~' + format_bytes(fdict['filesize_approx'])
3281         return res
3282
3283     def _list_format_headers(self, *headers):
3284         if self.params.get('listformats_table', True) is not False:
3285             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3286         return headers
3287
3288     def list_formats(self, info_dict):
3289         formats = info_dict.get('formats', [info_dict])
3290         new_format = self.params.get('listformats_table', True) is not False
3291         if new_format:
3292             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3293             table = [
3294                 [
3295                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3296                     format_field(f, 'ext'),
3297                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3298                     format_field(f, 'fps', '\t%d'),
3299                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3300                     delim,
3301                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3302                     format_field(f, 'tbr', '\t%dk'),
3303                     shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
3304                     delim,
3305                     format_field(f, 'vcodec', default='unknown').replace(
3306                         'none',
3307                         'images' if f.get('acodec') == 'none'
3308                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3309                     format_field(f, 'vbr', '\t%dk'),
3310                     format_field(f, 'acodec', default='unknown').replace(
3311                         'none',
3312                         '' if f.get('vcodec') == 'none'
3313                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3314                     format_field(f, 'abr', '\t%dk'),
3315                     format_field(f, 'asr', '\t%dHz'),
3316                     join_nonempty(
3317                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3318                         format_field(f, 'language', '[%s]'),
3319                         join_nonempty(
3320                             format_field(f, 'format_note'),
3321                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3322                             delim=', '),
3323                         delim=' '),
3324                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3325             header_line = self._list_format_headers(
3326                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3327                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3328         else:
3329             table = [
3330                 [
3331                     format_field(f, 'format_id'),
3332                     format_field(f, 'ext'),
3333                     self.format_resolution(f),
3334                     self._format_note(f)]
3335                 for f in formats
3336                 if f.get('preference') is None or f['preference'] >= -1000]
3337             header_line = ['format code', 'extension', 'resolution', 'note']
3338
3339         self.to_screen(
3340             '[info] Available formats for %s:' % info_dict['id'])
3341         self.to_stdout(render_table(
3342             header_line, table,
3343             extra_gap=(0 if new_format else 1),
3344             hide_empty=new_format,
3345             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3346
3347     def list_thumbnails(self, info_dict):
3348         thumbnails = list(info_dict.get('thumbnails'))
3349         if not thumbnails:
3350             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3351             return
3352
3353         self.to_screen(
3354             '[info] Thumbnails for %s:' % info_dict['id'])
3355         self.to_stdout(render_table(
3356             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3357             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3358
3359     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3360         if not subtitles:
3361             self.to_screen('%s has no %s' % (video_id, name))
3362             return
3363         self.to_screen(
3364             'Available %s for %s:' % (name, video_id))
3365
3366         def _row(lang, formats):
3367             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3368             if len(set(names)) == 1:
3369                 names = [] if names[0] == 'unknown' else names[:1]
3370             return [lang, ', '.join(names), ', '.join(exts)]
3371
3372         self.to_stdout(render_table(
3373             self._list_format_headers('Language', 'Name', 'Formats'),
3374             [_row(lang, formats) for lang, formats in subtitles.items()],
3375             hide_empty=True))
3376
3377     def urlopen(self, req):
3378         """ Start an HTTP download """
3379         if isinstance(req, compat_basestring):
3380             req = sanitized_Request(req)
3381         return self._opener.open(req, timeout=self._socket_timeout)
3382
3383     def print_debug_header(self):
3384         if not self.params.get('verbose'):
3385             return
3386
3387         def get_encoding(stream):
3388             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3389             if not supports_terminal_sequences(stream):
3390                 ret += ' (No ANSI)'
3391             return ret
3392
3393         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3394             locale.getpreferredencoding(),
3395             sys.getfilesystemencoding(),
3396             get_encoding(self._screen_file), get_encoding(self._err_file),
3397             self.get_encoding())
3398
3399         logger = self.params.get('logger')
3400         if logger:
3401             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3402             write_debug(encoding_str)
3403         else:
3404             write_string(f'[debug] {encoding_str}\n', encoding=None)
3405             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3406
3407         source = detect_variant()
3408         write_debug(join_nonempty(
3409             'yt-dlp version', __version__,
3410             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3411             '' if source == 'unknown' else f'({source})',
3412             delim=' '))
3413         if not _LAZY_LOADER:
3414             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3415                 write_debug('Lazy loading extractors is forcibly disabled')
3416             else:
3417                 write_debug('Lazy loading extractors is disabled')
3418         if plugin_extractors or plugin_postprocessors:
3419             write_debug('Plugins: %s' % [
3420                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3421                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3422         if self.params.get('compat_opts'):
3423             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3424
3425         if source == 'source':
3426             try:
3427                 sp = Popen(
3428                     ['git', 'rev-parse', '--short', 'HEAD'],
3429                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3430                     cwd=os.path.dirname(os.path.abspath(__file__)))
3431                 out, err = sp.communicate_or_kill()
3432                 out = out.decode().strip()
3433                 if re.match('[0-9a-f]+', out):
3434                     write_debug('Git HEAD: %s' % out)
3435             except Exception:
3436                 try:
3437                     sys.exc_clear()
3438                 except Exception:
3439                     pass
3440
3441         def python_implementation():
3442             impl_name = platform.python_implementation()
3443             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3444                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3445             return impl_name
3446
3447         write_debug('Python version %s (%s %s) - %s' % (
3448             platform.python_version(),
3449             python_implementation(),
3450             platform.architecture()[0],
3451             platform_name()))
3452
3453         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3454         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3455         if ffmpeg_features:
3456             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3457
3458         exe_versions['rtmpdump'] = rtmpdump_version()
3459         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3460         exe_str = ', '.join(
3461             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3462         ) or 'none'
3463         write_debug('exe versions: %s' % exe_str)
3464
3465         from .downloader.websocket import has_websockets
3466         from .postprocessor.embedthumbnail import has_mutagen
3467         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3468
3469         lib_str = join_nonempty(
3470             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3471             KEYRING_AVAILABLE and 'keyring',
3472             has_mutagen and 'mutagen',
3473             SQLITE_AVAILABLE and 'sqlite',
3474             has_websockets and 'websockets',
3475             delim=', ') or 'none'
3476         write_debug('Optional libraries: %s' % lib_str)
3477
3478         proxy_map = {}
3479         for handler in self._opener.handlers:
3480             if hasattr(handler, 'proxies'):
3481                 proxy_map.update(handler.proxies)
3482         write_debug(f'Proxy map: {proxy_map}')
3483
3484         # Not implemented
3485         if False and self.params.get('call_home'):
3486             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3487             write_debug('Public IP address: %s' % ipaddr)
3488             latest_version = self.urlopen(
3489                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3490             if version_tuple(latest_version) > version_tuple(__version__):
3491                 self.report_warning(
3492                     'You are using an outdated version (newest version: %s)! '
3493                     'See https://yt-dl.org/update if you need help updating.' %
3494                     latest_version)
3495
3496     def _setup_opener(self):
3497         timeout_val = self.params.get('socket_timeout')
3498         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3499
3500         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3501         opts_cookiefile = self.params.get('cookiefile')
3502         opts_proxy = self.params.get('proxy')
3503
3504         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3505
3506         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3507         if opts_proxy is not None:
3508             if opts_proxy == '':
3509                 proxies = {}
3510             else:
3511                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3512         else:
3513             proxies = compat_urllib_request.getproxies()
3514             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3515             if 'http' in proxies and 'https' not in proxies:
3516                 proxies['https'] = proxies['http']
3517         proxy_handler = PerRequestProxyHandler(proxies)
3518
3519         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3520         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3521         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3522         redirect_handler = YoutubeDLRedirectHandler()
3523         data_handler = compat_urllib_request_DataHandler()
3524
3525         # When passing our own FileHandler instance, build_opener won't add the
3526         # default FileHandler and allows us to disable the file protocol, which
3527         # can be used for malicious purposes (see
3528         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3529         file_handler = compat_urllib_request.FileHandler()
3530
3531         def file_open(*args, **kwargs):
3532             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3533         file_handler.file_open = file_open
3534
3535         opener = compat_urllib_request.build_opener(
3536             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3537
3538         # Delete the default user-agent header, which would otherwise apply in
3539         # cases where our custom HTTP handler doesn't come into play
3540         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3541         opener.addheaders = []
3542         self._opener = opener
3543
3544     def encode(self, s):
3545         if isinstance(s, bytes):
3546             return s  # Already encoded
3547
3548         try:
3549             return s.encode(self.get_encoding())
3550         except UnicodeEncodeError as err:
3551             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3552             raise
3553
3554     def get_encoding(self):
3555         encoding = self.params.get('encoding')
3556         if encoding is None:
3557             encoding = preferredencoding()
3558         return encoding
3559
3560     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3561         ''' Write infojson and returns True = written, False = skip, None = error '''
3562         if overwrite is None:
3563             overwrite = self.params.get('overwrites', True)
3564         if not self.params.get('writeinfojson'):
3565             return False
3566         elif not infofn:
3567             self.write_debug(f'Skipping writing {label} infojson')
3568             return False
3569         elif not self._ensure_dir_exists(infofn):
3570             return None
3571         elif not overwrite and os.path.exists(infofn):
3572             self.to_screen(f'[info] {label.title()} metadata is already present')
3573         else:
3574             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3575             try:
3576                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3577             except (OSError, IOError):
3578                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3579                 return None
3580         return True
3581
3582     def _write_description(self, label, ie_result, descfn):
3583         ''' Write description and returns True = written, False = skip, None = error '''
3584         if not self.params.get('writedescription'):
3585             return False
3586         elif not descfn:
3587             self.write_debug(f'Skipping writing {label} description')
3588             return False
3589         elif not self._ensure_dir_exists(descfn):
3590             return None
3591         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3592             self.to_screen(f'[info] {label.title()} description is already present')
3593         elif ie_result.get('description') is None:
3594             self.report_warning(f'There\'s no {label} description to write')
3595             return False
3596         else:
3597             try:
3598                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3599                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3600                     descfile.write(ie_result['description'])
3601             except (OSError, IOError):
3602                 self.report_error(f'Cannot write {label} description file {descfn}')
3603                 return None
3604         return True
3605
3606     def _write_subtitles(self, info_dict, filename):
3607         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3608         ret = []
3609         subtitles = info_dict.get('requested_subtitles')
3610         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3611             # subtitles download errors are already managed as troubles in relevant IE
3612             # that way it will silently go on when used with unsupporting IE
3613             return ret
3614
3615         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3616         if not sub_filename_base:
3617             self.to_screen('[info] Skipping writing video subtitles')
3618             return ret
3619         for sub_lang, sub_info in subtitles.items():
3620             sub_format = sub_info['ext']
3621             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3622             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3623             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3624                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3625                 sub_info['filepath'] = sub_filename
3626                 ret.append((sub_filename, sub_filename_final))
3627                 continue
3628
3629             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3630             if sub_info.get('data') is not None:
3631                 try:
3632                     # Use newline='' to prevent conversion of newline characters
3633                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3634                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3635                         subfile.write(sub_info['data'])
3636                     sub_info['filepath'] = sub_filename
3637                     ret.append((sub_filename, sub_filename_final))
3638                     continue
3639                 except (OSError, IOError):
3640                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3641                     return None
3642
3643             try:
3644                 sub_copy = sub_info.copy()
3645                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3646                 self.dl(sub_filename, sub_copy, subtitle=True)
3647                 sub_info['filepath'] = sub_filename
3648                 ret.append((sub_filename, sub_filename_final))
3649             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3650                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3651                 continue
3652         return ret
3653
3654     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3655         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3656         write_all = self.params.get('write_all_thumbnails', False)
3657         thumbnails, ret = [], []
3658         if write_all or self.params.get('writethumbnail', False):
3659             thumbnails = info_dict.get('thumbnails') or []
3660         multiple = write_all and len(thumbnails) > 1
3661
3662         if thumb_filename_base is None:
3663             thumb_filename_base = filename
3664         if thumbnails and not thumb_filename_base:
3665             self.write_debug(f'Skipping writing {label} thumbnail')
3666             return ret
3667
3668         for t in thumbnails[::-1]:
3669             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3670             thumb_display_id = f'{label} thumbnail {t["id"]}'
3671             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3672             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3673
3674             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3675                 ret.append((thumb_filename, thumb_filename_final))
3676                 t['filepath'] = thumb_filename
3677                 self.to_screen('[info] %s is already present' % (
3678                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3679             else:
3680                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3681                 try:
3682                     uf = self.urlopen(t['url'])
3683                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3684                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3685                         shutil.copyfileobj(uf, thumbf)
3686                     ret.append((thumb_filename, thumb_filename_final))
3687                     t['filepath'] = thumb_filename
3688                 except network_exceptions as err:
3689                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3690             if ret and not write_all:
3691                 break
3692         return ret