yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     formatSeconds,
  71     GeoRestrictedError,
  72     get_domain,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     join_nonempty,
  78     LazyList,
  79     LINK_TEMPLATES,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     ReExtractInfo,
  97     register_socks_protocols,
  98     RejectedVideoReached,
  99     remove_terminal_sequences,
 100     render_table,
 101     replace_extension,
 102     SameFileError,
 103     sanitize_filename,
 104     sanitize_path,
 105     sanitize_url,
 106     sanitized_Request,
 107     std_headers,
 108     STR_FORMAT_RE_TMPL,
 109     STR_FORMAT_TYPES,
 110     str_or_none,
 111     strftime_or_none,
 112     subtitles_filename,
 113     supports_terminal_sequences,
 114     timetuple_from_msec,
 115     to_high_limit_path,
 116     traverse_obj,
 117     try_get,
 118     UnavailableVideoError,
 119     url_basename,
 120     variadic,
 121     version_tuple,
 122     write_json_file,
 123     write_string,
 124     YoutubeDLCookieProcessor,
 125     YoutubeDLHandler,
 126     YoutubeDLRedirectHandler,
 127 )
 128 from .cache import Cache
 129 from .minicurses import format_text
 130 from .extractor import (
 131     gen_extractor_classes,
 132     get_info_extractor,
 133     _LAZY_LOADER,
 134     _PLUGIN_CLASSES as plugin_extractors
 135 )
 136 from .extractor.openload import PhantomJSwrapper
 137 from .downloader import (
 138     FFmpegFD,
 139     get_suitable_downloader,
 140     shorten_protocol_name
 141 )
 142 from .downloader.rtmp import rtmpdump_version
 143 from .postprocessor import (
 144     get_postprocessor,
 145     EmbedThumbnailPP,
 146     FFmpegFixupDuplicateMoovPP,
 147     FFmpegFixupDurationPP,
 148     FFmpegFixupM3u8PP,
 149     FFmpegFixupM4aPP,
 150     FFmpegFixupStretchedPP,
 151     FFmpegFixupTimestampPP,
 152     FFmpegMergerPP,
 153     FFmpegPostProcessor,
 154     MoveFilesAfterDownloadPP,
 155     _PLUGIN_CLASSES as plugin_postprocessors
 156 )
 157 from .update import detect_variant
 158 from .version import __version__, RELEASE_GIT_HEAD
 159
 160 if compat_os_name == 'nt':
 161     import ctypes
 162
 163
 164 class YoutubeDL(object):
 165     """YoutubeDL class.
 166
 167     YoutubeDL objects are the ones responsible of downloading the
 168     actual video file and writing it to disk if the user has requested
 169     it, among some other tasks. In most cases there should be one per
 170     program. As, given a video URL, the downloader doesn't know how to
 171     extract all the needed information, task that InfoExtractors do, it
 172     has to pass the URL to one of them.
 173
 174     For this, YoutubeDL objects have a method that allows
 175     InfoExtractors to be registered in a given order. When it is passed
 176     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 177     finds that reports being able to handle it. The InfoExtractor extracts
 178     all the information about the video or videos the URL refers to, and
 179     YoutubeDL process the extracted information, possibly using a File
 180     Downloader to download the video.
 181
 182     YoutubeDL objects accept a lot of parameters. In order not to saturate
 183     the object constructor with arguments, it receives a dictionary of
 184     options instead. These options are available through the params
 185     attribute for the InfoExtractors to use. The YoutubeDL also
 186     registers itself as the downloader in charge for the InfoExtractors
 187     that are added to it, so this is a "mutual registration".
 188
 189     Available options:
 190
 191     username:          Username for authentication purposes.
 192     password:          Password for authentication purposes.
 193     videopassword:     Password for accessing a video.
 194     ap_mso:            Adobe Pass multiple-system operator identifier.
 195     ap_username:       Multiple-system operator account username.
 196     ap_password:       Multiple-system operator account password.
 197     usenetrc:          Use netrc for authentication instead.
 198     verbose:           Print additional info to stdout.
 199     quiet:             Do not print messages to stdout.
 200     no_warnings:       Do not print out anything for warnings.
 201     forceprint:        A list of templates to force print
 202     forceurl:          Force printing final URL. (Deprecated)
 203     forcetitle:        Force printing title. (Deprecated)
 204     forceid:           Force printing ID. (Deprecated)
 205     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 206     forcedescription:  Force printing description. (Deprecated)
 207     forcefilename:     Force printing final filename. (Deprecated)
 208     forceduration:     Force printing duration. (Deprecated)
 209     forcejson:         Force printing info_dict as JSON.
 210     dump_single_json:  Force printing the info_dict of the whole playlist
 211                        (or video) as a single JSON line.
 212     force_write_download_archive: Force writing download archive regardless
 213                        of 'skip_download' or 'simulate'.
 214     simulate:          Do not download the video files. If unset (or None),
 215                        simulate only if listsubtitles, listformats or list_thumbnails is used
 216     format:            Video format code. see "FORMAT SELECTION" for more details.
 217                        You can also pass a function. The function takes 'ctx' as
 218                        argument and returns the formats to download.
 219                        See "build_format_selector" for an implementation
 220     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 221     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 222                        extracting metadata even if the video is not actually
 223                        available for download (experimental)
 224     format_sort:       A list of fields by which to sort the video formats.
 225                        See "Sorting Formats" for more details.
 226     format_sort_force: Force the given format_sort. see "Sorting Formats"
 227                        for more details.
 228     allow_multiple_video_streams:   Allow multiple video streams to be merged
 229                        into a single file
 230     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 231                        into a single file
 232     check_formats      Whether to test if the formats are downloadable.
 233                        Can be True (check all), False (check none),
 234                        'selected' (check selected formats),
 235                        or None (check only if requested by extractor)
 236     paths:             Dictionary of output paths. The allowed keys are 'home'
 237                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 238     outtmpl:           Dictionary of templates for output names. Allowed keys
 239                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 240                        For compatibility with youtube-dl, a single string can also be used
 241     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 242     restrictfilenames: Do not allow "&" and spaces in file names
 243     trim_file_name:    Limit length of filename (extension excluded)
 244     windowsfilenames:  Force the filenames to be windows compatible
 245     ignoreerrors:      Do not stop on download/postprocessing errors.
 246                        Can be 'only_download' to ignore only download errors.
 247                        Default is 'only_download' for CLI, but False for API
 248     skip_playlist_after_errors: Number of allowed failures until the rest of
 249                        the playlist is skipped
 250     force_generic_extractor: Force downloader to use the generic extractor
 251     overwrites:        Overwrite all video and metadata files if True,
 252                        overwrite only non-video files if None
 253                        and don't overwrite any file if False
 254                        For compatibility with youtube-dl,
 255                        "nooverwrites" may also be used instead
 256     playliststart:     Playlist item to start at.
 257     playlistend:       Playlist item to end at.
 258     playlist_items:    Specific indices of playlist to download.
 259     playlistreverse:   Download playlist items in reverse order.
 260     playlistrandom:    Download playlist items in random order.
 261     matchtitle:        Download only matching titles.
 262     rejecttitle:       Reject downloads for matching titles.
 263     logger:            Log messages to a logging.Logger instance.
 264     logtostderr:       Log messages to stderr instead of stdout.
 265     consoletitle:       Display progress in console window's titlebar.
 266     writedescription:  Write the video description to a .description file
 267     writeinfojson:     Write the video description to a .info.json file
 268     clean_infojson:    Remove private fields from the infojson
 269     getcomments:       Extract video comments. This will not be written to disk
 270                        unless writeinfojson is also given
 271     writeannotations:  Write the video annotations to a .annotations.xml file
 272     writethumbnail:    Write the thumbnail image to a file
 273     allow_playlist_files: Whether to write playlists' description, infojson etc
 274                        also to disk when using the 'write*' options
 275     write_all_thumbnails:  Write all thumbnail formats to files
 276     writelink:         Write an internet shortcut file, depending on the
 277                        current platform (.url/.webloc/.desktop)
 278     writeurllink:      Write a Windows internet shortcut file (.url)
 279     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 280     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 281     writesubtitles:    Write the video subtitles to a file
 282     writeautomaticsub: Write the automatically generated subtitles to a file
 283     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 284                        Downloads all the subtitles of the video
 285                        (requires writesubtitles or writeautomaticsub)
 286     listsubtitles:     Lists all available subtitles for the video
 287     subtitlesformat:   The format code for subtitles
 288     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 289                        The list may contain "all" to refer to all the available
 290                        subtitles. The language can be prefixed with a "-" to
 291                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 292     keepvideo:         Keep the video file after post-processing
 293     daterange:         A DateRange object, download only if the upload_date is in the range.
 294     skip_download:     Skip the actual download of the video file
 295     cachedir:          Location of the cache files in the filesystem.
 296                        False to disable filesystem cache.
 297     noplaylist:        Download single video instead of a playlist if in doubt.
 298     age_limit:         An integer representing the user's age in years.
 299                        Unsuitable videos for the given age are skipped.
 300     min_views:         An integer representing the minimum view count the video
 301                        must have in order to not be skipped.
 302                        Videos without view count information are always
 303                        downloaded. None for no limit.
 304     max_views:         An integer representing the maximum view count.
 305                        Videos that are more popular than that are not
 306                        downloaded.
 307                        Videos without view count information are always
 308                        downloaded. None for no limit.
 309     download_archive:  File name of a file where all downloads are recorded.
 310                        Videos already present in the file are not downloaded
 311                        again.
 312     break_on_existing: Stop the download process after attempting to download a
 313                        file that is in the archive.
 314     break_on_reject:   Stop the download process when encountering a video that
 315                        has been filtered out.
 316     break_per_url:     Whether break_on_reject and break_on_existing
 317                        should act on each input URL as opposed to for the entire queue
 318     cookiefile:        File name where cookies should be read from and dumped to
 319     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 320                        name/path from where cookies are loaded.
 321                        Eg: ('chrome', ) or ('vivaldi', 'default')
 322     nocheckcertificate:Do not verify SSL certificates
 323     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 324                        At the moment, this is only supported by YouTube.
 325     proxy:             URL of the proxy server to use
 326     geo_verification_proxy:  URL of the proxy to use for IP address verification
 327                        on geo-restricted sites.
 328     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 329     bidi_workaround:   Work around buggy terminals without bidirectional text
 330                        support, using fridibi
 331     debug_printtraffic:Print out sent and received HTTP traffic
 332     include_ads:       Download ads as well (deprecated)
 333     default_search:    Prepend this string if an input url is not valid.
 334                        'auto' for elaborate guessing
 335     encoding:          Use this encoding instead of the system-specified.
 336     extract_flat:      Do not resolve URLs, return the immediate result.
 337                        Pass in 'in_playlist' to only show this behavior for
 338                        playlist items.
 339     wait_for_video:    If given, wait for scheduled streams to become available.
 340                        The value should be a tuple containing the range
 341                        (min_secs, max_secs) to wait between retries
 342     postprocessors:    A list of dictionaries, each with an entry
 343                        * key:  The name of the postprocessor. See
 344                                yt_dlp/postprocessor/__init__.py for a list.
 345                        * when: When to run the postprocessor. Can be one of
 346                                pre_process|before_dl|post_process|after_move.
 347                                Assumed to be 'post_process' if not given
 348     post_hooks:        Deprecated - Register a custom postprocessor instead
 349                        A list of functions that get called as the final step
 350                        for each video file, after all postprocessors have been
 351                        called. The filename will be passed as the only argument.
 352     progress_hooks:    A list of functions that get called on download
 353                        progress, with a dictionary with the entries
 354                        * status: One of "downloading", "error", or "finished".
 355                                  Check this first and ignore unknown values.
 356                        * info_dict: The extracted info_dict
 357
 358                        If status is one of "downloading", or "finished", the
 359                        following properties may also be present:
 360                        * filename: The final filename (always present)
 361                        * tmpfilename: The filename we're currently writing to
 362                        * downloaded_bytes: Bytes on disk
 363                        * total_bytes: Size of the whole file, None if unknown
 364                        * total_bytes_estimate: Guess of the eventual file size,
 365                                                None if unavailable.
 366                        * elapsed: The number of seconds since download started.
 367                        * eta: The estimated time in seconds, None if unknown
 368                        * speed: The download speed in bytes/second, None if
 369                                 unknown
 370                        * fragment_index: The counter of the currently
 371                                          downloaded video fragment.
 372                        * fragment_count: The number of fragments (= individual
 373                                          files that will be merged)
 374
 375                        Progress hooks are guaranteed to be called at least once
 376                        (with status "finished") if the download is successful.
 377     postprocessor_hooks:  A list of functions that get called on postprocessing
 378                        progress, with a dictionary with the entries
 379                        * status: One of "started", "processing", or "finished".
 380                                  Check this first and ignore unknown values.
 381                        * postprocessor: Name of the postprocessor
 382                        * info_dict: The extracted info_dict
 383
 384                        Progress hooks are guaranteed to be called at least twice
 385                        (with status "started" and "finished") if the processing is successful.
 386     merge_output_format: Extension to use when merging formats.
 387     final_ext:         Expected final extension; used to detect when the file was
 388                        already downloaded and converted
 389     fixup:             Automatically correct known faults of the file.
 390                        One of:
 391                        - "never": do nothing
 392                        - "warn": only emit a warning
 393                        - "detect_or_warn": check whether we can do anything
 394                                            about it, warn otherwise (default)
 395     source_address:    Client-side IP address to bind to.
 396     call_home:         Boolean, true iff we are allowed to contact the
 397                        yt-dlp servers for debugging. (BROKEN)
 398     sleep_interval_requests: Number of seconds to sleep between requests
 399                        during extraction
 400     sleep_interval:    Number of seconds to sleep before each download when
 401                        used alone or a lower bound of a range for randomized
 402                        sleep before each download (minimum possible number
 403                        of seconds to sleep) when used along with
 404                        max_sleep_interval.
 405     max_sleep_interval:Upper bound of a range for randomized sleep before each
 406                        download (maximum possible number of seconds to sleep).
 407                        Must only be used along with sleep_interval.
 408                        Actual sleep time will be a random float from range
 409                        [sleep_interval; max_sleep_interval].
 410     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 411     listformats:       Print an overview of available video formats and exit.
 412     list_thumbnails:   Print a table of all thumbnails and exit.
 413     match_filter:      A function that gets called with the info_dict of
 414                        every video.
 415                        If it returns a message, the video is ignored.
 416                        If it returns None, the video is downloaded.
 417                        match_filter_func in utils.py is one example for this.
 418     no_color:          Do not emit color codes in output.
 419     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 420                        HTTP header
 421     geo_bypass_country:
 422                        Two-letter ISO 3166-2 country code that will be used for
 423                        explicit geographic restriction bypassing via faking
 424                        X-Forwarded-For HTTP header
 425     geo_bypass_ip_block:
 426                        IP range in CIDR notation that will be used similarly to
 427                        geo_bypass_country
 428
 429     The following options determine which downloader is picked:
 430     external_downloader: A dictionary of protocol keys and the executable of the
 431                        external downloader to use for it. The allowed protocols
 432                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 433                        Set the value to 'native' to use the native downloader
 434     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 435                        or {'m3u8': 'ffmpeg'} instead.
 436                        Use the native HLS downloader instead of ffmpeg/avconv
 437                        if True, otherwise use ffmpeg/avconv if False, otherwise
 438                        use downloader suggested by extractor if None.
 439     compat_opts:       Compatibility options. See "Differences in default behavior".
 440                        The following options do not work when used through the API:
 441                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 442                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 443                        Refer __init__.py for their implementation
 444     progress_template: Dictionary of templates for progress outputs.
 445                        Allowed keys are 'download', 'postprocess',
 446                        'download-title' (console title) and 'postprocess-title'.
 447                        The template is mapped on a dictionary with keys 'progress' and 'info'
 448
 449     The following parameters are not used by YoutubeDL itself, they are used by
 450     the downloader (see yt_dlp/downloader/common.py):
 451     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 452     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 453     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 454     external_downloader_args, concurrent_fragment_downloads.
 455
 456     The following options are used by the post processors:
 457     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 458                        otherwise prefer ffmpeg. (avconv support is deprecated)
 459     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 460                        to the binary or its containing directory.
 461     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 462                        and a list of additional command-line arguments for the
 463                        postprocessor/executable. The dict can also have "PP+EXE" keys
 464                        which are used when the given exe is used by the given PP.
 465                        Use 'default' as the name for arguments to passed to all PP
 466                        For compatibility with youtube-dl, a single list of args
 467                        can also be used
 468
 469     The following options are used by the extractors:
 470     extractor_retries: Number of times to retry for known errors
 471     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 472     hls_split_discontinuity: Split HLS playlists to different formats at
 473                        discontinuities such as ad breaks (default: False)
 474     extractor_args:    A dictionary of arguments to be passed to the extractors.
 475                        See "EXTRACTOR ARGUMENTS" for details.
 476                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 477     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 478                        If True (default), DASH manifests and related
 479                        data will be downloaded and processed by extractor.
 480                        You can reduce network I/O by disabling it if you don't
 481                        care about DASH. (only for youtube)
 482     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 483                        If True (default), HLS manifests and related
 484                        data will be downloaded and processed by extractor.
 485                        You can reduce network I/O by disabling it if you don't
 486                        care about HLS. (only for youtube)
 487     """
 488
 489     _NUMERIC_FIELDS = set((
 490         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 491         'timestamp', 'release_timestamp',
 492         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 493         'average_rating', 'comment_count', 'age_limit',
 494         'start_time', 'end_time',
 495         'chapter_number', 'season_number', 'episode_number',
 496         'track_number', 'disc_number', 'release_year',
 497     ))
 498
 499     _format_selection_exts = {
 500         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 501         'video': {'mp4', 'flv', 'webm', '3gp'},
 502         'storyboards': {'mhtml'},
 503     }
 504
 505     params = None
 506     _ies = {}
 507     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 508     _printed_messages = set()
 509     _first_webpage_request = True
 510     _download_retcode = None
 511     _num_downloads = None
 512     _playlist_level = 0
 513     _playlist_urls = set()
 514     _screen_file = None
 515
 516     def __init__(self, params=None, auto_init=True):
 517         """Create a FileDownloader object with the given options.
 518         @param auto_init    Whether to load the default extractors and print header (if verbose).
 519                             Set to 'no_verbose_header' to not print the header
 520         """
 521         if params is None:
 522             params = {}
 523         self._ies = {}
 524         self._ies_instances = {}
 525         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 526         self._printed_messages = set()
 527         self._first_webpage_request = True
 528         self._post_hooks = []
 529         self._progress_hooks = []
 530         self._postprocessor_hooks = []
 531         self._download_retcode = 0
 532         self._num_downloads = 0
 533         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 534         self._err_file = sys.stderr
 535         self.params = params
 536         self.cache = Cache(self)
 537
 538         windows_enable_vt_mode()
 539         self._allow_colors = {
 540             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 541             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 542         }
 543
 544         if sys.version_info < (3, 6):
 545             self.report_warning(
 546                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 547
 548         if self.params.get('allow_unplayable_formats'):
 549             self.report_warning(
 550                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 551                 'This is a developer option intended for debugging. \n'
 552                 '         If you experience any issues while using this option, '
 553                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 554
 555         def check_deprecated(param, option, suggestion):
 556             if self.params.get(param) is not None:
 557                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 558                 return True
 559             return False
 560
 561         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 562             if self.params.get('geo_verification_proxy') is None:
 563                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 564
 565         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 566         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 567         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 568
 569         for msg in self.params.get('_warnings', []):
 570             self.report_warning(msg)
 571         for msg in self.params.get('_deprecation_warnings', []):
 572             self.deprecation_warning(msg)
 573
 574         if 'list-formats' in self.params.get('compat_opts', []):
 575             self.params['listformats_table'] = False
 576
 577         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 578             # nooverwrites was unnecessarily changed to overwrites
 579             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 580             # This ensures compatibility with both keys
 581             self.params['overwrites'] = not self.params['nooverwrites']
 582         elif self.params.get('overwrites') is None:
 583             self.params.pop('overwrites', None)
 584         else:
 585             self.params['nooverwrites'] = not self.params['overwrites']
 586
 587         if params.get('bidi_workaround', False):
 588             try:
 589                 import pty
 590                 master, slave = pty.openpty()
 591                 width = compat_get_terminal_size().columns
 592                 if width is None:
 593                     width_args = []
 594                 else:
 595                     width_args = ['-w', str(width)]
 596                 sp_kwargs = dict(
 597                     stdin=subprocess.PIPE,
 598                     stdout=slave,
 599                     stderr=self._err_file)
 600                 try:
 601                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 602                 except OSError:
 603                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 604                 self._output_channel = os.fdopen(master, 'rb')
 605             except OSError as ose:
 606                 if ose.errno == errno.ENOENT:
 607                     self.report_warning(
 608                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 609                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 610                 else:
 611                     raise
 612
 613         if (sys.platform != 'win32'
 614                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 615                 and not params.get('restrictfilenames', False)):
 616             # Unicode filesystem API will throw errors (#1474, #13027)
 617             self.report_warning(
 618                 'Assuming --restrict-filenames since file system encoding '
 619                 'cannot encode all characters. '
 620                 'Set the LC_ALL environment variable to fix this.')
 621             self.params['restrictfilenames'] = True
 622
 623         self.outtmpl_dict = self.parse_outtmpl()
 624
 625         # Creating format selector here allows us to catch syntax errors before the extraction
 626         self.format_selector = (
 627             None if self.params.get('format') is None
 628             else self.params['format'] if callable(self.params['format'])
 629             else self.build_format_selector(self.params['format']))
 630
 631         self._setup_opener()
 632
 633         if auto_init:
 634             if auto_init != 'no_verbose_header':
 635                 self.print_debug_header()
 636             self.add_default_info_extractors()
 637
 638         hooks = {
 639             'post_hooks': self.add_post_hook,
 640             'progress_hooks': self.add_progress_hook,
 641             'postprocessor_hooks': self.add_postprocessor_hook,
 642         }
 643         for opt, fn in hooks.items():
 644             for ph in self.params.get(opt, []):
 645                 fn(ph)
 646
 647         for pp_def_raw in self.params.get('postprocessors', []):
 648             pp_def = dict(pp_def_raw)
 649             when = pp_def.pop('when', 'post_process')
 650             self.add_post_processor(
 651                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 652                 when=when)
 653
 654         register_socks_protocols()
 655
 656         def preload_download_archive(fn):
 657             """Preload the archive, if any is specified"""
 658             if fn is None:
 659                 return False
 660             self.write_debug(f'Loading archive file {fn!r}')
 661             try:
 662                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 663                     for line in archive_file:
 664                         self.archive.add(line.strip())
 665             except IOError as ioe:
 666                 if ioe.errno != errno.ENOENT:
 667                     raise
 668                 return False
 669             return True
 670
 671         self.archive = set()
 672         preload_download_archive(self.params.get('download_archive'))
 673
 674     def warn_if_short_id(self, argv):
 675         # short YouTube ID starting with dash?
 676         idxs = [
 677             i for i, a in enumerate(argv)
 678             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 679         if idxs:
 680             correct_argv = (
 681                 ['yt-dlp']
 682                 + [a for i, a in enumerate(argv) if i not in idxs]
 683                 + ['--'] + [argv[i] for i in idxs]
 684             )
 685             self.report_warning(
 686                 'Long argument string detected. '
 687                 'Use -- to separate parameters and URLs, like this:\n%s' %
 688                 args_to_str(correct_argv))
 689
 690     def add_info_extractor(self, ie):
 691         """Add an InfoExtractor object to the end of the list."""
 692         ie_key = ie.ie_key()
 693         self._ies[ie_key] = ie
 694         if not isinstance(ie, type):
 695             self._ies_instances[ie_key] = ie
 696             ie.set_downloader(self)
 697
 698     def _get_info_extractor_class(self, ie_key):
 699         ie = self._ies.get(ie_key)
 700         if ie is None:
 701             ie = get_info_extractor(ie_key)
 702             self.add_info_extractor(ie)
 703         return ie
 704
 705     def get_info_extractor(self, ie_key):
 706         """
 707         Get an instance of an IE with name ie_key, it will try to get one from
 708         the _ies list, if there's no instance it will create a new one and add
 709         it to the extractor list.
 710         """
 711         ie = self._ies_instances.get(ie_key)
 712         if ie is None:
 713             ie = get_info_extractor(ie_key)()
 714             self.add_info_extractor(ie)
 715         return ie
 716
 717     def add_default_info_extractors(self):
 718         """
 719         Add the InfoExtractors returned by gen_extractors to the end of the list
 720         """
 721         for ie in gen_extractor_classes():
 722             self.add_info_extractor(ie)
 723
 724     def add_post_processor(self, pp, when='post_process'):
 725         """Add a PostProcessor object to the end of the chain."""
 726         self._pps[when].append(pp)
 727         pp.set_downloader(self)
 728
 729     def add_post_hook(self, ph):
 730         """Add the post hook"""
 731         self._post_hooks.append(ph)
 732
 733     def add_progress_hook(self, ph):
 734         """Add the download progress hook"""
 735         self._progress_hooks.append(ph)
 736
 737     def add_postprocessor_hook(self, ph):
 738         """Add the postprocessing progress hook"""
 739         self._postprocessor_hooks.append(ph)
 740         for pps in self._pps.values():
 741             for pp in pps:
 742                 pp.add_progress_hook(ph)
 743
 744     def _bidi_workaround(self, message):
 745         if not hasattr(self, '_output_channel'):
 746             return message
 747
 748         assert hasattr(self, '_output_process')
 749         assert isinstance(message, compat_str)
 750         line_count = message.count('\n') + 1
 751         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 752         self._output_process.stdin.flush()
 753         res = ''.join(self._output_channel.readline().decode('utf-8')
 754                       for _ in range(line_count))
 755         return res[:-len('\n')]
 756
 757     def _write_string(self, message, out=None, only_once=False):
 758         if only_once:
 759             if message in self._printed_messages:
 760                 return
 761             self._printed_messages.add(message)
 762         write_string(message, out=out, encoding=self.params.get('encoding'))
 763
 764     def to_stdout(self, message, skip_eol=False, quiet=False):
 765         """Print message to stdout"""
 766         if self.params.get('logger'):
 767             self.params['logger'].debug(message)
 768         elif not quiet or self.params.get('verbose'):
 769             self._write_string(
 770                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 771                 self._err_file if quiet else self._screen_file)
 772
 773     def to_stderr(self, message, only_once=False):
 774         """Print message to stderr"""
 775         assert isinstance(message, compat_str)
 776         if self.params.get('logger'):
 777             self.params['logger'].error(message)
 778         else:
 779             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 780
 781     def to_console_title(self, message):
 782         if not self.params.get('consoletitle', False):
 783             return
 784         message = remove_terminal_sequences(message)
 785         if compat_os_name == 'nt':
 786             if ctypes.windll.kernel32.GetConsoleWindow():
 787                 # c_wchar_p() might not be necessary if `message` is
 788                 # already of type unicode()
 789                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 790         elif 'TERM' in os.environ:
 791             self._write_string('\033]0;%s\007' % message, self._screen_file)
 792
 793     def save_console_title(self):
 794         if not self.params.get('consoletitle', False):
 795             return
 796         if self.params.get('simulate'):
 797             return
 798         if compat_os_name != 'nt' and 'TERM' in os.environ:
 799             # Save the title on stack
 800             self._write_string('\033[22;0t', self._screen_file)
 801
 802     def restore_console_title(self):
 803         if not self.params.get('consoletitle', False):
 804             return
 805         if self.params.get('simulate'):
 806             return
 807         if compat_os_name != 'nt' and 'TERM' in os.environ:
 808             # Restore the title from stack
 809             self._write_string('\033[23;0t', self._screen_file)
 810
 811     def __enter__(self):
 812         self.save_console_title()
 813         return self
 814
 815     def __exit__(self, *args):
 816         self.restore_console_title()
 817
 818         if self.params.get('cookiefile') is not None:
 819             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 820
 821     def trouble(self, message=None, tb=None):
 822         """Determine action to take when a download problem appears.
 823
 824         Depending on if the downloader has been configured to ignore
 825         download errors or not, this method may throw an exception or
 826         not when errors are found, after printing the message.
 827
 828         tb, if given, is additional traceback information.
 829         """
 830         if message is not None:
 831             self.to_stderr(message)
 832         if self.params.get('verbose'):
 833             if tb is None:
 834                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 835                     tb = ''
 836                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 837                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 838                     tb += encode_compat_str(traceback.format_exc())
 839                 else:
 840                     tb_data = traceback.format_list(traceback.extract_stack())
 841                     tb = ''.join(tb_data)
 842             if tb:
 843                 self.to_stderr(tb)
 844         if not self.params.get('ignoreerrors'):
 845             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 846                 exc_info = sys.exc_info()[1].exc_info
 847             else:
 848                 exc_info = sys.exc_info()
 849             raise DownloadError(message, exc_info)
 850         self._download_retcode = 1
 851
 852     def to_screen(self, message, skip_eol=False):
 853         """Print message to stdout if not in quiet mode"""
 854         self.to_stdout(
 855             message, skip_eol, quiet=self.params.get('quiet', False))
 856
 857     class Styles(Enum):
 858         HEADERS = 'yellow'
 859         EMPHASIS = 'light blue'
 860         ID = 'green'
 861         DELIM = 'blue'
 862         ERROR = 'red'
 863         WARNING = 'yellow'
 864         SUPPRESS = 'light black'
 865
 866     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 867         if test_encoding:
 868             original_text = text
 869             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 870             text = text.encode(encoding, 'ignore').decode(encoding)
 871             if fallback is not None and text != original_text:
 872                 text = fallback
 873         if isinstance(f, self.Styles):
 874             f = f.value
 875         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 876
 877     def _format_screen(self, *args, **kwargs):
 878         return self._format_text(
 879             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 880
 881     def _format_err(self, *args, **kwargs):
 882         return self._format_text(
 883             self._err_file, self._allow_colors['err'], *args, **kwargs)
 884
 885     def report_warning(self, message, only_once=False):
 886         '''
 887         Print the message to stderr, it will be prefixed with 'WARNING:'
 888         If stderr is a tty file the 'WARNING:' will be colored
 889         '''
 890         if self.params.get('logger') is not None:
 891             self.params['logger'].warning(message)
 892         else:
 893             if self.params.get('no_warnings'):
 894                 return
 895             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 896
 897     def deprecation_warning(self, message):
 898         if self.params.get('logger') is not None:
 899             self.params['logger'].warning('DeprecationWarning: {message}')
 900         else:
 901             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 902
 903     def report_error(self, message, tb=None):
 904         '''
 905         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 906         in red if stderr is a tty file.
 907         '''
 908         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 909
 910     def write_debug(self, message, only_once=False):
 911         '''Log debug message or Print message to stderr'''
 912         if not self.params.get('verbose', False):
 913             return
 914         message = '[debug] %s' % message
 915         if self.params.get('logger'):
 916             self.params['logger'].debug(message)
 917         else:
 918             self.to_stderr(message, only_once)
 919
 920     def report_file_already_downloaded(self, file_name):
 921         """Report file has already been fully downloaded."""
 922         try:
 923             self.to_screen('[download] %s has already been downloaded' % file_name)
 924         except UnicodeEncodeError:
 925             self.to_screen('[download] The file has already been downloaded')
 926
 927     def report_file_delete(self, file_name):
 928         """Report that existing file will be deleted."""
 929         try:
 930             self.to_screen('Deleting existing file %s' % file_name)
 931         except UnicodeEncodeError:
 932             self.to_screen('Deleting existing file')
 933
 934     def raise_no_formats(self, info, forced=False):
 935         has_drm = info.get('__has_drm')
 936         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 937         expected = self.params.get('ignore_no_formats_error')
 938         if forced or not expected:
 939             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 940                                  expected=has_drm or expected)
 941         else:
 942             self.report_warning(msg)
 943
 944     def parse_outtmpl(self):
 945         outtmpl_dict = self.params.get('outtmpl', {})
 946         if not isinstance(outtmpl_dict, dict):
 947             outtmpl_dict = {'default': outtmpl_dict}
 948         # Remove spaces in the default template
 949         if self.params.get('restrictfilenames'):
 950             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 951         else:
 952             sanitize = lambda x: x
 953         outtmpl_dict.update({
 954             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 955             if outtmpl_dict.get(k) is None})
 956         for key, val in outtmpl_dict.items():
 957             if isinstance(val, bytes):
 958                 self.report_warning(
 959                     'Parameter outtmpl is bytes, but should be a unicode string. '
 960                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 961         return outtmpl_dict
 962
 963     def get_output_path(self, dir_type='', filename=None):
 964         paths = self.params.get('paths', {})
 965         assert isinstance(paths, dict)
 966         path = os.path.join(
 967             expand_path(paths.get('home', '').strip()),
 968             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 969             filename or '')
 970
 971         # Temporary fix for #4787
 972         # 'Treat' all problem characters by passing filename through preferredencoding
 973         # to workaround encoding issues with subprocess on python2 @ Windows
 974         if sys.version_info < (3, 0) and sys.platform == 'win32':
 975             path = encodeFilename(path, True).decode(preferredencoding())
 976         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 977
 978     @staticmethod
 979     def _outtmpl_expandpath(outtmpl):
 980         # expand_path translates '%%' into '%' and '$$' into '$'
 981         # correspondingly that is not what we want since we need to keep
 982         # '%%' intact for template dict substitution step. Working around
 983         # with boundary-alike separator hack.
 984         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 985         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 986
 987         # outtmpl should be expand_path'ed before template dict substitution
 988         # because meta fields may contain env variables we don't want to
 989         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 990         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 991         return expand_path(outtmpl).replace(sep, '')
 992
 993     @staticmethod
 994     def escape_outtmpl(outtmpl):
 995         ''' Escape any remaining strings like %s, %abc% etc. '''
 996         return re.sub(
 997             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 998             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 999             outtmpl)
1000
1001     @classmethod
1002     def validate_outtmpl(cls, outtmpl):
1003         ''' @return None or Exception object '''
1004         outtmpl = re.sub(
1005             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
1006             lambda mobj: f'{mobj.group(0)[:-1]}s',
1007             cls._outtmpl_expandpath(outtmpl))
1008         try:
1009             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1010             return None
1011         except ValueError as err:
1012             return err
1013
1014     @staticmethod
1015     def _copy_infodict(info_dict):
1016         info_dict = dict(info_dict)
1017         for key in ('__original_infodict', '__postprocessors'):
1018             info_dict.pop(key, None)
1019         return info_dict
1020
1021     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
1022         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1023         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1024
1025         info_dict = self._copy_infodict(info_dict)
1026         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1027             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1028             if info_dict.get('duration', None) is not None
1029             else None)
1030         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1031         if info_dict.get('resolution') is None:
1032             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1033
1034         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1035         # of %(field)s to %(field)0Nd for backward compatibility
1036         field_size_compat_map = {
1037             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1038             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1039             'autonumber': self.params.get('autonumber_size') or 5,
1040         }
1041
1042         TMPL_DICT = {}
1043         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1044         MATH_FUNCTIONS = {
1045             '+': float.__add__,
1046             '-': float.__sub__,
1047         }
1048         # Field is of the form key1.key2...
1049         # where keys (except first) can be string, int or slice
1050         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1051         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1052         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1053         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1054             (?P<negate>-)?
1055             (?P<fields>{field})
1056             (?P<maths>(?:{math_op}{math_field})*)
1057             (?:>(?P<strf_format>.+?))?
1058             (?P<alternate>(?<!\\),[^|&)]+)?
1059             (?:&(?P<replacement>.*?))?
1060             (?:\|(?P<default>.*?))?
1061             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1062
1063         def _traverse_infodict(k):
1064             k = k.split('.')
1065             if k[0] == '':
1066                 k.pop(0)
1067             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1068
1069         def get_value(mdict):
1070             # Object traversal
1071             value = _traverse_infodict(mdict['fields'])
1072             # Negative
1073             if mdict['negate']:
1074                 value = float_or_none(value)
1075                 if value is not None:
1076                     value *= -1
1077             # Do maths
1078             offset_key = mdict['maths']
1079             if offset_key:
1080                 value = float_or_none(value)
1081                 operator = None
1082                 while offset_key:
1083                     item = re.match(
1084                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1085                         offset_key).group(0)
1086                     offset_key = offset_key[len(item):]
1087                     if operator is None:
1088                         operator = MATH_FUNCTIONS[item]
1089                         continue
1090                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1091                     offset = float_or_none(item)
1092                     if offset is None:
1093                         offset = float_or_none(_traverse_infodict(item))
1094                     try:
1095                         value = operator(value, multiplier * offset)
1096                     except (TypeError, ZeroDivisionError):
1097                         return None
1098                     operator = None
1099             # Datetime formatting
1100             if mdict['strf_format']:
1101                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1102
1103             return value
1104
1105         na = self.params.get('outtmpl_na_placeholder', 'NA')
1106
1107         def _dumpjson_default(obj):
1108             if isinstance(obj, (set, LazyList)):
1109                 return list(obj)
1110             return repr(obj)
1111
1112         def create_key(outer_mobj):
1113             if not outer_mobj.group('has_key'):
1114                 return outer_mobj.group(0)
1115             key = outer_mobj.group('key')
1116             mobj = re.match(INTERNAL_FORMAT_RE, key)
1117             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1118             value, replacement, default = None, None, na
1119             while mobj:
1120                 mobj = mobj.groupdict()
1121                 default = mobj['default'] if mobj['default'] is not None else default
1122                 value = get_value(mobj)
1123                 replacement = mobj['replacement']
1124                 if value is None and mobj['alternate']:
1125                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1126                 else:
1127                     break
1128
1129             fmt = outer_mobj.group('format')
1130             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1131                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1132
1133             value = default if value is None else value if replacement is None else replacement
1134
1135             flags = outer_mobj.group('conversion') or ''
1136             str_fmt = f'{fmt[:-1]}s'
1137             if fmt[-1] == 'l':  # list
1138                 delim = '\n' if '#' in flags else ', '
1139                 value, fmt = delim.join(variadic(value)), str_fmt
1140             elif fmt[-1] == 'j':  # json
1141                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1142             elif fmt[-1] == 'q':  # quoted
1143                 value = map(str, variadic(value) if '#' in flags else [value])
1144                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1145             elif fmt[-1] == 'B':  # bytes
1146                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1147                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1148             elif fmt[-1] == 'U':  # unicode normalized
1149                 value, fmt = unicodedata.normalize(
1150                     # "+" = compatibility equivalence, "#" = NFD
1151                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1152                     value), str_fmt
1153             elif fmt[-1] == 'c':
1154                 if value:
1155                     value = str(value)[0]
1156                 else:
1157                     fmt = str_fmt
1158             elif fmt[-1] not in 'rs':  # numeric
1159                 value = float_or_none(value)
1160                 if value is None:
1161                     value, fmt = default, 's'
1162
1163             if sanitize:
1164                 if fmt[-1] == 'r':
1165                     # If value is an object, sanitize might convert it to a string
1166                     # So we convert it to repr first
1167                     value, fmt = repr(value), str_fmt
1168                 if fmt[-1] in 'csr':
1169                     value = sanitize(initial_field, value)
1170
1171             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1172             TMPL_DICT[key] = value
1173             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1174
1175         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1176
1177     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1178         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1179         return self.escape_outtmpl(outtmpl) % info_dict
1180
1181     def _prepare_filename(self, info_dict, tmpl_type='default'):
1182         try:
1183             sanitize = lambda k, v: sanitize_filename(
1184                 compat_str(v),
1185                 restricted=self.params.get('restrictfilenames'),
1186                 is_id=(k == 'id' or k.endswith('_id')))
1187             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1188             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1189
1190             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1191             if filename and force_ext is not None:
1192                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1193
1194             # https://github.com/blackjack4494/youtube-dlc/issues/85
1195             trim_file_name = self.params.get('trim_file_name', False)
1196             if trim_file_name:
1197                 no_ext, *ext = filename.rsplit('.', 2)
1198                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1199
1200             return filename
1201         except ValueError as err:
1202             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1203             return None
1204
1205     def prepare_filename(self, info_dict, dir_type='', warn=False):
1206         """Generate the output filename."""
1207
1208         filename = self._prepare_filename(info_dict, dir_type or 'default')
1209         if not filename and dir_type not in ('', 'temp'):
1210             return ''
1211
1212         if warn:
1213             if not self.params.get('paths'):
1214                 pass
1215             elif filename == '-':
1216                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1217             elif os.path.isabs(filename):
1218                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1219         if filename == '-' or not filename:
1220             return filename
1221
1222         return self.get_output_path(dir_type, filename)
1223
1224     def _match_entry(self, info_dict, incomplete=False, silent=False):
1225         """ Returns None if the file should be downloaded """
1226
1227         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1228
1229         def check_filter():
1230             if 'title' in info_dict:
1231                 # This can happen when we're just evaluating the playlist
1232                 title = info_dict['title']
1233                 matchtitle = self.params.get('matchtitle', False)
1234                 if matchtitle:
1235                     if not re.search(matchtitle, title, re.IGNORECASE):
1236                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1237                 rejecttitle = self.params.get('rejecttitle', False)
1238                 if rejecttitle:
1239                     if re.search(rejecttitle, title, re.IGNORECASE):
1240                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1241             date = info_dict.get('upload_date')
1242             if date is not None:
1243                 dateRange = self.params.get('daterange', DateRange())
1244                 if date not in dateRange:
1245                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1246             view_count = info_dict.get('view_count')
1247             if view_count is not None:
1248                 min_views = self.params.get('min_views')
1249                 if min_views is not None and view_count < min_views:
1250                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1251                 max_views = self.params.get('max_views')
1252                 if max_views is not None and view_count > max_views:
1253                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1254             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1255                 return 'Skipping "%s" because it is age restricted' % video_title
1256
1257             match_filter = self.params.get('match_filter')
1258             if match_filter is not None:
1259                 try:
1260                     ret = match_filter(info_dict, incomplete=incomplete)
1261                 except TypeError:
1262                     # For backward compatibility
1263                     ret = None if incomplete else match_filter(info_dict)
1264                 if ret is not None:
1265                     return ret
1266             return None
1267
1268         if self.in_download_archive(info_dict):
1269             reason = '%s has already been recorded in the archive' % video_title
1270             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1271         else:
1272             reason = check_filter()
1273             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1274         if reason is not None:
1275             if not silent:
1276                 self.to_screen('[download] ' + reason)
1277             if self.params.get(break_opt, False):
1278                 raise break_err()
1279         return reason
1280
1281     @staticmethod
1282     def add_extra_info(info_dict, extra_info):
1283         '''Set the keys from extra_info in info dict if they are missing'''
1284         for key, value in extra_info.items():
1285             info_dict.setdefault(key, value)
1286
1287     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1288                      process=True, force_generic_extractor=False):
1289         """
1290         Return a list with a dictionary for each video extracted.
1291
1292         Arguments:
1293         url -- URL to extract
1294
1295         Keyword arguments:
1296         download -- whether to download videos during extraction
1297         ie_key -- extractor key hint
1298         extra_info -- dictionary containing the extra values to add to each result
1299         process -- whether to resolve all unresolved references (URLs, playlist items),
1300             must be True for download to work.
1301         force_generic_extractor -- force using the generic extractor
1302         """
1303
1304         if extra_info is None:
1305             extra_info = {}
1306
1307         if not ie_key and force_generic_extractor:
1308             ie_key = 'Generic'
1309
1310         if ie_key:
1311             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1312         else:
1313             ies = self._ies
1314
1315         for ie_key, ie in ies.items():
1316             if not ie.suitable(url):
1317                 continue
1318
1319             if not ie.working():
1320                 self.report_warning('The program functionality for this site has been marked as broken, '
1321                                     'and will probably not work.')
1322
1323             temp_id = ie.get_temp_id(url)
1324             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1325                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1326                 if self.params.get('break_on_existing', False):
1327                     raise ExistingVideoReached()
1328                 break
1329             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1330         else:
1331             self.report_error('no suitable InfoExtractor for URL %s' % url)
1332
1333     def __handle_extraction_exceptions(func):
1334         @functools.wraps(func)
1335         def wrapper(self, *args, **kwargs):
1336             try:
1337                 return func(self, *args, **kwargs)
1338             except GeoRestrictedError as e:
1339                 msg = e.msg
1340                 if e.countries:
1341                     msg += '\nThis video is available in %s.' % ', '.join(
1342                         map(ISO3166Utils.short2full, e.countries))
1343                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1344                 self.report_error(msg)
1345             except ExtractorError as e:  # An error we somewhat expected
1346                 self.report_error(compat_str(e), e.format_traceback())
1347             except ReExtractInfo as e:
1348                 if e.expected:
1349                     self.to_screen(f'{e}; Re-extracting data')
1350                 else:
1351                     self.to_stderr('\r')
1352                     self.report_warning(f'{e}; Re-extracting data')
1353                 return wrapper(self, *args, **kwargs)
1354             except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1355                 raise
1356             except Exception as e:
1357                 if self.params.get('ignoreerrors'):
1358                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1359                 else:
1360                     raise
1361         return wrapper
1362
1363     def _wait_for_video(self, ie_result):
1364         if (not self.params.get('wait_for_video')
1365                 or ie_result.get('_type', 'video') != 'video'
1366                 or ie_result.get('formats') or ie_result.get('url')):
1367             return
1368
1369         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1370         last_msg = ''
1371
1372         def progress(msg):
1373             nonlocal last_msg
1374             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1375             last_msg = msg
1376
1377         min_wait, max_wait = self.params.get('wait_for_video')
1378         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1379         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1380             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1381             self.report_warning('Release time of video is not known')
1382         elif (diff or 0) <= 0:
1383             self.report_warning('Video should already be available according to extracted info')
1384         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1385         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1386
1387         wait_till = time.time() + diff
1388         try:
1389             while True:
1390                 diff = wait_till - time.time()
1391                 if diff <= 0:
1392                     progress('')
1393                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1394                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1395                 time.sleep(1)
1396         except KeyboardInterrupt:
1397             progress('')
1398             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1399         except BaseException as e:
1400             if not isinstance(e, ReExtractInfo):
1401                 self.to_screen('')
1402             raise
1403
1404     @__handle_extraction_exceptions
1405     def __extract_info(self, url, ie, download, extra_info, process):
1406         ie_result = ie.extract(url)
1407         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1408             return
1409         if isinstance(ie_result, list):
1410             # Backwards compatibility: old IE result format
1411             ie_result = {
1412                 '_type': 'compat_list',
1413                 'entries': ie_result,
1414             }
1415         if extra_info.get('original_url'):
1416             ie_result.setdefault('original_url', extra_info['original_url'])
1417         self.add_default_extra_info(ie_result, ie, url)
1418         if process:
1419             self._wait_for_video(ie_result)
1420             return self.process_ie_result(ie_result, download, extra_info)
1421         else:
1422             return ie_result
1423
1424     def add_default_extra_info(self, ie_result, ie, url):
1425         if url is not None:
1426             self.add_extra_info(ie_result, {
1427                 'webpage_url': url,
1428                 'original_url': url,
1429                 'webpage_url_basename': url_basename(url),
1430                 'webpage_url_domain': get_domain(url),
1431             })
1432         if ie is not None:
1433             self.add_extra_info(ie_result, {
1434                 'extractor': ie.IE_NAME,
1435                 'extractor_key': ie.ie_key(),
1436             })
1437
1438     def process_ie_result(self, ie_result, download=True, extra_info=None):
1439         """
1440         Take the result of the ie(may be modified) and resolve all unresolved
1441         references (URLs, playlist items).
1442
1443         It will also download the videos if 'download'.
1444         Returns the resolved ie_result.
1445         """
1446         if extra_info is None:
1447             extra_info = {}
1448         result_type = ie_result.get('_type', 'video')
1449
1450         if result_type in ('url', 'url_transparent'):
1451             ie_result['url'] = sanitize_url(ie_result['url'])
1452             if ie_result.get('original_url'):
1453                 extra_info.setdefault('original_url', ie_result['original_url'])
1454
1455             extract_flat = self.params.get('extract_flat', False)
1456             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1457                     or extract_flat is True):
1458                 info_copy = ie_result.copy()
1459                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1460                 if ie and not ie_result.get('id'):
1461                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1462                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1463                 self.add_extra_info(info_copy, extra_info)
1464                 info_copy, _ = self.pre_process(info_copy)
1465                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1466                 if self.params.get('force_write_download_archive', False):
1467                     self.record_download_archive(info_copy)
1468                 return ie_result
1469
1470         if result_type == 'video':
1471             self.add_extra_info(ie_result, extra_info)
1472             ie_result = self.process_video_result(ie_result, download=download)
1473             additional_urls = (ie_result or {}).get('additional_urls')
1474             if additional_urls:
1475                 # TODO: Improve MetadataParserPP to allow setting a list
1476                 if isinstance(additional_urls, compat_str):
1477                     additional_urls = [additional_urls]
1478                 self.to_screen(
1479                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1480                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1481                 ie_result['additional_entries'] = [
1482                     self.extract_info(
1483                         url, download, extra_info,
1484                         force_generic_extractor=self.params.get('force_generic_extractor'))
1485                     for url in additional_urls
1486                 ]
1487             return ie_result
1488         elif result_type == 'url':
1489             # We have to add extra_info to the results because it may be
1490             # contained in a playlist
1491             return self.extract_info(
1492                 ie_result['url'], download,
1493                 ie_key=ie_result.get('ie_key'),
1494                 extra_info=extra_info)
1495         elif result_type == 'url_transparent':
1496             # Use the information from the embedding page
1497             info = self.extract_info(
1498                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1499                 extra_info=extra_info, download=False, process=False)
1500
1501             # extract_info may return None when ignoreerrors is enabled and
1502             # extraction failed with an error, don't crash and return early
1503             # in this case
1504             if not info:
1505                 return info
1506
1507             force_properties = dict(
1508                 (k, v) for k, v in ie_result.items() if v is not None)
1509             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1510                 if f in force_properties:
1511                     del force_properties[f]
1512             new_result = info.copy()
1513             new_result.update(force_properties)
1514
1515             # Extracted info may not be a video result (i.e.
1516             # info.get('_type', 'video') != video) but rather an url or
1517             # url_transparent. In such cases outer metadata (from ie_result)
1518             # should be propagated to inner one (info). For this to happen
1519             # _type of info should be overridden with url_transparent. This
1520             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1521             if new_result.get('_type') == 'url':
1522                 new_result['_type'] = 'url_transparent'
1523
1524             return self.process_ie_result(
1525                 new_result, download=download, extra_info=extra_info)
1526         elif result_type in ('playlist', 'multi_video'):
1527             # Protect from infinite recursion due to recursively nested playlists
1528             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1529             webpage_url = ie_result['webpage_url']
1530             if webpage_url in self._playlist_urls:
1531                 self.to_screen(
1532                     '[download] Skipping already downloaded playlist: %s'
1533                     % ie_result.get('title') or ie_result.get('id'))
1534                 return
1535
1536             self._playlist_level += 1
1537             self._playlist_urls.add(webpage_url)
1538             self._sanitize_thumbnails(ie_result)
1539             try:
1540                 return self.__process_playlist(ie_result, download)
1541             finally:
1542                 self._playlist_level -= 1
1543                 if not self._playlist_level:
1544                     self._playlist_urls.clear()
1545         elif result_type == 'compat_list':
1546             self.report_warning(
1547                 'Extractor %s returned a compat_list result. '
1548                 'It needs to be updated.' % ie_result.get('extractor'))
1549
1550             def _fixup(r):
1551                 self.add_extra_info(r, {
1552                     'extractor': ie_result['extractor'],
1553                     'webpage_url': ie_result['webpage_url'],
1554                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1555                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1556                     'extractor_key': ie_result['extractor_key'],
1557                 })
1558                 return r
1559             ie_result['entries'] = [
1560                 self.process_ie_result(_fixup(r), download, extra_info)
1561                 for r in ie_result['entries']
1562             ]
1563             return ie_result
1564         else:
1565             raise Exception('Invalid result type: %s' % result_type)
1566
1567     def _ensure_dir_exists(self, path):
1568         return make_dir(path, self.report_error)
1569
1570     def __process_playlist(self, ie_result, download):
1571         # We process each entry in the playlist
1572         playlist = ie_result.get('title') or ie_result.get('id')
1573         self.to_screen('[download] Downloading playlist: %s' % playlist)
1574
1575         if 'entries' not in ie_result:
1576             raise EntryNotInPlaylist('There are no entries')
1577
1578         MissingEntry = object()
1579         incomplete_entries = bool(ie_result.get('requested_entries'))
1580         if incomplete_entries:
1581             def fill_missing_entries(entries, indices):
1582                 ret = [MissingEntry] * max(indices)
1583                 for i, entry in zip(indices, entries):
1584                     ret[i - 1] = entry
1585                 return ret
1586             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1587
1588         playlist_results = []
1589
1590         playliststart = self.params.get('playliststart', 1)
1591         playlistend = self.params.get('playlistend')
1592         # For backwards compatibility, interpret -1 as whole list
1593         if playlistend == -1:
1594             playlistend = None
1595
1596         playlistitems_str = self.params.get('playlist_items')
1597         playlistitems = None
1598         if playlistitems_str is not None:
1599             def iter_playlistitems(format):
1600                 for string_segment in format.split(','):
1601                     if '-' in string_segment:
1602                         start, end = string_segment.split('-')
1603                         for item in range(int(start), int(end) + 1):
1604                             yield int(item)
1605                     else:
1606                         yield int(string_segment)
1607             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1608
1609         ie_entries = ie_result['entries']
1610         msg = (
1611             'Downloading %d videos' if not isinstance(ie_entries, list)
1612             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1613
1614         if isinstance(ie_entries, list):
1615             def get_entry(i):
1616                 return ie_entries[i - 1]
1617         else:
1618             if not isinstance(ie_entries, (PagedList, LazyList)):
1619                 ie_entries = LazyList(ie_entries)
1620
1621             def get_entry(i):
1622                 return YoutubeDL.__handle_extraction_exceptions(
1623                     lambda self, i: ie_entries[i - 1]
1624                 )(self, i)
1625
1626         entries = []
1627         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1628         for i in items:
1629             if i == 0:
1630                 continue
1631             if playlistitems is None and playlistend is not None and playlistend < i:
1632                 break
1633             entry = None
1634             try:
1635                 entry = get_entry(i)
1636                 if entry is MissingEntry:
1637                     raise EntryNotInPlaylist()
1638             except (IndexError, EntryNotInPlaylist):
1639                 if incomplete_entries:
1640                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1641                 elif not playlistitems:
1642                     break
1643             entries.append(entry)
1644             try:
1645                 if entry is not None:
1646                     self._match_entry(entry, incomplete=True, silent=True)
1647             except (ExistingVideoReached, RejectedVideoReached):
1648                 break
1649         ie_result['entries'] = entries
1650
1651         # Save playlist_index before re-ordering
1652         entries = [
1653             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1654             for i, entry in enumerate(entries, 1)
1655             if entry is not None]
1656         n_entries = len(entries)
1657
1658         if not playlistitems and (playliststart != 1 or playlistend):
1659             playlistitems = list(range(playliststart, playliststart + n_entries))
1660         ie_result['requested_entries'] = playlistitems
1661
1662         _infojson_written = False
1663         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1664             ie_copy = {
1665                 'playlist': playlist,
1666                 'playlist_id': ie_result.get('id'),
1667                 'playlist_title': ie_result.get('title'),
1668                 'playlist_uploader': ie_result.get('uploader'),
1669                 'playlist_uploader_id': ie_result.get('uploader_id'),
1670                 'playlist_index': 0,
1671                 'n_entries': n_entries,
1672             }
1673             ie_copy.update(dict(ie_result))
1674
1675             _infojson_written = self._write_info_json(
1676                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1677             if _infojson_written is None:
1678                 return
1679             if self._write_description('playlist', ie_result,
1680                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1681                 return
1682             # TODO: This should be passed to ThumbnailsConvertor if necessary
1683             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1684
1685         if self.params.get('playlistreverse', False):
1686             entries = entries[::-1]
1687         if self.params.get('playlistrandom', False):
1688             random.shuffle(entries)
1689
1690         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1691
1692         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1693         failures = 0
1694         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1695         for i, entry_tuple in enumerate(entries, 1):
1696             playlist_index, entry = entry_tuple
1697             if 'playlist-index' in self.params.get('compat_opts', []):
1698                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1699             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1700             # This __x_forwarded_for_ip thing is a bit ugly but requires
1701             # minimal changes
1702             if x_forwarded_for:
1703                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1704             extra = {
1705                 'n_entries': n_entries,
1706                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1707                 'playlist_index': playlist_index,
1708                 'playlist_autonumber': i,
1709                 'playlist': playlist,
1710                 'playlist_id': ie_result.get('id'),
1711                 'playlist_title': ie_result.get('title'),
1712                 'playlist_uploader': ie_result.get('uploader'),
1713                 'playlist_uploader_id': ie_result.get('uploader_id'),
1714                 'extractor': ie_result['extractor'],
1715                 'webpage_url': ie_result['webpage_url'],
1716                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1717                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1718                 'extractor_key': ie_result['extractor_key'],
1719             }
1720
1721             if self._match_entry(entry, incomplete=True) is not None:
1722                 continue
1723
1724             entry_result = self.__process_iterable_entry(entry, download, extra)
1725             if not entry_result:
1726                 failures += 1
1727             if failures >= max_failures:
1728                 self.report_error(
1729                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1730                 break
1731             playlist_results.append(entry_result)
1732         ie_result['entries'] = playlist_results
1733
1734         # Write the updated info to json
1735         if _infojson_written and self._write_info_json(
1736                 'updated playlist', ie_result,
1737                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1738             return
1739         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1740         return ie_result
1741
1742     @__handle_extraction_exceptions
1743     def __process_iterable_entry(self, entry, download, extra_info):
1744         return self.process_ie_result(
1745             entry, download=download, extra_info=extra_info)
1746
1747     def _build_format_filter(self, filter_spec):
1748         " Returns a function to filter the formats according to the filter_spec "
1749
1750         OPERATORS = {
1751             '<': operator.lt,
1752             '<=': operator.le,
1753             '>': operator.gt,
1754             '>=': operator.ge,
1755             '=': operator.eq,
1756             '!=': operator.ne,
1757         }
1758         operator_rex = re.compile(r'''(?x)\s*
1759             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1760             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1761             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1762             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1763         m = operator_rex.fullmatch(filter_spec)
1764         if m:
1765             try:
1766                 comparison_value = int(m.group('value'))
1767             except ValueError:
1768                 comparison_value = parse_filesize(m.group('value'))
1769                 if comparison_value is None:
1770                     comparison_value = parse_filesize(m.group('value') + 'B')
1771                 if comparison_value is None:
1772                     raise ValueError(
1773                         'Invalid value %r in format specification %r' % (
1774                             m.group('value'), filter_spec))
1775             op = OPERATORS[m.group('op')]
1776
1777         if not m:
1778             STR_OPERATORS = {
1779                 '=': operator.eq,
1780                 '^=': lambda attr, value: attr.startswith(value),
1781                 '$=': lambda attr, value: attr.endswith(value),
1782                 '*=': lambda attr, value: value in attr,
1783             }
1784             str_operator_rex = re.compile(r'''(?x)\s*
1785                 (?P<key>[a-zA-Z0-9._-]+)\s*
1786                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1787                 (?P<value>[a-zA-Z0-9._-]+)\s*
1788                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1789             m = str_operator_rex.fullmatch(filter_spec)
1790             if m:
1791                 comparison_value = m.group('value')
1792                 str_op = STR_OPERATORS[m.group('op')]
1793                 if m.group('negation'):
1794                     op = lambda attr, value: not str_op(attr, value)
1795                 else:
1796                     op = str_op
1797
1798         if not m:
1799             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1800
1801         def _filter(f):
1802             actual_value = f.get(m.group('key'))
1803             if actual_value is None:
1804                 return m.group('none_inclusive')
1805             return op(actual_value, comparison_value)
1806         return _filter
1807
1808     def _check_formats(self, formats):
1809         for f in formats:
1810             self.to_screen('[info] Testing format %s' % f['format_id'])
1811             path = self.get_output_path('temp')
1812             if not self._ensure_dir_exists(f'{path}/'):
1813                 continue
1814             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1815             temp_file.close()
1816             try:
1817                 success, _ = self.dl(temp_file.name, f, test=True)
1818             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1819                 success = False
1820             finally:
1821                 if os.path.exists(temp_file.name):
1822                     try:
1823                         os.remove(temp_file.name)
1824                     except OSError:
1825                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1826             if success:
1827                 yield f
1828             else:
1829                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1830
1831     def _default_format_spec(self, info_dict, download=True):
1832
1833         def can_merge():
1834             merger = FFmpegMergerPP(self)
1835             return merger.available and merger.can_merge()
1836
1837         prefer_best = (
1838             not self.params.get('simulate')
1839             and download
1840             and (
1841                 not can_merge()
1842                 or info_dict.get('is_live', False)
1843                 or self.outtmpl_dict['default'] == '-'))
1844         compat = (
1845             prefer_best
1846             or self.params.get('allow_multiple_audio_streams', False)
1847             or 'format-spec' in self.params.get('compat_opts', []))
1848
1849         return (
1850             'best/bestvideo+bestaudio' if prefer_best
1851             else 'bestvideo*+bestaudio/best' if not compat
1852             else 'bestvideo+bestaudio/best')
1853
1854     def build_format_selector(self, format_spec):
1855         def syntax_error(note, start):
1856             message = (
1857                 'Invalid format specification: '
1858                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1859             return SyntaxError(message)
1860
1861         PICKFIRST = 'PICKFIRST'
1862         MERGE = 'MERGE'
1863         SINGLE = 'SINGLE'
1864         GROUP = 'GROUP'
1865         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1866
1867         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1868                                   'video': self.params.get('allow_multiple_video_streams', False)}
1869
1870         check_formats = self.params.get('check_formats') == 'selected'
1871
1872         def _parse_filter(tokens):
1873             filter_parts = []
1874             for type, string, start, _, _ in tokens:
1875                 if type == tokenize.OP and string == ']':
1876                     return ''.join(filter_parts)
1877                 else:
1878                     filter_parts.append(string)
1879
1880         def _remove_unused_ops(tokens):
1881             # Remove operators that we don't use and join them with the surrounding strings
1882             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1883             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1884             last_string, last_start, last_end, last_line = None, None, None, None
1885             for type, string, start, end, line in tokens:
1886                 if type == tokenize.OP and string == '[':
1887                     if last_string:
1888                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1889                         last_string = None
1890                     yield type, string, start, end, line
1891                     # everything inside brackets will be handled by _parse_filter
1892                     for type, string, start, end, line in tokens:
1893                         yield type, string, start, end, line
1894                         if type == tokenize.OP and string == ']':
1895                             break
1896                 elif type == tokenize.OP and string in ALLOWED_OPS:
1897                     if last_string:
1898                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1899                         last_string = None
1900                     yield type, string, start, end, line
1901                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1902                     if not last_string:
1903                         last_string = string
1904                         last_start = start
1905                         last_end = end
1906                     else:
1907                         last_string += string
1908             if last_string:
1909                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1910
1911         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1912             selectors = []
1913             current_selector = None
1914             for type, string, start, _, _ in tokens:
1915                 # ENCODING is only defined in python 3.x
1916                 if type == getattr(tokenize, 'ENCODING', None):
1917                     continue
1918                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1919                     current_selector = FormatSelector(SINGLE, string, [])
1920                 elif type == tokenize.OP:
1921                     if string == ')':
1922                         if not inside_group:
1923                             # ')' will be handled by the parentheses group
1924                             tokens.restore_last_token()
1925                         break
1926                     elif inside_merge and string in ['/', ',']:
1927                         tokens.restore_last_token()
1928                         break
1929                     elif inside_choice and string == ',':
1930                         tokens.restore_last_token()
1931                         break
1932                     elif string == ',':
1933                         if not current_selector:
1934                             raise syntax_error('"," must follow a format selector', start)
1935                         selectors.append(current_selector)
1936                         current_selector = None
1937                     elif string == '/':
1938                         if not current_selector:
1939                             raise syntax_error('"/" must follow a format selector', start)
1940                         first_choice = current_selector
1941                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1942                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1943                     elif string == '[':
1944                         if not current_selector:
1945                             current_selector = FormatSelector(SINGLE, 'best', [])
1946                         format_filter = _parse_filter(tokens)
1947                         current_selector.filters.append(format_filter)
1948                     elif string == '(':
1949                         if current_selector:
1950                             raise syntax_error('Unexpected "("', start)
1951                         group = _parse_format_selection(tokens, inside_group=True)
1952                         current_selector = FormatSelector(GROUP, group, [])
1953                     elif string == '+':
1954                         if not current_selector:
1955                             raise syntax_error('Unexpected "+"', start)
1956                         selector_1 = current_selector
1957                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1958                         if not selector_2:
1959                             raise syntax_error('Expected a selector', start)
1960                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1961                     else:
1962                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1963                 elif type == tokenize.ENDMARKER:
1964                     break
1965             if current_selector:
1966                 selectors.append(current_selector)
1967             return selectors
1968
1969         def _merge(formats_pair):
1970             format_1, format_2 = formats_pair
1971
1972             formats_info = []
1973             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1974             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1975
1976             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1977                 get_no_more = {'video': False, 'audio': False}
1978                 for (i, fmt_info) in enumerate(formats_info):
1979                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1980                         formats_info.pop(i)
1981                         continue
1982                     for aud_vid in ['audio', 'video']:
1983                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1984                             if get_no_more[aud_vid]:
1985                                 formats_info.pop(i)
1986                                 break
1987                             get_no_more[aud_vid] = True
1988
1989             if len(formats_info) == 1:
1990                 return formats_info[0]
1991
1992             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1993             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1994
1995             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1996             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1997
1998             output_ext = self.params.get('merge_output_format')
1999             if not output_ext:
2000                 if the_only_video:
2001                     output_ext = the_only_video['ext']
2002                 elif the_only_audio and not video_fmts:
2003                     output_ext = the_only_audio['ext']
2004                 else:
2005                     output_ext = 'mkv'
2006
2007             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2008
2009             new_dict = {
2010                 'requested_formats': formats_info,
2011                 'format': '+'.join(filtered('format')),
2012                 'format_id': '+'.join(filtered('format_id')),
2013                 'ext': output_ext,
2014                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2015                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2016                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2017                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2018                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2019             }
2020
2021             if the_only_video:
2022                 new_dict.update({
2023                     'width': the_only_video.get('width'),
2024                     'height': the_only_video.get('height'),
2025                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2026                     'fps': the_only_video.get('fps'),
2027                     'dynamic_range': the_only_video.get('dynamic_range'),
2028                     'vcodec': the_only_video.get('vcodec'),
2029                     'vbr': the_only_video.get('vbr'),
2030                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2031                 })
2032
2033             if the_only_audio:
2034                 new_dict.update({
2035                     'acodec': the_only_audio.get('acodec'),
2036                     'abr': the_only_audio.get('abr'),
2037                     'asr': the_only_audio.get('asr'),
2038                 })
2039
2040             return new_dict
2041
2042         def _check_formats(formats):
2043             if not check_formats:
2044                 yield from formats
2045                 return
2046             yield from self._check_formats(formats)
2047
2048         def _build_selector_function(selector):
2049             if isinstance(selector, list):  # ,
2050                 fs = [_build_selector_function(s) for s in selector]
2051
2052                 def selector_function(ctx):
2053                     for f in fs:
2054                         yield from f(ctx)
2055                 return selector_function
2056
2057             elif selector.type == GROUP:  # ()
2058                 selector_function = _build_selector_function(selector.selector)
2059
2060             elif selector.type == PICKFIRST:  # /
2061                 fs = [_build_selector_function(s) for s in selector.selector]
2062
2063                 def selector_function(ctx):
2064                     for f in fs:
2065                         picked_formats = list(f(ctx))
2066                         if picked_formats:
2067                             return picked_formats
2068                     return []
2069
2070             elif selector.type == MERGE:  # +
2071                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2072
2073                 def selector_function(ctx):
2074                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2075                         yield _merge(pair)
2076
2077             elif selector.type == SINGLE:  # atom
2078                 format_spec = selector.selector or 'best'
2079
2080                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2081                 if format_spec == 'all':
2082                     def selector_function(ctx):
2083                         yield from _check_formats(ctx['formats'][::-1])
2084                 elif format_spec == 'mergeall':
2085                     def selector_function(ctx):
2086                         formats = list(_check_formats(ctx['formats']))
2087                         if not formats:
2088                             return
2089                         merged_format = formats[-1]
2090                         for f in formats[-2::-1]:
2091                             merged_format = _merge((merged_format, f))
2092                         yield merged_format
2093
2094                 else:
2095                     format_fallback, format_reverse, format_idx = False, True, 1
2096                     mobj = re.match(
2097                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2098                         format_spec)
2099                     if mobj is not None:
2100                         format_idx = int_or_none(mobj.group('n'), default=1)
2101                         format_reverse = mobj.group('bw')[0] == 'b'
2102                         format_type = (mobj.group('type') or [None])[0]
2103                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2104                         format_modified = mobj.group('mod') is not None
2105
2106                         format_fallback = not format_type and not format_modified  # for b, w
2107                         _filter_f = (
2108                             (lambda f: f.get('%scodec' % format_type) != 'none')
2109                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2110                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2111                             if format_type  # bv, ba, wv, wa
2112                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2113                             if not format_modified  # b, w
2114                             else lambda f: True)  # b*, w*
2115                         filter_f = lambda f: _filter_f(f) and (
2116                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2117                     else:
2118                         if format_spec in self._format_selection_exts['audio']:
2119                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2120                         elif format_spec in self._format_selection_exts['video']:
2121                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2122                         elif format_spec in self._format_selection_exts['storyboards']:
2123                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2124                         else:
2125                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2126
2127                     def selector_function(ctx):
2128                         formats = list(ctx['formats'])
2129                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2130                         if format_fallback and ctx['incomplete_formats'] and not matches:
2131                             # for extractors with incomplete formats (audio only (soundcloud)
2132                             # or video only (imgur)) best/worst will fallback to
2133                             # best/worst {video,audio}-only format
2134                             matches = formats
2135                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2136                         try:
2137                             yield matches[format_idx - 1]
2138                         except IndexError:
2139                             return
2140
2141             filters = [self._build_format_filter(f) for f in selector.filters]
2142
2143             def final_selector(ctx):
2144                 ctx_copy = dict(ctx)
2145                 for _filter in filters:
2146                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2147                 return selector_function(ctx_copy)
2148             return final_selector
2149
2150         stream = io.BytesIO(format_spec.encode('utf-8'))
2151         try:
2152             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2153         except tokenize.TokenError:
2154             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2155
2156         class TokenIterator(object):
2157             def __init__(self, tokens):
2158                 self.tokens = tokens
2159                 self.counter = 0
2160
2161             def __iter__(self):
2162                 return self
2163
2164             def __next__(self):
2165                 if self.counter >= len(self.tokens):
2166                     raise StopIteration()
2167                 value = self.tokens[self.counter]
2168                 self.counter += 1
2169                 return value
2170
2171             next = __next__
2172
2173             def restore_last_token(self):
2174                 self.counter -= 1
2175
2176         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2177         return _build_selector_function(parsed_selector)
2178
2179     def _calc_headers(self, info_dict):
2180         res = std_headers.copy()
2181
2182         add_headers = info_dict.get('http_headers')
2183         if add_headers:
2184             res.update(add_headers)
2185
2186         cookies = self._calc_cookies(info_dict)
2187         if cookies:
2188             res['Cookie'] = cookies
2189
2190         if 'X-Forwarded-For' not in res:
2191             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2192             if x_forwarded_for_ip:
2193                 res['X-Forwarded-For'] = x_forwarded_for_ip
2194
2195         return res
2196
2197     def _calc_cookies(self, info_dict):
2198         pr = sanitized_Request(info_dict['url'])
2199         self.cookiejar.add_cookie_header(pr)
2200         return pr.get_header('Cookie')
2201
2202     def _sort_thumbnails(self, thumbnails):
2203         thumbnails.sort(key=lambda t: (
2204             t.get('preference') if t.get('preference') is not None else -1,
2205             t.get('width') if t.get('width') is not None else -1,
2206             t.get('height') if t.get('height') is not None else -1,
2207             t.get('id') if t.get('id') is not None else '',
2208             t.get('url')))
2209
2210     def _sanitize_thumbnails(self, info_dict):
2211         thumbnails = info_dict.get('thumbnails')
2212         if thumbnails is None:
2213             thumbnail = info_dict.get('thumbnail')
2214             if thumbnail:
2215                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2216         if not thumbnails:
2217             return
2218
2219         def check_thumbnails(thumbnails):
2220             for t in thumbnails:
2221                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2222                 try:
2223                     self.urlopen(HEADRequest(t['url']))
2224                 except network_exceptions as err:
2225                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2226                     continue
2227                 yield t
2228
2229         self._sort_thumbnails(thumbnails)
2230         for i, t in enumerate(thumbnails):
2231             if t.get('id') is None:
2232                 t['id'] = '%d' % i
2233             if t.get('width') and t.get('height'):
2234                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2235             t['url'] = sanitize_url(t['url'])
2236
2237         if self.params.get('check_formats') is True:
2238             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2239         else:
2240             info_dict['thumbnails'] = thumbnails
2241
2242     def process_video_result(self, info_dict, download=True):
2243         assert info_dict.get('_type', 'video') == 'video'
2244
2245         if 'id' not in info_dict:
2246             raise ExtractorError('Missing "id" field in extractor result')
2247         if 'title' not in info_dict:
2248             raise ExtractorError('Missing "title" field in extractor result',
2249                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2250
2251         def report_force_conversion(field, field_not, conversion):
2252             self.report_warning(
2253                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2254                 % (field, field_not, conversion))
2255
2256         def sanitize_string_field(info, string_field):
2257             field = info.get(string_field)
2258             if field is None or isinstance(field, compat_str):
2259                 return
2260             report_force_conversion(string_field, 'a string', 'string')
2261             info[string_field] = compat_str(field)
2262
2263         def sanitize_numeric_fields(info):
2264             for numeric_field in self._NUMERIC_FIELDS:
2265                 field = info.get(numeric_field)
2266                 if field is None or isinstance(field, compat_numeric_types):
2267                     continue
2268                 report_force_conversion(numeric_field, 'numeric', 'int')
2269                 info[numeric_field] = int_or_none(field)
2270
2271         sanitize_string_field(info_dict, 'id')
2272         sanitize_numeric_fields(info_dict)
2273
2274         if 'playlist' not in info_dict:
2275             # It isn't part of a playlist
2276             info_dict['playlist'] = None
2277             info_dict['playlist_index'] = None
2278
2279         self._sanitize_thumbnails(info_dict)
2280
2281         thumbnail = info_dict.get('thumbnail')
2282         thumbnails = info_dict.get('thumbnails')
2283         if thumbnail:
2284             info_dict['thumbnail'] = sanitize_url(thumbnail)
2285         elif thumbnails:
2286             info_dict['thumbnail'] = thumbnails[-1]['url']
2287
2288         if info_dict.get('display_id') is None and 'id' in info_dict:
2289             info_dict['display_id'] = info_dict['id']
2290
2291         if info_dict.get('duration') is not None:
2292             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2293
2294         for ts_key, date_key in (
2295                 ('timestamp', 'upload_date'),
2296                 ('release_timestamp', 'release_date'),
2297         ):
2298             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2299                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2300                 # see http://bugs.python.org/issue1646728)
2301                 try:
2302                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2303                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2304                 except (ValueError, OverflowError, OSError):
2305                     pass
2306
2307         live_keys = ('is_live', 'was_live')
2308         live_status = info_dict.get('live_status')
2309         if live_status is None:
2310             for key in live_keys:
2311                 if info_dict.get(key) is False:
2312                     continue
2313                 if info_dict.get(key):
2314                     live_status = key
2315                 break
2316             if all(info_dict.get(key) is False for key in live_keys):
2317                 live_status = 'not_live'
2318         if live_status:
2319             info_dict['live_status'] = live_status
2320             for key in live_keys:
2321                 if info_dict.get(key) is None:
2322                     info_dict[key] = (live_status == key)
2323
2324         # Auto generate title fields corresponding to the *_number fields when missing
2325         # in order to always have clean titles. This is very common for TV series.
2326         for field in ('chapter', 'season', 'episode'):
2327             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2328                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2329
2330         for cc_kind in ('subtitles', 'automatic_captions'):
2331             cc = info_dict.get(cc_kind)
2332             if cc:
2333                 for _, subtitle in cc.items():
2334                     for subtitle_format in subtitle:
2335                         if subtitle_format.get('url'):
2336                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2337                         if subtitle_format.get('ext') is None:
2338                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2339
2340         automatic_captions = info_dict.get('automatic_captions')
2341         subtitles = info_dict.get('subtitles')
2342
2343         info_dict['requested_subtitles'] = self.process_subtitles(
2344             info_dict['id'], subtitles, automatic_captions)
2345
2346         if info_dict.get('formats') is None:
2347             # There's only one format available
2348             formats = [info_dict]
2349         else:
2350             formats = info_dict['formats']
2351
2352         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2353         if not self.params.get('allow_unplayable_formats'):
2354             formats = [f for f in formats if not f.get('has_drm')]
2355
2356         if info_dict.get('is_live'):
2357             get_from_start = bool(self.params.get('live_from_start'))
2358             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2359
2360         if not formats:
2361             self.raise_no_formats(info_dict)
2362
2363         def is_wellformed(f):
2364             url = f.get('url')
2365             if not url:
2366                 self.report_warning(
2367                     '"url" field is missing or empty - skipping format, '
2368                     'there is an error in extractor')
2369                 return False
2370             if isinstance(url, bytes):
2371                 sanitize_string_field(f, 'url')
2372             return True
2373
2374         # Filter out malformed formats for better extraction robustness
2375         formats = list(filter(is_wellformed, formats))
2376
2377         formats_dict = {}
2378
2379         # We check that all the formats have the format and format_id fields
2380         for i, format in enumerate(formats):
2381             sanitize_string_field(format, 'format_id')
2382             sanitize_numeric_fields(format)
2383             format['url'] = sanitize_url(format['url'])
2384             if not format.get('format_id'):
2385                 format['format_id'] = compat_str(i)
2386             else:
2387                 # Sanitize format_id from characters used in format selector expression
2388                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2389             format_id = format['format_id']
2390             if format_id not in formats_dict:
2391                 formats_dict[format_id] = []
2392             formats_dict[format_id].append(format)
2393
2394         # Make sure all formats have unique format_id
2395         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2396         for format_id, ambiguous_formats in formats_dict.items():
2397             ambigious_id = len(ambiguous_formats) > 1
2398             for i, format in enumerate(ambiguous_formats):
2399                 if ambigious_id:
2400                     format['format_id'] = '%s-%d' % (format_id, i)
2401                 if format.get('ext') is None:
2402                     format['ext'] = determine_ext(format['url']).lower()
2403                 # Ensure there is no conflict between id and ext in format selection
2404                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2405                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2406                     format['format_id'] = 'f%s' % format['format_id']
2407
2408         for i, format in enumerate(formats):
2409             if format.get('format') is None:
2410                 format['format'] = '{id} - {res}{note}'.format(
2411                     id=format['format_id'],
2412                     res=self.format_resolution(format),
2413                     note=format_field(format, 'format_note', ' (%s)'),
2414                 )
2415             if format.get('protocol') is None:
2416                 format['protocol'] = determine_protocol(format)
2417             if format.get('resolution') is None:
2418                 format['resolution'] = self.format_resolution(format, default=None)
2419             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2420                 format['dynamic_range'] = 'SDR'
2421             if (info_dict.get('duration') and format.get('tbr')
2422                     and not format.get('filesize') and not format.get('filesize_approx')):
2423                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2424
2425             # Add HTTP headers, so that external programs can use them from the
2426             # json output
2427             full_format_info = info_dict.copy()
2428             full_format_info.update(format)
2429             format['http_headers'] = self._calc_headers(full_format_info)
2430         # Remove private housekeeping stuff
2431         if '__x_forwarded_for_ip' in info_dict:
2432             del info_dict['__x_forwarded_for_ip']
2433
2434         # TODO Central sorting goes here
2435
2436         if self.params.get('check_formats') is True:
2437             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2438
2439         if not formats or formats[0] is not info_dict:
2440             # only set the 'formats' fields if the original info_dict list them
2441             # otherwise we end up with a circular reference, the first (and unique)
2442             # element in the 'formats' field in info_dict is info_dict itself,
2443             # which can't be exported to json
2444             info_dict['formats'] = formats
2445
2446         info_dict, _ = self.pre_process(info_dict)
2447
2448         # The pre-processors may have modified the formats
2449         formats = info_dict.get('formats', [info_dict])
2450
2451         if self.params.get('list_thumbnails'):
2452             self.list_thumbnails(info_dict)
2453         if self.params.get('listformats'):
2454             if not info_dict.get('formats') and not info_dict.get('url'):
2455                 self.to_screen('%s has no formats' % info_dict['id'])
2456             else:
2457                 self.list_formats(info_dict)
2458         if self.params.get('listsubtitles'):
2459             if 'automatic_captions' in info_dict:
2460                 self.list_subtitles(
2461                     info_dict['id'], automatic_captions, 'automatic captions')
2462             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2463         list_only = self.params.get('simulate') is None and (
2464             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2465         if list_only:
2466             # Without this printing, -F --print-json will not work
2467             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2468             return
2469
2470         format_selector = self.format_selector
2471         if format_selector is None:
2472             req_format = self._default_format_spec(info_dict, download=download)
2473             self.write_debug('Default format spec: %s' % req_format)
2474             format_selector = self.build_format_selector(req_format)
2475
2476         # While in format selection we may need to have an access to the original
2477         # format set in order to calculate some metrics or do some processing.
2478         # For now we need to be able to guess whether original formats provided
2479         # by extractor are incomplete or not (i.e. whether extractor provides only
2480         # video-only or audio-only formats) for proper formats selection for
2481         # extractors with such incomplete formats (see
2482         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2483         # Since formats may be filtered during format selection and may not match
2484         # the original formats the results may be incorrect. Thus original formats
2485         # or pre-calculated metrics should be passed to format selection routines
2486         # as well.
2487         # We will pass a context object containing all necessary additional data
2488         # instead of just formats.
2489         # This fixes incorrect format selection issue (see
2490         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2491         incomplete_formats = (
2492             # All formats are video-only or
2493             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2494             # all formats are audio-only
2495             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2496
2497         ctx = {
2498             'formats': formats,
2499             'incomplete_formats': incomplete_formats,
2500         }
2501
2502         formats_to_download = list(format_selector(ctx))
2503         if not formats_to_download:
2504             if not self.params.get('ignore_no_formats_error'):
2505                 raise ExtractorError('Requested format is not available', expected=True,
2506                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2507             else:
2508                 self.report_warning('Requested format is not available')
2509                 # Process what we can, even without any available formats.
2510                 self.process_info(dict(info_dict))
2511         elif download:
2512             self.to_screen(
2513                 '[info] %s: Downloading %d format(s): %s' % (
2514                     info_dict['id'], len(formats_to_download),
2515                     ", ".join([f['format_id'] for f in formats_to_download])))
2516             for fmt in formats_to_download:
2517                 new_info = dict(info_dict)
2518                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2519                 new_info['__original_infodict'] = info_dict
2520                 new_info.update(fmt)
2521                 self.process_info(new_info)
2522         # We update the info dict with the selected best quality format (backwards compatibility)
2523         if formats_to_download:
2524             info_dict.update(formats_to_download[-1])
2525         return info_dict
2526
2527     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2528         """Select the requested subtitles and their format"""
2529         available_subs = {}
2530         if normal_subtitles and self.params.get('writesubtitles'):
2531             available_subs.update(normal_subtitles)
2532         if automatic_captions and self.params.get('writeautomaticsub'):
2533             for lang, cap_info in automatic_captions.items():
2534                 if lang not in available_subs:
2535                     available_subs[lang] = cap_info
2536
2537         if (not self.params.get('writesubtitles') and not
2538                 self.params.get('writeautomaticsub') or not
2539                 available_subs):
2540             return None
2541
2542         all_sub_langs = available_subs.keys()
2543         if self.params.get('allsubtitles', False):
2544             requested_langs = all_sub_langs
2545         elif self.params.get('subtitleslangs', False):
2546             # A list is used so that the order of languages will be the same as
2547             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2548             requested_langs = []
2549             for lang_re in self.params.get('subtitleslangs'):
2550                 if lang_re == 'all':
2551                     requested_langs.extend(all_sub_langs)
2552                     continue
2553                 discard = lang_re[0] == '-'
2554                 if discard:
2555                     lang_re = lang_re[1:]
2556                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2557                 if discard:
2558                     for lang in current_langs:
2559                         while lang in requested_langs:
2560                             requested_langs.remove(lang)
2561                 else:
2562                     requested_langs.extend(current_langs)
2563             requested_langs = orderedSet(requested_langs)
2564         elif 'en' in available_subs:
2565             requested_langs = ['en']
2566         else:
2567             requested_langs = [list(all_sub_langs)[0]]
2568         if requested_langs:
2569             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2570
2571         formats_query = self.params.get('subtitlesformat', 'best')
2572         formats_preference = formats_query.split('/') if formats_query else []
2573         subs = {}
2574         for lang in requested_langs:
2575             formats = available_subs.get(lang)
2576             if formats is None:
2577                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2578                 continue
2579             for ext in formats_preference:
2580                 if ext == 'best':
2581                     f = formats[-1]
2582                     break
2583                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2584                 if matches:
2585                     f = matches[-1]
2586                     break
2587             else:
2588                 f = formats[-1]
2589                 self.report_warning(
2590                     'No subtitle format found matching "%s" for language %s, '
2591                     'using %s' % (formats_query, lang, f['ext']))
2592             subs[lang] = f
2593         return subs
2594
2595     def __forced_printings(self, info_dict, filename, incomplete):
2596         def print_mandatory(field, actual_field=None):
2597             if actual_field is None:
2598                 actual_field = field
2599             if (self.params.get('force%s' % field, False)
2600                     and (not incomplete or info_dict.get(actual_field) is not None)):
2601                 self.to_stdout(info_dict[actual_field])
2602
2603         def print_optional(field):
2604             if (self.params.get('force%s' % field, False)
2605                     and info_dict.get(field) is not None):
2606                 self.to_stdout(info_dict[field])
2607
2608         info_dict = info_dict.copy()
2609         if filename is not None:
2610             info_dict['filename'] = filename
2611         if info_dict.get('requested_formats') is not None:
2612             # For RTMP URLs, also include the playpath
2613             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2614         elif 'url' in info_dict:
2615             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2616
2617         if self.params.get('forceprint') or self.params.get('forcejson'):
2618             self.post_extract(info_dict)
2619         for tmpl in self.params.get('forceprint', []):
2620             mobj = re.match(r'\w+(=?)$', tmpl)
2621             if mobj and mobj.group(1):
2622                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2623             elif mobj:
2624                 tmpl = '%({})s'.format(tmpl)
2625             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2626
2627         print_mandatory('title')
2628         print_mandatory('id')
2629         print_mandatory('url', 'urls')
2630         print_optional('thumbnail')
2631         print_optional('description')
2632         print_optional('filename')
2633         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2634             self.to_stdout(formatSeconds(info_dict['duration']))
2635         print_mandatory('format')
2636
2637         if self.params.get('forcejson'):
2638             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2639
2640     def dl(self, name, info, subtitle=False, test=False):
2641         if not info.get('url'):
2642             self.raise_no_formats(info, True)
2643
2644         if test:
2645             verbose = self.params.get('verbose')
2646             params = {
2647                 'test': True,
2648                 'quiet': self.params.get('quiet') or not verbose,
2649                 'verbose': verbose,
2650                 'noprogress': not verbose,
2651                 'nopart': True,
2652                 'skip_unavailable_fragments': False,
2653                 'keep_fragments': False,
2654                 'overwrites': True,
2655                 '_no_ytdl_file': True,
2656             }
2657         else:
2658             params = self.params
2659         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2660         if not test:
2661             for ph in self._progress_hooks:
2662                 fd.add_progress_hook(ph)
2663             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2664             self.write_debug('Invoking downloader on "%s"' % urls)
2665
2666         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2667         # But it may contain objects that are not deep-copyable
2668         new_info = self._copy_infodict(info)
2669         if new_info.get('http_headers') is None:
2670             new_info['http_headers'] = self._calc_headers(new_info)
2671         return fd.download(name, new_info, subtitle)
2672
2673     def process_info(self, info_dict):
2674         """Process a single resolved IE result."""
2675
2676         assert info_dict.get('_type', 'video') == 'video'
2677
2678         max_downloads = self.params.get('max_downloads')
2679         if max_downloads is not None:
2680             if self._num_downloads >= int(max_downloads):
2681                 raise MaxDownloadsReached()
2682
2683         if info_dict.get('is_live') and not self.params.get('live_from_start'):
2684             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2685
2686         # TODO: backward compatibility, to be removed
2687         info_dict['fulltitle'] = info_dict['title']
2688
2689         if 'format' not in info_dict and 'ext' in info_dict:
2690             info_dict['format'] = info_dict['ext']
2691
2692         if self._match_entry(info_dict) is not None:
2693             return
2694
2695         self.post_extract(info_dict)
2696         self._num_downloads += 1
2697
2698         # info_dict['_filename'] needs to be set for backward compatibility
2699         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2700         temp_filename = self.prepare_filename(info_dict, 'temp')
2701         files_to_move = {}
2702
2703         # Forced printings
2704         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2705
2706         if self.params.get('simulate'):
2707             if self.params.get('force_write_download_archive', False):
2708                 self.record_download_archive(info_dict)
2709             # Do nothing else if in simulate mode
2710             return
2711
2712         if full_filename is None:
2713             return
2714         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2715             return
2716         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2717             return
2718
2719         if self._write_description('video', info_dict,
2720                                    self.prepare_filename(info_dict, 'description')) is None:
2721             return
2722
2723         sub_files = self._write_subtitles(info_dict, temp_filename)
2724         if sub_files is None:
2725             return
2726         files_to_move.update(dict(sub_files))
2727
2728         thumb_files = self._write_thumbnails(
2729             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2730         if thumb_files is None:
2731             return
2732         files_to_move.update(dict(thumb_files))
2733
2734         infofn = self.prepare_filename(info_dict, 'infojson')
2735         _infojson_written = self._write_info_json('video', info_dict, infofn)
2736         if _infojson_written:
2737             info_dict['infojson_filename'] = infofn
2738             # For backward compatibility, even though it was a private field
2739             info_dict['__infojson_filename'] = infofn
2740         elif _infojson_written is None:
2741             return
2742
2743         # Note: Annotations are deprecated
2744         annofn = None
2745         if self.params.get('writeannotations', False):
2746             annofn = self.prepare_filename(info_dict, 'annotation')
2747         if annofn:
2748             if not self._ensure_dir_exists(encodeFilename(annofn)):
2749                 return
2750             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2751                 self.to_screen('[info] Video annotations are already present')
2752             elif not info_dict.get('annotations'):
2753                 self.report_warning('There are no annotations to write.')
2754             else:
2755                 try:
2756                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2757                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2758                         annofile.write(info_dict['annotations'])
2759                 except (KeyError, TypeError):
2760                     self.report_warning('There are no annotations to write.')
2761                 except (OSError, IOError):
2762                     self.report_error('Cannot write annotations file: ' + annofn)
2763                     return
2764
2765         # Write internet shortcut files
2766         def _write_link_file(link_type):
2767             if 'webpage_url' not in info_dict:
2768                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2769                 return False
2770             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2771             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2772                 return False
2773             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2774                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2775                 return True
2776             try:
2777                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2778                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2779                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2780                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2781                     if link_type == 'desktop':
2782                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2783                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2784             except (OSError, IOError):
2785                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2786                 return False
2787             return True
2788
2789         write_links = {
2790             'url': self.params.get('writeurllink'),
2791             'webloc': self.params.get('writewebloclink'),
2792             'desktop': self.params.get('writedesktoplink'),
2793         }
2794         if self.params.get('writelink'):
2795             link_type = ('webloc' if sys.platform == 'darwin'
2796                          else 'desktop' if sys.platform.startswith('linux')
2797                          else 'url')
2798             write_links[link_type] = True
2799
2800         if any(should_write and not _write_link_file(link_type)
2801                for link_type, should_write in write_links.items()):
2802             return
2803
2804         try:
2805             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2806         except PostProcessingError as err:
2807             self.report_error('Preprocessing: %s' % str(err))
2808             return
2809
2810         must_record_download_archive = False
2811         if self.params.get('skip_download', False):
2812             info_dict['filepath'] = temp_filename
2813             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2814             info_dict['__files_to_move'] = files_to_move
2815             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2816         else:
2817             # Download
2818             info_dict.setdefault('__postprocessors', [])
2819             try:
2820
2821                 def existing_file(*filepaths):
2822                     ext = info_dict.get('ext')
2823                     final_ext = self.params.get('final_ext', ext)
2824                     existing_files = []
2825                     for file in orderedSet(filepaths):
2826                         if final_ext != ext:
2827                             converted = replace_extension(file, final_ext, ext)
2828                             if os.path.exists(encodeFilename(converted)):
2829                                 existing_files.append(converted)
2830                         if os.path.exists(encodeFilename(file)):
2831                             existing_files.append(file)
2832
2833                     if not existing_files or self.params.get('overwrites', False):
2834                         for file in orderedSet(existing_files):
2835                             self.report_file_delete(file)
2836                             os.remove(encodeFilename(file))
2837                         return None
2838
2839                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2840                     return existing_files[0]
2841
2842                 success = True
2843                 if info_dict.get('requested_formats') is not None:
2844
2845                     def compatible_formats(formats):
2846                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2847                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2848                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2849                         if len(video_formats) > 2 or len(audio_formats) > 2:
2850                             return False
2851
2852                         # Check extension
2853                         exts = set(format.get('ext') for format in formats)
2854                         COMPATIBLE_EXTS = (
2855                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2856                             set(('webm',)),
2857                         )
2858                         for ext_sets in COMPATIBLE_EXTS:
2859                             if ext_sets.issuperset(exts):
2860                                 return True
2861                         # TODO: Check acodec/vcodec
2862                         return False
2863
2864                     requested_formats = info_dict['requested_formats']
2865                     old_ext = info_dict['ext']
2866                     if self.params.get('merge_output_format') is None:
2867                         if not compatible_formats(requested_formats):
2868                             info_dict['ext'] = 'mkv'
2869                             self.report_warning(
2870                                 'Requested formats are incompatible for merge and will be merged into mkv')
2871                         if (info_dict['ext'] == 'webm'
2872                                 and info_dict.get('thumbnails')
2873                                 # check with type instead of pp_key, __name__, or isinstance
2874                                 # since we dont want any custom PPs to trigger this
2875                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2876                             info_dict['ext'] = 'mkv'
2877                             self.report_warning(
2878                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2879                     new_ext = info_dict['ext']
2880
2881                     def correct_ext(filename, ext=new_ext):
2882                         if filename == '-':
2883                             return filename
2884                         filename_real_ext = os.path.splitext(filename)[1][1:]
2885                         filename_wo_ext = (
2886                             os.path.splitext(filename)[0]
2887                             if filename_real_ext in (old_ext, new_ext)
2888                             else filename)
2889                         return '%s.%s' % (filename_wo_ext, ext)
2890
2891                     # Ensure filename always has a correct extension for successful merge
2892                     full_filename = correct_ext(full_filename)
2893                     temp_filename = correct_ext(temp_filename)
2894                     dl_filename = existing_file(full_filename, temp_filename)
2895                     info_dict['__real_download'] = False
2896
2897                     downloaded = []
2898                     merger = FFmpegMergerPP(self)
2899
2900                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2901                     if dl_filename is not None:
2902                         self.report_file_already_downloaded(dl_filename)
2903                     elif fd:
2904                         for f in requested_formats if fd != FFmpegFD else []:
2905                             f['filepath'] = fname = prepend_extension(
2906                                 correct_ext(temp_filename, info_dict['ext']),
2907                                 'f%s' % f['format_id'], info_dict['ext'])
2908                             downloaded.append(fname)
2909                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2910                         success, real_download = self.dl(temp_filename, info_dict)
2911                         info_dict['__real_download'] = real_download
2912                     else:
2913                         if self.params.get('allow_unplayable_formats'):
2914                             self.report_warning(
2915                                 'You have requested merging of multiple formats '
2916                                 'while also allowing unplayable formats to be downloaded. '
2917                                 'The formats won\'t be merged to prevent data corruption.')
2918                         elif not merger.available:
2919                             self.report_warning(
2920                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2921                                 'The formats won\'t be merged.')
2922
2923                         if temp_filename == '-':
2924                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
2925                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2926                                       else 'but ffmpeg is not installed')
2927                             self.report_warning(
2928                                 f'You have requested downloading multiple formats to stdout {reason}. '
2929                                 'The formats will be streamed one after the other')
2930                             fname = temp_filename
2931                         for f in requested_formats:
2932                             new_info = dict(info_dict)
2933                             del new_info['requested_formats']
2934                             new_info.update(f)
2935                             if temp_filename != '-':
2936                                 fname = prepend_extension(
2937                                     correct_ext(temp_filename, new_info['ext']),
2938                                     'f%s' % f['format_id'], new_info['ext'])
2939                                 if not self._ensure_dir_exists(fname):
2940                                     return
2941                                 f['filepath'] = fname
2942                                 downloaded.append(fname)
2943                             partial_success, real_download = self.dl(fname, new_info)
2944                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2945                             success = success and partial_success
2946
2947                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
2948                         info_dict['__postprocessors'].append(merger)
2949                         info_dict['__files_to_merge'] = downloaded
2950                         # Even if there were no downloads, it is being merged only now
2951                         info_dict['__real_download'] = True
2952                     else:
2953                         for file in downloaded:
2954                             files_to_move[file] = None
2955                 else:
2956                     # Just a single file
2957                     dl_filename = existing_file(full_filename, temp_filename)
2958                     if dl_filename is None or dl_filename == temp_filename:
2959                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2960                         # So we should try to resume the download
2961                         success, real_download = self.dl(temp_filename, info_dict)
2962                         info_dict['__real_download'] = real_download
2963                     else:
2964                         self.report_file_already_downloaded(dl_filename)
2965
2966                 dl_filename = dl_filename or temp_filename
2967                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2968
2969             except network_exceptions as err:
2970                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2971                 return
2972             except (OSError, IOError) as err:
2973                 raise UnavailableVideoError(err)
2974             except (ContentTooShortError, ) as err:
2975                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2976                 return
2977
2978             if success and full_filename != '-':
2979
2980                 def fixup():
2981                     do_fixup = True
2982                     fixup_policy = self.params.get('fixup')
2983                     vid = info_dict['id']
2984
2985                     if fixup_policy in ('ignore', 'never'):
2986                         return
2987                     elif fixup_policy == 'warn':
2988                         do_fixup = False
2989                     elif fixup_policy != 'force':
2990                         assert fixup_policy in ('detect_or_warn', None)
2991                         if not info_dict.get('__real_download'):
2992                             do_fixup = False
2993
2994                     def ffmpeg_fixup(cndn, msg, cls):
2995                         if not cndn:
2996                             return
2997                         if not do_fixup:
2998                             self.report_warning(f'{vid}: {msg}')
2999                             return
3000                         pp = cls(self)
3001                         if pp.available:
3002                             info_dict['__postprocessors'].append(pp)
3003                         else:
3004                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3005
3006                     stretched_ratio = info_dict.get('stretched_ratio')
3007                     ffmpeg_fixup(
3008                         stretched_ratio not in (1, None),
3009                         f'Non-uniform pixel ratio {stretched_ratio}',
3010                         FFmpegFixupStretchedPP)
3011
3012                     ffmpeg_fixup(
3013                         (info_dict.get('requested_formats') is None
3014                          and info_dict.get('container') == 'm4a_dash'
3015                          and info_dict.get('ext') == 'm4a'),
3016                         'writing DASH m4a. Only some players support this container',
3017                         FFmpegFixupM4aPP)
3018
3019                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3020                     downloader = downloader.__name__ if downloader else None
3021
3022                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3023                         ffmpeg_fixup(downloader == 'HlsFD',
3024                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3025                                      FFmpegFixupM3u8PP)
3026                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3027                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3028
3029                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3030                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3031
3032                 fixup()
3033                 try:
3034                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
3035                 except PostProcessingError as err:
3036                     self.report_error('Postprocessing: %s' % str(err))
3037                     return
3038                 try:
3039                     for ph in self._post_hooks:
3040                         ph(info_dict['filepath'])
3041                 except Exception as err:
3042                     self.report_error('post hooks: %s' % str(err))
3043                     return
3044                 must_record_download_archive = True
3045
3046         if must_record_download_archive or self.params.get('force_write_download_archive', False):
3047             self.record_download_archive(info_dict)
3048         max_downloads = self.params.get('max_downloads')
3049         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3050             raise MaxDownloadsReached()
3051
3052     def __download_wrapper(self, func):
3053         @functools.wraps(func)
3054         def wrapper(*args, **kwargs):
3055             try:
3056                 res = func(*args, **kwargs)
3057             except UnavailableVideoError as e:
3058                 self.report_error(e)
3059             except MaxDownloadsReached as e:
3060                 self.to_screen(f'[info] {e}')
3061                 raise
3062             except DownloadCancelled as e:
3063                 self.to_screen(f'[info] {e}')
3064                 if not self.params.get('break_per_url'):
3065                     raise
3066             else:
3067                 if self.params.get('dump_single_json', False):
3068                     self.post_extract(res)
3069                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3070         return wrapper
3071
3072     def download(self, url_list):
3073         """Download a given list of URLs."""
3074         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3075         outtmpl = self.outtmpl_dict['default']
3076         if (len(url_list) > 1
3077                 and outtmpl != '-'
3078                 and '%' not in outtmpl
3079                 and self.params.get('max_downloads') != 1):
3080             raise SameFileError(outtmpl)
3081
3082         for url in url_list:
3083             self.__download_wrapper(self.extract_info)(
3084                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3085
3086         return self._download_retcode
3087
3088     def download_with_info_file(self, info_filename):
3089         with contextlib.closing(fileinput.FileInput(
3090                 [info_filename], mode='r',
3091                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3092             # FileInput doesn't have a read method, we can't call json.load
3093             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3094         try:
3095             self.__download_wrapper(self.process_ie_result)(info, download=True)
3096         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3097             if not isinstance(e, EntryNotInPlaylist):
3098                 self.to_stderr('\r')
3099             webpage_url = info.get('webpage_url')
3100             if webpage_url is not None:
3101                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3102                 return self.download([webpage_url])
3103             else:
3104                 raise
3105         return self._download_retcode
3106
3107     @staticmethod
3108     def sanitize_info(info_dict, remove_private_keys=False):
3109         ''' Sanitize the infodict for converting to json '''
3110         if info_dict is None:
3111             return info_dict
3112         info_dict.setdefault('epoch', int(time.time()))
3113         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3114         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3115         if remove_private_keys:
3116             remove_keys |= {
3117                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3118                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3119             }
3120             empty_values = (None, {}, [], set(), tuple())
3121             reject = lambda k, v: k not in keep_keys and (
3122                 k.startswith('_') or k in remove_keys or v in empty_values)
3123         else:
3124             reject = lambda k, v: k in remove_keys
3125
3126         def filter_fn(obj):
3127             if isinstance(obj, dict):
3128                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3129             elif isinstance(obj, (list, tuple, set, LazyList)):
3130                 return list(map(filter_fn, obj))
3131             elif obj is None or isinstance(obj, (str, int, float, bool)):
3132                 return obj
3133             else:
3134                 return repr(obj)
3135
3136         return filter_fn(info_dict)
3137
3138     @staticmethod
3139     def filter_requested_info(info_dict, actually_filter=True):
3140         ''' Alias of sanitize_info for backward compatibility '''
3141         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3142
3143     def run_pp(self, pp, infodict):
3144         files_to_delete = []
3145         if '__files_to_move' not in infodict:
3146             infodict['__files_to_move'] = {}
3147         try:
3148             files_to_delete, infodict = pp.run(infodict)
3149         except PostProcessingError as e:
3150             # Must be True and not 'only_download'
3151             if self.params.get('ignoreerrors') is True:
3152                 self.report_error(e)
3153                 return infodict
3154             raise
3155
3156         if not files_to_delete:
3157             return infodict
3158         if self.params.get('keepvideo', False):
3159             for f in files_to_delete:
3160                 infodict['__files_to_move'].setdefault(f, '')
3161         else:
3162             for old_filename in set(files_to_delete):
3163                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3164                 try:
3165                     os.remove(encodeFilename(old_filename))
3166                 except (IOError, OSError):
3167                     self.report_warning('Unable to remove downloaded original file')
3168                 if old_filename in infodict['__files_to_move']:
3169                     del infodict['__files_to_move'][old_filename]
3170         return infodict
3171
3172     @staticmethod
3173     def post_extract(info_dict):
3174         def actual_post_extract(info_dict):
3175             if info_dict.get('_type') in ('playlist', 'multi_video'):
3176                 for video_dict in info_dict.get('entries', {}):
3177                     actual_post_extract(video_dict or {})
3178                 return
3179
3180             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3181             extra = post_extractor().items()
3182             info_dict.update(extra)
3183             info_dict.pop('__post_extractor', None)
3184
3185             original_infodict = info_dict.get('__original_infodict') or {}
3186             original_infodict.update(extra)
3187             original_infodict.pop('__post_extractor', None)
3188
3189         actual_post_extract(info_dict or {})
3190
3191     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3192         info = dict(ie_info)
3193         info['__files_to_move'] = files_to_move or {}
3194         for pp in self._pps[key]:
3195             info = self.run_pp(pp, info)
3196         return info, info.pop('__files_to_move', None)
3197
3198     def post_process(self, filename, ie_info, files_to_move=None):
3199         """Run all the postprocessors on the given file."""
3200         info = dict(ie_info)
3201         info['filepath'] = filename
3202         info['__files_to_move'] = files_to_move or {}
3203
3204         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3205             info = self.run_pp(pp, info)
3206         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3207         del info['__files_to_move']
3208         for pp in self._pps['after_move']:
3209             info = self.run_pp(pp, info)
3210         return info
3211
3212     def _make_archive_id(self, info_dict):
3213         video_id = info_dict.get('id')
3214         if not video_id:
3215             return
3216         # Future-proof against any change in case
3217         # and backwards compatibility with prior versions
3218         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3219         if extractor is None:
3220             url = str_or_none(info_dict.get('url'))
3221             if not url:
3222                 return
3223             # Try to find matching extractor for the URL and take its ie_key
3224             for ie_key, ie in self._ies.items():
3225                 if ie.suitable(url):
3226                     extractor = ie_key
3227                     break
3228             else:
3229                 return
3230         return '%s %s' % (extractor.lower(), video_id)
3231
3232     def in_download_archive(self, info_dict):
3233         fn = self.params.get('download_archive')
3234         if fn is None:
3235             return False
3236
3237         vid_id = self._make_archive_id(info_dict)
3238         if not vid_id:
3239             return False  # Incomplete video information
3240
3241         return vid_id in self.archive
3242
3243     def record_download_archive(self, info_dict):
3244         fn = self.params.get('download_archive')
3245         if fn is None:
3246             return
3247         vid_id = self._make_archive_id(info_dict)
3248         assert vid_id
3249         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3250             archive_file.write(vid_id + '\n')
3251         self.archive.add(vid_id)
3252
3253     @staticmethod
3254     def format_resolution(format, default='unknown'):
3255         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3256             return 'audio only'
3257         if format.get('resolution') is not None:
3258             return format['resolution']
3259         if format.get('width') and format.get('height'):
3260             return '%dx%d' % (format['width'], format['height'])
3261         elif format.get('height'):
3262             return '%sp' % format['height']
3263         elif format.get('width'):
3264             return '%dx?' % format['width']
3265         return default
3266
3267     def _format_note(self, fdict):
3268         res = ''
3269         if fdict.get('ext') in ['f4f', 'f4m']:
3270             res += '(unsupported)'
3271         if fdict.get('language'):
3272             if res:
3273                 res += ' '
3274             res += '[%s]' % fdict['language']
3275         if fdict.get('format_note') is not None:
3276             if res:
3277                 res += ' '
3278             res += fdict['format_note']
3279         if fdict.get('tbr') is not None:
3280             if res:
3281                 res += ', '
3282             res += '%4dk' % fdict['tbr']
3283         if fdict.get('container') is not None:
3284             if res:
3285                 res += ', '
3286             res += '%s container' % fdict['container']
3287         if (fdict.get('vcodec') is not None
3288                 and fdict.get('vcodec') != 'none'):
3289             if res:
3290                 res += ', '
3291             res += fdict['vcodec']
3292             if fdict.get('vbr') is not None:
3293                 res += '@'
3294         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3295             res += 'video@'
3296         if fdict.get('vbr') is not None:
3297             res += '%4dk' % fdict['vbr']
3298         if fdict.get('fps') is not None:
3299             if res:
3300                 res += ', '
3301             res += '%sfps' % fdict['fps']
3302         if fdict.get('acodec') is not None:
3303             if res:
3304                 res += ', '
3305             if fdict['acodec'] == 'none':
3306                 res += 'video only'
3307             else:
3308                 res += '%-5s' % fdict['acodec']
3309         elif fdict.get('abr') is not None:
3310             if res:
3311                 res += ', '
3312             res += 'audio'
3313         if fdict.get('abr') is not None:
3314             res += '@%3dk' % fdict['abr']
3315         if fdict.get('asr') is not None:
3316             res += ' (%5dHz)' % fdict['asr']
3317         if fdict.get('filesize') is not None:
3318             if res:
3319                 res += ', '
3320             res += format_bytes(fdict['filesize'])
3321         elif fdict.get('filesize_approx') is not None:
3322             if res:
3323                 res += ', '
3324             res += '~' + format_bytes(fdict['filesize_approx'])
3325         return res
3326
3327     def _list_format_headers(self, *headers):
3328         if self.params.get('listformats_table', True) is not False:
3329             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3330         return headers
3331
3332     def list_formats(self, info_dict):
3333         formats = info_dict.get('formats', [info_dict])
3334         new_format = self.params.get('listformats_table', True) is not False
3335         if new_format:
3336             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3337             table = [
3338                 [
3339                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3340                     format_field(f, 'ext'),
3341                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3342                     format_field(f, 'fps', '\t%d'),
3343                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3344                     delim,
3345                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3346                     format_field(f, 'tbr', '\t%dk'),
3347                     shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
3348                     delim,
3349                     format_field(f, 'vcodec', default='unknown').replace(
3350                         'none',
3351                         'images' if f.get('acodec') == 'none'
3352                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3353                     format_field(f, 'vbr', '\t%dk'),
3354                     format_field(f, 'acodec', default='unknown').replace(
3355                         'none',
3356                         '' if f.get('vcodec') == 'none'
3357                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3358                     format_field(f, 'abr', '\t%dk'),
3359                     format_field(f, 'asr', '\t%dHz'),
3360                     join_nonempty(
3361                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3362                         format_field(f, 'language', '[%s]'),
3363                         join_nonempty(
3364                             format_field(f, 'format_note'),
3365                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3366                             delim=', '),
3367                         delim=' '),
3368                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3369             header_line = self._list_format_headers(
3370                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3371                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3372         else:
3373             table = [
3374                 [
3375                     format_field(f, 'format_id'),
3376                     format_field(f, 'ext'),
3377                     self.format_resolution(f),
3378                     self._format_note(f)]
3379                 for f in formats
3380                 if f.get('preference') is None or f['preference'] >= -1000]
3381             header_line = ['format code', 'extension', 'resolution', 'note']
3382
3383         self.to_screen(
3384             '[info] Available formats for %s:' % info_dict['id'])
3385         self.to_stdout(render_table(
3386             header_line, table,
3387             extra_gap=(0 if new_format else 1),
3388             hide_empty=new_format,
3389             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3390
3391     def list_thumbnails(self, info_dict):
3392         thumbnails = list(info_dict.get('thumbnails'))
3393         if not thumbnails:
3394             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3395             return
3396
3397         self.to_screen(
3398             '[info] Thumbnails for %s:' % info_dict['id'])
3399         self.to_stdout(render_table(
3400             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3401             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3402
3403     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3404         if not subtitles:
3405             self.to_screen('%s has no %s' % (video_id, name))
3406             return
3407         self.to_screen(
3408             'Available %s for %s:' % (name, video_id))
3409
3410         def _row(lang, formats):
3411             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3412             if len(set(names)) == 1:
3413                 names = [] if names[0] == 'unknown' else names[:1]
3414             return [lang, ', '.join(names), ', '.join(exts)]
3415
3416         self.to_stdout(render_table(
3417             self._list_format_headers('Language', 'Name', 'Formats'),
3418             [_row(lang, formats) for lang, formats in subtitles.items()],
3419             hide_empty=True))
3420
3421     def urlopen(self, req):
3422         """ Start an HTTP download """
3423         if isinstance(req, compat_basestring):
3424             req = sanitized_Request(req)
3425         return self._opener.open(req, timeout=self._socket_timeout)
3426
3427     def print_debug_header(self):
3428         if not self.params.get('verbose'):
3429             return
3430
3431         def get_encoding(stream):
3432             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3433             if not supports_terminal_sequences(stream):
3434                 from .compat import WINDOWS_VT_MODE
3435                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3436             return ret
3437
3438         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3439             locale.getpreferredencoding(),
3440             sys.getfilesystemencoding(),
3441             get_encoding(self._screen_file), get_encoding(self._err_file),
3442             self.get_encoding())
3443
3444         logger = self.params.get('logger')
3445         if logger:
3446             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3447             write_debug(encoding_str)
3448         else:
3449             write_string(f'[debug] {encoding_str}\n', encoding=None)
3450             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3451
3452         source = detect_variant()
3453         write_debug(join_nonempty(
3454             'yt-dlp version', __version__,
3455             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3456             '' if source == 'unknown' else f'({source})',
3457             delim=' '))
3458         if not _LAZY_LOADER:
3459             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3460                 write_debug('Lazy loading extractors is forcibly disabled')
3461             else:
3462                 write_debug('Lazy loading extractors is disabled')
3463         if plugin_extractors or plugin_postprocessors:
3464             write_debug('Plugins: %s' % [
3465                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3466                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3467         if self.params.get('compat_opts'):
3468             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3469
3470         if source == 'source':
3471             try:
3472                 sp = Popen(
3473                     ['git', 'rev-parse', '--short', 'HEAD'],
3474                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3475                     cwd=os.path.dirname(os.path.abspath(__file__)))
3476                 out, err = sp.communicate_or_kill()
3477                 out = out.decode().strip()
3478                 if re.match('[0-9a-f]+', out):
3479                     write_debug('Git HEAD: %s' % out)
3480             except Exception:
3481                 try:
3482                     sys.exc_clear()
3483                 except Exception:
3484                     pass
3485
3486         def python_implementation():
3487             impl_name = platform.python_implementation()
3488             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3489                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3490             return impl_name
3491
3492         write_debug('Python version %s (%s %s) - %s' % (
3493             platform.python_version(),
3494             python_implementation(),
3495             platform.architecture()[0],
3496             platform_name()))
3497
3498         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3499         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3500         if ffmpeg_features:
3501             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3502
3503         exe_versions['rtmpdump'] = rtmpdump_version()
3504         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3505         exe_str = ', '.join(
3506             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3507         ) or 'none'
3508         write_debug('exe versions: %s' % exe_str)
3509
3510         from .downloader.websocket import has_websockets
3511         from .postprocessor.embedthumbnail import has_mutagen
3512         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3513
3514         lib_str = join_nonempty(
3515             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3516             KEYRING_AVAILABLE and 'keyring',
3517             has_mutagen and 'mutagen',
3518             SQLITE_AVAILABLE and 'sqlite',
3519             has_websockets and 'websockets',
3520             delim=', ') or 'none'
3521         write_debug('Optional libraries: %s' % lib_str)
3522
3523         proxy_map = {}
3524         for handler in self._opener.handlers:
3525             if hasattr(handler, 'proxies'):
3526                 proxy_map.update(handler.proxies)
3527         write_debug(f'Proxy map: {proxy_map}')
3528
3529         # Not implemented
3530         if False and self.params.get('call_home'):
3531             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3532             write_debug('Public IP address: %s' % ipaddr)
3533             latest_version = self.urlopen(
3534                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3535             if version_tuple(latest_version) > version_tuple(__version__):
3536                 self.report_warning(
3537                     'You are using an outdated version (newest version: %s)! '
3538                     'See https://yt-dl.org/update if you need help updating.' %
3539                     latest_version)
3540
3541     def _setup_opener(self):
3542         timeout_val = self.params.get('socket_timeout')
3543         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3544
3545         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3546         opts_cookiefile = self.params.get('cookiefile')
3547         opts_proxy = self.params.get('proxy')
3548
3549         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3550
3551         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3552         if opts_proxy is not None:
3553             if opts_proxy == '':
3554                 proxies = {}
3555             else:
3556                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3557         else:
3558             proxies = compat_urllib_request.getproxies()
3559             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3560             if 'http' in proxies and 'https' not in proxies:
3561                 proxies['https'] = proxies['http']
3562         proxy_handler = PerRequestProxyHandler(proxies)
3563
3564         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3565         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3566         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3567         redirect_handler = YoutubeDLRedirectHandler()
3568         data_handler = compat_urllib_request_DataHandler()
3569
3570         # When passing our own FileHandler instance, build_opener won't add the
3571         # default FileHandler and allows us to disable the file protocol, which
3572         # can be used for malicious purposes (see
3573         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3574         file_handler = compat_urllib_request.FileHandler()
3575
3576         def file_open(*args, **kwargs):
3577             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3578         file_handler.file_open = file_open
3579
3580         opener = compat_urllib_request.build_opener(
3581             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3582
3583         # Delete the default user-agent header, which would otherwise apply in
3584         # cases where our custom HTTP handler doesn't come into play
3585         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3586         opener.addheaders = []
3587         self._opener = opener
3588
3589     def encode(self, s):
3590         if isinstance(s, bytes):
3591             return s  # Already encoded
3592
3593         try:
3594             return s.encode(self.get_encoding())
3595         except UnicodeEncodeError as err:
3596             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3597             raise
3598
3599     def get_encoding(self):
3600         encoding = self.params.get('encoding')
3601         if encoding is None:
3602             encoding = preferredencoding()
3603         return encoding
3604
3605     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3606         ''' Write infojson and returns True = written, False = skip, None = error '''
3607         if overwrite is None:
3608             overwrite = self.params.get('overwrites', True)
3609         if not self.params.get('writeinfojson'):
3610             return False
3611         elif not infofn:
3612             self.write_debug(f'Skipping writing {label} infojson')
3613             return False
3614         elif not self._ensure_dir_exists(infofn):
3615             return None
3616         elif not overwrite and os.path.exists(infofn):
3617             self.to_screen(f'[info] {label.title()} metadata is already present')
3618         else:
3619             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3620             try:
3621                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3622             except (OSError, IOError):
3623                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3624                 return None
3625         return True
3626
3627     def _write_description(self, label, ie_result, descfn):
3628         ''' Write description and returns True = written, False = skip, None = error '''
3629         if not self.params.get('writedescription'):
3630             return False
3631         elif not descfn:
3632             self.write_debug(f'Skipping writing {label} description')
3633             return False
3634         elif not self._ensure_dir_exists(descfn):
3635             return None
3636         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3637             self.to_screen(f'[info] {label.title()} description is already present')
3638         elif ie_result.get('description') is None:
3639             self.report_warning(f'There\'s no {label} description to write')
3640             return False
3641         else:
3642             try:
3643                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3644                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3645                     descfile.write(ie_result['description'])
3646             except (OSError, IOError):
3647                 self.report_error(f'Cannot write {label} description file {descfn}')
3648                 return None
3649         return True
3650
3651     def _write_subtitles(self, info_dict, filename):
3652         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3653         ret = []
3654         subtitles = info_dict.get('requested_subtitles')
3655         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3656             # subtitles download errors are already managed as troubles in relevant IE
3657             # that way it will silently go on when used with unsupporting IE
3658             return ret
3659
3660         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3661         if not sub_filename_base:
3662             self.to_screen('[info] Skipping writing video subtitles')
3663             return ret
3664         for sub_lang, sub_info in subtitles.items():
3665             sub_format = sub_info['ext']
3666             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3667             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3668             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3669                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3670                 sub_info['filepath'] = sub_filename
3671                 ret.append((sub_filename, sub_filename_final))
3672                 continue
3673
3674             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3675             if sub_info.get('data') is not None:
3676                 try:
3677                     # Use newline='' to prevent conversion of newline characters
3678                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3679                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3680                         subfile.write(sub_info['data'])
3681                     sub_info['filepath'] = sub_filename
3682                     ret.append((sub_filename, sub_filename_final))
3683                     continue
3684                 except (OSError, IOError):
3685                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3686                     return None
3687
3688             try:
3689                 sub_copy = sub_info.copy()
3690                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3691                 self.dl(sub_filename, sub_copy, subtitle=True)
3692                 sub_info['filepath'] = sub_filename
3693                 ret.append((sub_filename, sub_filename_final))
3694             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3695                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3696                 continue
3697         return ret
3698
3699     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3700         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3701         write_all = self.params.get('write_all_thumbnails', False)
3702         thumbnails, ret = [], []
3703         if write_all or self.params.get('writethumbnail', False):
3704             thumbnails = info_dict.get('thumbnails') or []
3705         multiple = write_all and len(thumbnails) > 1
3706
3707         if thumb_filename_base is None:
3708             thumb_filename_base = filename
3709         if thumbnails and not thumb_filename_base:
3710             self.write_debug(f'Skipping writing {label} thumbnail')
3711             return ret
3712
3713         for idx, t in list(enumerate(thumbnails))[::-1]:
3714             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3715             thumb_display_id = f'{label} thumbnail {t["id"]}'
3716             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3717             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3718
3719             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3720                 ret.append((thumb_filename, thumb_filename_final))
3721                 t['filepath'] = thumb_filename
3722                 self.to_screen('[info] %s is already present' % (
3723                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3724             else:
3725                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3726                 try:
3727                     uf = self.urlopen(t['url'])
3728                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3729                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3730                         shutil.copyfileobj(uf, thumbf)
3731                     ret.append((thumb_filename, thumb_filename_final))
3732                     t['filepath'] = thumb_filename
3733                 except network_exceptions as err:
3734                     thumbnails.pop(idx)
3735                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3736             if ret and not write_all:
3737                 break
3738         return ret