yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     join_nonempty,
  78     LazyList,
  79     LINK_TEMPLATES,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     register_socks_protocols,
  97     RejectedVideoReached,
  98     render_table,
  99     replace_extension,
 100     SameFileError,
 101     sanitize_filename,
 102     sanitize_path,
 103     sanitize_url,
 104     sanitized_Request,
 105     std_headers,
 106     STR_FORMAT_RE_TMPL,
 107     STR_FORMAT_TYPES,
 108     str_or_none,
 109     strftime_or_none,
 110     subtitles_filename,
 111     supports_terminal_sequences,
 112     ThrottledDownload,
 113     to_high_limit_path,
 114     traverse_obj,
 115     try_get,
 116     UnavailableVideoError,
 117     url_basename,
 118     variadic,
 119     version_tuple,
 120     write_json_file,
 121     write_string,
 122     YoutubeDLCookieProcessor,
 123     YoutubeDLHandler,
 124     YoutubeDLRedirectHandler,
 125 )
 126 from .cache import Cache
 127 from .minicurses import format_text
 128 from .extractor import (
 129     gen_extractor_classes,
 130     get_info_extractor,
 131     _LAZY_LOADER,
 132     _PLUGIN_CLASSES as plugin_extractors
 133 )
 134 from .extractor.openload import PhantomJSwrapper
 135 from .downloader import (
 136     FFmpegFD,
 137     get_suitable_downloader,
 138     shorten_protocol_name
 139 )
 140 from .downloader.rtmp import rtmpdump_version
 141 from .postprocessor import (
 142     get_postprocessor,
 143     EmbedThumbnailPP,
 144     FFmpegFixupDurationPP,
 145     FFmpegFixupM3u8PP,
 146     FFmpegFixupM4aPP,
 147     FFmpegFixupStretchedPP,
 148     FFmpegFixupTimestampPP,
 149     FFmpegMergerPP,
 150     FFmpegPostProcessor,
 151     MoveFilesAfterDownloadPP,
 152     _PLUGIN_CLASSES as plugin_postprocessors
 153 )
 154 from .update import detect_variant
 155 from .version import __version__
 156
 157 if compat_os_name == 'nt':
 158     import ctypes
 159
 160
 161 class YoutubeDL(object):
 162     """YoutubeDL class.
 163
 164     YoutubeDL objects are the ones responsible of downloading the
 165     actual video file and writing it to disk if the user has requested
 166     it, among some other tasks. In most cases there should be one per
 167     program. As, given a video URL, the downloader doesn't know how to
 168     extract all the needed information, task that InfoExtractors do, it
 169     has to pass the URL to one of them.
 170
 171     For this, YoutubeDL objects have a method that allows
 172     InfoExtractors to be registered in a given order. When it is passed
 173     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 174     finds that reports being able to handle it. The InfoExtractor extracts
 175     all the information about the video or videos the URL refers to, and
 176     YoutubeDL process the extracted information, possibly using a File
 177     Downloader to download the video.
 178
 179     YoutubeDL objects accept a lot of parameters. In order not to saturate
 180     the object constructor with arguments, it receives a dictionary of
 181     options instead. These options are available through the params
 182     attribute for the InfoExtractors to use. The YoutubeDL also
 183     registers itself as the downloader in charge for the InfoExtractors
 184     that are added to it, so this is a "mutual registration".
 185
 186     Available options:
 187
 188     username:          Username for authentication purposes.
 189     password:          Password for authentication purposes.
 190     videopassword:     Password for accessing a video.
 191     ap_mso:            Adobe Pass multiple-system operator identifier.
 192     ap_username:       Multiple-system operator account username.
 193     ap_password:       Multiple-system operator account password.
 194     usenetrc:          Use netrc for authentication instead.
 195     verbose:           Print additional info to stdout.
 196     quiet:             Do not print messages to stdout.
 197     no_warnings:       Do not print out anything for warnings.
 198     forceprint:        A list of templates to force print
 199     forceurl:          Force printing final URL. (Deprecated)
 200     forcetitle:        Force printing title. (Deprecated)
 201     forceid:           Force printing ID. (Deprecated)
 202     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 203     forcedescription:  Force printing description. (Deprecated)
 204     forcefilename:     Force printing final filename. (Deprecated)
 205     forceduration:     Force printing duration. (Deprecated)
 206     forcejson:         Force printing info_dict as JSON.
 207     dump_single_json:  Force printing the info_dict of the whole playlist
 208                        (or video) as a single JSON line.
 209     force_write_download_archive: Force writing download archive regardless
 210                        of 'skip_download' or 'simulate'.
 211     simulate:          Do not download the video files. If unset (or None),
 212                        simulate only if listsubtitles, listformats or list_thumbnails is used
 213     format:            Video format code. see "FORMAT SELECTION" for more details.
 214     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 215     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 216                        extracting metadata even if the video is not actually
 217                        available for download (experimental)
 218     format_sort:       A list of fields by which to sort the video formats.
 219                        See "Sorting Formats" for more details.
 220     format_sort_force: Force the given format_sort. see "Sorting Formats"
 221                        for more details.
 222     allow_multiple_video_streams:   Allow multiple video streams to be merged
 223                        into a single file
 224     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 225                        into a single file
 226     check_formats      Whether to test if the formats are downloadable.
 227                        Can be True (check all), False (check none),
 228                        'selected' (check selected formats),
 229                        or None (check only if requested by extractor)
 230     paths:             Dictionary of output paths. The allowed keys are 'home'
 231                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 232     outtmpl:           Dictionary of templates for output names. Allowed keys
 233                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 234                        For compatibility with youtube-dl, a single string can also be used
 235     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 236     restrictfilenames: Do not allow "&" and spaces in file names
 237     trim_file_name:    Limit length of filename (extension excluded)
 238     windowsfilenames:  Force the filenames to be windows compatible
 239     ignoreerrors:      Do not stop on download/postprocessing errors.
 240                        Can be 'only_download' to ignore only download errors.
 241                        Default is 'only_download' for CLI, but False for API
 242     skip_playlist_after_errors: Number of allowed failures until the rest of
 243                        the playlist is skipped
 244     force_generic_extractor: Force downloader to use the generic extractor
 245     overwrites:        Overwrite all video and metadata files if True,
 246                        overwrite only non-video files if None
 247                        and don't overwrite any file if False
 248                        For compatibility with youtube-dl,
 249                        "nooverwrites" may also be used instead
 250     playliststart:     Playlist item to start at.
 251     playlistend:       Playlist item to end at.
 252     playlist_items:    Specific indices of playlist to download.
 253     playlistreverse:   Download playlist items in reverse order.
 254     playlistrandom:    Download playlist items in random order.
 255     matchtitle:        Download only matching titles.
 256     rejecttitle:       Reject downloads for matching titles.
 257     logger:            Log messages to a logging.Logger instance.
 258     logtostderr:       Log messages to stderr instead of stdout.
 259     consoletitle:       Display progress in console window's titlebar.
 260     writedescription:  Write the video description to a .description file
 261     writeinfojson:     Write the video description to a .info.json file
 262     clean_infojson:    Remove private fields from the infojson
 263     getcomments:       Extract video comments. This will not be written to disk
 264                        unless writeinfojson is also given
 265     writeannotations:  Write the video annotations to a .annotations.xml file
 266     writethumbnail:    Write the thumbnail image to a file
 267     allow_playlist_files: Whether to write playlists' description, infojson etc
 268                        also to disk when using the 'write*' options
 269     write_all_thumbnails:  Write all thumbnail formats to files
 270     writelink:         Write an internet shortcut file, depending on the
 271                        current platform (.url/.webloc/.desktop)
 272     writeurllink:      Write a Windows internet shortcut file (.url)
 273     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 274     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 275     writesubtitles:    Write the video subtitles to a file
 276     writeautomaticsub: Write the automatically generated subtitles to a file
 277     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 278                        Downloads all the subtitles of the video
 279                        (requires writesubtitles or writeautomaticsub)
 280     listsubtitles:     Lists all available subtitles for the video
 281     subtitlesformat:   The format code for subtitles
 282     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 283                        The list may contain "all" to refer to all the available
 284                        subtitles. The language can be prefixed with a "-" to
 285                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 286     keepvideo:         Keep the video file after post-processing
 287     daterange:         A DateRange object, download only if the upload_date is in the range.
 288     skip_download:     Skip the actual download of the video file
 289     cachedir:          Location of the cache files in the filesystem.
 290                        False to disable filesystem cache.
 291     noplaylist:        Download single video instead of a playlist if in doubt.
 292     age_limit:         An integer representing the user's age in years.
 293                        Unsuitable videos for the given age are skipped.
 294     min_views:         An integer representing the minimum view count the video
 295                        must have in order to not be skipped.
 296                        Videos without view count information are always
 297                        downloaded. None for no limit.
 298     max_views:         An integer representing the maximum view count.
 299                        Videos that are more popular than that are not
 300                        downloaded.
 301                        Videos without view count information are always
 302                        downloaded. None for no limit.
 303     download_archive:  File name of a file where all downloads are recorded.
 304                        Videos already present in the file are not downloaded
 305                        again.
 306     break_on_existing: Stop the download process after attempting to download a
 307                        file that is in the archive.
 308     break_on_reject:   Stop the download process when encountering a video that
 309                        has been filtered out.
 310     cookiefile:        File name where cookies should be read from and dumped to
 311     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 312                        name/path from where cookies are loaded.
 313                        Eg: ('chrome', ) or ('vivaldi', 'default')
 314     nocheckcertificate:Do not verify SSL certificates
 315     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 316                        At the moment, this is only supported by YouTube.
 317     proxy:             URL of the proxy server to use
 318     geo_verification_proxy:  URL of the proxy to use for IP address verification
 319                        on geo-restricted sites.
 320     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 321     bidi_workaround:   Work around buggy terminals without bidirectional text
 322                        support, using fridibi
 323     debug_printtraffic:Print out sent and received HTTP traffic
 324     include_ads:       Download ads as well
 325     default_search:    Prepend this string if an input url is not valid.
 326                        'auto' for elaborate guessing
 327     encoding:          Use this encoding instead of the system-specified.
 328     extract_flat:      Do not resolve URLs, return the immediate result.
 329                        Pass in 'in_playlist' to only show this behavior for
 330                        playlist items.
 331     postprocessors:    A list of dictionaries, each with an entry
 332                        * key:  The name of the postprocessor. See
 333                                yt_dlp/postprocessor/__init__.py for a list.
 334                        * when: When to run the postprocessor. Can be one of
 335                                pre_process|before_dl|post_process|after_move.
 336                                Assumed to be 'post_process' if not given
 337     post_hooks:        Deprecated - Register a custom postprocessor instead
 338                        A list of functions that get called as the final step
 339                        for each video file, after all postprocessors have been
 340                        called. The filename will be passed as the only argument.
 341     progress_hooks:    A list of functions that get called on download
 342                        progress, with a dictionary with the entries
 343                        * status: One of "downloading", "error", or "finished".
 344                                  Check this first and ignore unknown values.
 345                        * info_dict: The extracted info_dict
 346
 347                        If status is one of "downloading", or "finished", the
 348                        following properties may also be present:
 349                        * filename: The final filename (always present)
 350                        * tmpfilename: The filename we're currently writing to
 351                        * downloaded_bytes: Bytes on disk
 352                        * total_bytes: Size of the whole file, None if unknown
 353                        * total_bytes_estimate: Guess of the eventual file size,
 354                                                None if unavailable.
 355                        * elapsed: The number of seconds since download started.
 356                        * eta: The estimated time in seconds, None if unknown
 357                        * speed: The download speed in bytes/second, None if
 358                                 unknown
 359                        * fragment_index: The counter of the currently
 360                                          downloaded video fragment.
 361                        * fragment_count: The number of fragments (= individual
 362                                          files that will be merged)
 363
 364                        Progress hooks are guaranteed to be called at least once
 365                        (with status "finished") if the download is successful.
 366     postprocessor_hooks:  A list of functions that get called on postprocessing
 367                        progress, with a dictionary with the entries
 368                        * status: One of "started", "processing", or "finished".
 369                                  Check this first and ignore unknown values.
 370                        * postprocessor: Name of the postprocessor
 371                        * info_dict: The extracted info_dict
 372
 373                        Progress hooks are guaranteed to be called at least twice
 374                        (with status "started" and "finished") if the processing is successful.
 375     merge_output_format: Extension to use when merging formats.
 376     final_ext:         Expected final extension; used to detect when the file was
 377                        already downloaded and converted
 378     fixup:             Automatically correct known faults of the file.
 379                        One of:
 380                        - "never": do nothing
 381                        - "warn": only emit a warning
 382                        - "detect_or_warn": check whether we can do anything
 383                                            about it, warn otherwise (default)
 384     source_address:    Client-side IP address to bind to.
 385     call_home:         Boolean, true iff we are allowed to contact the
 386                        yt-dlp servers for debugging. (BROKEN)
 387     sleep_interval_requests: Number of seconds to sleep between requests
 388                        during extraction
 389     sleep_interval:    Number of seconds to sleep before each download when
 390                        used alone or a lower bound of a range for randomized
 391                        sleep before each download (minimum possible number
 392                        of seconds to sleep) when used along with
 393                        max_sleep_interval.
 394     max_sleep_interval:Upper bound of a range for randomized sleep before each
 395                        download (maximum possible number of seconds to sleep).
 396                        Must only be used along with sleep_interval.
 397                        Actual sleep time will be a random float from range
 398                        [sleep_interval; max_sleep_interval].
 399     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 400     listformats:       Print an overview of available video formats and exit.
 401     list_thumbnails:   Print a table of all thumbnails and exit.
 402     match_filter:      A function that gets called with the info_dict of
 403                        every video.
 404                        If it returns a message, the video is ignored.
 405                        If it returns None, the video is downloaded.
 406                        match_filter_func in utils.py is one example for this.
 407     no_color:          Do not emit color codes in output.
 408     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 409                        HTTP header
 410     geo_bypass_country:
 411                        Two-letter ISO 3166-2 country code that will be used for
 412                        explicit geographic restriction bypassing via faking
 413                        X-Forwarded-For HTTP header
 414     geo_bypass_ip_block:
 415                        IP range in CIDR notation that will be used similarly to
 416                        geo_bypass_country
 417
 418     The following options determine which downloader is picked:
 419     external_downloader: A dictionary of protocol keys and the executable of the
 420                        external downloader to use for it. The allowed protocols
 421                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 422                        Set the value to 'native' to use the native downloader
 423     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 424                        or {'m3u8': 'ffmpeg'} instead.
 425                        Use the native HLS downloader instead of ffmpeg/avconv
 426                        if True, otherwise use ffmpeg/avconv if False, otherwise
 427                        use downloader suggested by extractor if None.
 428     compat_opts:       Compatibility options. See "Differences in default behavior".
 429                        The following options do not work when used through the API:
 430                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 431                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 432                        Refer __init__.py for their implementation
 433     progress_template: Dictionary of templates for progress outputs.
 434                        Allowed keys are 'download', 'postprocess',
 435                        'download-title' (console title) and 'postprocess-title'.
 436                        The template is mapped on a dictionary with keys 'progress' and 'info'
 437
 438     The following parameters are not used by YoutubeDL itself, they are used by
 439     the downloader (see yt_dlp/downloader/common.py):
 440     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 441     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 442     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 443     external_downloader_args, concurrent_fragment_downloads.
 444
 445     The following options are used by the post processors:
 446     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 447                        otherwise prefer ffmpeg. (avconv support is deprecated)
 448     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 449                        to the binary or its containing directory.
 450     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 451                        and a list of additional command-line arguments for the
 452                        postprocessor/executable. The dict can also have "PP+EXE" keys
 453                        which are used when the given exe is used by the given PP.
 454                        Use 'default' as the name for arguments to passed to all PP
 455                        For compatibility with youtube-dl, a single list of args
 456                        can also be used
 457
 458     The following options are used by the extractors:
 459     extractor_retries: Number of times to retry for known errors
 460     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 461     hls_split_discontinuity: Split HLS playlists to different formats at
 462                        discontinuities such as ad breaks (default: False)
 463     extractor_args:    A dictionary of arguments to be passed to the extractors.
 464                        See "EXTRACTOR ARGUMENTS" for details.
 465                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 466     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 467                        If True (default), DASH manifests and related
 468                        data will be downloaded and processed by extractor.
 469                        You can reduce network I/O by disabling it if you don't
 470                        care about DASH. (only for youtube)
 471     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 472                        If True (default), HLS manifests and related
 473                        data will be downloaded and processed by extractor.
 474                        You can reduce network I/O by disabling it if you don't
 475                        care about HLS. (only for youtube)
 476     """
 477
 478     _NUMERIC_FIELDS = set((
 479         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 480         'timestamp', 'release_timestamp',
 481         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 482         'average_rating', 'comment_count', 'age_limit',
 483         'start_time', 'end_time',
 484         'chapter_number', 'season_number', 'episode_number',
 485         'track_number', 'disc_number', 'release_year',
 486     ))
 487
 488     _format_selection_exts = {
 489         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 490         'video': {'mp4', 'flv', 'webm', '3gp'},
 491         'storyboards': {'mhtml'},
 492     }
 493
 494     params = None
 495     _ies = {}
 496     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 497     _printed_messages = set()
 498     _first_webpage_request = True
 499     _download_retcode = None
 500     _num_downloads = None
 501     _playlist_level = 0
 502     _playlist_urls = set()
 503     _screen_file = None
 504
 505     def __init__(self, params=None, auto_init=True):
 506         """Create a FileDownloader object with the given options.
 507         @param auto_init    Whether to load the default extractors and print header (if verbose).
 508                             Set to 'no_verbose_header' to not print the header
 509         """
 510         if params is None:
 511             params = {}
 512         self._ies = {}
 513         self._ies_instances = {}
 514         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 515         self._printed_messages = set()
 516         self._first_webpage_request = True
 517         self._post_hooks = []
 518         self._progress_hooks = []
 519         self._postprocessor_hooks = []
 520         self._download_retcode = 0
 521         self._num_downloads = 0
 522         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 523         self._err_file = sys.stderr
 524         self.params = params
 525         self.cache = Cache(self)
 526
 527         windows_enable_vt_mode()
 528         # FIXME: This will break if we ever print color to stdout
 529         self._allow_colors = {
 530             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 531             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 532         }
 533
 534         if sys.version_info < (3, 6):
 535             self.report_warning(
 536                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 537
 538         if self.params.get('allow_unplayable_formats'):
 539             self.report_warning(
 540                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 541                 'This is a developer option intended for debugging. \n'
 542                 '         If you experience any issues while using this option, '
 543                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 544
 545         def check_deprecated(param, option, suggestion):
 546             if self.params.get(param) is not None:
 547                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 548                 return True
 549             return False
 550
 551         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 552             if self.params.get('geo_verification_proxy') is None:
 553                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 554
 555         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 556         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 557         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 558
 559         for msg in self.params.get('_warnings', []):
 560             self.report_warning(msg)
 561
 562         if 'list-formats' in self.params.get('compat_opts', []):
 563             self.params['listformats_table'] = False
 564
 565         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 566             # nooverwrites was unnecessarily changed to overwrites
 567             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 568             # This ensures compatibility with both keys
 569             self.params['overwrites'] = not self.params['nooverwrites']
 570         elif self.params.get('overwrites') is None:
 571             self.params.pop('overwrites', None)
 572         else:
 573             self.params['nooverwrites'] = not self.params['overwrites']
 574
 575         if params.get('bidi_workaround', False):
 576             try:
 577                 import pty
 578                 master, slave = pty.openpty()
 579                 width = compat_get_terminal_size().columns
 580                 if width is None:
 581                     width_args = []
 582                 else:
 583                     width_args = ['-w', str(width)]
 584                 sp_kwargs = dict(
 585                     stdin=subprocess.PIPE,
 586                     stdout=slave,
 587                     stderr=self._err_file)
 588                 try:
 589                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 590                 except OSError:
 591                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 592                 self._output_channel = os.fdopen(master, 'rb')
 593             except OSError as ose:
 594                 if ose.errno == errno.ENOENT:
 595                     self.report_warning(
 596                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 597                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 598                 else:
 599                     raise
 600
 601         if (sys.platform != 'win32'
 602                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 603                 and not params.get('restrictfilenames', False)):
 604             # Unicode filesystem API will throw errors (#1474, #13027)
 605             self.report_warning(
 606                 'Assuming --restrict-filenames since file system encoding '
 607                 'cannot encode all characters. '
 608                 'Set the LC_ALL environment variable to fix this.')
 609             self.params['restrictfilenames'] = True
 610
 611         self.outtmpl_dict = self.parse_outtmpl()
 612
 613         # Creating format selector here allows us to catch syntax errors before the extraction
 614         self.format_selector = (
 615             None if self.params.get('format') is None
 616             else self.build_format_selector(self.params['format']))
 617
 618         self._setup_opener()
 619
 620         if auto_init:
 621             if auto_init != 'no_verbose_header':
 622                 self.print_debug_header()
 623             self.add_default_info_extractors()
 624
 625         for pp_def_raw in self.params.get('postprocessors', []):
 626             pp_def = dict(pp_def_raw)
 627             when = pp_def.pop('when', 'post_process')
 628             pp_class = get_postprocessor(pp_def.pop('key'))
 629             pp = pp_class(self, **compat_kwargs(pp_def))
 630             self.add_post_processor(pp, when=when)
 631
 632         for ph in self.params.get('post_hooks', []):
 633             self.add_post_hook(ph)
 634
 635         for ph in self.params.get('progress_hooks', []):
 636             self.add_progress_hook(ph)
 637
 638         register_socks_protocols()
 639
 640         def preload_download_archive(fn):
 641             """Preload the archive, if any is specified"""
 642             if fn is None:
 643                 return False
 644             self.write_debug(f'Loading archive file {fn!r}')
 645             try:
 646                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 647                     for line in archive_file:
 648                         self.archive.add(line.strip())
 649             except IOError as ioe:
 650                 if ioe.errno != errno.ENOENT:
 651                     raise
 652                 return False
 653             return True
 654
 655         self.archive = set()
 656         preload_download_archive(self.params.get('download_archive'))
 657
 658     def warn_if_short_id(self, argv):
 659         # short YouTube ID starting with dash?
 660         idxs = [
 661             i for i, a in enumerate(argv)
 662             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 663         if idxs:
 664             correct_argv = (
 665                 ['yt-dlp']
 666                 + [a for i, a in enumerate(argv) if i not in idxs]
 667                 + ['--'] + [argv[i] for i in idxs]
 668             )
 669             self.report_warning(
 670                 'Long argument string detected. '
 671                 'Use -- to separate parameters and URLs, like this:\n%s' %
 672                 args_to_str(correct_argv))
 673
 674     def add_info_extractor(self, ie):
 675         """Add an InfoExtractor object to the end of the list."""
 676         ie_key = ie.ie_key()
 677         self._ies[ie_key] = ie
 678         if not isinstance(ie, type):
 679             self._ies_instances[ie_key] = ie
 680             ie.set_downloader(self)
 681
 682     def _get_info_extractor_class(self, ie_key):
 683         ie = self._ies.get(ie_key)
 684         if ie is None:
 685             ie = get_info_extractor(ie_key)
 686             self.add_info_extractor(ie)
 687         return ie
 688
 689     def get_info_extractor(self, ie_key):
 690         """
 691         Get an instance of an IE with name ie_key, it will try to get one from
 692         the _ies list, if there's no instance it will create a new one and add
 693         it to the extractor list.
 694         """
 695         ie = self._ies_instances.get(ie_key)
 696         if ie is None:
 697             ie = get_info_extractor(ie_key)()
 698             self.add_info_extractor(ie)
 699         return ie
 700
 701     def add_default_info_extractors(self):
 702         """
 703         Add the InfoExtractors returned by gen_extractors to the end of the list
 704         """
 705         for ie in gen_extractor_classes():
 706             self.add_info_extractor(ie)
 707
 708     def add_post_processor(self, pp, when='post_process'):
 709         """Add a PostProcessor object to the end of the chain."""
 710         self._pps[when].append(pp)
 711         pp.set_downloader(self)
 712
 713     def add_post_hook(self, ph):
 714         """Add the post hook"""
 715         self._post_hooks.append(ph)
 716
 717     def add_progress_hook(self, ph):
 718         """Add the download progress hook"""
 719         self._progress_hooks.append(ph)
 720
 721     def add_postprocessor_hook(self, ph):
 722         """Add the postprocessing progress hook"""
 723         self._postprocessor_hooks.append(ph)
 724
 725     def _bidi_workaround(self, message):
 726         if not hasattr(self, '_output_channel'):
 727             return message
 728
 729         assert hasattr(self, '_output_process')
 730         assert isinstance(message, compat_str)
 731         line_count = message.count('\n') + 1
 732         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 733         self._output_process.stdin.flush()
 734         res = ''.join(self._output_channel.readline().decode('utf-8')
 735                       for _ in range(line_count))
 736         return res[:-len('\n')]
 737
 738     def _write_string(self, message, out=None, only_once=False):
 739         if only_once:
 740             if message in self._printed_messages:
 741                 return
 742             self._printed_messages.add(message)
 743         write_string(message, out=out, encoding=self.params.get('encoding'))
 744
 745     def to_stdout(self, message, skip_eol=False, quiet=False):
 746         """Print message to stdout"""
 747         if self.params.get('logger'):
 748             self.params['logger'].debug(message)
 749         elif not quiet or self.params.get('verbose'):
 750             self._write_string(
 751                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 752                 self._err_file if quiet else self._screen_file)
 753
 754     def to_stderr(self, message, only_once=False):
 755         """Print message to stderr"""
 756         assert isinstance(message, compat_str)
 757         if self.params.get('logger'):
 758             self.params['logger'].error(message)
 759         else:
 760             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 761
 762     def to_console_title(self, message):
 763         if not self.params.get('consoletitle', False):
 764             return
 765         if compat_os_name == 'nt':
 766             if ctypes.windll.kernel32.GetConsoleWindow():
 767                 # c_wchar_p() might not be necessary if `message` is
 768                 # already of type unicode()
 769                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 770         elif 'TERM' in os.environ:
 771             self._write_string('\033]0;%s\007' % message, self._screen_file)
 772
 773     def save_console_title(self):
 774         if not self.params.get('consoletitle', False):
 775             return
 776         if self.params.get('simulate'):
 777             return
 778         if compat_os_name != 'nt' and 'TERM' in os.environ:
 779             # Save the title on stack
 780             self._write_string('\033[22;0t', self._screen_file)
 781
 782     def restore_console_title(self):
 783         if not self.params.get('consoletitle', False):
 784             return
 785         if self.params.get('simulate'):
 786             return
 787         if compat_os_name != 'nt' and 'TERM' in os.environ:
 788             # Restore the title from stack
 789             self._write_string('\033[23;0t', self._screen_file)
 790
 791     def __enter__(self):
 792         self.save_console_title()
 793         return self
 794
 795     def __exit__(self, *args):
 796         self.restore_console_title()
 797
 798         if self.params.get('cookiefile') is not None:
 799             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 800
 801     def trouble(self, message=None, tb=None):
 802         """Determine action to take when a download problem appears.
 803
 804         Depending on if the downloader has been configured to ignore
 805         download errors or not, this method may throw an exception or
 806         not when errors are found, after printing the message.
 807
 808         tb, if given, is additional traceback information.
 809         """
 810         if message is not None:
 811             self.to_stderr(message)
 812         if self.params.get('verbose'):
 813             if tb is None:
 814                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 815                     tb = ''
 816                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 817                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 818                     tb += encode_compat_str(traceback.format_exc())
 819                 else:
 820                     tb_data = traceback.format_list(traceback.extract_stack())
 821                     tb = ''.join(tb_data)
 822             if tb:
 823                 self.to_stderr(tb)
 824         if not self.params.get('ignoreerrors'):
 825             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 826                 exc_info = sys.exc_info()[1].exc_info
 827             else:
 828                 exc_info = sys.exc_info()
 829             raise DownloadError(message, exc_info)
 830         self._download_retcode = 1
 831
 832     def to_screen(self, message, skip_eol=False):
 833         """Print message to stdout if not in quiet mode"""
 834         self.to_stdout(
 835             message, skip_eol, quiet=self.params.get('quiet', False))
 836
 837     class Styles(Enum):
 838         HEADERS = 'yellow'
 839         EMPHASIS = 'blue'
 840         ID = 'green'
 841         DELIM = 'blue'
 842         ERROR = 'red'
 843         WARNING = 'yellow'
 844
 845     def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
 846         assert out in ('screen', 'err')
 847         if test_encoding:
 848             original_text = text
 849             handle = self._screen_file if out == 'screen' else self._err_file
 850             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 851             text = text.encode(encoding, 'ignore').decode(encoding)
 852             if fallback is not None and text != original_text:
 853                 text = fallback
 854         if isinstance(f, self.Styles):
 855             f = f._value_
 856         return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
 857
 858     def _format_screen(self, *args, **kwargs):
 859         return self.__format_text('screen', *args, **kwargs)
 860
 861     def _format_err(self, *args, **kwargs):
 862         return self.__format_text('err', *args, **kwargs)
 863
 864     def report_warning(self, message, only_once=False):
 865         '''
 866         Print the message to stderr, it will be prefixed with 'WARNING:'
 867         If stderr is a tty file the 'WARNING:' will be colored
 868         '''
 869         if self.params.get('logger') is not None:
 870             self.params['logger'].warning(message)
 871         else:
 872             if self.params.get('no_warnings'):
 873                 return
 874             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 875
 876     def report_error(self, message, tb=None):
 877         '''
 878         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 879         in red if stderr is a tty file.
 880         '''
 881         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 882
 883     def write_debug(self, message, only_once=False):
 884         '''Log debug message or Print message to stderr'''
 885         if not self.params.get('verbose', False):
 886             return
 887         message = '[debug] %s' % message
 888         if self.params.get('logger'):
 889             self.params['logger'].debug(message)
 890         else:
 891             self.to_stderr(message, only_once)
 892
 893     def report_file_already_downloaded(self, file_name):
 894         """Report file has already been fully downloaded."""
 895         try:
 896             self.to_screen('[download] %s has already been downloaded' % file_name)
 897         except UnicodeEncodeError:
 898             self.to_screen('[download] The file has already been downloaded')
 899
 900     def report_file_delete(self, file_name):
 901         """Report that existing file will be deleted."""
 902         try:
 903             self.to_screen('Deleting existing file %s' % file_name)
 904         except UnicodeEncodeError:
 905             self.to_screen('Deleting existing file')
 906
 907     def raise_no_formats(self, info, forced=False):
 908         has_drm = info.get('__has_drm')
 909         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 910         expected = self.params.get('ignore_no_formats_error')
 911         if forced or not expected:
 912             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 913                                  expected=has_drm or expected)
 914         else:
 915             self.report_warning(msg)
 916
 917     def parse_outtmpl(self):
 918         outtmpl_dict = self.params.get('outtmpl', {})
 919         if not isinstance(outtmpl_dict, dict):
 920             outtmpl_dict = {'default': outtmpl_dict}
 921         # Remove spaces in the default template
 922         if self.params.get('restrictfilenames'):
 923             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 924         else:
 925             sanitize = lambda x: x
 926         outtmpl_dict.update({
 927             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 928             if outtmpl_dict.get(k) is None})
 929         for key, val in outtmpl_dict.items():
 930             if isinstance(val, bytes):
 931                 self.report_warning(
 932                     'Parameter outtmpl is bytes, but should be a unicode string. '
 933                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 934         return outtmpl_dict
 935
 936     def get_output_path(self, dir_type='', filename=None):
 937         paths = self.params.get('paths', {})
 938         assert isinstance(paths, dict)
 939         path = os.path.join(
 940             expand_path(paths.get('home', '').strip()),
 941             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 942             filename or '')
 943
 944         # Temporary fix for #4787
 945         # 'Treat' all problem characters by passing filename through preferredencoding
 946         # to workaround encoding issues with subprocess on python2 @ Windows
 947         if sys.version_info < (3, 0) and sys.platform == 'win32':
 948             path = encodeFilename(path, True).decode(preferredencoding())
 949         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 950
 951     @staticmethod
 952     def _outtmpl_expandpath(outtmpl):
 953         # expand_path translates '%%' into '%' and '$$' into '$'
 954         # correspondingly that is not what we want since we need to keep
 955         # '%%' intact for template dict substitution step. Working around
 956         # with boundary-alike separator hack.
 957         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 958         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 959
 960         # outtmpl should be expand_path'ed before template dict substitution
 961         # because meta fields may contain env variables we don't want to
 962         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 963         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 964         return expand_path(outtmpl).replace(sep, '')
 965
 966     @staticmethod
 967     def escape_outtmpl(outtmpl):
 968         ''' Escape any remaining strings like %s, %abc% etc. '''
 969         return re.sub(
 970             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 971             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 972             outtmpl)
 973
 974     @classmethod
 975     def validate_outtmpl(cls, outtmpl):
 976         ''' @return None or Exception object '''
 977         outtmpl = re.sub(
 978             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 979             lambda mobj: f'{mobj.group(0)[:-1]}s',
 980             cls._outtmpl_expandpath(outtmpl))
 981         try:
 982             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 983             return None
 984         except ValueError as err:
 985             return err
 986
 987     @staticmethod
 988     def _copy_infodict(info_dict):
 989         info_dict = dict(info_dict)
 990         for key in ('__original_infodict', '__postprocessors'):
 991             info_dict.pop(key, None)
 992         return info_dict
 993
 994     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 995         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
 996         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 997
 998         info_dict = self._copy_infodict(info_dict)
 999         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1000             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1001             if info_dict.get('duration', None) is not None
1002             else None)
1003         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1004         if info_dict.get('resolution') is None:
1005             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1006
1007         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1008         # of %(field)s to %(field)0Nd for backward compatibility
1009         field_size_compat_map = {
1010             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1011             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1012             'autonumber': self.params.get('autonumber_size') or 5,
1013         }
1014
1015         TMPL_DICT = {}
1016         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1017         MATH_FUNCTIONS = {
1018             '+': float.__add__,
1019             '-': float.__sub__,
1020         }
1021         # Field is of the form key1.key2...
1022         # where keys (except first) can be string, int or slice
1023         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1024         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1025         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1026         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1027             (?P<negate>-)?
1028             (?P<fields>{field})
1029             (?P<maths>(?:{math_op}{math_field})*)
1030             (?:>(?P<strf_format>.+?))?
1031             (?P<alternate>(?<!\\),[^|)]+)?
1032             (?:\|(?P<default>.*?))?
1033             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1034
1035         def _traverse_infodict(k):
1036             k = k.split('.')
1037             if k[0] == '':
1038                 k.pop(0)
1039             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1040
1041         def get_value(mdict):
1042             # Object traversal
1043             value = _traverse_infodict(mdict['fields'])
1044             # Negative
1045             if mdict['negate']:
1046                 value = float_or_none(value)
1047                 if value is not None:
1048                     value *= -1
1049             # Do maths
1050             offset_key = mdict['maths']
1051             if offset_key:
1052                 value = float_or_none(value)
1053                 operator = None
1054                 while offset_key:
1055                     item = re.match(
1056                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1057                         offset_key).group(0)
1058                     offset_key = offset_key[len(item):]
1059                     if operator is None:
1060                         operator = MATH_FUNCTIONS[item]
1061                         continue
1062                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1063                     offset = float_or_none(item)
1064                     if offset is None:
1065                         offset = float_or_none(_traverse_infodict(item))
1066                     try:
1067                         value = operator(value, multiplier * offset)
1068                     except (TypeError, ZeroDivisionError):
1069                         return None
1070                     operator = None
1071             # Datetime formatting
1072             if mdict['strf_format']:
1073                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1074
1075             return value
1076
1077         na = self.params.get('outtmpl_na_placeholder', 'NA')
1078
1079         def _dumpjson_default(obj):
1080             if isinstance(obj, (set, LazyList)):
1081                 return list(obj)
1082             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1083
1084         def create_key(outer_mobj):
1085             if not outer_mobj.group('has_key'):
1086                 return outer_mobj.group(0)
1087             key = outer_mobj.group('key')
1088             mobj = re.match(INTERNAL_FORMAT_RE, key)
1089             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1090             value, default = None, na
1091             while mobj:
1092                 mobj = mobj.groupdict()
1093                 default = mobj['default'] if mobj['default'] is not None else default
1094                 value = get_value(mobj)
1095                 if value is None and mobj['alternate']:
1096                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1097                 else:
1098                     break
1099
1100             fmt = outer_mobj.group('format')
1101             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1102                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1103
1104             value = default if value is None else value
1105
1106             flags = outer_mobj.group('conversion') or ''
1107             str_fmt = f'{fmt[:-1]}s'
1108             if fmt[-1] == 'l':  # list
1109                 delim = '\n' if '#' in flags else ', '
1110                 value, fmt = delim.join(variadic(value)), str_fmt
1111             elif fmt[-1] == 'j':  # json
1112                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1113             elif fmt[-1] == 'q':  # quoted
1114                 value = map(str, variadic(value) if '#' in flags else [value])
1115                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1116             elif fmt[-1] == 'B':  # bytes
1117                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1118                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1119             elif fmt[-1] == 'U':  # unicode normalized
1120                 value, fmt = unicodedata.normalize(
1121                     # "+" = compatibility equivalence, "#" = NFD
1122                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1123                     value), str_fmt
1124             elif fmt[-1] == 'c':
1125                 if value:
1126                     value = str(value)[0]
1127                 else:
1128                     fmt = str_fmt
1129             elif fmt[-1] not in 'rs':  # numeric
1130                 value = float_or_none(value)
1131                 if value is None:
1132                     value, fmt = default, 's'
1133
1134             if sanitize:
1135                 if fmt[-1] == 'r':
1136                     # If value is an object, sanitize might convert it to a string
1137                     # So we convert it to repr first
1138                     value, fmt = repr(value), str_fmt
1139                 if fmt[-1] in 'csr':
1140                     value = sanitize(initial_field, value)
1141
1142             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1143             TMPL_DICT[key] = value
1144             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1145
1146         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1147
1148     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1149         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1150         return self.escape_outtmpl(outtmpl) % info_dict
1151
1152     def _prepare_filename(self, info_dict, tmpl_type='default'):
1153         try:
1154             sanitize = lambda k, v: sanitize_filename(
1155                 compat_str(v),
1156                 restricted=self.params.get('restrictfilenames'),
1157                 is_id=(k == 'id' or k.endswith('_id')))
1158             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1159             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1160
1161             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1162             if filename and force_ext is not None:
1163                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1164
1165             # https://github.com/blackjack4494/youtube-dlc/issues/85
1166             trim_file_name = self.params.get('trim_file_name', False)
1167             if trim_file_name:
1168                 fn_groups = filename.rsplit('.')
1169                 ext = fn_groups[-1]
1170                 sub_ext = ''
1171                 if len(fn_groups) > 2:
1172                     sub_ext = fn_groups[-2]
1173                 filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
1174
1175             return filename
1176         except ValueError as err:
1177             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1178             return None
1179
1180     def prepare_filename(self, info_dict, dir_type='', warn=False):
1181         """Generate the output filename."""
1182
1183         filename = self._prepare_filename(info_dict, dir_type or 'default')
1184         if not filename and dir_type not in ('', 'temp'):
1185             return ''
1186
1187         if warn:
1188             if not self.params.get('paths'):
1189                 pass
1190             elif filename == '-':
1191                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1192             elif os.path.isabs(filename):
1193                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1194         if filename == '-' or not filename:
1195             return filename
1196
1197         return self.get_output_path(dir_type, filename)
1198
1199     def _match_entry(self, info_dict, incomplete=False, silent=False):
1200         """ Returns None if the file should be downloaded """
1201
1202         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1203
1204         def check_filter():
1205             if 'title' in info_dict:
1206                 # This can happen when we're just evaluating the playlist
1207                 title = info_dict['title']
1208                 matchtitle = self.params.get('matchtitle', False)
1209                 if matchtitle:
1210                     if not re.search(matchtitle, title, re.IGNORECASE):
1211                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1212                 rejecttitle = self.params.get('rejecttitle', False)
1213                 if rejecttitle:
1214                     if re.search(rejecttitle, title, re.IGNORECASE):
1215                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1216             date = info_dict.get('upload_date')
1217             if date is not None:
1218                 dateRange = self.params.get('daterange', DateRange())
1219                 if date not in dateRange:
1220                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1221             view_count = info_dict.get('view_count')
1222             if view_count is not None:
1223                 min_views = self.params.get('min_views')
1224                 if min_views is not None and view_count < min_views:
1225                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1226                 max_views = self.params.get('max_views')
1227                 if max_views is not None and view_count > max_views:
1228                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1229             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1230                 return 'Skipping "%s" because it is age restricted' % video_title
1231
1232             match_filter = self.params.get('match_filter')
1233             if match_filter is not None:
1234                 try:
1235                     ret = match_filter(info_dict, incomplete=incomplete)
1236                 except TypeError:
1237                     # For backward compatibility
1238                     ret = None if incomplete else match_filter(info_dict)
1239                 if ret is not None:
1240                     return ret
1241             return None
1242
1243         if self.in_download_archive(info_dict):
1244             reason = '%s has already been recorded in the archive' % video_title
1245             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1246         else:
1247             reason = check_filter()
1248             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1249         if reason is not None:
1250             if not silent:
1251                 self.to_screen('[download] ' + reason)
1252             if self.params.get(break_opt, False):
1253                 raise break_err()
1254         return reason
1255
1256     @staticmethod
1257     def add_extra_info(info_dict, extra_info):
1258         '''Set the keys from extra_info in info dict if they are missing'''
1259         for key, value in extra_info.items():
1260             info_dict.setdefault(key, value)
1261
1262     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1263                      process=True, force_generic_extractor=False):
1264         """
1265         Return a list with a dictionary for each video extracted.
1266
1267         Arguments:
1268         url -- URL to extract
1269
1270         Keyword arguments:
1271         download -- whether to download videos during extraction
1272         ie_key -- extractor key hint
1273         extra_info -- dictionary containing the extra values to add to each result
1274         process -- whether to resolve all unresolved references (URLs, playlist items),
1275             must be True for download to work.
1276         force_generic_extractor -- force using the generic extractor
1277         """
1278
1279         if extra_info is None:
1280             extra_info = {}
1281
1282         if not ie_key and force_generic_extractor:
1283             ie_key = 'Generic'
1284
1285         if ie_key:
1286             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1287         else:
1288             ies = self._ies
1289
1290         for ie_key, ie in ies.items():
1291             if not ie.suitable(url):
1292                 continue
1293
1294             if not ie.working():
1295                 self.report_warning('The program functionality for this site has been marked as broken, '
1296                                     'and will probably not work.')
1297
1298             temp_id = ie.get_temp_id(url)
1299             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1300                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1301                                ie_key, temp_id))
1302                 break
1303             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1304         else:
1305             self.report_error('no suitable InfoExtractor for URL %s' % url)
1306
1307     def __handle_extraction_exceptions(func):
1308         @functools.wraps(func)
1309         def wrapper(self, *args, **kwargs):
1310             try:
1311                 return func(self, *args, **kwargs)
1312             except GeoRestrictedError as e:
1313                 msg = e.msg
1314                 if e.countries:
1315                     msg += '\nThis video is available in %s.' % ', '.join(
1316                         map(ISO3166Utils.short2full, e.countries))
1317                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1318                 self.report_error(msg)
1319             except ExtractorError as e:  # An error we somewhat expected
1320                 self.report_error(compat_str(e), e.format_traceback())
1321             except ThrottledDownload as e:
1322                 self.to_stderr('\r')
1323                 self.report_warning(f'{e}; Re-extracting data')
1324                 return wrapper(self, *args, **kwargs)
1325             except (DownloadCancelled, LazyList.IndexError):
1326                 raise
1327             except Exception as e:
1328                 if self.params.get('ignoreerrors'):
1329                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1330                 else:
1331                     raise
1332         return wrapper
1333
1334     @__handle_extraction_exceptions
1335     def __extract_info(self, url, ie, download, extra_info, process):
1336         ie_result = ie.extract(url)
1337         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1338             return
1339         if isinstance(ie_result, list):
1340             # Backwards compatibility: old IE result format
1341             ie_result = {
1342                 '_type': 'compat_list',
1343                 'entries': ie_result,
1344             }
1345         if extra_info.get('original_url'):
1346             ie_result.setdefault('original_url', extra_info['original_url'])
1347         self.add_default_extra_info(ie_result, ie, url)
1348         if process:
1349             return self.process_ie_result(ie_result, download, extra_info)
1350         else:
1351             return ie_result
1352
1353     def add_default_extra_info(self, ie_result, ie, url):
1354         if url is not None:
1355             self.add_extra_info(ie_result, {
1356                 'webpage_url': url,
1357                 'original_url': url,
1358                 'webpage_url_basename': url_basename(url),
1359             })
1360         if ie is not None:
1361             self.add_extra_info(ie_result, {
1362                 'extractor': ie.IE_NAME,
1363                 'extractor_key': ie.ie_key(),
1364             })
1365
1366     def process_ie_result(self, ie_result, download=True, extra_info=None):
1367         """
1368         Take the result of the ie(may be modified) and resolve all unresolved
1369         references (URLs, playlist items).
1370
1371         It will also download the videos if 'download'.
1372         Returns the resolved ie_result.
1373         """
1374         if extra_info is None:
1375             extra_info = {}
1376         result_type = ie_result.get('_type', 'video')
1377
1378         if result_type in ('url', 'url_transparent'):
1379             ie_result['url'] = sanitize_url(ie_result['url'])
1380             if ie_result.get('original_url'):
1381                 extra_info.setdefault('original_url', ie_result['original_url'])
1382
1383             extract_flat = self.params.get('extract_flat', False)
1384             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1385                     or extract_flat is True):
1386                 info_copy = ie_result.copy()
1387                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1388                 if ie and not ie_result.get('id'):
1389                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1390                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1391                 self.add_extra_info(info_copy, extra_info)
1392                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1393                 if self.params.get('force_write_download_archive', False):
1394                     self.record_download_archive(info_copy)
1395                 return ie_result
1396
1397         if result_type == 'video':
1398             self.add_extra_info(ie_result, extra_info)
1399             ie_result = self.process_video_result(ie_result, download=download)
1400             additional_urls = (ie_result or {}).get('additional_urls')
1401             if additional_urls:
1402                 # TODO: Improve MetadataParserPP to allow setting a list
1403                 if isinstance(additional_urls, compat_str):
1404                     additional_urls = [additional_urls]
1405                 self.to_screen(
1406                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1407                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1408                 ie_result['additional_entries'] = [
1409                     self.extract_info(
1410                         url, download, extra_info,
1411                         force_generic_extractor=self.params.get('force_generic_extractor'))
1412                     for url in additional_urls
1413                 ]
1414             return ie_result
1415         elif result_type == 'url':
1416             # We have to add extra_info to the results because it may be
1417             # contained in a playlist
1418             return self.extract_info(
1419                 ie_result['url'], download,
1420                 ie_key=ie_result.get('ie_key'),
1421                 extra_info=extra_info)
1422         elif result_type == 'url_transparent':
1423             # Use the information from the embedding page
1424             info = self.extract_info(
1425                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1426                 extra_info=extra_info, download=False, process=False)
1427
1428             # extract_info may return None when ignoreerrors is enabled and
1429             # extraction failed with an error, don't crash and return early
1430             # in this case
1431             if not info:
1432                 return info
1433
1434             force_properties = dict(
1435                 (k, v) for k, v in ie_result.items() if v is not None)
1436             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1437                 if f in force_properties:
1438                     del force_properties[f]
1439             new_result = info.copy()
1440             new_result.update(force_properties)
1441
1442             # Extracted info may not be a video result (i.e.
1443             # info.get('_type', 'video') != video) but rather an url or
1444             # url_transparent. In such cases outer metadata (from ie_result)
1445             # should be propagated to inner one (info). For this to happen
1446             # _type of info should be overridden with url_transparent. This
1447             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1448             if new_result.get('_type') == 'url':
1449                 new_result['_type'] = 'url_transparent'
1450
1451             return self.process_ie_result(
1452                 new_result, download=download, extra_info=extra_info)
1453         elif result_type in ('playlist', 'multi_video'):
1454             # Protect from infinite recursion due to recursively nested playlists
1455             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1456             webpage_url = ie_result['webpage_url']
1457             if webpage_url in self._playlist_urls:
1458                 self.to_screen(
1459                     '[download] Skipping already downloaded playlist: %s'
1460                     % ie_result.get('title') or ie_result.get('id'))
1461                 return
1462
1463             self._playlist_level += 1
1464             self._playlist_urls.add(webpage_url)
1465             self._sanitize_thumbnails(ie_result)
1466             try:
1467                 return self.__process_playlist(ie_result, download)
1468             finally:
1469                 self._playlist_level -= 1
1470                 if not self._playlist_level:
1471                     self._playlist_urls.clear()
1472         elif result_type == 'compat_list':
1473             self.report_warning(
1474                 'Extractor %s returned a compat_list result. '
1475                 'It needs to be updated.' % ie_result.get('extractor'))
1476
1477             def _fixup(r):
1478                 self.add_extra_info(r, {
1479                     'extractor': ie_result['extractor'],
1480                     'webpage_url': ie_result['webpage_url'],
1481                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1482                     'extractor_key': ie_result['extractor_key'],
1483                 })
1484                 return r
1485             ie_result['entries'] = [
1486                 self.process_ie_result(_fixup(r), download, extra_info)
1487                 for r in ie_result['entries']
1488             ]
1489             return ie_result
1490         else:
1491             raise Exception('Invalid result type: %s' % result_type)
1492
1493     def _ensure_dir_exists(self, path):
1494         return make_dir(path, self.report_error)
1495
1496     def __process_playlist(self, ie_result, download):
1497         # We process each entry in the playlist
1498         playlist = ie_result.get('title') or ie_result.get('id')
1499         self.to_screen('[download] Downloading playlist: %s' % playlist)
1500
1501         if 'entries' not in ie_result:
1502             raise EntryNotInPlaylist('There are no entries')
1503         incomplete_entries = bool(ie_result.get('requested_entries'))
1504         if incomplete_entries:
1505             def fill_missing_entries(entries, indexes):
1506                 ret = [None] * max(*indexes)
1507                 for i, entry in zip(indexes, entries):
1508                     ret[i - 1] = entry
1509                 return ret
1510             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1511
1512         playlist_results = []
1513
1514         playliststart = self.params.get('playliststart', 1)
1515         playlistend = self.params.get('playlistend')
1516         # For backwards compatibility, interpret -1 as whole list
1517         if playlistend == -1:
1518             playlistend = None
1519
1520         playlistitems_str = self.params.get('playlist_items')
1521         playlistitems = None
1522         if playlistitems_str is not None:
1523             def iter_playlistitems(format):
1524                 for string_segment in format.split(','):
1525                     if '-' in string_segment:
1526                         start, end = string_segment.split('-')
1527                         for item in range(int(start), int(end) + 1):
1528                             yield int(item)
1529                     else:
1530                         yield int(string_segment)
1531             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1532
1533         ie_entries = ie_result['entries']
1534         msg = (
1535             'Downloading %d videos' if not isinstance(ie_entries, list)
1536             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1537
1538         if isinstance(ie_entries, list):
1539             def get_entry(i):
1540                 return ie_entries[i - 1]
1541         else:
1542             if not isinstance(ie_entries, (PagedList, LazyList)):
1543                 ie_entries = LazyList(ie_entries)
1544
1545             def get_entry(i):
1546                 return YoutubeDL.__handle_extraction_exceptions(
1547                     lambda self, i: ie_entries[i - 1]
1548                 )(self, i)
1549
1550         entries = []
1551         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1552         for i in items:
1553             if i == 0:
1554                 continue
1555             if playlistitems is None and playlistend is not None and playlistend < i:
1556                 break
1557             entry = None
1558             try:
1559                 entry = get_entry(i)
1560                 if entry is None:
1561                     raise EntryNotInPlaylist()
1562             except (IndexError, EntryNotInPlaylist):
1563                 if incomplete_entries:
1564                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1565                 elif not playlistitems:
1566                     break
1567             entries.append(entry)
1568             try:
1569                 if entry is not None:
1570                     self._match_entry(entry, incomplete=True, silent=True)
1571             except (ExistingVideoReached, RejectedVideoReached):
1572                 break
1573         ie_result['entries'] = entries
1574
1575         # Save playlist_index before re-ordering
1576         entries = [
1577             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1578             for i, entry in enumerate(entries, 1)
1579             if entry is not None]
1580         n_entries = len(entries)
1581
1582         if not playlistitems and (playliststart or playlistend):
1583             playlistitems = list(range(playliststart, playliststart + n_entries))
1584         ie_result['requested_entries'] = playlistitems
1585
1586         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1587             ie_copy = {
1588                 'playlist': playlist,
1589                 'playlist_id': ie_result.get('id'),
1590                 'playlist_title': ie_result.get('title'),
1591                 'playlist_uploader': ie_result.get('uploader'),
1592                 'playlist_uploader_id': ie_result.get('uploader_id'),
1593                 'playlist_index': 0,
1594                 'n_entries': n_entries,
1595             }
1596             ie_copy.update(dict(ie_result))
1597
1598             if self._write_info_json('playlist', ie_result,
1599                                      self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1600                 return
1601             if self._write_description('playlist', ie_result,
1602                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1603                 return
1604             # TODO: This should be passed to ThumbnailsConvertor if necessary
1605             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1606
1607         if self.params.get('playlistreverse', False):
1608             entries = entries[::-1]
1609         if self.params.get('playlistrandom', False):
1610             random.shuffle(entries)
1611
1612         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1613
1614         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1615         failures = 0
1616         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1617         for i, entry_tuple in enumerate(entries, 1):
1618             playlist_index, entry = entry_tuple
1619             if 'playlist-index' in self.params.get('compat_opts', []):
1620                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1621             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1622             # This __x_forwarded_for_ip thing is a bit ugly but requires
1623             # minimal changes
1624             if x_forwarded_for:
1625                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1626             extra = {
1627                 'n_entries': n_entries,
1628                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1629                 'playlist_index': playlist_index,
1630                 'playlist_autonumber': i,
1631                 'playlist': playlist,
1632                 'playlist_id': ie_result.get('id'),
1633                 'playlist_title': ie_result.get('title'),
1634                 'playlist_uploader': ie_result.get('uploader'),
1635                 'playlist_uploader_id': ie_result.get('uploader_id'),
1636                 'extractor': ie_result['extractor'],
1637                 'webpage_url': ie_result['webpage_url'],
1638                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1639                 'extractor_key': ie_result['extractor_key'],
1640             }
1641
1642             if self._match_entry(entry, incomplete=True) is not None:
1643                 continue
1644
1645             entry_result = self.__process_iterable_entry(entry, download, extra)
1646             if not entry_result:
1647                 failures += 1
1648             if failures >= max_failures:
1649                 self.report_error(
1650                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1651                 break
1652             # TODO: skip failed (empty) entries?
1653             playlist_results.append(entry_result)
1654         ie_result['entries'] = playlist_results
1655         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1656         return ie_result
1657
1658     @__handle_extraction_exceptions
1659     def __process_iterable_entry(self, entry, download, extra_info):
1660         return self.process_ie_result(
1661             entry, download=download, extra_info=extra_info)
1662
1663     def _build_format_filter(self, filter_spec):
1664         " Returns a function to filter the formats according to the filter_spec "
1665
1666         OPERATORS = {
1667             '<': operator.lt,
1668             '<=': operator.le,
1669             '>': operator.gt,
1670             '>=': operator.ge,
1671             '=': operator.eq,
1672             '!=': operator.ne,
1673         }
1674         operator_rex = re.compile(r'''(?x)\s*
1675             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1676             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1677             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1678             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1679         m = operator_rex.fullmatch(filter_spec)
1680         if m:
1681             try:
1682                 comparison_value = int(m.group('value'))
1683             except ValueError:
1684                 comparison_value = parse_filesize(m.group('value'))
1685                 if comparison_value is None:
1686                     comparison_value = parse_filesize(m.group('value') + 'B')
1687                 if comparison_value is None:
1688                     raise ValueError(
1689                         'Invalid value %r in format specification %r' % (
1690                             m.group('value'), filter_spec))
1691             op = OPERATORS[m.group('op')]
1692
1693         if not m:
1694             STR_OPERATORS = {
1695                 '=': operator.eq,
1696                 '^=': lambda attr, value: attr.startswith(value),
1697                 '$=': lambda attr, value: attr.endswith(value),
1698                 '*=': lambda attr, value: value in attr,
1699             }
1700             str_operator_rex = re.compile(r'''(?x)\s*
1701                 (?P<key>[a-zA-Z0-9._-]+)\s*
1702                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1703                 (?P<value>[a-zA-Z0-9._-]+)\s*
1704                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1705             m = str_operator_rex.fullmatch(filter_spec)
1706             if m:
1707                 comparison_value = m.group('value')
1708                 str_op = STR_OPERATORS[m.group('op')]
1709                 if m.group('negation'):
1710                     op = lambda attr, value: not str_op(attr, value)
1711                 else:
1712                     op = str_op
1713
1714         if not m:
1715             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1716
1717         def _filter(f):
1718             actual_value = f.get(m.group('key'))
1719             if actual_value is None:
1720                 return m.group('none_inclusive')
1721             return op(actual_value, comparison_value)
1722         return _filter
1723
1724     def _check_formats(self, formats):
1725         for f in formats:
1726             self.to_screen('[info] Testing format %s' % f['format_id'])
1727             temp_file = tempfile.NamedTemporaryFile(
1728                 suffix='.tmp', delete=False,
1729                 dir=self.get_output_path('temp') or None)
1730             temp_file.close()
1731             try:
1732                 success, _ = self.dl(temp_file.name, f, test=True)
1733             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1734                 success = False
1735             finally:
1736                 if os.path.exists(temp_file.name):
1737                     try:
1738                         os.remove(temp_file.name)
1739                     except OSError:
1740                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1741             if success:
1742                 yield f
1743             else:
1744                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1745
1746     def _default_format_spec(self, info_dict, download=True):
1747
1748         def can_merge():
1749             merger = FFmpegMergerPP(self)
1750             return merger.available and merger.can_merge()
1751
1752         prefer_best = (
1753             not self.params.get('simulate')
1754             and download
1755             and (
1756                 not can_merge()
1757                 or info_dict.get('is_live', False)
1758                 or self.outtmpl_dict['default'] == '-'))
1759         compat = (
1760             prefer_best
1761             or self.params.get('allow_multiple_audio_streams', False)
1762             or 'format-spec' in self.params.get('compat_opts', []))
1763
1764         return (
1765             'best/bestvideo+bestaudio' if prefer_best
1766             else 'bestvideo*+bestaudio/best' if not compat
1767             else 'bestvideo+bestaudio/best')
1768
1769     def build_format_selector(self, format_spec):
1770         def syntax_error(note, start):
1771             message = (
1772                 'Invalid format specification: '
1773                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1774             return SyntaxError(message)
1775
1776         PICKFIRST = 'PICKFIRST'
1777         MERGE = 'MERGE'
1778         SINGLE = 'SINGLE'
1779         GROUP = 'GROUP'
1780         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1781
1782         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1783                                   'video': self.params.get('allow_multiple_video_streams', False)}
1784
1785         check_formats = self.params.get('check_formats') == 'selected'
1786
1787         def _parse_filter(tokens):
1788             filter_parts = []
1789             for type, string, start, _, _ in tokens:
1790                 if type == tokenize.OP and string == ']':
1791                     return ''.join(filter_parts)
1792                 else:
1793                     filter_parts.append(string)
1794
1795         def _remove_unused_ops(tokens):
1796             # Remove operators that we don't use and join them with the surrounding strings
1797             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1798             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1799             last_string, last_start, last_end, last_line = None, None, None, None
1800             for type, string, start, end, line in tokens:
1801                 if type == tokenize.OP and string == '[':
1802                     if last_string:
1803                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1804                         last_string = None
1805                     yield type, string, start, end, line
1806                     # everything inside brackets will be handled by _parse_filter
1807                     for type, string, start, end, line in tokens:
1808                         yield type, string, start, end, line
1809                         if type == tokenize.OP and string == ']':
1810                             break
1811                 elif type == tokenize.OP and string in ALLOWED_OPS:
1812                     if last_string:
1813                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1814                         last_string = None
1815                     yield type, string, start, end, line
1816                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1817                     if not last_string:
1818                         last_string = string
1819                         last_start = start
1820                         last_end = end
1821                     else:
1822                         last_string += string
1823             if last_string:
1824                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1825
1826         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1827             selectors = []
1828             current_selector = None
1829             for type, string, start, _, _ in tokens:
1830                 # ENCODING is only defined in python 3.x
1831                 if type == getattr(tokenize, 'ENCODING', None):
1832                     continue
1833                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1834                     current_selector = FormatSelector(SINGLE, string, [])
1835                 elif type == tokenize.OP:
1836                     if string == ')':
1837                         if not inside_group:
1838                             # ')' will be handled by the parentheses group
1839                             tokens.restore_last_token()
1840                         break
1841                     elif inside_merge and string in ['/', ',']:
1842                         tokens.restore_last_token()
1843                         break
1844                     elif inside_choice and string == ',':
1845                         tokens.restore_last_token()
1846                         break
1847                     elif string == ',':
1848                         if not current_selector:
1849                             raise syntax_error('"," must follow a format selector', start)
1850                         selectors.append(current_selector)
1851                         current_selector = None
1852                     elif string == '/':
1853                         if not current_selector:
1854                             raise syntax_error('"/" must follow a format selector', start)
1855                         first_choice = current_selector
1856                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1857                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1858                     elif string == '[':
1859                         if not current_selector:
1860                             current_selector = FormatSelector(SINGLE, 'best', [])
1861                         format_filter = _parse_filter(tokens)
1862                         current_selector.filters.append(format_filter)
1863                     elif string == '(':
1864                         if current_selector:
1865                             raise syntax_error('Unexpected "("', start)
1866                         group = _parse_format_selection(tokens, inside_group=True)
1867                         current_selector = FormatSelector(GROUP, group, [])
1868                     elif string == '+':
1869                         if not current_selector:
1870                             raise syntax_error('Unexpected "+"', start)
1871                         selector_1 = current_selector
1872                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1873                         if not selector_2:
1874                             raise syntax_error('Expected a selector', start)
1875                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1876                     else:
1877                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1878                 elif type == tokenize.ENDMARKER:
1879                     break
1880             if current_selector:
1881                 selectors.append(current_selector)
1882             return selectors
1883
1884         def _merge(formats_pair):
1885             format_1, format_2 = formats_pair
1886
1887             formats_info = []
1888             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1889             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1890
1891             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1892                 get_no_more = {'video': False, 'audio': False}
1893                 for (i, fmt_info) in enumerate(formats_info):
1894                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1895                         formats_info.pop(i)
1896                         continue
1897                     for aud_vid in ['audio', 'video']:
1898                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1899                             if get_no_more[aud_vid]:
1900                                 formats_info.pop(i)
1901                                 break
1902                             get_no_more[aud_vid] = True
1903
1904             if len(formats_info) == 1:
1905                 return formats_info[0]
1906
1907             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1908             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1909
1910             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1911             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1912
1913             output_ext = self.params.get('merge_output_format')
1914             if not output_ext:
1915                 if the_only_video:
1916                     output_ext = the_only_video['ext']
1917                 elif the_only_audio and not video_fmts:
1918                     output_ext = the_only_audio['ext']
1919                 else:
1920                     output_ext = 'mkv'
1921
1922             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1923
1924             new_dict = {
1925                 'requested_formats': formats_info,
1926                 'format': '+'.join(filtered('format')),
1927                 'format_id': '+'.join(filtered('format_id')),
1928                 'ext': output_ext,
1929                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1930                 'language': '+'.join(orderedSet(filtered('language'))),
1931                 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1932                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1933                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1934             }
1935
1936             if the_only_video:
1937                 new_dict.update({
1938                     'width': the_only_video.get('width'),
1939                     'height': the_only_video.get('height'),
1940                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1941                     'fps': the_only_video.get('fps'),
1942                     'dynamic_range': the_only_video.get('dynamic_range'),
1943                     'vcodec': the_only_video.get('vcodec'),
1944                     'vbr': the_only_video.get('vbr'),
1945                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1946                 })
1947
1948             if the_only_audio:
1949                 new_dict.update({
1950                     'acodec': the_only_audio.get('acodec'),
1951                     'abr': the_only_audio.get('abr'),
1952                     'asr': the_only_audio.get('asr'),
1953                 })
1954
1955             return new_dict
1956
1957         def _check_formats(formats):
1958             if not check_formats:
1959                 yield from formats
1960                 return
1961             yield from self._check_formats(formats)
1962
1963         def _build_selector_function(selector):
1964             if isinstance(selector, list):  # ,
1965                 fs = [_build_selector_function(s) for s in selector]
1966
1967                 def selector_function(ctx):
1968                     for f in fs:
1969                         yield from f(ctx)
1970                 return selector_function
1971
1972             elif selector.type == GROUP:  # ()
1973                 selector_function = _build_selector_function(selector.selector)
1974
1975             elif selector.type == PICKFIRST:  # /
1976                 fs = [_build_selector_function(s) for s in selector.selector]
1977
1978                 def selector_function(ctx):
1979                     for f in fs:
1980                         picked_formats = list(f(ctx))
1981                         if picked_formats:
1982                             return picked_formats
1983                     return []
1984
1985             elif selector.type == MERGE:  # +
1986                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1987
1988                 def selector_function(ctx):
1989                     for pair in itertools.product(
1990                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1991                         yield _merge(pair)
1992
1993             elif selector.type == SINGLE:  # atom
1994                 format_spec = selector.selector or 'best'
1995
1996                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1997                 if format_spec == 'all':
1998                     def selector_function(ctx):
1999                         yield from _check_formats(ctx['formats'])
2000                 elif format_spec == 'mergeall':
2001                     def selector_function(ctx):
2002                         formats = list(_check_formats(ctx['formats']))
2003                         if not formats:
2004                             return
2005                         merged_format = formats[-1]
2006                         for f in formats[-2::-1]:
2007                             merged_format = _merge((merged_format, f))
2008                         yield merged_format
2009
2010                 else:
2011                     format_fallback, format_reverse, format_idx = False, True, 1
2012                     mobj = re.match(
2013                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2014                         format_spec)
2015                     if mobj is not None:
2016                         format_idx = int_or_none(mobj.group('n'), default=1)
2017                         format_reverse = mobj.group('bw')[0] == 'b'
2018                         format_type = (mobj.group('type') or [None])[0]
2019                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2020                         format_modified = mobj.group('mod') is not None
2021
2022                         format_fallback = not format_type and not format_modified  # for b, w
2023                         _filter_f = (
2024                             (lambda f: f.get('%scodec' % format_type) != 'none')
2025                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2026                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2027                             if format_type  # bv, ba, wv, wa
2028                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2029                             if not format_modified  # b, w
2030                             else lambda f: True)  # b*, w*
2031                         filter_f = lambda f: _filter_f(f) and (
2032                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2033                     else:
2034                         if format_spec in self._format_selection_exts['audio']:
2035                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2036                         elif format_spec in self._format_selection_exts['video']:
2037                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2038                         elif format_spec in self._format_selection_exts['storyboards']:
2039                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2040                         else:
2041                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2042
2043                     def selector_function(ctx):
2044                         formats = list(ctx['formats'])
2045                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2046                         if format_fallback and ctx['incomplete_formats'] and not matches:
2047                             # for extractors with incomplete formats (audio only (soundcloud)
2048                             # or video only (imgur)) best/worst will fallback to
2049                             # best/worst {video,audio}-only format
2050                             matches = formats
2051                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2052                         try:
2053                             yield matches[format_idx - 1]
2054                         except IndexError:
2055                             return
2056
2057             filters = [self._build_format_filter(f) for f in selector.filters]
2058
2059             def final_selector(ctx):
2060                 ctx_copy = copy.deepcopy(ctx)
2061                 for _filter in filters:
2062                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2063                 return selector_function(ctx_copy)
2064             return final_selector
2065
2066         stream = io.BytesIO(format_spec.encode('utf-8'))
2067         try:
2068             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2069         except tokenize.TokenError:
2070             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2071
2072         class TokenIterator(object):
2073             def __init__(self, tokens):
2074                 self.tokens = tokens
2075                 self.counter = 0
2076
2077             def __iter__(self):
2078                 return self
2079
2080             def __next__(self):
2081                 if self.counter >= len(self.tokens):
2082                     raise StopIteration()
2083                 value = self.tokens[self.counter]
2084                 self.counter += 1
2085                 return value
2086
2087             next = __next__
2088
2089             def restore_last_token(self):
2090                 self.counter -= 1
2091
2092         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2093         return _build_selector_function(parsed_selector)
2094
2095     def _calc_headers(self, info_dict):
2096         res = std_headers.copy()
2097
2098         add_headers = info_dict.get('http_headers')
2099         if add_headers:
2100             res.update(add_headers)
2101
2102         cookies = self._calc_cookies(info_dict)
2103         if cookies:
2104             res['Cookie'] = cookies
2105
2106         if 'X-Forwarded-For' not in res:
2107             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2108             if x_forwarded_for_ip:
2109                 res['X-Forwarded-For'] = x_forwarded_for_ip
2110
2111         return res
2112
2113     def _calc_cookies(self, info_dict):
2114         pr = sanitized_Request(info_dict['url'])
2115         self.cookiejar.add_cookie_header(pr)
2116         return pr.get_header('Cookie')
2117
2118     def _sort_thumbnails(self, thumbnails):
2119         thumbnails.sort(key=lambda t: (
2120             t.get('preference') if t.get('preference') is not None else -1,
2121             t.get('width') if t.get('width') is not None else -1,
2122             t.get('height') if t.get('height') is not None else -1,
2123             t.get('id') if t.get('id') is not None else '',
2124             t.get('url')))
2125
2126     def _sanitize_thumbnails(self, info_dict):
2127         thumbnails = info_dict.get('thumbnails')
2128         if thumbnails is None:
2129             thumbnail = info_dict.get('thumbnail')
2130             if thumbnail:
2131                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2132         if not thumbnails:
2133             return
2134
2135         def check_thumbnails(thumbnails):
2136             for t in thumbnails:
2137                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2138                 try:
2139                     self.urlopen(HEADRequest(t['url']))
2140                 except network_exceptions as err:
2141                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2142                     continue
2143                 yield t
2144
2145         self._sort_thumbnails(thumbnails)
2146         for i, t in enumerate(thumbnails):
2147             if t.get('id') is None:
2148                 t['id'] = '%d' % i
2149             if t.get('width') and t.get('height'):
2150                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2151             t['url'] = sanitize_url(t['url'])
2152
2153         if self.params.get('check_formats') is True:
2154             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
2155         else:
2156             info_dict['thumbnails'] = thumbnails
2157
2158     def process_video_result(self, info_dict, download=True):
2159         assert info_dict.get('_type', 'video') == 'video'
2160
2161         if 'id' not in info_dict:
2162             raise ExtractorError('Missing "id" field in extractor result')
2163         if 'title' not in info_dict:
2164             raise ExtractorError('Missing "title" field in extractor result',
2165                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2166
2167         def report_force_conversion(field, field_not, conversion):
2168             self.report_warning(
2169                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2170                 % (field, field_not, conversion))
2171
2172         def sanitize_string_field(info, string_field):
2173             field = info.get(string_field)
2174             if field is None or isinstance(field, compat_str):
2175                 return
2176             report_force_conversion(string_field, 'a string', 'string')
2177             info[string_field] = compat_str(field)
2178
2179         def sanitize_numeric_fields(info):
2180             for numeric_field in self._NUMERIC_FIELDS:
2181                 field = info.get(numeric_field)
2182                 if field is None or isinstance(field, compat_numeric_types):
2183                     continue
2184                 report_force_conversion(numeric_field, 'numeric', 'int')
2185                 info[numeric_field] = int_or_none(field)
2186
2187         sanitize_string_field(info_dict, 'id')
2188         sanitize_numeric_fields(info_dict)
2189
2190         if 'playlist' not in info_dict:
2191             # It isn't part of a playlist
2192             info_dict['playlist'] = None
2193             info_dict['playlist_index'] = None
2194
2195         self._sanitize_thumbnails(info_dict)
2196
2197         thumbnail = info_dict.get('thumbnail')
2198         thumbnails = info_dict.get('thumbnails')
2199         if thumbnail:
2200             info_dict['thumbnail'] = sanitize_url(thumbnail)
2201         elif thumbnails:
2202             info_dict['thumbnail'] = thumbnails[-1]['url']
2203
2204         if info_dict.get('display_id') is None and 'id' in info_dict:
2205             info_dict['display_id'] = info_dict['id']
2206
2207         if info_dict.get('duration') is not None:
2208             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2209
2210         for ts_key, date_key in (
2211                 ('timestamp', 'upload_date'),
2212                 ('release_timestamp', 'release_date'),
2213         ):
2214             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2215                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2216                 # see http://bugs.python.org/issue1646728)
2217                 try:
2218                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2219                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2220                 except (ValueError, OverflowError, OSError):
2221                     pass
2222
2223         live_keys = ('is_live', 'was_live')
2224         live_status = info_dict.get('live_status')
2225         if live_status is None:
2226             for key in live_keys:
2227                 if info_dict.get(key) is False:
2228                     continue
2229                 if info_dict.get(key):
2230                     live_status = key
2231                 break
2232             if all(info_dict.get(key) is False for key in live_keys):
2233                 live_status = 'not_live'
2234         if live_status:
2235             info_dict['live_status'] = live_status
2236             for key in live_keys:
2237                 if info_dict.get(key) is None:
2238                     info_dict[key] = (live_status == key)
2239
2240         # Auto generate title fields corresponding to the *_number fields when missing
2241         # in order to always have clean titles. This is very common for TV series.
2242         for field in ('chapter', 'season', 'episode'):
2243             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2244                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2245
2246         for cc_kind in ('subtitles', 'automatic_captions'):
2247             cc = info_dict.get(cc_kind)
2248             if cc:
2249                 for _, subtitle in cc.items():
2250                     for subtitle_format in subtitle:
2251                         if subtitle_format.get('url'):
2252                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2253                         if subtitle_format.get('ext') is None:
2254                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2255
2256         automatic_captions = info_dict.get('automatic_captions')
2257         subtitles = info_dict.get('subtitles')
2258
2259         info_dict['requested_subtitles'] = self.process_subtitles(
2260             info_dict['id'], subtitles, automatic_captions)
2261
2262         if info_dict.get('formats') is None:
2263             # There's only one format available
2264             formats = [info_dict]
2265         else:
2266             formats = info_dict['formats']
2267
2268         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2269         if not self.params.get('allow_unplayable_formats'):
2270             formats = [f for f in formats if not f.get('has_drm')]
2271
2272         if not formats:
2273             self.raise_no_formats(info_dict)
2274
2275         def is_wellformed(f):
2276             url = f.get('url')
2277             if not url:
2278                 self.report_warning(
2279                     '"url" field is missing or empty - skipping format, '
2280                     'there is an error in extractor')
2281                 return False
2282             if isinstance(url, bytes):
2283                 sanitize_string_field(f, 'url')
2284             return True
2285
2286         # Filter out malformed formats for better extraction robustness
2287         formats = list(filter(is_wellformed, formats))
2288
2289         formats_dict = {}
2290
2291         # We check that all the formats have the format and format_id fields
2292         for i, format in enumerate(formats):
2293             sanitize_string_field(format, 'format_id')
2294             sanitize_numeric_fields(format)
2295             format['url'] = sanitize_url(format['url'])
2296             if not format.get('format_id'):
2297                 format['format_id'] = compat_str(i)
2298             else:
2299                 # Sanitize format_id from characters used in format selector expression
2300                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2301             format_id = format['format_id']
2302             if format_id not in formats_dict:
2303                 formats_dict[format_id] = []
2304             formats_dict[format_id].append(format)
2305
2306         # Make sure all formats have unique format_id
2307         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2308         for format_id, ambiguous_formats in formats_dict.items():
2309             ambigious_id = len(ambiguous_formats) > 1
2310             for i, format in enumerate(ambiguous_formats):
2311                 if ambigious_id:
2312                     format['format_id'] = '%s-%d' % (format_id, i)
2313                 if format.get('ext') is None:
2314                     format['ext'] = determine_ext(format['url']).lower()
2315                 # Ensure there is no conflict between id and ext in format selection
2316                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2317                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2318                     format['format_id'] = 'f%s' % format['format_id']
2319
2320         for i, format in enumerate(formats):
2321             if format.get('format') is None:
2322                 format['format'] = '{id} - {res}{note}'.format(
2323                     id=format['format_id'],
2324                     res=self.format_resolution(format),
2325                     note=format_field(format, 'format_note', ' (%s)'),
2326                 )
2327             if format.get('protocol') is None:
2328                 format['protocol'] = determine_protocol(format)
2329             if format.get('resolution') is None:
2330                 format['resolution'] = self.format_resolution(format, default=None)
2331             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2332                 format['dynamic_range'] = 'SDR'
2333             if (info_dict.get('duration') and format.get('tbr')
2334                     and not format.get('filesize') and not format.get('filesize_approx')):
2335                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2336
2337             # Add HTTP headers, so that external programs can use them from the
2338             # json output
2339             full_format_info = info_dict.copy()
2340             full_format_info.update(format)
2341             format['http_headers'] = self._calc_headers(full_format_info)
2342         # Remove private housekeeping stuff
2343         if '__x_forwarded_for_ip' in info_dict:
2344             del info_dict['__x_forwarded_for_ip']
2345
2346         # TODO Central sorting goes here
2347
2348         if self.params.get('check_formats') is True:
2349             formats = LazyList(self._check_formats(formats[::-1])).reverse()
2350
2351         if not formats or formats[0] is not info_dict:
2352             # only set the 'formats' fields if the original info_dict list them
2353             # otherwise we end up with a circular reference, the first (and unique)
2354             # element in the 'formats' field in info_dict is info_dict itself,
2355             # which can't be exported to json
2356             info_dict['formats'] = formats
2357
2358         info_dict, _ = self.pre_process(info_dict)
2359
2360         if self.params.get('list_thumbnails'):
2361             self.list_thumbnails(info_dict)
2362         if self.params.get('listformats'):
2363             if not info_dict.get('formats') and not info_dict.get('url'):
2364                 self.to_screen('%s has no formats' % info_dict['id'])
2365             else:
2366                 self.list_formats(info_dict)
2367         if self.params.get('listsubtitles'):
2368             if 'automatic_captions' in info_dict:
2369                 self.list_subtitles(
2370                     info_dict['id'], automatic_captions, 'automatic captions')
2371             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2372         list_only = self.params.get('simulate') is None and (
2373             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2374         if list_only:
2375             # Without this printing, -F --print-json will not work
2376             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2377             return
2378
2379         format_selector = self.format_selector
2380         if format_selector is None:
2381             req_format = self._default_format_spec(info_dict, download=download)
2382             self.write_debug('Default format spec: %s' % req_format)
2383             format_selector = self.build_format_selector(req_format)
2384
2385         # While in format selection we may need to have an access to the original
2386         # format set in order to calculate some metrics or do some processing.
2387         # For now we need to be able to guess whether original formats provided
2388         # by extractor are incomplete or not (i.e. whether extractor provides only
2389         # video-only or audio-only formats) for proper formats selection for
2390         # extractors with such incomplete formats (see
2391         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2392         # Since formats may be filtered during format selection and may not match
2393         # the original formats the results may be incorrect. Thus original formats
2394         # or pre-calculated metrics should be passed to format selection routines
2395         # as well.
2396         # We will pass a context object containing all necessary additional data
2397         # instead of just formats.
2398         # This fixes incorrect format selection issue (see
2399         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2400         incomplete_formats = (
2401             # All formats are video-only or
2402             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2403             # all formats are audio-only
2404             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2405
2406         ctx = {
2407             'formats': formats,
2408             'incomplete_formats': incomplete_formats,
2409         }
2410
2411         formats_to_download = list(format_selector(ctx))
2412         if not formats_to_download:
2413             if not self.params.get('ignore_no_formats_error'):
2414                 raise ExtractorError('Requested format is not available', expected=True,
2415                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2416             else:
2417                 self.report_warning('Requested format is not available')
2418                 # Process what we can, even without any available formats.
2419                 self.process_info(dict(info_dict))
2420         elif download:
2421             self.to_screen(
2422                 '[info] %s: Downloading %d format(s): %s' % (
2423                     info_dict['id'], len(formats_to_download),
2424                     ", ".join([f['format_id'] for f in formats_to_download])))
2425             for fmt in formats_to_download:
2426                 new_info = dict(info_dict)
2427                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2428                 new_info['__original_infodict'] = info_dict
2429                 new_info.update(fmt)
2430                 self.process_info(new_info)
2431         # We update the info dict with the selected best quality format (backwards compatibility)
2432         if formats_to_download:
2433             info_dict.update(formats_to_download[-1])
2434         return info_dict
2435
2436     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2437         """Select the requested subtitles and their format"""
2438         available_subs = {}
2439         if normal_subtitles and self.params.get('writesubtitles'):
2440             available_subs.update(normal_subtitles)
2441         if automatic_captions and self.params.get('writeautomaticsub'):
2442             for lang, cap_info in automatic_captions.items():
2443                 if lang not in available_subs:
2444                     available_subs[lang] = cap_info
2445
2446         if (not self.params.get('writesubtitles') and not
2447                 self.params.get('writeautomaticsub') or not
2448                 available_subs):
2449             return None
2450
2451         all_sub_langs = available_subs.keys()
2452         if self.params.get('allsubtitles', False):
2453             requested_langs = all_sub_langs
2454         elif self.params.get('subtitleslangs', False):
2455             # A list is used so that the order of languages will be the same as
2456             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2457             requested_langs = []
2458             for lang_re in self.params.get('subtitleslangs'):
2459                 if lang_re == 'all':
2460                     requested_langs.extend(all_sub_langs)
2461                     continue
2462                 discard = lang_re[0] == '-'
2463                 if discard:
2464                     lang_re = lang_re[1:]
2465                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2466                 if discard:
2467                     for lang in current_langs:
2468                         while lang in requested_langs:
2469                             requested_langs.remove(lang)
2470                 else:
2471                     requested_langs.extend(current_langs)
2472             requested_langs = orderedSet(requested_langs)
2473         elif 'en' in available_subs:
2474             requested_langs = ['en']
2475         else:
2476             requested_langs = [list(all_sub_langs)[0]]
2477         if requested_langs:
2478             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2479
2480         formats_query = self.params.get('subtitlesformat', 'best')
2481         formats_preference = formats_query.split('/') if formats_query else []
2482         subs = {}
2483         for lang in requested_langs:
2484             formats = available_subs.get(lang)
2485             if formats is None:
2486                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2487                 continue
2488             for ext in formats_preference:
2489                 if ext == 'best':
2490                     f = formats[-1]
2491                     break
2492                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2493                 if matches:
2494                     f = matches[-1]
2495                     break
2496             else:
2497                 f = formats[-1]
2498                 self.report_warning(
2499                     'No subtitle format found matching "%s" for language %s, '
2500                     'using %s' % (formats_query, lang, f['ext']))
2501             subs[lang] = f
2502         return subs
2503
2504     def __forced_printings(self, info_dict, filename, incomplete):
2505         def print_mandatory(field, actual_field=None):
2506             if actual_field is None:
2507                 actual_field = field
2508             if (self.params.get('force%s' % field, False)
2509                     and (not incomplete or info_dict.get(actual_field) is not None)):
2510                 self.to_stdout(info_dict[actual_field])
2511
2512         def print_optional(field):
2513             if (self.params.get('force%s' % field, False)
2514                     and info_dict.get(field) is not None):
2515                 self.to_stdout(info_dict[field])
2516
2517         info_dict = info_dict.copy()
2518         if filename is not None:
2519             info_dict['filename'] = filename
2520         if info_dict.get('requested_formats') is not None:
2521             # For RTMP URLs, also include the playpath
2522             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2523         elif 'url' in info_dict:
2524             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2525
2526         if self.params.get('forceprint') or self.params.get('forcejson'):
2527             self.post_extract(info_dict)
2528         for tmpl in self.params.get('forceprint', []):
2529             mobj = re.match(r'\w+(=?)$', tmpl)
2530             if mobj and mobj.group(1):
2531                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2532             elif mobj:
2533                 tmpl = '%({})s'.format(tmpl)
2534             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2535
2536         print_mandatory('title')
2537         print_mandatory('id')
2538         print_mandatory('url', 'urls')
2539         print_optional('thumbnail')
2540         print_optional('description')
2541         print_optional('filename')
2542         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2543             self.to_stdout(formatSeconds(info_dict['duration']))
2544         print_mandatory('format')
2545
2546         if self.params.get('forcejson'):
2547             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2548
2549     def dl(self, name, info, subtitle=False, test=False):
2550         if not info.get('url'):
2551             self.raise_no_formats(info, True)
2552
2553         if test:
2554             verbose = self.params.get('verbose')
2555             params = {
2556                 'test': True,
2557                 'quiet': self.params.get('quiet') or not verbose,
2558                 'verbose': verbose,
2559                 'noprogress': not verbose,
2560                 'nopart': True,
2561                 'skip_unavailable_fragments': False,
2562                 'keep_fragments': False,
2563                 'overwrites': True,
2564                 '_no_ytdl_file': True,
2565             }
2566         else:
2567             params = self.params
2568         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2569         if not test:
2570             for ph in self._progress_hooks:
2571                 fd.add_progress_hook(ph)
2572             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2573             self.write_debug('Invoking downloader on "%s"' % urls)
2574
2575         new_info = copy.deepcopy(self._copy_infodict(info))
2576         if new_info.get('http_headers') is None:
2577             new_info['http_headers'] = self._calc_headers(new_info)
2578         return fd.download(name, new_info, subtitle)
2579
2580     def process_info(self, info_dict):
2581         """Process a single resolved IE result."""
2582
2583         assert info_dict.get('_type', 'video') == 'video'
2584
2585         max_downloads = self.params.get('max_downloads')
2586         if max_downloads is not None:
2587             if self._num_downloads >= int(max_downloads):
2588                 raise MaxDownloadsReached()
2589
2590         # TODO: backward compatibility, to be removed
2591         info_dict['fulltitle'] = info_dict['title']
2592
2593         if 'format' not in info_dict and 'ext' in info_dict:
2594             info_dict['format'] = info_dict['ext']
2595
2596         if self._match_entry(info_dict) is not None:
2597             return
2598
2599         self.post_extract(info_dict)
2600         self._num_downloads += 1
2601
2602         # info_dict['_filename'] needs to be set for backward compatibility
2603         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2604         temp_filename = self.prepare_filename(info_dict, 'temp')
2605         files_to_move = {}
2606
2607         # Forced printings
2608         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2609
2610         if self.params.get('simulate'):
2611             if self.params.get('force_write_download_archive', False):
2612                 self.record_download_archive(info_dict)
2613             # Do nothing else if in simulate mode
2614             return
2615
2616         if full_filename is None:
2617             return
2618         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2619             return
2620         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2621             return
2622
2623         if self._write_description('video', info_dict,
2624                                    self.prepare_filename(info_dict, 'description')) is None:
2625             return
2626
2627         sub_files = self._write_subtitles(info_dict, temp_filename)
2628         if sub_files is None:
2629             return
2630         files_to_move.update(dict(sub_files))
2631
2632         thumb_files = self._write_thumbnails(
2633             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2634         if thumb_files is None:
2635             return
2636         files_to_move.update(dict(thumb_files))
2637
2638         infofn = self.prepare_filename(info_dict, 'infojson')
2639         _infojson_written = self._write_info_json('video', info_dict, infofn)
2640         if _infojson_written:
2641             info_dict['__infojson_filename'] = infofn
2642         elif _infojson_written is None:
2643             return
2644
2645         # Note: Annotations are deprecated
2646         annofn = None
2647         if self.params.get('writeannotations', False):
2648             annofn = self.prepare_filename(info_dict, 'annotation')
2649         if annofn:
2650             if not self._ensure_dir_exists(encodeFilename(annofn)):
2651                 return
2652             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2653                 self.to_screen('[info] Video annotations are already present')
2654             elif not info_dict.get('annotations'):
2655                 self.report_warning('There are no annotations to write.')
2656             else:
2657                 try:
2658                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2659                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2660                         annofile.write(info_dict['annotations'])
2661                 except (KeyError, TypeError):
2662                     self.report_warning('There are no annotations to write.')
2663                 except (OSError, IOError):
2664                     self.report_error('Cannot write annotations file: ' + annofn)
2665                     return
2666
2667         # Write internet shortcut files
2668         def _write_link_file(link_type):
2669             if 'webpage_url' not in info_dict:
2670                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2671                 return False
2672             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2673             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2674                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2675                 return True
2676             try:
2677                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2678                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2679                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2680                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2681                     if link_type == 'desktop':
2682                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2683                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2684             except (OSError, IOError):
2685                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2686                 return False
2687             return True
2688
2689         write_links = {
2690             'url': self.params.get('writeurllink'),
2691             'webloc': self.params.get('writewebloclink'),
2692             'desktop': self.params.get('writedesktoplink'),
2693         }
2694         if self.params.get('writelink'):
2695             link_type = ('webloc' if sys.platform == 'darwin'
2696                          else 'desktop' if sys.platform.startswith('linux')
2697                          else 'url')
2698             write_links[link_type] = True
2699
2700         if any(should_write and not _write_link_file(link_type)
2701                for link_type, should_write in write_links.items()):
2702             return
2703
2704         try:
2705             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2706         except PostProcessingError as err:
2707             self.report_error('Preprocessing: %s' % str(err))
2708             return
2709
2710         must_record_download_archive = False
2711         if self.params.get('skip_download', False):
2712             info_dict['filepath'] = temp_filename
2713             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2714             info_dict['__files_to_move'] = files_to_move
2715             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2716         else:
2717             # Download
2718             info_dict.setdefault('__postprocessors', [])
2719             try:
2720
2721                 def existing_file(*filepaths):
2722                     ext = info_dict.get('ext')
2723                     final_ext = self.params.get('final_ext', ext)
2724                     existing_files = []
2725                     for file in orderedSet(filepaths):
2726                         if final_ext != ext:
2727                             converted = replace_extension(file, final_ext, ext)
2728                             if os.path.exists(encodeFilename(converted)):
2729                                 existing_files.append(converted)
2730                         if os.path.exists(encodeFilename(file)):
2731                             existing_files.append(file)
2732
2733                     if not existing_files or self.params.get('overwrites', False):
2734                         for file in orderedSet(existing_files):
2735                             self.report_file_delete(file)
2736                             os.remove(encodeFilename(file))
2737                         return None
2738
2739                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2740                     return existing_files[0]
2741
2742                 success = True
2743                 if info_dict.get('requested_formats') is not None:
2744
2745                     def compatible_formats(formats):
2746                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2747                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2748                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2749                         if len(video_formats) > 2 or len(audio_formats) > 2:
2750                             return False
2751
2752                         # Check extension
2753                         exts = set(format.get('ext') for format in formats)
2754                         COMPATIBLE_EXTS = (
2755                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2756                             set(('webm',)),
2757                         )
2758                         for ext_sets in COMPATIBLE_EXTS:
2759                             if ext_sets.issuperset(exts):
2760                                 return True
2761                         # TODO: Check acodec/vcodec
2762                         return False
2763
2764                     requested_formats = info_dict['requested_formats']
2765                     old_ext = info_dict['ext']
2766                     if self.params.get('merge_output_format') is None:
2767                         if not compatible_formats(requested_formats):
2768                             info_dict['ext'] = 'mkv'
2769                             self.report_warning(
2770                                 'Requested formats are incompatible for merge and will be merged into mkv')
2771                         if (info_dict['ext'] == 'webm'
2772                                 and info_dict.get('thumbnails')
2773                                 # check with type instead of pp_key, __name__, or isinstance
2774                                 # since we dont want any custom PPs to trigger this
2775                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2776                             info_dict['ext'] = 'mkv'
2777                             self.report_warning(
2778                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2779                     new_ext = info_dict['ext']
2780
2781                     def correct_ext(filename, ext=new_ext):
2782                         if filename == '-':
2783                             return filename
2784                         filename_real_ext = os.path.splitext(filename)[1][1:]
2785                         filename_wo_ext = (
2786                             os.path.splitext(filename)[0]
2787                             if filename_real_ext in (old_ext, new_ext)
2788                             else filename)
2789                         return '%s.%s' % (filename_wo_ext, ext)
2790
2791                     # Ensure filename always has a correct extension for successful merge
2792                     full_filename = correct_ext(full_filename)
2793                     temp_filename = correct_ext(temp_filename)
2794                     dl_filename = existing_file(full_filename, temp_filename)
2795                     info_dict['__real_download'] = False
2796
2797                     if dl_filename is not None:
2798                         self.report_file_already_downloaded(dl_filename)
2799                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2800                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2801                         success, real_download = self.dl(temp_filename, info_dict)
2802                         info_dict['__real_download'] = real_download
2803                     else:
2804                         downloaded = []
2805                         merger = FFmpegMergerPP(self)
2806                         if self.params.get('allow_unplayable_formats'):
2807                             self.report_warning(
2808                                 'You have requested merging of multiple formats '
2809                                 'while also allowing unplayable formats to be downloaded. '
2810                                 'The formats won\'t be merged to prevent data corruption.')
2811                         elif not merger.available:
2812                             self.report_warning(
2813                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2814                                 'The formats won\'t be merged.')
2815
2816                         if temp_filename == '-':
2817                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2818                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2819                                       else 'but ffmpeg is not installed')
2820                             self.report_warning(
2821                                 f'You have requested downloading multiple formats to stdout {reason}. '
2822                                 'The formats will be streamed one after the other')
2823                             fname = temp_filename
2824                         for f in requested_formats:
2825                             new_info = dict(info_dict)
2826                             del new_info['requested_formats']
2827                             new_info.update(f)
2828                             if temp_filename != '-':
2829                                 fname = prepend_extension(
2830                                     correct_ext(temp_filename, new_info['ext']),
2831                                     'f%s' % f['format_id'], new_info['ext'])
2832                                 if not self._ensure_dir_exists(fname):
2833                                     return
2834                                 f['filepath'] = fname
2835                                 downloaded.append(fname)
2836                             partial_success, real_download = self.dl(fname, new_info)
2837                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2838                             success = success and partial_success
2839                         if merger.available and not self.params.get('allow_unplayable_formats'):
2840                             info_dict['__postprocessors'].append(merger)
2841                             info_dict['__files_to_merge'] = downloaded
2842                             # Even if there were no downloads, it is being merged only now
2843                             info_dict['__real_download'] = True
2844                         else:
2845                             for file in downloaded:
2846                                 files_to_move[file] = None
2847                 else:
2848                     # Just a single file
2849                     dl_filename = existing_file(full_filename, temp_filename)
2850                     if dl_filename is None or dl_filename == temp_filename:
2851                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2852                         # So we should try to resume the download
2853                         success, real_download = self.dl(temp_filename, info_dict)
2854                         info_dict['__real_download'] = real_download
2855                     else:
2856                         self.report_file_already_downloaded(dl_filename)
2857
2858                 dl_filename = dl_filename or temp_filename
2859                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2860
2861             except network_exceptions as err:
2862                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2863                 return
2864             except (OSError, IOError) as err:
2865                 raise UnavailableVideoError(err)
2866             except (ContentTooShortError, ) as err:
2867                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2868                 return
2869
2870             if success and full_filename != '-':
2871
2872                 def fixup():
2873                     do_fixup = True
2874                     fixup_policy = self.params.get('fixup')
2875                     vid = info_dict['id']
2876
2877                     if fixup_policy in ('ignore', 'never'):
2878                         return
2879                     elif fixup_policy == 'warn':
2880                         do_fixup = False
2881                     elif fixup_policy != 'force':
2882                         assert fixup_policy in ('detect_or_warn', None)
2883                         if not info_dict.get('__real_download'):
2884                             do_fixup = False
2885
2886                     def ffmpeg_fixup(cndn, msg, cls):
2887                         if not cndn:
2888                             return
2889                         if not do_fixup:
2890                             self.report_warning(f'{vid}: {msg}')
2891                             return
2892                         pp = cls(self)
2893                         if pp.available:
2894                             info_dict['__postprocessors'].append(pp)
2895                         else:
2896                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2897
2898                     stretched_ratio = info_dict.get('stretched_ratio')
2899                     ffmpeg_fixup(
2900                         stretched_ratio not in (1, None),
2901                         f'Non-uniform pixel ratio {stretched_ratio}',
2902                         FFmpegFixupStretchedPP)
2903
2904                     ffmpeg_fixup(
2905                         (info_dict.get('requested_formats') is None
2906                          and info_dict.get('container') == 'm4a_dash'
2907                          and info_dict.get('ext') == 'm4a'),
2908                         'writing DASH m4a. Only some players support this container',
2909                         FFmpegFixupM4aPP)
2910
2911                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2912                     downloader = downloader.__name__ if downloader else None
2913                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2914                                  'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2915                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2916                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2917
2918                 fixup()
2919                 try:
2920                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2921                 except PostProcessingError as err:
2922                     self.report_error('Postprocessing: %s' % str(err))
2923                     return
2924                 try:
2925                     for ph in self._post_hooks:
2926                         ph(info_dict['filepath'])
2927                 except Exception as err:
2928                     self.report_error('post hooks: %s' % str(err))
2929                     return
2930                 must_record_download_archive = True
2931
2932         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2933             self.record_download_archive(info_dict)
2934         max_downloads = self.params.get('max_downloads')
2935         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2936             raise MaxDownloadsReached()
2937
2938     def __download_wrapper(self, func):
2939         @functools.wraps(func)
2940         def wrapper(*args, **kwargs):
2941             try:
2942                 res = func(*args, **kwargs)
2943             except UnavailableVideoError as e:
2944                 self.report_error(e)
2945             except DownloadCancelled as e:
2946                 self.to_screen(f'[info] {e}')
2947                 raise
2948             else:
2949                 if self.params.get('dump_single_json', False):
2950                     self.post_extract(res)
2951                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2952         return wrapper
2953
2954     def download(self, url_list):
2955         """Download a given list of URLs."""
2956         url_list = variadic(url_list)  # Passing a single URL is a common mistake
2957         outtmpl = self.outtmpl_dict['default']
2958         if (len(url_list) > 1
2959                 and outtmpl != '-'
2960                 and '%' not in outtmpl
2961                 and self.params.get('max_downloads') != 1):
2962             raise SameFileError(outtmpl)
2963
2964         for url in url_list:
2965             self.__download_wrapper(self.extract_info)(
2966                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2967
2968         return self._download_retcode
2969
2970     def download_with_info_file(self, info_filename):
2971         with contextlib.closing(fileinput.FileInput(
2972                 [info_filename], mode='r',
2973                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2974             # FileInput doesn't have a read method, we can't call json.load
2975             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2976         try:
2977             self.__download_wrapper(self.process_ie_result)(info, download=True)
2978         except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e:
2979             self.to_stderr('\r')
2980             webpage_url = info.get('webpage_url')
2981             if webpage_url is not None:
2982                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
2983                 return self.download([webpage_url])
2984             else:
2985                 raise
2986         return self._download_retcode
2987
2988     @staticmethod
2989     def sanitize_info(info_dict, remove_private_keys=False):
2990         ''' Sanitize the infodict for converting to json '''
2991         if info_dict is None:
2992             return info_dict
2993         info_dict.setdefault('epoch', int(time.time()))
2994         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
2995         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
2996         if remove_private_keys:
2997             remove_keys |= {
2998                 'requested_formats', 'requested_subtitles', 'requested_entries',
2999                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
3000             }
3001             empty_values = (None, {}, [], set(), tuple())
3002             reject = lambda k, v: k not in keep_keys and (
3003                 k.startswith('_') or k in remove_keys or v in empty_values)
3004         else:
3005             reject = lambda k, v: k in remove_keys
3006         filter_fn = lambda obj: (
3007             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3008             else obj if not isinstance(obj, dict)
3009             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3010         return filter_fn(info_dict)
3011
3012     @staticmethod
3013     def filter_requested_info(info_dict, actually_filter=True):
3014         ''' Alias of sanitize_info for backward compatibility '''
3015         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3016
3017     def run_pp(self, pp, infodict):
3018         files_to_delete = []
3019         if '__files_to_move' not in infodict:
3020             infodict['__files_to_move'] = {}
3021         try:
3022             files_to_delete, infodict = pp.run(infodict)
3023         except PostProcessingError as e:
3024             # Must be True and not 'only_download'
3025             if self.params.get('ignoreerrors') is True:
3026                 self.report_error(e)
3027                 return infodict
3028             raise
3029
3030         if not files_to_delete:
3031             return infodict
3032         if self.params.get('keepvideo', False):
3033             for f in files_to_delete:
3034                 infodict['__files_to_move'].setdefault(f, '')
3035         else:
3036             for old_filename in set(files_to_delete):
3037                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3038                 try:
3039                     os.remove(encodeFilename(old_filename))
3040                 except (IOError, OSError):
3041                     self.report_warning('Unable to remove downloaded original file')
3042                 if old_filename in infodict['__files_to_move']:
3043                     del infodict['__files_to_move'][old_filename]
3044         return infodict
3045
3046     @staticmethod
3047     def post_extract(info_dict):
3048         def actual_post_extract(info_dict):
3049             if info_dict.get('_type') in ('playlist', 'multi_video'):
3050                 for video_dict in info_dict.get('entries', {}):
3051                     actual_post_extract(video_dict or {})
3052                 return
3053
3054             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3055             extra = post_extractor().items()
3056             info_dict.update(extra)
3057             info_dict.pop('__post_extractor', None)
3058
3059             original_infodict = info_dict.get('__original_infodict') or {}
3060             original_infodict.update(extra)
3061             original_infodict.pop('__post_extractor', None)
3062
3063         actual_post_extract(info_dict or {})
3064
3065     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3066         info = dict(ie_info)
3067         info['__files_to_move'] = files_to_move or {}
3068         for pp in self._pps[key]:
3069             info = self.run_pp(pp, info)
3070         return info, info.pop('__files_to_move', None)
3071
3072     def post_process(self, filename, ie_info, files_to_move=None):
3073         """Run all the postprocessors on the given file."""
3074         info = dict(ie_info)
3075         info['filepath'] = filename
3076         info['__files_to_move'] = files_to_move or {}
3077
3078         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3079             info = self.run_pp(pp, info)
3080         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3081         del info['__files_to_move']
3082         for pp in self._pps['after_move']:
3083             info = self.run_pp(pp, info)
3084         return info
3085
3086     def _make_archive_id(self, info_dict):
3087         video_id = info_dict.get('id')
3088         if not video_id:
3089             return
3090         # Future-proof against any change in case
3091         # and backwards compatibility with prior versions
3092         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3093         if extractor is None:
3094             url = str_or_none(info_dict.get('url'))
3095             if not url:
3096                 return
3097             # Try to find matching extractor for the URL and take its ie_key
3098             for ie_key, ie in self._ies.items():
3099                 if ie.suitable(url):
3100                     extractor = ie_key
3101                     break
3102             else:
3103                 return
3104         return '%s %s' % (extractor.lower(), video_id)
3105
3106     def in_download_archive(self, info_dict):
3107         fn = self.params.get('download_archive')
3108         if fn is None:
3109             return False
3110
3111         vid_id = self._make_archive_id(info_dict)
3112         if not vid_id:
3113             return False  # Incomplete video information
3114
3115         return vid_id in self.archive
3116
3117     def record_download_archive(self, info_dict):
3118         fn = self.params.get('download_archive')
3119         if fn is None:
3120             return
3121         vid_id = self._make_archive_id(info_dict)
3122         assert vid_id
3123         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3124             archive_file.write(vid_id + '\n')
3125         self.archive.add(vid_id)
3126
3127     @staticmethod
3128     def format_resolution(format, default='unknown'):
3129         is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
3130         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3131             return 'audio only'
3132         if format.get('resolution') is not None:
3133             return format['resolution']
3134         if format.get('width') and format.get('height'):
3135             res = '%dx%d' % (format['width'], format['height'])
3136         elif format.get('height'):
3137             res = '%sp' % format['height']
3138         elif format.get('width'):
3139             res = '%dx?' % format['width']
3140         elif is_images:
3141             return 'images'
3142         else:
3143             return default
3144         return f'{res} images' if is_images else res
3145
3146     def _format_note(self, fdict):
3147         res = ''
3148         if fdict.get('ext') in ['f4f', 'f4m']:
3149             res += '(unsupported) '
3150         if fdict.get('language'):
3151             if res:
3152                 res += ' '
3153             res += '[%s] ' % fdict['language']
3154         if fdict.get('format_note') is not None:
3155             res += fdict['format_note'] + ' '
3156         if fdict.get('tbr') is not None:
3157             res += '%4dk ' % fdict['tbr']
3158         if fdict.get('container') is not None:
3159             if res:
3160                 res += ', '
3161             res += '%s container' % fdict['container']
3162         if (fdict.get('vcodec') is not None
3163                 and fdict.get('vcodec') != 'none'):
3164             if res:
3165                 res += ', '
3166             res += fdict['vcodec']
3167             if fdict.get('vbr') is not None:
3168                 res += '@'
3169         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3170             res += 'video@'
3171         if fdict.get('vbr') is not None:
3172             res += '%4dk' % fdict['vbr']
3173         if fdict.get('fps') is not None:
3174             if res:
3175                 res += ', '
3176             res += '%sfps' % fdict['fps']
3177         if fdict.get('acodec') is not None:
3178             if res:
3179                 res += ', '
3180             if fdict['acodec'] == 'none':
3181                 res += 'video only'
3182             else:
3183                 res += '%-5s' % fdict['acodec']
3184         elif fdict.get('abr') is not None:
3185             if res:
3186                 res += ', '
3187             res += 'audio'
3188         if fdict.get('abr') is not None:
3189             res += '@%3dk' % fdict['abr']
3190         if fdict.get('asr') is not None:
3191             res += ' (%5dHz)' % fdict['asr']
3192         if fdict.get('filesize') is not None:
3193             if res:
3194                 res += ', '
3195             res += format_bytes(fdict['filesize'])
3196         elif fdict.get('filesize_approx') is not None:
3197             if res:
3198                 res += ', '
3199             res += '~' + format_bytes(fdict['filesize_approx'])
3200         return res
3201
3202     def _list_format_headers(self, *headers):
3203         if self.params.get('listformats_table', True) is not False:
3204             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3205         return headers
3206
3207     def list_formats(self, info_dict):
3208         formats = info_dict.get('formats', [info_dict])
3209         new_format = self.params.get('listformats_table', True) is not False
3210         if new_format:
3211             tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats))
3212             vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats))
3213             abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats))
3214             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3215             table = [
3216                 [
3217                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3218                     format_field(f, 'ext'),
3219                     self.format_resolution(f),
3220                     format_field(f, 'fps', '%3d'),
3221                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3222                     delim,
3223                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3224                     format_field(f, 'tbr', f'%{tbr_digits}dk'),
3225                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3226                     delim,
3227                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3228                     format_field(f, 'vbr', f'%{vbr_digits}dk'),
3229                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3230                     format_field(f, 'abr', f'%{abr_digits}dk'),
3231                     format_field(f, 'asr', '%5dHz'),
3232                     join_nonempty(
3233                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3234                         format_field(f, 'language', '[%s]'),
3235                         format_field(f, 'format_note'),
3236                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3237                         delim=', '),
3238                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3239             header_line = self._list_format_headers(
3240                 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', '  TBR', 'PROTO',
3241                 delim, 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO')
3242         else:
3243             table = [
3244                 [
3245                     format_field(f, 'format_id'),
3246                     format_field(f, 'ext'),
3247                     self.format_resolution(f),
3248                     self._format_note(f)]
3249                 for f in formats
3250                 if f.get('preference') is None or f['preference'] >= -1000]
3251             header_line = ['format code', 'extension', 'resolution', 'note']
3252
3253         self.to_screen(
3254             '[info] Available formats for %s:' % info_dict['id'])
3255         self.to_stdout(render_table(
3256             header_line, table,
3257             extraGap=(0 if new_format else 1),
3258             hideEmpty=new_format,
3259             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3260
3261     def list_thumbnails(self, info_dict):
3262         thumbnails = list(info_dict.get('thumbnails'))
3263         if not thumbnails:
3264             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3265             return
3266
3267         self.to_screen(
3268             '[info] Thumbnails for %s:' % info_dict['id'])
3269         self.to_stdout(render_table(
3270             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3271             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3272
3273     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3274         if not subtitles:
3275             self.to_screen('%s has no %s' % (video_id, name))
3276             return
3277         self.to_screen(
3278             'Available %s for %s:' % (name, video_id))
3279
3280         def _row(lang, formats):
3281             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3282             if len(set(names)) == 1:
3283                 names = [] if names[0] == 'unknown' else names[:1]
3284             return [lang, ', '.join(names), ', '.join(exts)]
3285
3286         self.to_stdout(render_table(
3287             self._list_format_headers('Language', 'Name', 'Formats'),
3288             [_row(lang, formats) for lang, formats in subtitles.items()],
3289             hideEmpty=True))
3290
3291     def urlopen(self, req):
3292         """ Start an HTTP download """
3293         if isinstance(req, compat_basestring):
3294             req = sanitized_Request(req)
3295         return self._opener.open(req, timeout=self._socket_timeout)
3296
3297     def print_debug_header(self):
3298         if not self.params.get('verbose'):
3299             return
3300
3301         def get_encoding(stream):
3302             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3303             if not supports_terminal_sequences(stream):
3304                 ret += ' (No ANSI)'
3305             return ret
3306
3307         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3308             locale.getpreferredencoding(),
3309             sys.getfilesystemencoding(),
3310             get_encoding(self._screen_file), get_encoding(self._err_file),
3311             self.get_encoding())
3312
3313         logger = self.params.get('logger')
3314         if logger:
3315             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3316             write_debug(encoding_str)
3317         else:
3318             write_string(f'[debug] {encoding_str}\n', encoding=None)
3319             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3320
3321         source = detect_variant()
3322         write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
3323         if not _LAZY_LOADER:
3324             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3325                 write_debug('Lazy loading extractors is forcibly disabled')
3326             else:
3327                 write_debug('Lazy loading extractors is disabled')
3328         if plugin_extractors or plugin_postprocessors:
3329             write_debug('Plugins: %s' % [
3330                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3331                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3332         if self.params.get('compat_opts'):
3333             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3334         try:
3335             sp = Popen(
3336                 ['git', 'rev-parse', '--short', 'HEAD'],
3337                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3338                 cwd=os.path.dirname(os.path.abspath(__file__)))
3339             out, err = sp.communicate_or_kill()
3340             out = out.decode().strip()
3341             if re.match('[0-9a-f]+', out):
3342                 write_debug('Git HEAD: %s' % out)
3343         except Exception:
3344             try:
3345                 sys.exc_clear()
3346             except Exception:
3347                 pass
3348
3349         def python_implementation():
3350             impl_name = platform.python_implementation()
3351             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3352                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3353             return impl_name
3354
3355         write_debug('Python version %s (%s %s) - %s' % (
3356             platform.python_version(),
3357             python_implementation(),
3358             platform.architecture()[0],
3359             platform_name()))
3360
3361         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3362         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3363         if ffmpeg_features:
3364             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3365
3366         exe_versions['rtmpdump'] = rtmpdump_version()
3367         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3368         exe_str = ', '.join(
3369             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3370         ) or 'none'
3371         write_debug('exe versions: %s' % exe_str)
3372
3373         from .downloader.websocket import has_websockets
3374         from .postprocessor.embedthumbnail import has_mutagen
3375         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3376
3377         lib_str = join_nonempty(
3378             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3379             KEYRING_AVAILABLE and 'keyring',
3380             has_mutagen and 'mutagen',
3381             SQLITE_AVAILABLE and 'sqlite',
3382             has_websockets and 'websockets',
3383             delim=', ') or 'none'
3384         write_debug('Optional libraries: %s' % lib_str)
3385
3386         proxy_map = {}
3387         for handler in self._opener.handlers:
3388             if hasattr(handler, 'proxies'):
3389                 proxy_map.update(handler.proxies)
3390         write_debug(f'Proxy map: {proxy_map}')
3391
3392         # Not implemented
3393         if False and self.params.get('call_home'):
3394             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3395             write_debug('Public IP address: %s' % ipaddr)
3396             latest_version = self.urlopen(
3397                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3398             if version_tuple(latest_version) > version_tuple(__version__):
3399                 self.report_warning(
3400                     'You are using an outdated version (newest version: %s)! '
3401                     'See https://yt-dl.org/update if you need help updating.' %
3402                     latest_version)
3403
3404     def _setup_opener(self):
3405         timeout_val = self.params.get('socket_timeout')
3406         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3407
3408         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3409         opts_cookiefile = self.params.get('cookiefile')
3410         opts_proxy = self.params.get('proxy')
3411
3412         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3413
3414         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3415         if opts_proxy is not None:
3416             if opts_proxy == '':
3417                 proxies = {}
3418             else:
3419                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3420         else:
3421             proxies = compat_urllib_request.getproxies()
3422             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3423             if 'http' in proxies and 'https' not in proxies:
3424                 proxies['https'] = proxies['http']
3425         proxy_handler = PerRequestProxyHandler(proxies)
3426
3427         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3428         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3429         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3430         redirect_handler = YoutubeDLRedirectHandler()
3431         data_handler = compat_urllib_request_DataHandler()
3432
3433         # When passing our own FileHandler instance, build_opener won't add the
3434         # default FileHandler and allows us to disable the file protocol, which
3435         # can be used for malicious purposes (see
3436         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3437         file_handler = compat_urllib_request.FileHandler()
3438
3439         def file_open(*args, **kwargs):
3440             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3441         file_handler.file_open = file_open
3442
3443         opener = compat_urllib_request.build_opener(
3444             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3445
3446         # Delete the default user-agent header, which would otherwise apply in
3447         # cases where our custom HTTP handler doesn't come into play
3448         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3449         opener.addheaders = []
3450         self._opener = opener
3451
3452     def encode(self, s):
3453         if isinstance(s, bytes):
3454             return s  # Already encoded
3455
3456         try:
3457             return s.encode(self.get_encoding())
3458         except UnicodeEncodeError as err:
3459             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3460             raise
3461
3462     def get_encoding(self):
3463         encoding = self.params.get('encoding')
3464         if encoding is None:
3465             encoding = preferredencoding()
3466         return encoding
3467
3468     def _write_info_json(self, label, ie_result, infofn):
3469         ''' Write infojson and returns True = written, False = skip, None = error '''
3470         if not self.params.get('writeinfojson'):
3471             return False
3472         elif not infofn:
3473             self.write_debug(f'Skipping writing {label} infojson')
3474             return False
3475         elif not self._ensure_dir_exists(infofn):
3476             return None
3477         elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3478             self.to_screen(f'[info] {label.title()} metadata is already present')
3479         else:
3480             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3481             try:
3482                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3483             except (OSError, IOError):
3484                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3485                 return None
3486         return True
3487
3488     def _write_description(self, label, ie_result, descfn):
3489         ''' Write description and returns True = written, False = skip, None = error '''
3490         if not self.params.get('writedescription'):
3491             return False
3492         elif not descfn:
3493             self.write_debug(f'Skipping writing {label} description')
3494             return False
3495         elif not self._ensure_dir_exists(descfn):
3496             return None
3497         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3498             self.to_screen(f'[info] {label.title()} description is already present')
3499         elif ie_result.get('description') is None:
3500             self.report_warning(f'There\'s no {label} description to write')
3501             return False
3502         else:
3503             try:
3504                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3505                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3506                     descfile.write(ie_result['description'])
3507             except (OSError, IOError):
3508                 self.report_error(f'Cannot write {label} description file {descfn}')
3509                 return None
3510         return True
3511
3512     def _write_subtitles(self, info_dict, filename):
3513         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3514         ret = []
3515         subtitles = info_dict.get('requested_subtitles')
3516         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3517             # subtitles download errors are already managed as troubles in relevant IE
3518             # that way it will silently go on when used with unsupporting IE
3519             return ret
3520
3521         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3522         if not sub_filename_base:
3523             self.to_screen('[info] Skipping writing video subtitles')
3524             return ret
3525         for sub_lang, sub_info in subtitles.items():
3526             sub_format = sub_info['ext']
3527             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3528             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3529             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3530                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3531                 sub_info['filepath'] = sub_filename
3532                 ret.append((sub_filename, sub_filename_final))
3533                 continue
3534
3535             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3536             if sub_info.get('data') is not None:
3537                 try:
3538                     # Use newline='' to prevent conversion of newline characters
3539                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3540                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3541                         subfile.write(sub_info['data'])
3542                     sub_info['filepath'] = sub_filename
3543                     ret.append((sub_filename, sub_filename_final))
3544                     continue
3545                 except (OSError, IOError):
3546                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3547                     return None
3548
3549             try:
3550                 sub_copy = sub_info.copy()
3551                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3552                 self.dl(sub_filename, sub_copy, subtitle=True)
3553                 sub_info['filepath'] = sub_filename
3554                 ret.append((sub_filename, sub_filename_final))
3555             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3556                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3557                 continue
3558         return ret
3559
3560     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3561         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3562         write_all = self.params.get('write_all_thumbnails', False)
3563         thumbnails, ret = [], []
3564         if write_all or self.params.get('writethumbnail', False):
3565             thumbnails = info_dict.get('thumbnails') or []
3566         multiple = write_all and len(thumbnails) > 1
3567
3568         if thumb_filename_base is None:
3569             thumb_filename_base = filename
3570         if thumbnails and not thumb_filename_base:
3571             self.write_debug(f'Skipping writing {label} thumbnail')
3572             return ret
3573
3574         for t in thumbnails[::-1]:
3575             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3576             thumb_display_id = f'{label} thumbnail {t["id"]}'
3577             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3578             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3579
3580             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3581                 ret.append((thumb_filename, thumb_filename_final))
3582                 t['filepath'] = thumb_filename
3583                 self.to_screen('[info] %s is already present' % (
3584                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3585             else:
3586                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3587                 try:
3588                     uf = self.urlopen(t['url'])
3589                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3590                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3591                         shutil.copyfileobj(uf, thumbf)
3592                     ret.append((thumb_filename, thumb_filename_final))
3593                     t['filepath'] = thumb_filename
3594                 except network_exceptions as err:
3595                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3596             if ret and not write_all:
3597                 break
3598         return ret