venv/lib/python3.11/site-packages/pip/_vendor/pygments/util.py

   1 """
   2     pygments.util
   3     ~~~~~~~~~~~~~
   4
   5     Utility functions.
   6
   7     :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
   8     :license: BSD, see LICENSE for details.
   9 """
  10
  11 import re
  12 from io import TextIOWrapper
  13
  14
  15 split_path_re = re.compile(r'[/\\ ]')
  16 doctype_lookup_re = re.compile(r'''
  17     <!DOCTYPE\s+(
  18      [a-zA-Z_][a-zA-Z0-9]*
  19      (?: \s+      # optional in HTML5
  20      [a-zA-Z_][a-zA-Z0-9]*\s+
  21      "[^"]*")?
  22      )
  23      [^>]*>
  24 ''', re.DOTALL | re.MULTILINE | re.VERBOSE)
  25 tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>',
  26                     re.IGNORECASE | re.DOTALL | re.MULTILINE)
  27 xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I)
  28
  29
  30 class ClassNotFound(ValueError):
  31     """Raised if one of the lookup functions didn't find a matching class."""
  32
  33
  34 class OptionError(Exception):
  35     """
  36     This exception will be raised by all option processing functions if
  37     the type or value of the argument is not correct.
  38     """
  39
  40 def get_choice_opt(options, optname, allowed, default=None, normcase=False):
  41     """
  42     If the key `optname` from the dictionary is not in the sequence
  43     `allowed`, raise an error, otherwise return it.
  44     """
  45     string = options.get(optname, default)
  46     if normcase:
  47         string = string.lower()
  48     if string not in allowed:
  49         raise OptionError('Value for option %s must be one of %s' %
  50                           (optname, ', '.join(map(str, allowed))))
  51     return string
  52
  53
  54 def get_bool_opt(options, optname, default=None):
  55     """
  56     Intuitively, this is `options.get(optname, default)`, but restricted to
  57     Boolean value. The Booleans can be represented as string, in order to accept
  58     Boolean value from the command line arguments. If the key `optname` is
  59     present in the dictionary `options` and is not associated with a Boolean,
  60     raise an `OptionError`. If it is absent, `default` is returned instead.
  61
  62     The valid string values for ``True`` are ``1``, ``yes``, ``true`` and
  63     ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off``
  64     (matched case-insensitively).
  65     """
  66     string = options.get(optname, default)
  67     if isinstance(string, bool):
  68         return string
  69     elif isinstance(string, int):
  70         return bool(string)
  71     elif not isinstance(string, str):
  72         raise OptionError('Invalid type %r for option %s; use '
  73                           '1/0, yes/no, true/false, on/off' % (
  74                               string, optname))
  75     elif string.lower() in ('1', 'yes', 'true', 'on'):
  76         return True
  77     elif string.lower() in ('0', 'no', 'false', 'off'):
  78         return False
  79     else:
  80         raise OptionError('Invalid value %r for option %s; use '
  81                           '1/0, yes/no, true/false, on/off' % (
  82                               string, optname))
  83
  84
  85 def get_int_opt(options, optname, default=None):
  86     """As :func:`get_bool_opt`, but interpret the value as an integer."""
  87     string = options.get(optname, default)
  88     try:
  89         return int(string)
  90     except TypeError:
  91         raise OptionError('Invalid type %r for option %s; you '
  92                           'must give an integer value' % (
  93                               string, optname))
  94     except ValueError:
  95         raise OptionError('Invalid value %r for option %s; you '
  96                           'must give an integer value' % (
  97                               string, optname))
  98
  99 def get_list_opt(options, optname, default=None):
 100     """
 101     If the key `optname` from the dictionary `options` is a string,
 102     split it at whitespace and return it. If it is already a list
 103     or a tuple, it is returned as a list.
 104     """
 105     val = options.get(optname, default)
 106     if isinstance(val, str):
 107         return val.split()
 108     elif isinstance(val, (list, tuple)):
 109         return list(val)
 110     else:
 111         raise OptionError('Invalid type %r for option %s; you '
 112                           'must give a list value' % (
 113                               val, optname))
 114
 115
 116 def docstring_headline(obj):
 117     if not obj.__doc__:
 118         return ''
 119     res = []
 120     for line in obj.__doc__.strip().splitlines():
 121         if line.strip():
 122             res.append(" " + line.strip())
 123         else:
 124             break
 125     return ''.join(res).lstrip()
 126
 127
 128 def make_analysator(f):
 129     """Return a static text analyser function that returns float values."""
 130     def text_analyse(text):
 131         try:
 132             rv = f(text)
 133         except Exception:
 134             return 0.0
 135         if not rv:
 136             return 0.0
 137         try:
 138             return min(1.0, max(0.0, float(rv)))
 139         except (ValueError, TypeError):
 140             return 0.0
 141     text_analyse.__doc__ = f.__doc__
 142     return staticmethod(text_analyse)
 143
 144
 145 def shebang_matches(text, regex):
 146     r"""Check if the given regular expression matches the last part of the
 147     shebang if one exists.
 148
 149         >>> from pygments.util import shebang_matches
 150         >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')
 151         True
 152         >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')
 153         True
 154         >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')
 155         False
 156         >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')
 157         False
 158         >>> shebang_matches('#!/usr/bin/startsomethingwith python',
 159         ...                 r'python(2\.\d)?')
 160         True
 161
 162     It also checks for common windows executable file extensions::
 163
 164         >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')
 165         True
 166
 167     Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does
 168     the same as ``'perl -e'``)
 169
 170     Note that this method automatically searches the whole string (eg:
 171     the regular expression is wrapped in ``'^$'``)
 172     """
 173     index = text.find('\n')
 174     if index >= 0:
 175         first_line = text[:index].lower()
 176     else:
 177         first_line = text.lower()
 178     if first_line.startswith('#!'):
 179         try:
 180             found = [x for x in split_path_re.split(first_line[2:].strip())
 181                      if x and not x.startswith('-')][-1]
 182         except IndexError:
 183             return False
 184         regex = re.compile(r'^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE)
 185         if regex.search(found) is not None:
 186             return True
 187     return False
 188
 189
 190 def doctype_matches(text, regex):
 191     """Check if the doctype matches a regular expression (if present).
 192
 193     Note that this method only checks the first part of a DOCTYPE.
 194     eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'
 195     """
 196     m = doctype_lookup_re.search(text)
 197     if m is None:
 198         return False
 199     doctype = m.group(1)
 200     return re.compile(regex, re.I).match(doctype.strip()) is not None
 201
 202
 203 def html_doctype_matches(text):
 204     """Check if the file looks like it has a html doctype."""
 205     return doctype_matches(text, r'html')
 206
 207
 208 _looks_like_xml_cache = {}
 209
 210
 211 def looks_like_xml(text):
 212     """Check if a doctype exists or if we have some tags."""
 213     if xml_decl_re.match(text):
 214         return True
 215     key = hash(text)
 216     try:
 217         return _looks_like_xml_cache[key]
 218     except KeyError:
 219         m = doctype_lookup_re.search(text)
 220         if m is not None:
 221             return True
 222         rv = tag_re.search(text[:1000]) is not None
 223         _looks_like_xml_cache[key] = rv
 224         return rv
 225
 226
 227 def surrogatepair(c):
 228     """Given a unicode character code with length greater than 16 bits,
 229     return the two 16 bit surrogate pair.
 230     """
 231     # From example D28 of:
 232     # http://www.unicode.org/book/ch03.pdf
 233     return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))
 234
 235
 236 def format_lines(var_name, seq, raw=False, indent_level=0):
 237     """Formats a sequence of strings for output."""
 238     lines = []
 239     base_indent = ' ' * indent_level * 4
 240     inner_indent = ' ' * (indent_level + 1) * 4
 241     lines.append(base_indent + var_name + ' = (')
 242     if raw:
 243         # These should be preformatted reprs of, say, tuples.
 244         for i in seq:
 245             lines.append(inner_indent + i + ',')
 246     else:
 247         for i in seq:
 248             # Force use of single quotes
 249             r = repr(i + '"')
 250             lines.append(inner_indent + r[:-2] + r[-1] + ',')
 251     lines.append(base_indent + ')')
 252     return '\n'.join(lines)
 253
 254
 255 def duplicates_removed(it, already_seen=()):
 256     """
 257     Returns a list with duplicates removed from the iterable `it`.
 258
 259     Order is preserved.
 260     """
 261     lst = []
 262     seen = set()
 263     for i in it:
 264         if i in seen or i in already_seen:
 265             continue
 266         lst.append(i)
 267         seen.add(i)
 268     return lst
 269
 270
 271 class Future:
 272     """Generic class to defer some work.
 273
 274     Handled specially in RegexLexerMeta, to support regex string construction at
 275     first use.
 276     """
 277     def get(self):
 278         raise NotImplementedError
 279
 280
 281 def guess_decode(text):
 282     """Decode *text* with guessed encoding.
 283
 284     First try UTF-8; this should fail for non-UTF-8 encodings.
 285     Then try the preferred locale encoding.
 286     Fall back to latin-1, which always works.
 287     """
 288     try:
 289         text = text.decode('utf-8')
 290         return text, 'utf-8'
 291     except UnicodeDecodeError:
 292         try:
 293             import locale
 294             prefencoding = locale.getpreferredencoding()
 295             text = text.decode()
 296             return text, prefencoding
 297         except (UnicodeDecodeError, LookupError):
 298             text = text.decode('latin1')
 299             return text, 'latin1'
 300
 301
 302 def guess_decode_from_terminal(text, term):
 303     """Decode *text* coming from terminal *term*.
 304
 305     First try the terminal encoding, if given.
 306     Then try UTF-8.  Then try the preferred locale encoding.
 307     Fall back to latin-1, which always works.
 308     """
 309     if getattr(term, 'encoding', None):
 310         try:
 311             text = text.decode(term.encoding)
 312         except UnicodeDecodeError:
 313             pass
 314         else:
 315             return text, term.encoding
 316     return guess_decode(text)
 317
 318
 319 def terminal_encoding(term):
 320     """Return our best guess of encoding for the given *term*."""
 321     if getattr(term, 'encoding', None):
 322         return term.encoding
 323     import locale
 324     return locale.getpreferredencoding()
 325
 326
 327 class UnclosingTextIOWrapper(TextIOWrapper):
 328     # Don't close underlying buffer on destruction.
 329     def close(self):
 330         self.flush()