]>
jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/pip/_vendor/pygments/util.py
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
12 from io
import TextIOWrapper
15 split_path_re
= re
.compile(r
'[/\\ ]')
16 doctype_lookup_re
= re
.compile(r
'''
19 (?: \s+ # optional in HTML5
20 [a-zA-Z_][a-zA-Z0-9]*\s+
24 ''', re
.DOTALL | re
.MULTILINE | re
.VERBOSE
)
25 tag_re
= re
.compile(r
'<(.+?)(\s.*?)?>.*?</.+?>',
26 re
.IGNORECASE | re
.DOTALL | re
.MULTILINE
)
27 xml_decl_re
= re
.compile(r
'\s*<\?xml[^>]*\?>', re
.I
)
30 class ClassNotFound(ValueError):
31 """Raised if one of the lookup functions didn't find a matching class."""
34 class OptionError(Exception):
36 This exception will be raised by all option processing functions if
37 the type or value of the argument is not correct.
40 def get_choice_opt(options
, optname
, allowed
, default
=None, normcase
=False):
42 If the key `optname` from the dictionary is not in the sequence
43 `allowed`, raise an error, otherwise return it.
45 string
= options
.get(optname
, default
)
47 string
= string
.lower()
48 if string
not in allowed
:
49 raise OptionError('Value for option %s must be one of %s' %
50 (optname
, ', '.join(map(str, allowed
))))
54 def get_bool_opt(options
, optname
, default
=None):
56 Intuitively, this is `options.get(optname, default)`, but restricted to
57 Boolean value. The Booleans can be represented as string, in order to accept
58 Boolean value from the command line arguments. If the key `optname` is
59 present in the dictionary `options` and is not associated with a Boolean,
60 raise an `OptionError`. If it is absent, `default` is returned instead.
62 The valid string values for ``True`` are ``1``, ``yes``, ``true`` and
63 ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off``
64 (matched case-insensitively).
66 string
= options
.get(optname
, default
)
67 if isinstance(string
, bool):
69 elif isinstance(string
, int):
71 elif not isinstance(string
, str):
72 raise OptionError('Invalid type %r for option %s; use '
73 '1/0, yes/no, true/false, on/off' % (
75 elif string
.lower() in ('1', 'yes', 'true', 'on'):
77 elif string
.lower() in ('0', 'no', 'false', 'off'):
80 raise OptionError('Invalid value %r for option %s; use '
81 '1/0, yes/no, true/false, on/off' % (
85 def get_int_opt(options
, optname
, default
=None):
86 """As :func:`get_bool_opt`, but interpret the value as an integer."""
87 string
= options
.get(optname
, default
)
91 raise OptionError('Invalid type %r for option %s; you '
92 'must give an integer value' % (
95 raise OptionError('Invalid value %r for option %s; you '
96 'must give an integer value' % (
99 def get_list_opt(options
, optname
, default
=None):
101 If the key `optname` from the dictionary `options` is a string,
102 split it at whitespace and return it. If it is already a list
103 or a tuple, it is returned as a list.
105 val
= options
.get(optname
, default
)
106 if isinstance(val
, str):
108 elif isinstance(val
, (list, tuple)):
111 raise OptionError('Invalid type %r for option %s; you '
112 'must give a list value' % (
116 def docstring_headline(obj
):
120 for line
in obj
.__doc
__.strip().splitlines():
122 res
.append(" " + line
.strip())
125 return ''.join(res
).lstrip()
128 def make_analysator(f
):
129 """Return a static text analyser function that returns float values."""
130 def text_analyse(text
):
138 return min(1.0, max(0.0, float(rv
)))
139 except (ValueError, TypeError):
141 text_analyse
.__doc
__ = f
.__doc
__
142 return staticmethod(text_analyse
)
145 def shebang_matches(text
, regex
):
146 r
"""Check if the given regular expression matches the last part of the
147 shebang if one exists.
149 >>> from pygments.util import shebang_matches
150 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')
152 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')
154 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')
156 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')
158 >>> shebang_matches('#!/usr/bin/startsomethingwith python',
159 ... r'python(2\.\d)?')
162 It also checks for common windows executable file extensions::
164 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')
167 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does
168 the same as ``'perl -e'``)
170 Note that this method automatically searches the whole string (eg:
171 the regular expression is wrapped in ``'^$'``)
173 index
= text
.find('\n')
175 first_line
= text
[:index
].lower()
177 first_line
= text
.lower()
178 if first_line
.startswith('#!'):
180 found
= [x
for x
in split_path_re
.split(first_line
[2:].strip())
181 if x
and not x
.startswith('-')][-1]
184 regex
= re
.compile(r
'^%s(\.(exe|cmd|bat|bin))?$' % regex
, re
.IGNORECASE
)
185 if regex
.search(found
) is not None:
190 def doctype_matches(text
, regex
):
191 """Check if the doctype matches a regular expression (if present).
193 Note that this method only checks the first part of a DOCTYPE.
194 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'
196 m
= doctype_lookup_re
.search(text
)
200 return re
.compile(regex
, re
.I
).match(doctype
.strip()) is not None
203 def html_doctype_matches(text
):
204 """Check if the file looks like it has a html doctype."""
205 return doctype_matches(text
, r
'html')
208 _looks_like_xml_cache
= {}
211 def looks_like_xml(text
):
212 """Check if a doctype exists or if we have some tags."""
213 if xml_decl_re
.match(text
):
217 return _looks_like_xml_cache
[key
]
219 m
= doctype_lookup_re
.search(text
)
222 rv
= tag_re
.search(text
[:1000]) is not None
223 _looks_like_xml_cache
[key
] = rv
227 def surrogatepair(c
):
228 """Given a unicode character code with length greater than 16 bits,
229 return the two 16 bit surrogate pair.
231 # From example D28 of:
232 # http://www.unicode.org/book/ch03.pdf
233 return (0xd7c0 + (c
>> 10), (0xdc00 + (c
& 0x3ff)))
236 def format_lines(var_name
, seq
, raw
=False, indent_level
=0):
237 """Formats a sequence of strings for output."""
239 base_indent
= ' ' * indent_level
* 4
240 inner_indent
= ' ' * (indent_level
+ 1) * 4
241 lines
.append(base_indent
+ var_name
+ ' = (')
243 # These should be preformatted reprs of, say, tuples.
245 lines
.append(inner_indent
+ i
+ ',')
248 # Force use of single quotes
250 lines
.append(inner_indent
+ r
[:-2] + r
[-1] + ',')
251 lines
.append(base_indent
+ ')')
252 return '\n'.join(lines
)
255 def duplicates_removed(it
, already_seen
=()):
257 Returns a list with duplicates removed from the iterable `it`.
264 if i
in seen
or i
in already_seen
:
272 """Generic class to defer some work.
274 Handled specially in RegexLexerMeta, to support regex string construction at
278 raise NotImplementedError
281 def guess_decode(text
):
282 """Decode *text* with guessed encoding.
284 First try UTF-8; this should fail for non-UTF-8 encodings.
285 Then try the preferred locale encoding.
286 Fall back to latin-1, which always works.
289 text
= text
.decode('utf-8')
291 except UnicodeDecodeError:
294 prefencoding
= locale
.getpreferredencoding()
296 return text
, prefencoding
297 except (UnicodeDecodeError, LookupError):
298 text
= text
.decode('latin1')
299 return text
, 'latin1'
302 def guess_decode_from_terminal(text
, term
):
303 """Decode *text* coming from terminal *term*.
305 First try the terminal encoding, if given.
306 Then try UTF-8. Then try the preferred locale encoding.
307 Fall back to latin-1, which always works.
309 if getattr(term
, 'encoding', None):
311 text
= text
.decode(term
.encoding
)
312 except UnicodeDecodeError:
315 return text
, term
.encoding
316 return guess_decode(text
)
319 def terminal_encoding(term
):
320 """Return our best guess of encoding for the given *term*."""
321 if getattr(term
, 'encoding', None):
324 return locale
.getpreferredencoding()
327 class UnclosingTextIOWrapper(TextIOWrapper
):
328 # Don't close underlying buffer on destruction.