]> jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/pip/_vendor/pygments/regexopt.py
init: venv aand flask
[dlqueue.git] / venv / lib / python3.11 / site-packages / pip / _vendor / pygments / regexopt.py
1 """
2 pygments.regexopt
3 ~~~~~~~~~~~~~~~~~
4
5 An algorithm that generates optimized regexes for matching long lists of
6 literal strings.
7
8 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13 from re import escape
14 from os.path import commonprefix
15 from itertools import groupby
16 from operator import itemgetter
17
18 CS_ESCAPE = re.compile(r'[\[\^\\\-\]]')
19 FIRST_ELEMENT = itemgetter(0)
20
21
22 def make_charset(letters):
23 return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']'
24
25
26 def regex_opt_inner(strings, open_paren):
27 """Return a regex that matches any string in the sorted list of strings."""
28 close_paren = open_paren and ')' or ''
29 # print strings, repr(open_paren)
30 if not strings:
31 # print '-> nothing left'
32 return ''
33 first = strings[0]
34 if len(strings) == 1:
35 # print '-> only 1 string'
36 return open_paren + escape(first) + close_paren
37 if not first:
38 # print '-> first string empty'
39 return open_paren + regex_opt_inner(strings[1:], '(?:') \
40 + '?' + close_paren
41 if len(first) == 1:
42 # multiple one-char strings? make a charset
43 oneletter = []
44 rest = []
45 for s in strings:
46 if len(s) == 1:
47 oneletter.append(s)
48 else:
49 rest.append(s)
50 if len(oneletter) > 1: # do we have more than one oneletter string?
51 if rest:
52 # print '-> 1-character + rest'
53 return open_paren + regex_opt_inner(rest, '') + '|' \
54 + make_charset(oneletter) + close_paren
55 # print '-> only 1-character'
56 return open_paren + make_charset(oneletter) + close_paren
57 prefix = commonprefix(strings)
58 if prefix:
59 plen = len(prefix)
60 # we have a prefix for all strings
61 # print '-> prefix:', prefix
62 return open_paren + escape(prefix) \
63 + regex_opt_inner([s[plen:] for s in strings], '(?:') \
64 + close_paren
65 # is there a suffix?
66 strings_rev = [s[::-1] for s in strings]
67 suffix = commonprefix(strings_rev)
68 if suffix:
69 slen = len(suffix)
70 # print '-> suffix:', suffix[::-1]
71 return open_paren \
72 + regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
73 + escape(suffix[::-1]) + close_paren
74 # recurse on common 1-string prefixes
75 # print '-> last resort'
76 return open_paren + \
77 '|'.join(regex_opt_inner(list(group[1]), '')
78 for group in groupby(strings, lambda s: s[0] == first[0])) \
79 + close_paren
80
81
82 def regex_opt(strings, prefix='', suffix=''):
83 """Return a compiled regex that matches any string in the given list.
84
85 The strings to match must be literal strings, not regexes. They will be
86 regex-escaped.
87
88 *prefix* and *suffix* are pre- and appended to the final regex.
89 """
90 strings = sorted(strings)
91 return prefix + regex_opt_inner(strings, '(') + suffix