]>
jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/pip/_vendor/pygments/regexopt.py
5 An algorithm that generates optimized regexes for matching long lists of
8 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
14 from os
.path
import commonprefix
15 from itertools
import groupby
16 from operator
import itemgetter
18 CS_ESCAPE
= re
.compile(r
'[\[\^\\\-\]]')
19 FIRST_ELEMENT
= itemgetter(0)
22 def make_charset(letters
):
23 return '[' + CS_ESCAPE
.sub(lambda m
: '\\' + m
.group(), ''.join(letters
)) + ']'
26 def regex_opt_inner(strings
, open_paren
):
27 """Return a regex that matches any string in the sorted list of strings."""
28 close_paren
= open_paren
and ')' or ''
29 # print strings, repr(open_paren)
31 # print '-> nothing left'
35 # print '-> only 1 string'
36 return open_paren
+ escape(first
) + close_paren
38 # print '-> first string empty'
39 return open_paren
+ regex_opt_inner(strings
[1:], '(?:') \
42 # multiple one-char strings? make a charset
50 if len(oneletter
) > 1: # do we have more than one oneletter string?
52 # print '-> 1-character + rest'
53 return open_paren
+ regex_opt_inner(rest
, '') + '|' \
54 + make_charset(oneletter
) + close_paren
55 # print '-> only 1-character'
56 return open_paren
+ make_charset(oneletter
) + close_paren
57 prefix
= commonprefix(strings
)
60 # we have a prefix for all strings
61 # print '-> prefix:', prefix
62 return open_paren
+ escape(prefix
) \
63 + regex_opt_inner([s
[plen
:] for s
in strings
], '(?:') \
66 strings_rev
= [s
[::-1] for s
in strings
]
67 suffix
= commonprefix(strings_rev
)
70 # print '-> suffix:', suffix[::-1]
72 + regex_opt_inner(sorted(s
[:-slen
] for s
in strings
), '(?:') \
73 + escape(suffix
[::-1]) + close_paren
74 # recurse on common 1-string prefixes
75 # print '-> last resort'
77 '|'.join(regex_opt_inner(list(group
[1]), '')
78 for group
in groupby(strings
, lambda s
: s
[0] == first
[0])) \
82 def regex_opt(strings
, prefix
='', suffix
=''):
83 """Return a compiled regex that matches any string in the given list.
85 The strings to match must be literal strings, not regexes. They will be
88 *prefix* and *suffix* are pre- and appended to the final regex.
90 strings
= sorted(strings
)
91 return prefix
+ regex_opt_inner(strings
, '(') + suffix