]>
Commit | Line | Data |
---|---|---|
4d256d41 CP |
1 | """ |
2 | Iterator based sre token scanner | |
3 | """ | |
4 | import re | |
5 | from re import VERBOSE, MULTILINE, DOTALL | |
6 | import sre_parse | |
7 | import sre_compile | |
8 | import sre_constants | |
9 | from sre_constants import BRANCH, SUBPATTERN | |
10 | ||
11 | __all__ = ['Scanner', 'pattern'] | |
12 | ||
13 | FLAGS = (VERBOSE | MULTILINE | DOTALL) | |
14 | ||
15 | class Scanner(object): | |
16 | def __init__(self, lexicon, flags=FLAGS): | |
17 | self.actions = [None] | |
18 | # Combine phrases into a compound pattern | |
19 | s = sre_parse.Pattern() | |
20 | s.flags = flags | |
21 | p = [] | |
22 | for idx, token in enumerate(lexicon): | |
23 | phrase = token.pattern | |
24 | try: | |
25 | subpattern = sre_parse.SubPattern(s, | |
26 | [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) | |
27 | except sre_constants.error: | |
28 | raise | |
29 | p.append(subpattern) | |
30 | self.actions.append(token) | |
31 | ||
32 | s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work | |
33 | p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) | |
34 | self.scanner = sre_compile.compile(p) | |
35 | ||
36 | def iterscan(self, string, idx=0, context=None): | |
37 | """ | |
38 | Yield match, end_idx for each match | |
39 | """ | |
40 | match = self.scanner.scanner(string, idx).match | |
41 | actions = self.actions | |
42 | lastend = idx | |
43 | end = len(string) | |
44 | while True: | |
45 | m = match() | |
46 | if m is None: | |
47 | break | |
48 | matchbegin, matchend = m.span() | |
49 | if lastend == matchend: | |
50 | break | |
51 | action = actions[m.lastindex] | |
52 | if action is not None: | |
53 | rval, next_pos = action(m, context) | |
54 | if next_pos is not None and next_pos != matchend: | |
55 | # "fast forward" the scanner | |
56 | matchend = next_pos | |
57 | match = self.scanner.scanner(string, matchend).match | |
58 | yield rval, matchend | |
59 | lastend = matchend | |
60 | ||
61 | ||
62 | def pattern(pattern, flags=FLAGS): | |
63 | def decorator(fn): | |
64 | fn.pattern = pattern | |
65 | fn.regex = re.compile(pattern, flags) | |
66 | return fn | |
67 | return decorator |