]>
Commit | Line | Data |
---|---|---|
d76ed9a9 AS |
1 | /* classes: h_files */ |
2 | ||
3 | #ifndef RXNFAH | |
4 | #define RXNFAH | |
5 | /* Copyright (C) 1995, 1996 Tom Lord | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU Library General Public License as published by | |
9 | * the Free Software Foundation; either version 2, or (at your option) | |
10 | * any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU Library General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Library General Public License | |
18 | * along with this software; see the file COPYING. If not, write to | |
19 | * the Free Software Foundation, 59 Temple Place - Suite 330, | |
20 | * Boston, MA 02111-1307, USA. | |
21 | */ | |
22 | ||
23 | ||
24 | \f | |
25 | /* | |
26 | * Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu) | |
27 | */ | |
28 | \f | |
29 | #include "_rx.h" | |
30 | #include "rxnode.h" | |
31 | \f | |
32 | ||
33 | /* NFA | |
34 | * | |
35 | * A syntax tree is compiled into an NFA. This page defines the structure | |
36 | * of that NFA. | |
37 | */ | |
38 | ||
39 | struct rx_nfa_state | |
40 | { | |
41 | /* These are kept in a list as the NFA is being built. | |
42 | * Here is the link. | |
43 | */ | |
44 | struct rx_nfa_state *next; | |
45 | ||
46 | /* After the NFA is built, states are given integer id's. | |
47 | * States whose outgoing transitions are all either epsilon or | |
48 | * side effect edges are given ids less than 0. Other states | |
49 | * are given successive non-negative ids starting from 0. | |
50 | * | |
51 | * Here is the id for this state: | |
52 | */ | |
53 | int id; | |
54 | ||
55 | /* The list of NFA edges that go from this state to some other. */ | |
56 | struct rx_nfa_edge *edges; | |
57 | ||
58 | /* If you land in this state, then you implicitly land | |
59 | * in all other states reachable by only epsilon translations. | |
60 | * Call the set of maximal loop-less paths to such states the | |
61 | * epsilon closure of this state. | |
62 | * | |
63 | * There may be other states that are reachable by a mixture of | |
64 | * epsilon and side effect edges. Consider the set of maximal loop-less | |
65 | * paths of that sort from this state. Call it the epsilon-side-effect | |
66 | * closure of the state. | |
67 | * | |
68 | * The epsilon closure of the state is a subset of the epsilon-side- | |
69 | * effect closure. It consists of all the paths that contain | |
70 | * no side effects -- only epsilon edges. | |
71 | * | |
72 | * The paths in the epsilon-side-effect closure can be partitioned | |
73 | * into equivalance sets. Two paths are equivalant if they have the | |
74 | * same set of side effects, in the same order. The epsilon-closure | |
75 | * is one of these equivalance sets. Let's call these equivalance | |
76 | * sets: observably equivalant path sets. That name is chosen | |
77 | * because equivalance of two paths means they cause the same side | |
78 | * effects -- so they lead to the same subsequent observations other | |
79 | * than that they may wind up in different target states. | |
80 | * | |
81 | * The superstate nfa, which is derived from this nfa, is based on | |
82 | * the observation that all of the paths in an observably equivalant | |
83 | * path set can be explored at the same time, provided that the | |
84 | * matcher keeps track not of a single nfa state, but of a set of | |
85 | * states. In particular, after following all the paths in an | |
86 | * observably equivalant set, you wind up at a set of target states. | |
87 | * That set of target states corresponds to one state in the | |
88 | * superstate NFA. | |
89 | * | |
90 | * Staticly, before matching begins, it is convenient to analyze the | |
91 | * nfa. Each state is labeled with a list of the observably | |
92 | * equivalant path sets who's union covers all the | |
93 | * epsilon-side-effect paths beginning in this state. This list is | |
94 | * called the possible futures of the state. | |
95 | * | |
96 | * A trivial example is this NFA: | |
97 | * s1 | |
98 | * A ---> B | |
99 | * | |
100 | * s2 | |
101 | * ---> C | |
102 | * | |
103 | * epsilon s1 | |
104 | * ---------> D ------> E | |
105 | * | |
106 | * | |
107 | * In this example, A has two possible futures. | |
108 | * One invokes the side effect `s1' and contains two paths, | |
109 | * one ending in state B, the other in state E. | |
110 | * The other invokes the side effect `s2' and contains only | |
111 | * one path, landing in state C. | |
112 | * | |
113 | * Here is a list of the possible futures of this state: | |
114 | */ | |
115 | struct rx_possible_future *futures; | |
116 | int futures_computed:1; | |
117 | ||
118 | ||
119 | /* There is exactly one distinguished starting state in every NFA: */ | |
120 | unsigned int is_start:1; | |
121 | ||
122 | int has_cset_edges:1; | |
123 | ||
124 | /* There may be many final states if the "cut" operator was used. | |
125 | * each will have a different non-0 value for this field: | |
126 | */ | |
127 | int is_final; | |
128 | ||
129 | ||
130 | /* These are used internally during NFA construction... */ | |
131 | unsigned int eclosure_needed:1; | |
132 | unsigned int mark:1; | |
133 | }; | |
134 | ||
135 | ||
136 | /* An edge in an NFA is typed: | |
137 | */ | |
138 | enum rx_nfa_etype | |
139 | { | |
140 | /* A cset edge is labled with a set of characters one of which | |
141 | * must be matched for the edge to be taken. | |
142 | */ | |
143 | ne_cset, | |
144 | ||
145 | /* An epsilon edge is taken whenever its starting state is | |
146 | * reached. | |
147 | */ | |
148 | ne_epsilon, | |
149 | ||
150 | /* A side effect edge is taken whenever its starting state is | |
151 | * reached. Side effects may cause the match to fail or the | |
152 | * position of the matcher to advance. | |
153 | */ | |
154 | ne_side_effect | |
155 | }; | |
156 | ||
157 | struct rx_nfa_edge | |
158 | { | |
159 | struct rx_nfa_edge *next; | |
160 | enum rx_nfa_etype type; | |
161 | struct rx_nfa_state *dest; | |
162 | union | |
163 | { | |
164 | rx_Bitset cset; | |
165 | void * side_effect; | |
166 | } params; | |
167 | }; | |
168 | ||
169 | ||
170 | ||
171 | /* A possible future consists of a list of side effects | |
172 | * and a set of destination states. Below are their | |
173 | * representations. These structures are hash-consed so | |
174 | * that lists with the same elements share a representation | |
175 | * (their addresses are ==). | |
176 | */ | |
177 | ||
178 | struct rx_nfa_state_set | |
179 | { | |
180 | struct rx_nfa_state * car; | |
181 | struct rx_nfa_state_set * cdr; | |
182 | }; | |
183 | ||
184 | struct rx_se_list | |
185 | { | |
186 | void * car; | |
187 | struct rx_se_list * cdr; | |
188 | }; | |
189 | ||
190 | struct rx_possible_future | |
191 | { | |
192 | struct rx_possible_future *next; | |
193 | struct rx_se_list * effects; | |
194 | struct rx_nfa_state_set * destset; | |
195 | }; | |
196 | ||
197 | ||
198 | ||
199 | \f | |
200 | #ifdef __STDC__ | |
201 | extern struct rx_nfa_state * rx_nfa_state (struct rx *rx); | |
202 | extern struct rx_nfa_edge * rx_nfa_edge (struct rx *rx, | |
203 | enum rx_nfa_etype type, | |
204 | struct rx_nfa_state *start, | |
205 | struct rx_nfa_state *dest); | |
206 | extern int rx_build_nfa (struct rx *rx, | |
207 | struct rexp_node *rexp, | |
208 | struct rx_nfa_state **start, | |
209 | struct rx_nfa_state **end); | |
210 | extern struct rx_possible_future * rx_state_possible_futures (struct rx * rx, struct rx_nfa_state * n); | |
211 | extern void rx_free_nfa (struct rx *rx); | |
212 | ||
213 | #else /* STDC */ | |
214 | extern struct rx_nfa_state * rx_nfa_state (); | |
215 | extern struct rx_nfa_edge * rx_nfa_edge (); | |
216 | extern int rx_build_nfa (); | |
217 | extern struct rx_possible_future * rx_state_possible_futures (); | |
218 | extern void rx_free_nfa (); | |
219 | ||
220 | #endif /* STDC */ | |
221 | ||
222 | ||
223 | ||
224 | ||
225 | ||
226 | ||
227 | ||
228 | ||
229 | ||
230 | ||
231 | ||
232 | #endif /* RXNFAH */ |