]>
jfr.im git - irc/evilnet/x3.git/blob - rx/rxnfa.h
5 /* Copyright (C) 1995, 1996 Tom Lord
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Library General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this software; see the file COPYING. If not, write to
19 * the Free Software Foundation, 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
26 * Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu)
35 * A syntax tree is compiled into an NFA. This page defines the structure
41 /* These are kept in a list as the NFA is being built.
44 struct rx_nfa_state
*next
;
46 /* After the NFA is built, states are given integer id's.
47 * States whose outgoing transitions are all either epsilon or
48 * side effect edges are given ids less than 0. Other states
49 * are given successive non-negative ids starting from 0.
51 * Here is the id for this state:
55 /* The list of NFA edges that go from this state to some other. */
56 struct rx_nfa_edge
*edges
;
58 /* If you land in this state, then you implicitly land
59 * in all other states reachable by only epsilon translations.
60 * Call the set of maximal loop-less paths to such states the
61 * epsilon closure of this state.
63 * There may be other states that are reachable by a mixture of
64 * epsilon and side effect edges. Consider the set of maximal loop-less
65 * paths of that sort from this state. Call it the epsilon-side-effect
66 * closure of the state.
68 * The epsilon closure of the state is a subset of the epsilon-side-
69 * effect closure. It consists of all the paths that contain
70 * no side effects -- only epsilon edges.
72 * The paths in the epsilon-side-effect closure can be partitioned
73 * into equivalance sets. Two paths are equivalant if they have the
74 * same set of side effects, in the same order. The epsilon-closure
75 * is one of these equivalance sets. Let's call these equivalance
76 * sets: observably equivalant path sets. That name is chosen
77 * because equivalance of two paths means they cause the same side
78 * effects -- so they lead to the same subsequent observations other
79 * than that they may wind up in different target states.
81 * The superstate nfa, which is derived from this nfa, is based on
82 * the observation that all of the paths in an observably equivalant
83 * path set can be explored at the same time, provided that the
84 * matcher keeps track not of a single nfa state, but of a set of
85 * states. In particular, after following all the paths in an
86 * observably equivalant set, you wind up at a set of target states.
87 * That set of target states corresponds to one state in the
90 * Staticly, before matching begins, it is convenient to analyze the
91 * nfa. Each state is labeled with a list of the observably
92 * equivalant path sets who's union covers all the
93 * epsilon-side-effect paths beginning in this state. This list is
94 * called the possible futures of the state.
96 * A trivial example is this NFA:
104 * ---------> D ------> E
107 * In this example, A has two possible futures.
108 * One invokes the side effect `s1' and contains two paths,
109 * one ending in state B, the other in state E.
110 * The other invokes the side effect `s2' and contains only
111 * one path, landing in state C.
113 * Here is a list of the possible futures of this state:
115 struct rx_possible_future
*futures
;
116 int futures_computed
:1;
119 /* There is exactly one distinguished starting state in every NFA: */
120 unsigned int is_start
:1;
122 int has_cset_edges
:1;
124 /* There may be many final states if the "cut" operator was used.
125 * each will have a different non-0 value for this field:
130 /* These are used internally during NFA construction... */
131 unsigned int eclosure_needed
:1;
136 /* An edge in an NFA is typed:
140 /* A cset edge is labled with a set of characters one of which
141 * must be matched for the edge to be taken.
145 /* An epsilon edge is taken whenever its starting state is
150 /* A side effect edge is taken whenever its starting state is
151 * reached. Side effects may cause the match to fail or the
152 * position of the matcher to advance.
159 struct rx_nfa_edge
*next
;
160 enum rx_nfa_etype type
;
161 struct rx_nfa_state
*dest
;
171 /* A possible future consists of a list of side effects
172 * and a set of destination states. Below are their
173 * representations. These structures are hash-consed so
174 * that lists with the same elements share a representation
175 * (their addresses are ==).
178 struct rx_nfa_state_set
180 struct rx_nfa_state
* car
;
181 struct rx_nfa_state_set
* cdr
;
187 struct rx_se_list
* cdr
;
190 struct rx_possible_future
192 struct rx_possible_future
*next
;
193 struct rx_se_list
* effects
;
194 struct rx_nfa_state_set
* destset
;
201 extern struct rx_nfa_state
* rx_nfa_state (struct rx
*rx
);
202 extern struct rx_nfa_edge
* rx_nfa_edge (struct rx
*rx
,
203 enum rx_nfa_etype type
,
204 struct rx_nfa_state
*start
,
205 struct rx_nfa_state
*dest
);
206 extern int rx_build_nfa (struct rx
*rx
,
207 struct rexp_node
*rexp
,
208 struct rx_nfa_state
**start
,
209 struct rx_nfa_state
**end
);
210 extern struct rx_possible_future
* rx_state_possible_futures (struct rx
* rx
, struct rx_nfa_state
* n
);
211 extern void rx_free_nfa (struct rx
*rx
);
214 extern struct rx_nfa_state
* rx_nfa_state ();
215 extern struct rx_nfa_edge
* rx_nfa_edge ();
216 extern int rx_build_nfa ();
217 extern struct rx_possible_future
* rx_state_possible_futures ();
218 extern void rx_free_nfa ();