]> jfr.im git - irc/evilnet/x3.git/blame - rx/rxnfa.h
Couple of srvx updates.
[irc/evilnet/x3.git] / rx / rxnfa.h
CommitLineData
d76ed9a9 1/* classes: h_files */
2
3#ifndef RXNFAH
4#define RXNFAH
5/* Copyright (C) 1995, 1996 Tom Lord
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Library General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this software; see the file COPYING. If not, write to
19 * the Free Software Foundation, 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
21 */
22
23
24\f
25/*
26 * Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu)
27 */
28\f
29#include "_rx.h"
30#include "rxnode.h"
31\f
32
33/* NFA
34 *
35 * A syntax tree is compiled into an NFA. This page defines the structure
36 * of that NFA.
37 */
38
39struct rx_nfa_state
40{
41 /* These are kept in a list as the NFA is being built.
42 * Here is the link.
43 */
44 struct rx_nfa_state *next;
45
46 /* After the NFA is built, states are given integer id's.
47 * States whose outgoing transitions are all either epsilon or
48 * side effect edges are given ids less than 0. Other states
49 * are given successive non-negative ids starting from 0.
50 *
51 * Here is the id for this state:
52 */
53 int id;
54
55 /* The list of NFA edges that go from this state to some other. */
56 struct rx_nfa_edge *edges;
57
58 /* If you land in this state, then you implicitly land
59 * in all other states reachable by only epsilon translations.
60 * Call the set of maximal loop-less paths to such states the
61 * epsilon closure of this state.
62 *
63 * There may be other states that are reachable by a mixture of
64 * epsilon and side effect edges. Consider the set of maximal loop-less
65 * paths of that sort from this state. Call it the epsilon-side-effect
66 * closure of the state.
67 *
68 * The epsilon closure of the state is a subset of the epsilon-side-
69 * effect closure. It consists of all the paths that contain
70 * no side effects -- only epsilon edges.
71 *
72 * The paths in the epsilon-side-effect closure can be partitioned
73 * into equivalance sets. Two paths are equivalant if they have the
74 * same set of side effects, in the same order. The epsilon-closure
75 * is one of these equivalance sets. Let's call these equivalance
76 * sets: observably equivalant path sets. That name is chosen
77 * because equivalance of two paths means they cause the same side
78 * effects -- so they lead to the same subsequent observations other
79 * than that they may wind up in different target states.
80 *
81 * The superstate nfa, which is derived from this nfa, is based on
82 * the observation that all of the paths in an observably equivalant
83 * path set can be explored at the same time, provided that the
84 * matcher keeps track not of a single nfa state, but of a set of
85 * states. In particular, after following all the paths in an
86 * observably equivalant set, you wind up at a set of target states.
87 * That set of target states corresponds to one state in the
88 * superstate NFA.
89 *
90 * Staticly, before matching begins, it is convenient to analyze the
91 * nfa. Each state is labeled with a list of the observably
92 * equivalant path sets who's union covers all the
93 * epsilon-side-effect paths beginning in this state. This list is
94 * called the possible futures of the state.
95 *
96 * A trivial example is this NFA:
97 * s1
98 * A ---> B
99 *
100 * s2
101 * ---> C
102 *
103 * epsilon s1
104 * ---------> D ------> E
105 *
106 *
107 * In this example, A has two possible futures.
108 * One invokes the side effect `s1' and contains two paths,
109 * one ending in state B, the other in state E.
110 * The other invokes the side effect `s2' and contains only
111 * one path, landing in state C.
112 *
113 * Here is a list of the possible futures of this state:
114 */
115 struct rx_possible_future *futures;
116 int futures_computed:1;
117
118
119 /* There is exactly one distinguished starting state in every NFA: */
120 unsigned int is_start:1;
121
122 int has_cset_edges:1;
123
124 /* There may be many final states if the "cut" operator was used.
125 * each will have a different non-0 value for this field:
126 */
127 int is_final;
128
129
130 /* These are used internally during NFA construction... */
131 unsigned int eclosure_needed:1;
132 unsigned int mark:1;
133};
134
135
136/* An edge in an NFA is typed:
137 */
138enum rx_nfa_etype
139{
140 /* A cset edge is labled with a set of characters one of which
141 * must be matched for the edge to be taken.
142 */
143 ne_cset,
144
145 /* An epsilon edge is taken whenever its starting state is
146 * reached.
147 */
148 ne_epsilon,
149
150 /* A side effect edge is taken whenever its starting state is
151 * reached. Side effects may cause the match to fail or the
152 * position of the matcher to advance.
153 */
154 ne_side_effect
155};
156
157struct rx_nfa_edge
158{
159 struct rx_nfa_edge *next;
160 enum rx_nfa_etype type;
161 struct rx_nfa_state *dest;
162 union
163 {
164 rx_Bitset cset;
165 void * side_effect;
166 } params;
167};
168
169
170
171/* A possible future consists of a list of side effects
172 * and a set of destination states. Below are their
173 * representations. These structures are hash-consed so
174 * that lists with the same elements share a representation
175 * (their addresses are ==).
176 */
177
178struct rx_nfa_state_set
179{
180 struct rx_nfa_state * car;
181 struct rx_nfa_state_set * cdr;
182};
183
184struct rx_se_list
185{
186 void * car;
187 struct rx_se_list * cdr;
188};
189
190struct rx_possible_future
191{
192 struct rx_possible_future *next;
193 struct rx_se_list * effects;
194 struct rx_nfa_state_set * destset;
195};
196
197
198
199\f
200#ifdef __STDC__
201extern struct rx_nfa_state * rx_nfa_state (struct rx *rx);
202extern struct rx_nfa_edge * rx_nfa_edge (struct rx *rx,
203 enum rx_nfa_etype type,
204 struct rx_nfa_state *start,
205 struct rx_nfa_state *dest);
206extern int rx_build_nfa (struct rx *rx,
207 struct rexp_node *rexp,
208 struct rx_nfa_state **start,
209 struct rx_nfa_state **end);
210extern struct rx_possible_future * rx_state_possible_futures (struct rx * rx, struct rx_nfa_state * n);
211extern void rx_free_nfa (struct rx *rx);
212
213#else /* STDC */
214extern struct rx_nfa_state * rx_nfa_state ();
215extern struct rx_nfa_edge * rx_nfa_edge ();
216extern int rx_build_nfa ();
217extern struct rx_possible_future * rx_state_possible_futures ();
218extern void rx_free_nfa ();
219
220#endif /* STDC */
221
222
223
224
225
226
227
228
229
230
231
232#endif /* RXNFAH */