]>
Commit | Line | Data |
---|---|---|
d76ed9a9 AS |
1 | /* Copyright (C) 1995, 1996 Tom Lord |
2 | * | |
3 | * This program is free software; you can redistribute it and/or modify | |
4 | * it under the terms of the GNU Library General Public License as published by | |
5 | * the Free Software Foundation; either version 2, or (at your option) | |
6 | * any later version. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 | * GNU Library General Public License for more details. | |
12 | * | |
13 | * You should have received a copy of the GNU Library General Public License | |
14 | * along with this software; see the file COPYING. If not, write to | |
15 | * the Free Software Foundation, 59 Temple Place - Suite 330, | |
16 | * Boston, MA 02111-1307, USA. | |
17 | */ | |
18 | ||
19 | \f | |
20 | /* | |
21 | * Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu) | |
22 | */ | |
23 | \f | |
24 | ||
25 | #include "rxall.h" | |
26 | #include "rxnfa.h" | |
27 | ||
28 | \f | |
29 | ||
30 | #define remalloc(M, S) (M ? realloc (M, S) : malloc (S)) | |
31 | ||
32 | \f | |
33 | /* {Low Level Data Structure Code} | |
34 | */ | |
35 | ||
36 | /* Constructs a new nfa node. */ | |
37 | #ifdef __STDC__ | |
38 | struct rx_nfa_state * | |
39 | rx_nfa_state (struct rx *rx) | |
40 | #else | |
41 | struct rx_nfa_state * | |
42 | rx_nfa_state (rx) | |
43 | struct rx *rx; | |
44 | #endif | |
45 | { | |
46 | struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n)); | |
47 | if (!n) | |
48 | return 0; | |
49 | rx_bzero ((char *)n, sizeof (*n)); | |
50 | n->next = rx->nfa_states; | |
51 | rx->nfa_states = n; | |
52 | return n; | |
53 | } | |
54 | ||
55 | ||
56 | #ifdef __STDC__ | |
57 | static void | |
58 | rx_free_nfa_state (struct rx_nfa_state * n) | |
59 | #else | |
60 | static void | |
61 | rx_free_nfa_state (n) | |
62 | struct rx_nfa_state * n; | |
63 | #endif | |
64 | { | |
65 | free ((char *)n); | |
66 | } | |
67 | ||
68 | ||
69 | /* This adds an edge between two nodes, but doesn't initialize the | |
70 | * edge label. | |
71 | */ | |
72 | ||
73 | #ifdef __STDC__ | |
74 | struct rx_nfa_edge * | |
75 | rx_nfa_edge (struct rx *rx, | |
76 | enum rx_nfa_etype type, | |
77 | struct rx_nfa_state *start, | |
78 | struct rx_nfa_state *dest) | |
79 | #else | |
80 | struct rx_nfa_edge * | |
81 | rx_nfa_edge (rx, type, start, dest) | |
82 | struct rx *rx; | |
83 | enum rx_nfa_etype type; | |
84 | struct rx_nfa_state *start; | |
85 | struct rx_nfa_state *dest; | |
86 | #endif | |
87 | { | |
88 | struct rx_nfa_edge *e; | |
89 | e = (struct rx_nfa_edge *)malloc (sizeof (*e)); | |
90 | if (!e) | |
91 | return 0; | |
92 | e->next = start->edges; | |
93 | start->edges = e; | |
94 | e->type = type; | |
95 | e->dest = dest; | |
96 | return e; | |
97 | } | |
98 | ||
99 | ||
100 | #ifdef __STDC__ | |
101 | static void | |
102 | rx_free_nfa_edge (struct rx_nfa_edge * e) | |
103 | #else | |
104 | static void | |
105 | rx_free_nfa_edge (e) | |
106 | struct rx_nfa_edge * e; | |
107 | #endif | |
108 | { | |
109 | free ((char *)e); | |
110 | } | |
111 | ||
112 | ||
113 | /* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure | |
114 | * of an NFA. These are added to an nfa automaticly by eclose_nfa. | |
115 | */ | |
116 | ||
117 | #ifdef __STDC__ | |
118 | static struct rx_possible_future * | |
119 | rx_possible_future (struct rx * rx, | |
120 | struct rx_se_list * effects) | |
121 | #else | |
122 | static struct rx_possible_future * | |
123 | rx_possible_future (rx, effects) | |
124 | struct rx * rx; | |
125 | struct rx_se_list * effects; | |
126 | #endif | |
127 | { | |
128 | struct rx_possible_future *ec; | |
129 | ec = (struct rx_possible_future *) malloc (sizeof (*ec)); | |
130 | if (!ec) | |
131 | return 0; | |
132 | ec->destset = 0; | |
133 | ec->next = 0; | |
134 | ec->effects = effects; | |
135 | return ec; | |
136 | } | |
137 | ||
138 | ||
139 | #ifdef __STDC__ | |
140 | static void | |
141 | rx_free_possible_future (struct rx_possible_future * pf) | |
142 | #else | |
143 | static void | |
144 | rx_free_possible_future (pf) | |
145 | struct rx_possible_future * pf; | |
146 | #endif | |
147 | { | |
148 | free ((char *)pf); | |
149 | } | |
150 | ||
151 | ||
152 | #ifdef __STDC__ | |
153 | static void | |
154 | rx_free_nfa_graph (struct rx *rx) | |
155 | #else | |
156 | static void | |
157 | rx_free_nfa_graph (rx) | |
158 | struct rx *rx; | |
159 | #endif | |
160 | { | |
161 | while (rx->nfa_states) | |
162 | { | |
163 | while (rx->nfa_states->edges) | |
164 | { | |
165 | switch (rx->nfa_states->edges->type) | |
166 | { | |
167 | case ne_cset: | |
168 | rx_free_cset (rx->nfa_states->edges->params.cset); | |
169 | break; | |
170 | default: | |
171 | break; | |
172 | } | |
173 | { | |
174 | struct rx_nfa_edge * e; | |
175 | e = rx->nfa_states->edges; | |
176 | rx->nfa_states->edges = rx->nfa_states->edges->next; | |
177 | rx_free_nfa_edge (e); | |
178 | } | |
179 | } /* while (rx->nfa_states->edges) */ | |
180 | { | |
181 | /* Iterate over the partial epsilon closures of rx->nfa_states */ | |
182 | struct rx_possible_future * pf = rx->nfa_states->futures; | |
183 | while (pf) | |
184 | { | |
185 | struct rx_possible_future * pft = pf; | |
186 | pf = pf->next; | |
187 | rx_free_possible_future (pft); | |
188 | } | |
189 | } | |
190 | { | |
191 | struct rx_nfa_state *n; | |
192 | n = rx->nfa_states; | |
193 | rx->nfa_states = rx->nfa_states->next; | |
194 | rx_free_nfa_state (n); | |
195 | } | |
196 | } | |
197 | } | |
198 | ||
199 | ||
200 | \f | |
201 | /* {Translating a Syntax Tree into an NFA} | |
202 | * | |
203 | */ | |
204 | ||
205 | ||
206 | /* This is the Thompson regexp->nfa algorithm. | |
207 | * It is modified to allow for `side-effect epsilons.' Those are | |
208 | * edges that are taken whenever a similarly placed epsilon edge | |
209 | * would be, but which also imply that some side effect occurs | |
210 | * when the edge is taken. | |
211 | * | |
212 | * Side effects are used to model parts of the pattern langauge | |
213 | * that are not regular. | |
214 | */ | |
215 | ||
216 | #ifdef __STDC__ | |
217 | int | |
218 | rx_build_nfa (struct rx *rx, | |
219 | struct rexp_node *rexp, | |
220 | struct rx_nfa_state **start, | |
221 | struct rx_nfa_state **end) | |
222 | #else | |
223 | int | |
224 | rx_build_nfa (rx, rexp, start, end) | |
225 | struct rx *rx; | |
226 | struct rexp_node *rexp; | |
227 | struct rx_nfa_state **start; | |
228 | struct rx_nfa_state **end; | |
229 | #endif | |
230 | { | |
231 | struct rx_nfa_edge *edge; | |
232 | ||
233 | /* Start & end nodes may have been allocated by the caller. */ | |
234 | *start = *start ? *start : rx_nfa_state (rx); | |
235 | ||
236 | if (!*start) | |
237 | return 0; | |
238 | ||
239 | if (!rexp) | |
240 | { | |
241 | *end = *start; | |
242 | return 1; | |
243 | } | |
244 | ||
245 | *end = *end ? *end : rx_nfa_state (rx); | |
246 | ||
247 | if (!*end) | |
248 | { | |
249 | rx_free_nfa_state (*start); | |
250 | return 0; | |
251 | } | |
252 | ||
253 | switch (rexp->type) | |
254 | { | |
255 | case r_cset: | |
256 | edge = rx_nfa_edge (rx, ne_cset, *start, *end); | |
257 | (*start)->has_cset_edges = 1; | |
258 | if (!edge) | |
259 | return 0; | |
260 | edge->params.cset = rx_copy_cset (rx->local_cset_size, | |
261 | rexp->params.cset); | |
262 | if (!edge->params.cset) | |
263 | { | |
264 | rx_free_nfa_edge (edge); | |
265 | return 0; | |
266 | } | |
267 | return 1; | |
268 | ||
269 | case r_string: | |
270 | { | |
271 | if (rexp->params.cstr.len == 1) | |
272 | { | |
273 | edge = rx_nfa_edge (rx, ne_cset, *start, *end); | |
274 | (*start)->has_cset_edges = 1; | |
275 | if (!edge) | |
276 | return 0; | |
277 | edge->params.cset = rx_cset (rx->local_cset_size); | |
278 | if (!edge->params.cset) | |
279 | { | |
280 | rx_free_nfa_edge (edge); | |
281 | return 0; | |
282 | } | |
283 | RX_bitset_enjoin (edge->params.cset, rexp->params.cstr.contents[0]); | |
284 | return 1; | |
285 | } | |
286 | else | |
287 | { | |
288 | struct rexp_node copied; | |
289 | struct rx_nfa_state * shared; | |
290 | ||
291 | copied = *rexp; | |
292 | shared = 0; | |
293 | copied.params.cstr.len--; | |
294 | copied.params.cstr.contents++; | |
295 | if (!rx_build_nfa (rx, &copied, &shared, end)) | |
296 | return 0; | |
297 | copied.params.cstr.len = 1; | |
298 | copied.params.cstr.contents--; | |
299 | return rx_build_nfa (rx, &copied, start, &shared); | |
300 | } | |
301 | } | |
302 | ||
303 | case r_opt: | |
304 | return (rx_build_nfa (rx, rexp->params.pair.left, start, end) | |
305 | && rx_nfa_edge (rx, ne_epsilon, *start, *end)); | |
306 | ||
307 | case r_plus: | |
308 | { | |
309 | struct rx_nfa_state * star_start = 0; | |
310 | struct rx_nfa_state * star_end = 0; | |
311 | struct rx_nfa_state * shared; | |
312 | ||
313 | shared = 0; | |
314 | if (!rx_build_nfa (rx, rexp->params.pair.left, start, &shared)) | |
315 | return 0; | |
316 | return (rx_build_nfa (rx, rexp->params.pair.left, | |
317 | &star_start, &star_end) | |
318 | && star_start | |
319 | && star_end | |
320 | && rx_nfa_edge (rx, ne_epsilon, star_start, star_end) | |
321 | && rx_nfa_edge (rx, ne_epsilon, shared, star_start) | |
322 | && rx_nfa_edge (rx, ne_epsilon, star_end, *end) | |
323 | && rx_nfa_edge (rx, ne_epsilon, star_end, star_start)); | |
324 | } | |
325 | ||
326 | case r_interval: | |
327 | case r_star: | |
328 | { | |
329 | struct rx_nfa_state * star_start = 0; | |
330 | struct rx_nfa_state * star_end = 0; | |
331 | return (rx_build_nfa (rx, rexp->params.pair.left, | |
332 | &star_start, &star_end) | |
333 | && star_start | |
334 | && star_end | |
335 | && rx_nfa_edge (rx, ne_epsilon, star_start, star_end) | |
336 | && rx_nfa_edge (rx, ne_epsilon, *start, star_start) | |
337 | && rx_nfa_edge (rx, ne_epsilon, star_end, *end) | |
338 | ||
339 | && rx_nfa_edge (rx, ne_epsilon, star_end, star_start)); | |
340 | } | |
341 | ||
342 | case r_cut: | |
343 | { | |
344 | struct rx_nfa_state * cut_end = 0; | |
345 | ||
346 | cut_end = rx_nfa_state (rx); | |
347 | if (!(cut_end && rx_nfa_edge (rx, ne_epsilon, *start, cut_end))) | |
348 | { | |
349 | rx_free_nfa_state (*start); | |
350 | rx_free_nfa_state (*end); | |
351 | if (cut_end) | |
352 | rx_free_nfa_state (cut_end); | |
353 | return 0; | |
354 | } | |
355 | cut_end->is_final = rexp->params.intval; | |
356 | return 1; | |
357 | } | |
358 | ||
359 | case r_parens: | |
360 | return rx_build_nfa (rx, rexp->params.pair.left, start, end); | |
361 | ||
362 | case r_concat: | |
363 | { | |
364 | struct rx_nfa_state *shared = 0; | |
365 | return | |
366 | (rx_build_nfa (rx, rexp->params.pair.left, start, &shared) | |
367 | && rx_build_nfa (rx, rexp->params.pair.right, &shared, end)); | |
368 | } | |
369 | ||
370 | case r_alternate: | |
371 | { | |
372 | struct rx_nfa_state *ls = 0; | |
373 | struct rx_nfa_state *le = 0; | |
374 | struct rx_nfa_state *rs = 0; | |
375 | struct rx_nfa_state *re = 0; | |
376 | return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le) | |
377 | && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re) | |
378 | && rx_nfa_edge (rx, ne_epsilon, *start, ls) | |
379 | && rx_nfa_edge (rx, ne_epsilon, *start, rs) | |
380 | && rx_nfa_edge (rx, ne_epsilon, le, *end) | |
381 | && rx_nfa_edge (rx, ne_epsilon, re, *end)); | |
382 | } | |
383 | ||
384 | case r_context: | |
385 | edge = rx_nfa_edge (rx, ne_side_effect, *start, *end); | |
386 | if (!edge) | |
387 | return 0; | |
388 | edge->params.side_effect = (void *)rexp->params.intval; | |
389 | return 1; | |
390 | } | |
391 | ||
392 | /* this should never happen */ | |
393 | return 0; | |
394 | } | |
395 | ||
396 | \f | |
397 | /* {Low Level Data Structures for the Static NFA->SuperNFA Analysis} | |
398 | * | |
399 | * There are side effect lists -- lists of side effects occuring | |
400 | * along an uninterrupted, acyclic path of side-effect epsilon edges. | |
401 | * Such paths are collapsed to single edges in the course of computing | |
402 | * epsilon closures. The resulting single edges are labled with a list | |
403 | * of all the side effects from the original multi-edge path. Equivalent | |
404 | * lists of side effects are made == by the constructors below. | |
405 | * | |
406 | * There are also nfa state sets. These are used to hold a list of all | |
407 | * states reachable from a starting state for a given type of transition | |
408 | * and side effect list. These are also hash-consed. | |
409 | */ | |
410 | ||
411 | ||
412 | ||
413 | /* The next several functions compare, construct, etc. lists of side | |
414 | * effects. See ECLOSE_NFA (below) for details. | |
415 | */ | |
416 | ||
417 | /* Ordering of rx_se_list | |
418 | * (-1, 0, 1 return value convention). | |
419 | */ | |
420 | ||
421 | #ifdef __STDC__ | |
422 | static int | |
423 | se_list_cmp (void * va, void * vb) | |
424 | #else | |
425 | static int | |
426 | se_list_cmp (va, vb) | |
427 | void * va; | |
428 | void * vb; | |
429 | #endif | |
430 | { | |
431 | struct rx_se_list * a = (struct rx_se_list *)va; | |
432 | struct rx_se_list * b = (struct rx_se_list *)vb; | |
433 | ||
434 | return ((va == vb) | |
435 | ? 0 | |
436 | : (!va | |
437 | ? -1 | |
438 | : (!vb | |
439 | ? 1 | |
440 | : ((long)a->car < (long)b->car | |
441 | ? 1 | |
442 | : ((long)a->car > (long)b->car | |
443 | ? -1 | |
444 | : se_list_cmp ((void *)a->cdr, (void *)b->cdr)))))); | |
445 | } | |
446 | ||
447 | ||
448 | #ifdef __STDC__ | |
449 | static int | |
450 | se_list_equal (void * va, void * vb) | |
451 | #else | |
452 | static int | |
453 | se_list_equal (va, vb) | |
454 | void * va; | |
455 | void * vb; | |
456 | #endif | |
457 | { | |
458 | return !(se_list_cmp (va, vb)); | |
459 | } | |
460 | ||
461 | static struct rx_hash_rules se_list_hash_rules = { se_list_equal, 0, 0, 0, 0 }; | |
462 | ||
463 | ||
464 | #ifdef __STDC__ | |
465 | static struct rx_se_list * | |
466 | side_effect_cons (struct rx * rx, | |
467 | void * se, struct rx_se_list * list) | |
468 | #else | |
469 | static struct rx_se_list * | |
470 | side_effect_cons (rx, se, list) | |
471 | struct rx * rx; | |
472 | void * se; | |
473 | struct rx_se_list * list; | |
474 | #endif | |
475 | { | |
476 | struct rx_se_list * l; | |
477 | l = ((struct rx_se_list *) malloc (sizeof (*l))); | |
478 | if (!l) | |
479 | return 0; | |
480 | l->car = se; | |
481 | l->cdr = list; | |
482 | return l; | |
483 | } | |
484 | ||
485 | ||
486 | #ifdef __STDC__ | |
487 | static struct rx_se_list * | |
488 | hash_cons_se_prog (struct rx * rx, | |
489 | struct rx_hash * memo, | |
490 | void * car, struct rx_se_list * cdr) | |
491 | #else | |
492 | static struct rx_se_list * | |
493 | hash_cons_se_prog (rx, memo, car, cdr) | |
494 | struct rx * rx; | |
495 | struct rx_hash * memo; | |
496 | void * car; | |
497 | struct rx_se_list * cdr; | |
498 | #endif | |
499 | { | |
500 | long hash = (long)car ^ (long)cdr; | |
501 | struct rx_se_list template; | |
502 | ||
503 | template.car = car; | |
504 | template.cdr = cdr; | |
505 | { | |
506 | struct rx_hash_item * it = rx_hash_store (memo, hash, | |
507 | (void *)&template, | |
508 | &se_list_hash_rules); | |
509 | if (!it) | |
510 | return 0; | |
511 | if (it->data == (void *)&template) | |
512 | { | |
513 | struct rx_se_list * consed; | |
514 | consed = (struct rx_se_list *) malloc (sizeof (*consed)); | |
515 | *consed = template; | |
516 | it->data = (void *)consed; | |
517 | } | |
518 | return (struct rx_se_list *)it->data; | |
519 | } | |
520 | } | |
521 | ||
522 | ||
523 | #ifdef __STDC__ | |
524 | static struct rx_se_list * | |
525 | hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog) | |
526 | #else | |
527 | static struct rx_se_list * | |
528 | hash_se_prog (rx, memo, prog) | |
529 | struct rx * rx; | |
530 | struct rx_hash * memo; | |
531 | struct rx_se_list * prog; | |
532 | #endif | |
533 | { | |
534 | struct rx_se_list * answer = 0; | |
535 | while (prog) | |
536 | { | |
537 | answer = hash_cons_se_prog (rx, memo, prog->car, answer); | |
538 | if (!answer) | |
539 | return 0; | |
540 | prog = prog->cdr; | |
541 | } | |
542 | return answer; | |
543 | } | |
544 | ||
545 | ||
546 | ||
547 | /* {Constructors, etc. for NFA State Sets} | |
548 | */ | |
549 | ||
550 | #ifdef __STDC__ | |
551 | static int | |
552 | nfa_set_cmp (void * va, void * vb) | |
553 | #else | |
554 | static int | |
555 | nfa_set_cmp (va, vb) | |
556 | void * va; | |
557 | void * vb; | |
558 | #endif | |
559 | { | |
560 | struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va; | |
561 | struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb; | |
562 | ||
563 | return ((va == vb) | |
564 | ? 0 | |
565 | : (!va | |
566 | ? -1 | |
567 | : (!vb | |
568 | ? 1 | |
569 | : (a->car->id < b->car->id | |
570 | ? 1 | |
571 | : (a->car->id > b->car->id | |
572 | ? -1 | |
573 | : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr)))))); | |
574 | } | |
575 | ||
576 | #ifdef __STDC__ | |
577 | static int | |
578 | nfa_set_equal (void * va, void * vb) | |
579 | #else | |
580 | static int | |
581 | nfa_set_equal (va, vb) | |
582 | void * va; | |
583 | void * vb; | |
584 | #endif | |
585 | { | |
586 | return !nfa_set_cmp (va, vb); | |
587 | } | |
588 | ||
589 | static struct rx_hash_rules nfa_set_hash_rules = { nfa_set_equal, 0, 0, 0, 0 }; | |
590 | ||
591 | ||
592 | #ifdef __STDC__ | |
593 | static struct rx_nfa_state_set * | |
594 | nfa_set_cons (struct rx * rx, | |
595 | struct rx_hash * memo, struct rx_nfa_state * state, | |
596 | struct rx_nfa_state_set * set) | |
597 | #else | |
598 | static struct rx_nfa_state_set * | |
599 | nfa_set_cons (rx, memo, state, set) | |
600 | struct rx * rx; | |
601 | struct rx_hash * memo; | |
602 | struct rx_nfa_state * state; | |
603 | struct rx_nfa_state_set * set; | |
604 | #endif | |
605 | { | |
606 | struct rx_nfa_state_set template; | |
607 | struct rx_hash_item * node; | |
608 | template.car = state; | |
609 | template.cdr = set; | |
610 | node = rx_hash_store (memo, | |
611 | (((long)state) >> 8) ^ (long)set, | |
612 | &template, &nfa_set_hash_rules); | |
613 | if (!node) | |
614 | return 0; | |
615 | if (node->data == &template) | |
616 | { | |
617 | struct rx_nfa_state_set * l; | |
618 | l = (struct rx_nfa_state_set *) malloc (sizeof (*l)); | |
619 | node->data = (void *) l; | |
620 | if (!l) | |
621 | return 0; | |
622 | *l = template; | |
623 | } | |
624 | return (struct rx_nfa_state_set *)node->data; | |
625 | } | |
626 | ||
627 | ||
628 | #ifdef __STDC__ | |
629 | static struct rx_nfa_state_set * | |
630 | nfa_set_enjoin (struct rx * rx, | |
631 | struct rx_hash * memo, struct rx_nfa_state * state, | |
632 | struct rx_nfa_state_set * set) | |
633 | #else | |
634 | static struct rx_nfa_state_set * | |
635 | nfa_set_enjoin (rx, memo, state, set) | |
636 | struct rx * rx; | |
637 | struct rx_hash * memo; | |
638 | struct rx_nfa_state * state; | |
639 | struct rx_nfa_state_set * set; | |
640 | #endif | |
641 | { | |
642 | if (!set || (state->id < set->car->id)) | |
643 | return nfa_set_cons (rx, memo, state, set); | |
644 | if (state->id == set->car->id) | |
645 | return set; | |
646 | else | |
647 | { | |
648 | struct rx_nfa_state_set * newcdr | |
649 | = nfa_set_enjoin (rx, memo, state, set->cdr); | |
650 | if (newcdr != set->cdr) | |
651 | set = nfa_set_cons (rx, memo, set->car, newcdr); | |
652 | return set; | |
653 | } | |
654 | } | |
655 | ||
656 | \f | |
657 | /* {Computing Epsilon/Side-effect Closures.} | |
658 | */ | |
659 | ||
660 | struct eclose_frame | |
661 | { | |
662 | struct rx_se_list *prog_backwards; | |
663 | }; | |
664 | ||
665 | ||
666 | /* This is called while computing closures for "outnode". | |
667 | * The current node in the traversal is "node". | |
668 | * "frame" contains a list of a all side effects between | |
669 | * "outnode" and "node" from most to least recent. | |
670 | * | |
671 | * Explores forward from "node", adding new possible | |
672 | * futures to outnode. | |
673 | * | |
674 | * Returns 0 on allocation failure. | |
675 | */ | |
676 | ||
677 | #ifdef __STDC__ | |
678 | static int | |
679 | eclose_node (struct rx *rx, struct rx_nfa_state *outnode, | |
680 | struct rx_nfa_state *node, struct eclose_frame *frame) | |
681 | #else | |
682 | static int | |
683 | eclose_node (rx, outnode, node, frame) | |
684 | struct rx *rx; | |
685 | struct rx_nfa_state *outnode; | |
686 | struct rx_nfa_state *node; | |
687 | struct eclose_frame *frame; | |
688 | #endif | |
689 | { | |
690 | struct rx_nfa_edge *e = node->edges; | |
691 | ||
692 | /* For each node, we follow all epsilon paths to build the closure. | |
693 | * The closure omits nodes that have only epsilon edges. | |
694 | * The closure is split into partial closures -- all the states in | |
695 | * a partial closure are reached by crossing the same list of | |
696 | * of side effects (though not necessarily the same path). | |
697 | */ | |
698 | if (node->mark) | |
699 | return 1; | |
700 | node->mark = 1; | |
701 | ||
702 | /* If "node" has more than just epsilon and | |
703 | * and side-effect transitions (id >= 0), or is final, | |
704 | * then it has to be added to the possible futures | |
705 | * of "outnode". | |
706 | */ | |
707 | if (node->id >= 0 || node->is_final) | |
708 | { | |
709 | struct rx_possible_future **ec; | |
710 | struct rx_se_list * prog_in_order; | |
711 | int cmp; | |
712 | ||
713 | prog_in_order = ((struct rx_se_list *)hash_se_prog (rx, | |
714 | &rx->se_list_memo, | |
715 | frame->prog_backwards)); | |
716 | ||
717 | ec = &outnode->futures; | |
718 | ||
719 | while (*ec) | |
720 | { | |
721 | cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order); | |
722 | if (cmp <= 0) | |
723 | break; | |
724 | ec = &(*ec)->next; | |
725 | } | |
726 | ||
727 | if (!*ec || (cmp < 0)) | |
728 | { | |
729 | struct rx_possible_future * pf; | |
730 | pf = rx_possible_future (rx, prog_in_order); | |
731 | if (!pf) | |
732 | return 0; | |
733 | pf->next = *ec; | |
734 | *ec = pf; | |
735 | } | |
736 | if (node->id >= 0) | |
737 | { | |
738 | (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo, | |
739 | node, (*ec)->destset); | |
740 | if (!(*ec)->destset) | |
741 | return 0; | |
742 | } | |
743 | } | |
744 | ||
745 | /* Recurse on outgoing epsilon and side effect nodes. | |
746 | */ | |
747 | while (e) | |
748 | { | |
749 | switch (e->type) | |
750 | { | |
751 | case ne_epsilon: | |
752 | if (!eclose_node (rx, outnode, e->dest, frame)) | |
753 | return 0; | |
754 | break; | |
755 | case ne_side_effect: | |
756 | { | |
757 | frame->prog_backwards = side_effect_cons (rx, | |
758 | e->params.side_effect, | |
759 | frame->prog_backwards); | |
760 | if (!frame->prog_backwards) | |
761 | return 0; | |
762 | if (!eclose_node (rx, outnode, e->dest, frame)) | |
763 | return 0; | |
764 | { | |
765 | struct rx_se_list * dying = frame->prog_backwards; | |
766 | frame->prog_backwards = frame->prog_backwards->cdr; | |
767 | free ((char *)dying); | |
768 | } | |
769 | break; | |
770 | } | |
771 | default: | |
772 | break; | |
773 | } | |
774 | e = e->next; | |
775 | } | |
776 | node->mark = 0; | |
777 | return 1; | |
778 | } | |
779 | ||
780 | ||
781 | #ifdef __STDC__ | |
782 | struct rx_possible_future * | |
783 | rx_state_possible_futures (struct rx * rx, struct rx_nfa_state * n) | |
784 | #else | |
785 | struct rx_possible_future * | |
786 | rx_state_possible_futures (rx, n) | |
787 | struct rx * rx; | |
788 | struct rx_nfa_state * n; | |
789 | #endif | |
790 | { | |
791 | if (n->futures_computed) | |
792 | return n->futures; | |
793 | else | |
794 | { | |
795 | struct eclose_frame frame; | |
796 | frame.prog_backwards = 0; | |
797 | if (!eclose_node (rx, n, n, &frame)) | |
798 | return 0; | |
799 | else | |
800 | { | |
801 | n->futures_computed = 1; | |
802 | return n->futures; | |
803 | } | |
804 | } | |
805 | } | |
806 | ||
807 | ||
808 | \f | |
809 | /* {Storing the NFA in a Contiguous Region of Memory} | |
810 | */ | |
811 | ||
812 | ||
813 | ||
814 | #ifdef __STDC__ | |
815 | static void | |
816 | se_memo_freer (struct rx_hash_item * node) | |
817 | #else | |
818 | static void | |
819 | se_memo_freer (node) | |
820 | struct rx_hash_item * node; | |
821 | #endif | |
822 | { | |
823 | free ((char *)node->data); | |
824 | } | |
825 | ||
826 | ||
827 | #ifdef __STDC__ | |
828 | static void | |
829 | nfa_set_freer (struct rx_hash_item * node) | |
830 | #else | |
831 | static void | |
832 | nfa_set_freer (node) | |
833 | struct rx_hash_item * node; | |
834 | #endif | |
835 | { | |
836 | free ((char *)node->data); | |
837 | } | |
838 | ||
839 | #ifdef __STDC__ | |
840 | void | |
841 | rx_free_nfa (struct rx *rx) | |
842 | #else | |
843 | void | |
844 | rx_free_nfa (rx) | |
845 | struct rx *rx; | |
846 | #endif | |
847 | { | |
848 | rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules); | |
849 | rx_bzero ((char *)&rx->se_list_memo, sizeof (rx->se_list_memo)); | |
850 | rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules); | |
851 | rx_bzero ((char *)&rx->set_list_memo, sizeof (rx->set_list_memo)); | |
852 | rx_free_nfa_graph (rx); | |
853 | } |