]> jfr.im git - irc/evilnet/x3.git/blame - rx/rxnfa.c
Fixed SHUN add/remove to include the last modification timestamp required by Nefariou...
[irc/evilnet/x3.git] / rx / rxnfa.c
CommitLineData
d76ed9a9 1/* Copyright (C) 1995, 1996 Tom Lord
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU Library General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU Library General Public License for more details.
12 *
13 * You should have received a copy of the GNU Library General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 59 Temple Place - Suite 330,
16 * Boston, MA 02111-1307, USA.
17 */
18
19\f
20/*
21 * Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu)
22 */
23\f
24
25#include "rxall.h"
26#include "rxnfa.h"
27
28\f
29
30#define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
31
32\f
33/* {Low Level Data Structure Code}
34 */
35
36/* Constructs a new nfa node. */
37#ifdef __STDC__
38struct rx_nfa_state *
39rx_nfa_state (struct rx *rx)
40#else
41struct rx_nfa_state *
42rx_nfa_state (rx)
43 struct rx *rx;
44#endif
45{
46 struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
47 if (!n)
48 return 0;
49 rx_bzero ((char *)n, sizeof (*n));
50 n->next = rx->nfa_states;
51 rx->nfa_states = n;
52 return n;
53}
54
55
56#ifdef __STDC__
57static void
58rx_free_nfa_state (struct rx_nfa_state * n)
59#else
60static void
61rx_free_nfa_state (n)
62 struct rx_nfa_state * n;
63#endif
64{
65 free ((char *)n);
66}
67
68
69/* This adds an edge between two nodes, but doesn't initialize the
70 * edge label.
71 */
72
73#ifdef __STDC__
74struct rx_nfa_edge *
75rx_nfa_edge (struct rx *rx,
76 enum rx_nfa_etype type,
77 struct rx_nfa_state *start,
78 struct rx_nfa_state *dest)
79#else
80struct rx_nfa_edge *
81rx_nfa_edge (rx, type, start, dest)
82 struct rx *rx;
83 enum rx_nfa_etype type;
84 struct rx_nfa_state *start;
85 struct rx_nfa_state *dest;
86#endif
87{
88 struct rx_nfa_edge *e;
89 e = (struct rx_nfa_edge *)malloc (sizeof (*e));
90 if (!e)
91 return 0;
92 e->next = start->edges;
93 start->edges = e;
94 e->type = type;
95 e->dest = dest;
96 return e;
97}
98
99
100#ifdef __STDC__
101static void
102rx_free_nfa_edge (struct rx_nfa_edge * e)
103#else
104static void
105rx_free_nfa_edge (e)
106 struct rx_nfa_edge * e;
107#endif
108{
109 free ((char *)e);
110}
111
112
113/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
114 * of an NFA. These are added to an nfa automaticly by eclose_nfa.
115 */
116
117#ifdef __STDC__
118static struct rx_possible_future *
119rx_possible_future (struct rx * rx,
120 struct rx_se_list * effects)
121#else
122static struct rx_possible_future *
123rx_possible_future (rx, effects)
124 struct rx * rx;
125 struct rx_se_list * effects;
126#endif
127{
128 struct rx_possible_future *ec;
129 ec = (struct rx_possible_future *) malloc (sizeof (*ec));
130 if (!ec)
131 return 0;
132 ec->destset = 0;
133 ec->next = 0;
134 ec->effects = effects;
135 return ec;
136}
137
138
139#ifdef __STDC__
140static void
141rx_free_possible_future (struct rx_possible_future * pf)
142#else
143static void
144rx_free_possible_future (pf)
145 struct rx_possible_future * pf;
146#endif
147{
148 free ((char *)pf);
149}
150
151
152#ifdef __STDC__
153static void
154rx_free_nfa_graph (struct rx *rx)
155#else
156static void
157rx_free_nfa_graph (rx)
158 struct rx *rx;
159#endif
160{
161 while (rx->nfa_states)
162 {
163 while (rx->nfa_states->edges)
164 {
165 switch (rx->nfa_states->edges->type)
166 {
167 case ne_cset:
168 rx_free_cset (rx->nfa_states->edges->params.cset);
169 break;
170 default:
171 break;
172 }
173 {
174 struct rx_nfa_edge * e;
175 e = rx->nfa_states->edges;
176 rx->nfa_states->edges = rx->nfa_states->edges->next;
177 rx_free_nfa_edge (e);
178 }
179 } /* while (rx->nfa_states->edges) */
180 {
181 /* Iterate over the partial epsilon closures of rx->nfa_states */
182 struct rx_possible_future * pf = rx->nfa_states->futures;
183 while (pf)
184 {
185 struct rx_possible_future * pft = pf;
186 pf = pf->next;
187 rx_free_possible_future (pft);
188 }
189 }
190 {
191 struct rx_nfa_state *n;
192 n = rx->nfa_states;
193 rx->nfa_states = rx->nfa_states->next;
194 rx_free_nfa_state (n);
195 }
196 }
197}
198
199
200\f
201/* {Translating a Syntax Tree into an NFA}
202 *
203 */
204
205
206/* This is the Thompson regexp->nfa algorithm.
207 * It is modified to allow for `side-effect epsilons.' Those are
208 * edges that are taken whenever a similarly placed epsilon edge
209 * would be, but which also imply that some side effect occurs
210 * when the edge is taken.
211 *
212 * Side effects are used to model parts of the pattern langauge
213 * that are not regular.
214 */
215
216#ifdef __STDC__
217int
218rx_build_nfa (struct rx *rx,
219 struct rexp_node *rexp,
220 struct rx_nfa_state **start,
221 struct rx_nfa_state **end)
222#else
223int
224rx_build_nfa (rx, rexp, start, end)
225 struct rx *rx;
226 struct rexp_node *rexp;
227 struct rx_nfa_state **start;
228 struct rx_nfa_state **end;
229#endif
230{
231 struct rx_nfa_edge *edge;
232
233 /* Start & end nodes may have been allocated by the caller. */
234 *start = *start ? *start : rx_nfa_state (rx);
235
236 if (!*start)
237 return 0;
238
239 if (!rexp)
240 {
241 *end = *start;
242 return 1;
243 }
244
245 *end = *end ? *end : rx_nfa_state (rx);
246
247 if (!*end)
248 {
249 rx_free_nfa_state (*start);
250 return 0;
251 }
252
253 switch (rexp->type)
254 {
255 case r_cset:
256 edge = rx_nfa_edge (rx, ne_cset, *start, *end);
257 (*start)->has_cset_edges = 1;
258 if (!edge)
259 return 0;
260 edge->params.cset = rx_copy_cset (rx->local_cset_size,
261 rexp->params.cset);
262 if (!edge->params.cset)
263 {
264 rx_free_nfa_edge (edge);
265 return 0;
266 }
267 return 1;
268
269 case r_string:
270 {
271 if (rexp->params.cstr.len == 1)
272 {
273 edge = rx_nfa_edge (rx, ne_cset, *start, *end);
274 (*start)->has_cset_edges = 1;
275 if (!edge)
276 return 0;
277 edge->params.cset = rx_cset (rx->local_cset_size);
278 if (!edge->params.cset)
279 {
280 rx_free_nfa_edge (edge);
281 return 0;
282 }
283 RX_bitset_enjoin (edge->params.cset, rexp->params.cstr.contents[0]);
284 return 1;
285 }
286 else
287 {
288 struct rexp_node copied;
289 struct rx_nfa_state * shared;
290
291 copied = *rexp;
292 shared = 0;
293 copied.params.cstr.len--;
294 copied.params.cstr.contents++;
295 if (!rx_build_nfa (rx, &copied, &shared, end))
296 return 0;
297 copied.params.cstr.len = 1;
298 copied.params.cstr.contents--;
299 return rx_build_nfa (rx, &copied, start, &shared);
300 }
301 }
302
303 case r_opt:
304 return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
305 && rx_nfa_edge (rx, ne_epsilon, *start, *end));
306
307 case r_plus:
308 {
309 struct rx_nfa_state * star_start = 0;
310 struct rx_nfa_state * star_end = 0;
311 struct rx_nfa_state * shared;
312
313 shared = 0;
314 if (!rx_build_nfa (rx, rexp->params.pair.left, start, &shared))
315 return 0;
316 return (rx_build_nfa (rx, rexp->params.pair.left,
317 &star_start, &star_end)
318 && star_start
319 && star_end
320 && rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
321 && rx_nfa_edge (rx, ne_epsilon, shared, star_start)
322 && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
323 && rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
324 }
325
326 case r_interval:
327 case r_star:
328 {
329 struct rx_nfa_state * star_start = 0;
330 struct rx_nfa_state * star_end = 0;
331 return (rx_build_nfa (rx, rexp->params.pair.left,
332 &star_start, &star_end)
333 && star_start
334 && star_end
335 && rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
336 && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
337 && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
338
339 && rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
340 }
341
342 case r_cut:
343 {
344 struct rx_nfa_state * cut_end = 0;
345
346 cut_end = rx_nfa_state (rx);
347 if (!(cut_end && rx_nfa_edge (rx, ne_epsilon, *start, cut_end)))
348 {
349 rx_free_nfa_state (*start);
350 rx_free_nfa_state (*end);
351 if (cut_end)
352 rx_free_nfa_state (cut_end);
353 return 0;
354 }
355 cut_end->is_final = rexp->params.intval;
356 return 1;
357 }
358
359 case r_parens:
360 return rx_build_nfa (rx, rexp->params.pair.left, start, end);
361
362 case r_concat:
363 {
364 struct rx_nfa_state *shared = 0;
365 return
366 (rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
367 && rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
368 }
369
370 case r_alternate:
371 {
372 struct rx_nfa_state *ls = 0;
373 struct rx_nfa_state *le = 0;
374 struct rx_nfa_state *rs = 0;
375 struct rx_nfa_state *re = 0;
376 return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
377 && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
378 && rx_nfa_edge (rx, ne_epsilon, *start, ls)
379 && rx_nfa_edge (rx, ne_epsilon, *start, rs)
380 && rx_nfa_edge (rx, ne_epsilon, le, *end)
381 && rx_nfa_edge (rx, ne_epsilon, re, *end));
382 }
383
384 case r_context:
385 edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
386 if (!edge)
387 return 0;
388 edge->params.side_effect = (void *)rexp->params.intval;
389 return 1;
390 }
391
392 /* this should never happen */
393 return 0;
394}
395
396\f
397/* {Low Level Data Structures for the Static NFA->SuperNFA Analysis}
398 *
399 * There are side effect lists -- lists of side effects occuring
400 * along an uninterrupted, acyclic path of side-effect epsilon edges.
401 * Such paths are collapsed to single edges in the course of computing
402 * epsilon closures. The resulting single edges are labled with a list
403 * of all the side effects from the original multi-edge path. Equivalent
404 * lists of side effects are made == by the constructors below.
405 *
406 * There are also nfa state sets. These are used to hold a list of all
407 * states reachable from a starting state for a given type of transition
408 * and side effect list. These are also hash-consed.
409 */
410
411
412
413/* The next several functions compare, construct, etc. lists of side
414 * effects. See ECLOSE_NFA (below) for details.
415 */
416
417/* Ordering of rx_se_list
418 * (-1, 0, 1 return value convention).
419 */
420
421#ifdef __STDC__
422static int
423se_list_cmp (void * va, void * vb)
424#else
425static int
426se_list_cmp (va, vb)
427 void * va;
428 void * vb;
429#endif
430{
431 struct rx_se_list * a = (struct rx_se_list *)va;
432 struct rx_se_list * b = (struct rx_se_list *)vb;
433
434 return ((va == vb)
435 ? 0
436 : (!va
437 ? -1
438 : (!vb
439 ? 1
440 : ((long)a->car < (long)b->car
441 ? 1
442 : ((long)a->car > (long)b->car
443 ? -1
444 : se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
445}
446
447
448#ifdef __STDC__
449static int
450se_list_equal (void * va, void * vb)
451#else
452static int
453se_list_equal (va, vb)
454 void * va;
455 void * vb;
456#endif
457{
458 return !(se_list_cmp (va, vb));
459}
460
461static struct rx_hash_rules se_list_hash_rules = { se_list_equal, 0, 0, 0, 0 };
462
463
464#ifdef __STDC__
465static struct rx_se_list *
466side_effect_cons (struct rx * rx,
467 void * se, struct rx_se_list * list)
468#else
469static struct rx_se_list *
470side_effect_cons (rx, se, list)
471 struct rx * rx;
472 void * se;
473 struct rx_se_list * list;
474#endif
475{
476 struct rx_se_list * l;
477 l = ((struct rx_se_list *) malloc (sizeof (*l)));
478 if (!l)
479 return 0;
480 l->car = se;
481 l->cdr = list;
482 return l;
483}
484
485
486#ifdef __STDC__
487static struct rx_se_list *
488hash_cons_se_prog (struct rx * rx,
489 struct rx_hash * memo,
490 void * car, struct rx_se_list * cdr)
491#else
492static struct rx_se_list *
493hash_cons_se_prog (rx, memo, car, cdr)
494 struct rx * rx;
495 struct rx_hash * memo;
496 void * car;
497 struct rx_se_list * cdr;
498#endif
499{
500 long hash = (long)car ^ (long)cdr;
501 struct rx_se_list template;
502
503 template.car = car;
504 template.cdr = cdr;
505 {
506 struct rx_hash_item * it = rx_hash_store (memo, hash,
507 (void *)&template,
508 &se_list_hash_rules);
509 if (!it)
510 return 0;
511 if (it->data == (void *)&template)
512 {
513 struct rx_se_list * consed;
514 consed = (struct rx_se_list *) malloc (sizeof (*consed));
515 *consed = template;
516 it->data = (void *)consed;
517 }
518 return (struct rx_se_list *)it->data;
519 }
520}
521
522
523#ifdef __STDC__
524static struct rx_se_list *
525hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
526#else
527static struct rx_se_list *
528hash_se_prog (rx, memo, prog)
529 struct rx * rx;
530 struct rx_hash * memo;
531 struct rx_se_list * prog;
532#endif
533{
534 struct rx_se_list * answer = 0;
535 while (prog)
536 {
537 answer = hash_cons_se_prog (rx, memo, prog->car, answer);
538 if (!answer)
539 return 0;
540 prog = prog->cdr;
541 }
542 return answer;
543}
544
545
546
547/* {Constructors, etc. for NFA State Sets}
548 */
549
550#ifdef __STDC__
551static int
552nfa_set_cmp (void * va, void * vb)
553#else
554static int
555nfa_set_cmp (va, vb)
556 void * va;
557 void * vb;
558#endif
559{
560 struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
561 struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
562
563 return ((va == vb)
564 ? 0
565 : (!va
566 ? -1
567 : (!vb
568 ? 1
569 : (a->car->id < b->car->id
570 ? 1
571 : (a->car->id > b->car->id
572 ? -1
573 : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
574}
575
576#ifdef __STDC__
577static int
578nfa_set_equal (void * va, void * vb)
579#else
580static int
581nfa_set_equal (va, vb)
582 void * va;
583 void * vb;
584#endif
585{
586 return !nfa_set_cmp (va, vb);
587}
588
589static struct rx_hash_rules nfa_set_hash_rules = { nfa_set_equal, 0, 0, 0, 0 };
590
591
592#ifdef __STDC__
593static struct rx_nfa_state_set *
594nfa_set_cons (struct rx * rx,
595 struct rx_hash * memo, struct rx_nfa_state * state,
596 struct rx_nfa_state_set * set)
597#else
598static struct rx_nfa_state_set *
599nfa_set_cons (rx, memo, state, set)
600 struct rx * rx;
601 struct rx_hash * memo;
602 struct rx_nfa_state * state;
603 struct rx_nfa_state_set * set;
604#endif
605{
606 struct rx_nfa_state_set template;
607 struct rx_hash_item * node;
608 template.car = state;
609 template.cdr = set;
610 node = rx_hash_store (memo,
611 (((long)state) >> 8) ^ (long)set,
612 &template, &nfa_set_hash_rules);
613 if (!node)
614 return 0;
615 if (node->data == &template)
616 {
617 struct rx_nfa_state_set * l;
618 l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
619 node->data = (void *) l;
620 if (!l)
621 return 0;
622 *l = template;
623 }
624 return (struct rx_nfa_state_set *)node->data;
625}
626
627
628#ifdef __STDC__
629static struct rx_nfa_state_set *
630nfa_set_enjoin (struct rx * rx,
631 struct rx_hash * memo, struct rx_nfa_state * state,
632 struct rx_nfa_state_set * set)
633#else
634static struct rx_nfa_state_set *
635nfa_set_enjoin (rx, memo, state, set)
636 struct rx * rx;
637 struct rx_hash * memo;
638 struct rx_nfa_state * state;
639 struct rx_nfa_state_set * set;
640#endif
641{
642 if (!set || (state->id < set->car->id))
643 return nfa_set_cons (rx, memo, state, set);
644 if (state->id == set->car->id)
645 return set;
646 else
647 {
648 struct rx_nfa_state_set * newcdr
649 = nfa_set_enjoin (rx, memo, state, set->cdr);
650 if (newcdr != set->cdr)
651 set = nfa_set_cons (rx, memo, set->car, newcdr);
652 return set;
653 }
654}
655
656\f
657/* {Computing Epsilon/Side-effect Closures.}
658 */
659
660struct eclose_frame
661{
662 struct rx_se_list *prog_backwards;
663};
664
665
666/* This is called while computing closures for "outnode".
667 * The current node in the traversal is "node".
668 * "frame" contains a list of a all side effects between
669 * "outnode" and "node" from most to least recent.
670 *
671 * Explores forward from "node", adding new possible
672 * futures to outnode.
673 *
674 * Returns 0 on allocation failure.
675 */
676
677#ifdef __STDC__
678static int
679eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
680 struct rx_nfa_state *node, struct eclose_frame *frame)
681#else
682static int
683eclose_node (rx, outnode, node, frame)
684 struct rx *rx;
685 struct rx_nfa_state *outnode;
686 struct rx_nfa_state *node;
687 struct eclose_frame *frame;
688#endif
689{
690 struct rx_nfa_edge *e = node->edges;
691
692 /* For each node, we follow all epsilon paths to build the closure.
693 * The closure omits nodes that have only epsilon edges.
694 * The closure is split into partial closures -- all the states in
695 * a partial closure are reached by crossing the same list of
696 * of side effects (though not necessarily the same path).
697 */
698 if (node->mark)
699 return 1;
700 node->mark = 1;
701
702 /* If "node" has more than just epsilon and
703 * and side-effect transitions (id >= 0), or is final,
704 * then it has to be added to the possible futures
705 * of "outnode".
706 */
707 if (node->id >= 0 || node->is_final)
708 {
709 struct rx_possible_future **ec;
710 struct rx_se_list * prog_in_order;
711 int cmp;
712
713 prog_in_order = ((struct rx_se_list *)hash_se_prog (rx,
714 &rx->se_list_memo,
715 frame->prog_backwards));
716
717 ec = &outnode->futures;
718
719 while (*ec)
720 {
721 cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
722 if (cmp <= 0)
723 break;
724 ec = &(*ec)->next;
725 }
726
727 if (!*ec || (cmp < 0))
728 {
729 struct rx_possible_future * pf;
730 pf = rx_possible_future (rx, prog_in_order);
731 if (!pf)
732 return 0;
733 pf->next = *ec;
734 *ec = pf;
735 }
736 if (node->id >= 0)
737 {
738 (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
739 node, (*ec)->destset);
740 if (!(*ec)->destset)
741 return 0;
742 }
743 }
744
745 /* Recurse on outgoing epsilon and side effect nodes.
746 */
747 while (e)
748 {
749 switch (e->type)
750 {
751 case ne_epsilon:
752 if (!eclose_node (rx, outnode, e->dest, frame))
753 return 0;
754 break;
755 case ne_side_effect:
756 {
757 frame->prog_backwards = side_effect_cons (rx,
758 e->params.side_effect,
759 frame->prog_backwards);
760 if (!frame->prog_backwards)
761 return 0;
762 if (!eclose_node (rx, outnode, e->dest, frame))
763 return 0;
764 {
765 struct rx_se_list * dying = frame->prog_backwards;
766 frame->prog_backwards = frame->prog_backwards->cdr;
767 free ((char *)dying);
768 }
769 break;
770 }
771 default:
772 break;
773 }
774 e = e->next;
775 }
776 node->mark = 0;
777 return 1;
778}
779
780
781#ifdef __STDC__
782struct rx_possible_future *
783rx_state_possible_futures (struct rx * rx, struct rx_nfa_state * n)
784#else
785struct rx_possible_future *
786rx_state_possible_futures (rx, n)
787 struct rx * rx;
788 struct rx_nfa_state * n;
789#endif
790{
791 if (n->futures_computed)
792 return n->futures;
793 else
794 {
795 struct eclose_frame frame;
796 frame.prog_backwards = 0;
797 if (!eclose_node (rx, n, n, &frame))
798 return 0;
799 else
800 {
801 n->futures_computed = 1;
802 return n->futures;
803 }
804 }
805}
806
807
808\f
809/* {Storing the NFA in a Contiguous Region of Memory}
810 */
811
812
813
814#ifdef __STDC__
815static void
816se_memo_freer (struct rx_hash_item * node)
817#else
818static void
819se_memo_freer (node)
820 struct rx_hash_item * node;
821#endif
822{
823 free ((char *)node->data);
824}
825
826
827#ifdef __STDC__
828static void
829nfa_set_freer (struct rx_hash_item * node)
830#else
831static void
832nfa_set_freer (node)
833 struct rx_hash_item * node;
834#endif
835{
836 free ((char *)node->data);
837}
838
839#ifdef __STDC__
840void
841rx_free_nfa (struct rx *rx)
842#else
843void
844rx_free_nfa (rx)
845 struct rx *rx;
846#endif
847{
848 rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
849 rx_bzero ((char *)&rx->se_list_memo, sizeof (rx->se_list_memo));
850 rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
851 rx_bzero ((char *)&rx->set_list_memo, sizeof (rx->set_list_memo));
852 rx_free_nfa_graph (rx);
853}