1 /* Copyright (C) 1995, 1996 Tom Lord
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU Library General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU Library General Public License for more details.
13 * You should have received a copy of the GNU Library General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 59 Temple Place - Suite 330,
16 * Boston, MA 02111-1307, USA.
21 * Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu)
30 #define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
33 /* {Low Level Data Structure Code}
36 /* Constructs a new nfa node. */
39 rx_nfa_state (struct rx
*rx
)
46 struct rx_nfa_state
* n
= (struct rx_nfa_state
*)malloc (sizeof (*n
));
49 rx_bzero ((char *)n
, sizeof (*n
));
50 n
->next
= rx
->nfa_states
;
58 rx_free_nfa_state (struct rx_nfa_state
* n
)
62 struct rx_nfa_state
* n
;
69 /* This adds an edge between two nodes, but doesn't initialize the
75 rx_nfa_edge (struct rx
*rx
,
76 enum rx_nfa_etype type
,
77 struct rx_nfa_state
*start
,
78 struct rx_nfa_state
*dest
)
81 rx_nfa_edge (rx
, type
, start
, dest
)
83 enum rx_nfa_etype type
;
84 struct rx_nfa_state
*start
;
85 struct rx_nfa_state
*dest
;
88 struct rx_nfa_edge
*e
;
89 e
= (struct rx_nfa_edge
*)malloc (sizeof (*e
));
92 e
->next
= start
->edges
;
102 rx_free_nfa_edge (struct rx_nfa_edge
* e
)
106 struct rx_nfa_edge
* e
;
113 /* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
114 * of an NFA. These are added to an nfa automaticly by eclose_nfa.
118 static struct rx_possible_future
*
119 rx_possible_future (struct rx
* rx
,
120 struct rx_se_list
* effects
)
122 static struct rx_possible_future
*
123 rx_possible_future (rx
, effects
)
125 struct rx_se_list
* effects
;
128 struct rx_possible_future
*ec
;
129 ec
= (struct rx_possible_future
*) malloc (sizeof (*ec
));
134 ec
->effects
= effects
;
141 rx_free_possible_future (struct rx_possible_future
* pf
)
144 rx_free_possible_future (pf
)
145 struct rx_possible_future
* pf
;
154 rx_free_nfa_graph (struct rx
*rx
)
157 rx_free_nfa_graph (rx
)
161 while (rx
->nfa_states
)
163 while (rx
->nfa_states
->edges
)
165 switch (rx
->nfa_states
->edges
->type
)
168 rx_free_cset (rx
->nfa_states
->edges
->params
.cset
);
174 struct rx_nfa_edge
* e
;
175 e
= rx
->nfa_states
->edges
;
176 rx
->nfa_states
->edges
= rx
->nfa_states
->edges
->next
;
177 rx_free_nfa_edge (e
);
179 } /* while (rx->nfa_states->edges) */
181 /* Iterate over the partial epsilon closures of rx->nfa_states */
182 struct rx_possible_future
* pf
= rx
->nfa_states
->futures
;
185 struct rx_possible_future
* pft
= pf
;
187 rx_free_possible_future (pft
);
191 struct rx_nfa_state
*n
;
193 rx
->nfa_states
= rx
->nfa_states
->next
;
194 rx_free_nfa_state (n
);
201 /* {Translating a Syntax Tree into an NFA}
206 /* This is the Thompson regexp->nfa algorithm.
207 * It is modified to allow for `side-effect epsilons.' Those are
208 * edges that are taken whenever a similarly placed epsilon edge
209 * would be, but which also imply that some side effect occurs
210 * when the edge is taken.
212 * Side effects are used to model parts of the pattern langauge
213 * that are not regular.
218 rx_build_nfa (struct rx
*rx
,
219 struct rexp_node
*rexp
,
220 struct rx_nfa_state
**start
,
221 struct rx_nfa_state
**end
)
224 rx_build_nfa (rx
, rexp
, start
, end
)
226 struct rexp_node
*rexp
;
227 struct rx_nfa_state
**start
;
228 struct rx_nfa_state
**end
;
231 struct rx_nfa_edge
*edge
;
233 /* Start & end nodes may have been allocated by the caller. */
234 *start
= *start
? *start
: rx_nfa_state (rx
);
245 *end
= *end
? *end
: rx_nfa_state (rx
);
249 rx_free_nfa_state (*start
);
256 edge
= rx_nfa_edge (rx
, ne_cset
, *start
, *end
);
257 (*start
)->has_cset_edges
= 1;
260 edge
->params
.cset
= rx_copy_cset (rx
->local_cset_size
,
262 if (!edge
->params
.cset
)
264 rx_free_nfa_edge (edge
);
271 if (rexp
->params
.cstr
.len
== 1)
273 edge
= rx_nfa_edge (rx
, ne_cset
, *start
, *end
);
274 (*start
)->has_cset_edges
= 1;
277 edge
->params
.cset
= rx_cset (rx
->local_cset_size
);
278 if (!edge
->params
.cset
)
280 rx_free_nfa_edge (edge
);
283 RX_bitset_enjoin (edge
->params
.cset
, rexp
->params
.cstr
.contents
[0]);
288 struct rexp_node copied
;
289 struct rx_nfa_state
* shared
;
293 copied
.params
.cstr
.len
--;
294 copied
.params
.cstr
.contents
++;
295 if (!rx_build_nfa (rx
, &copied
, &shared
, end
))
297 copied
.params
.cstr
.len
= 1;
298 copied
.params
.cstr
.contents
--;
299 return rx_build_nfa (rx
, &copied
, start
, &shared
);
304 return (rx_build_nfa (rx
, rexp
->params
.pair
.left
, start
, end
)
305 && rx_nfa_edge (rx
, ne_epsilon
, *start
, *end
));
309 struct rx_nfa_state
* star_start
= 0;
310 struct rx_nfa_state
* star_end
= 0;
311 struct rx_nfa_state
* shared
;
314 if (!rx_build_nfa (rx
, rexp
->params
.pair
.left
, start
, &shared
))
316 return (rx_build_nfa (rx
, rexp
->params
.pair
.left
,
317 &star_start
, &star_end
)
320 && rx_nfa_edge (rx
, ne_epsilon
, star_start
, star_end
)
321 && rx_nfa_edge (rx
, ne_epsilon
, shared
, star_start
)
322 && rx_nfa_edge (rx
, ne_epsilon
, star_end
, *end
)
323 && rx_nfa_edge (rx
, ne_epsilon
, star_end
, star_start
));
329 struct rx_nfa_state
* star_start
= 0;
330 struct rx_nfa_state
* star_end
= 0;
331 return (rx_build_nfa (rx
, rexp
->params
.pair
.left
,
332 &star_start
, &star_end
)
335 && rx_nfa_edge (rx
, ne_epsilon
, star_start
, star_end
)
336 && rx_nfa_edge (rx
, ne_epsilon
, *start
, star_start
)
337 && rx_nfa_edge (rx
, ne_epsilon
, star_end
, *end
)
339 && rx_nfa_edge (rx
, ne_epsilon
, star_end
, star_start
));
344 struct rx_nfa_state
* cut_end
= 0;
346 cut_end
= rx_nfa_state (rx
);
347 if (!(cut_end
&& rx_nfa_edge (rx
, ne_epsilon
, *start
, cut_end
)))
349 rx_free_nfa_state (*start
);
350 rx_free_nfa_state (*end
);
352 rx_free_nfa_state (cut_end
);
355 cut_end
->is_final
= rexp
->params
.intval
;
360 return rx_build_nfa (rx
, rexp
->params
.pair
.left
, start
, end
);
364 struct rx_nfa_state
*shared
= 0;
366 (rx_build_nfa (rx
, rexp
->params
.pair
.left
, start
, &shared
)
367 && rx_build_nfa (rx
, rexp
->params
.pair
.right
, &shared
, end
));
372 struct rx_nfa_state
*ls
= 0;
373 struct rx_nfa_state
*le
= 0;
374 struct rx_nfa_state
*rs
= 0;
375 struct rx_nfa_state
*re
= 0;
376 return (rx_build_nfa (rx
, rexp
->params
.pair
.left
, &ls
, &le
)
377 && rx_build_nfa (rx
, rexp
->params
.pair
.right
, &rs
, &re
)
378 && rx_nfa_edge (rx
, ne_epsilon
, *start
, ls
)
379 && rx_nfa_edge (rx
, ne_epsilon
, *start
, rs
)
380 && rx_nfa_edge (rx
, ne_epsilon
, le
, *end
)
381 && rx_nfa_edge (rx
, ne_epsilon
, re
, *end
));
385 edge
= rx_nfa_edge (rx
, ne_side_effect
, *start
, *end
);
388 edge
->params
.side_effect
= (void *)rexp
->params
.intval
;
392 /* this should never happen */
397 /* {Low Level Data Structures for the Static NFA->SuperNFA Analysis}
399 * There are side effect lists -- lists of side effects occuring
400 * along an uninterrupted, acyclic path of side-effect epsilon edges.
401 * Such paths are collapsed to single edges in the course of computing
402 * epsilon closures. The resulting single edges are labled with a list
403 * of all the side effects from the original multi-edge path. Equivalent
404 * lists of side effects are made == by the constructors below.
406 * There are also nfa state sets. These are used to hold a list of all
407 * states reachable from a starting state for a given type of transition
408 * and side effect list. These are also hash-consed.
413 /* The next several functions compare, construct, etc. lists of side
414 * effects. See ECLOSE_NFA (below) for details.
417 /* Ordering of rx_se_list
418 * (-1, 0, 1 return value convention).
423 se_list_cmp (void * va
, void * vb
)
431 struct rx_se_list
* a
= (struct rx_se_list
*)va
;
432 struct rx_se_list
* b
= (struct rx_se_list
*)vb
;
440 : ((long)a
->car
< (long)b
->car
442 : ((long)a
->car
> (long)b
->car
444 : se_list_cmp ((void *)a
->cdr
, (void *)b
->cdr
))))));
450 se_list_equal (void * va
, void * vb
)
453 se_list_equal (va
, vb
)
458 return !(se_list_cmp (va
, vb
));
461 static struct rx_hash_rules se_list_hash_rules
= { se_list_equal
, 0, 0, 0, 0 };
465 static struct rx_se_list
*
466 side_effect_cons (struct rx
* rx
,
467 void * se
, struct rx_se_list
* list
)
469 static struct rx_se_list
*
470 side_effect_cons (rx
, se
, list
)
473 struct rx_se_list
* list
;
476 struct rx_se_list
* l
;
477 l
= ((struct rx_se_list
*) malloc (sizeof (*l
)));
487 static struct rx_se_list
*
488 hash_cons_se_prog (struct rx
* rx
,
489 struct rx_hash
* memo
,
490 void * car
, struct rx_se_list
* cdr
)
492 static struct rx_se_list
*
493 hash_cons_se_prog (rx
, memo
, car
, cdr
)
495 struct rx_hash
* memo
;
497 struct rx_se_list
* cdr
;
500 long hash
= (long)car
^ (long)cdr
;
501 struct rx_se_list
template;
506 struct rx_hash_item
* it
= rx_hash_store (memo
, hash
,
508 &se_list_hash_rules
);
511 if (it
->data
== (void *)&template)
513 struct rx_se_list
* consed
;
514 consed
= (struct rx_se_list
*) malloc (sizeof (*consed
));
516 it
->data
= (void *)consed
;
518 return (struct rx_se_list
*)it
->data
;
524 static struct rx_se_list
*
525 hash_se_prog (struct rx
* rx
, struct rx_hash
* memo
, struct rx_se_list
* prog
)
527 static struct rx_se_list
*
528 hash_se_prog (rx
, memo
, prog
)
530 struct rx_hash
* memo
;
531 struct rx_se_list
* prog
;
534 struct rx_se_list
* answer
= 0;
537 answer
= hash_cons_se_prog (rx
, memo
, prog
->car
, answer
);
547 /* {Constructors, etc. for NFA State Sets}
552 nfa_set_cmp (void * va
, void * vb
)
560 struct rx_nfa_state_set
* a
= (struct rx_nfa_state_set
*)va
;
561 struct rx_nfa_state_set
* b
= (struct rx_nfa_state_set
*)vb
;
569 : (a
->car
->id
< b
->car
->id
571 : (a
->car
->id
> b
->car
->id
573 : nfa_set_cmp ((void *)a
->cdr
, (void *)b
->cdr
))))));
578 nfa_set_equal (void * va
, void * vb
)
581 nfa_set_equal (va
, vb
)
586 return !nfa_set_cmp (va
, vb
);
589 static struct rx_hash_rules nfa_set_hash_rules
= { nfa_set_equal
, 0, 0, 0, 0 };
593 static struct rx_nfa_state_set
*
594 nfa_set_cons (struct rx
* rx
,
595 struct rx_hash
* memo
, struct rx_nfa_state
* state
,
596 struct rx_nfa_state_set
* set
)
598 static struct rx_nfa_state_set
*
599 nfa_set_cons (rx
, memo
, state
, set
)
601 struct rx_hash
* memo
;
602 struct rx_nfa_state
* state
;
603 struct rx_nfa_state_set
* set
;
606 struct rx_nfa_state_set
template;
607 struct rx_hash_item
* node
;
608 template.car
= state
;
610 node
= rx_hash_store (memo
,
611 (((long)state
) >> 8) ^ (long)set
,
612 &template, &nfa_set_hash_rules
);
615 if (node
->data
== &template)
617 struct rx_nfa_state_set
* l
;
618 l
= (struct rx_nfa_state_set
*) malloc (sizeof (*l
));
619 node
->data
= (void *) l
;
624 return (struct rx_nfa_state_set
*)node
->data
;
629 static struct rx_nfa_state_set
*
630 nfa_set_enjoin (struct rx
* rx
,
631 struct rx_hash
* memo
, struct rx_nfa_state
* state
,
632 struct rx_nfa_state_set
* set
)
634 static struct rx_nfa_state_set
*
635 nfa_set_enjoin (rx
, memo
, state
, set
)
637 struct rx_hash
* memo
;
638 struct rx_nfa_state
* state
;
639 struct rx_nfa_state_set
* set
;
642 if (!set
|| (state
->id
< set
->car
->id
))
643 return nfa_set_cons (rx
, memo
, state
, set
);
644 if (state
->id
== set
->car
->id
)
648 struct rx_nfa_state_set
* newcdr
649 = nfa_set_enjoin (rx
, memo
, state
, set
->cdr
);
650 if (newcdr
!= set
->cdr
)
651 set
= nfa_set_cons (rx
, memo
, set
->car
, newcdr
);
657 /* {Computing Epsilon/Side-effect Closures.}
662 struct rx_se_list
*prog_backwards
;
666 /* This is called while computing closures for "outnode".
667 * The current node in the traversal is "node".
668 * "frame" contains a list of a all side effects between
669 * "outnode" and "node" from most to least recent.
671 * Explores forward from "node", adding new possible
672 * futures to outnode.
674 * Returns 0 on allocation failure.
679 eclose_node (struct rx
*rx
, struct rx_nfa_state
*outnode
,
680 struct rx_nfa_state
*node
, struct eclose_frame
*frame
)
683 eclose_node (rx
, outnode
, node
, frame
)
685 struct rx_nfa_state
*outnode
;
686 struct rx_nfa_state
*node
;
687 struct eclose_frame
*frame
;
690 struct rx_nfa_edge
*e
= node
->edges
;
692 /* For each node, we follow all epsilon paths to build the closure.
693 * The closure omits nodes that have only epsilon edges.
694 * The closure is split into partial closures -- all the states in
695 * a partial closure are reached by crossing the same list of
696 * of side effects (though not necessarily the same path).
702 /* If "node" has more than just epsilon and
703 * and side-effect transitions (id >= 0), or is final,
704 * then it has to be added to the possible futures
707 if (node
->id
>= 0 || node
->is_final
)
709 struct rx_possible_future
**ec
;
710 struct rx_se_list
* prog_in_order
;
713 prog_in_order
= ((struct rx_se_list
*)hash_se_prog (rx
,
715 frame
->prog_backwards
));
717 ec
= &outnode
->futures
;
721 cmp
= se_list_cmp ((void *)(*ec
)->effects
, (void *)prog_in_order
);
727 if (!*ec
|| (cmp
< 0))
729 struct rx_possible_future
* pf
;
730 pf
= rx_possible_future (rx
, prog_in_order
);
738 (*ec
)->destset
= nfa_set_enjoin (rx
, &rx
->set_list_memo
,
739 node
, (*ec
)->destset
);
745 /* Recurse on outgoing epsilon and side effect nodes.
752 if (!eclose_node (rx
, outnode
, e
->dest
, frame
))
757 frame
->prog_backwards
= side_effect_cons (rx
,
758 e
->params
.side_effect
,
759 frame
->prog_backwards
);
760 if (!frame
->prog_backwards
)
762 if (!eclose_node (rx
, outnode
, e
->dest
, frame
))
765 struct rx_se_list
* dying
= frame
->prog_backwards
;
766 frame
->prog_backwards
= frame
->prog_backwards
->cdr
;
767 free ((char *)dying
);
782 struct rx_possible_future
*
783 rx_state_possible_futures (struct rx
* rx
, struct rx_nfa_state
* n
)
785 struct rx_possible_future
*
786 rx_state_possible_futures (rx
, n
)
788 struct rx_nfa_state
* n
;
791 if (n
->futures_computed
)
795 struct eclose_frame frame
;
796 frame
.prog_backwards
= 0;
797 if (!eclose_node (rx
, n
, n
, &frame
))
801 n
->futures_computed
= 1;
809 /* {Storing the NFA in a Contiguous Region of Memory}
816 se_memo_freer (struct rx_hash_item
* node
)
820 struct rx_hash_item
* node
;
823 free ((char *)node
->data
);
829 nfa_set_freer (struct rx_hash_item
* node
)
833 struct rx_hash_item
* node
;
836 free ((char *)node
->data
);
841 rx_free_nfa (struct rx
*rx
)
848 rx_free_hash_table (&rx
->se_list_memo
, se_memo_freer
, &se_list_hash_rules
);
849 rx_bzero ((char *)&rx
->se_list_memo
, sizeof (rx
->se_list_memo
));
850 rx_free_hash_table (&rx
->set_list_memo
, nfa_set_freer
, &nfa_set_hash_rules
);
851 rx_bzero ((char *)&rx
->set_list_memo
, sizeof (rx
->set_list_memo
));
852 rx_free_nfa_graph (rx
);