3 /* Copyright (C) 1995, 1996 Tom Lord
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Library General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public License
16 * along with this software; see the file COPYING. If not, write to
17 * the Free Software Foundation, 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
27 /* The functions in the next several pages define the lazy-NFA-conversion used
28 * by matchers. The input to this construction is an NFA such as
29 * is built by compactify_nfa (rx.c). The output is the superNFA.
32 /* Match engines can use arbitrary values for opcodes. So, the parse tree
33 * is built using instructions names (enum rx_opcode), but the superstate
34 * nfa is populated with mystery opcodes (void *).
36 * For convenience, here is an id table. The opcodes are == to their inxs
38 * The lables in re_search_2 would make good values for instructions.
41 void * rx_id_instruction_table
[rx_num_instructions
] =
43 (void *) rx_backtrack_point
,
44 (void *) rx_do_side_effects
,
45 (void *) rx_cache_miss
,
46 (void *) rx_next_char
,
47 (void *) rx_backtrack
,
54 /* The partially instantiated superstate graph has a transition
55 * table at every node. There is one entry for every character.
56 * This fills in the transition for a set.
60 install_transition (struct rx_superstate
*super
,
61 struct rx_inx
*answer
, rx_Bitset trcset
)
64 install_transition (super
, answer
, trcset
)
65 struct rx_superstate
*super
;
66 struct rx_inx
*answer
;
70 struct rx_inx
* transitions
= super
->transitions
;
72 for (chr
= 0; chr
< 256; )
80 RX_subset sub
= *trcset
;
86 transitions
[chr
] = *answer
;
97 qlen (struct rx_superstate
* q
)
101 struct rx_superstate
* q
;
105 struct rx_superstate
* it
;
108 for (it
= q
->next_recyclable
; it
!= q
; it
= it
->next_recyclable
)
115 check_cache (struct rx_cache
* cache
)
119 struct rx_cache
* cache
;
122 struct rx_cache
* you_fucked_up
= 0;
123 int total
= cache
->superstates
;
124 int semi
= cache
->semifree_superstates
;
125 if (semi
!= qlen (cache
->semifree_superstate
))
126 check_cache (you_fucked_up
);
127 if ((total
- semi
) != qlen (cache
->lru_superstate
))
128 check_cache (you_fucked_up
);
132 rx_cache_malloc (struct rx_cache
* cache
, int size
)
135 rx_cache_malloc (cache
, size
)
136 struct rx_cache
* cache
;
141 answer
= (char *)malloc (size
);
143 cache
->bytes_used
+= size
;
150 rx_cache_free (struct rx_cache
* cache
, int size
, char * mem
)
153 rx_cache_free (cache
, size
, mem
)
154 struct rx_cache
* cache
;
160 cache
->bytes_used
-= size
;
165 /* When a superstate is old and neglected, it can enter a
166 * semi-free state. A semi-free state is slated to die.
167 * Incoming transitions to a semi-free state are re-written
168 * to cause an (interpreted) fault when they are taken.
169 * The fault handler revives the semi-free state, patches
170 * incoming transitions back to normal, and continues.
172 * The idea is basicly to free in two stages, aborting
173 * between the two if the state turns out to be useful again.
174 * When a free is aborted, the rescued superstate is placed
175 * in the most-favored slot to maximize the time until it
176 * is next semi-freed.
178 * Overall, this approximates truly freeing superstates in
179 * lru order, but does not involve the cost of updating perfectly
180 * accurate timestamps every time a superstate is touched.
185 semifree_superstate (struct rx_cache
* cache
)
188 semifree_superstate (cache
)
189 struct rx_cache
* cache
;
192 int disqualified
= cache
->semifree_superstates
;
193 if (disqualified
== cache
->superstates
)
195 while (cache
->lru_superstate
->locks
)
197 cache
->lru_superstate
= cache
->lru_superstate
->next_recyclable
;
199 if (disqualified
== cache
->superstates
)
203 struct rx_superstate
* it
= cache
->lru_superstate
;
204 it
->next_recyclable
->prev_recyclable
= it
->prev_recyclable
;
205 it
->prev_recyclable
->next_recyclable
= it
->next_recyclable
;
206 cache
->lru_superstate
= (it
== it
->next_recyclable
208 : it
->next_recyclable
);
209 if (!cache
->semifree_superstate
)
211 cache
->semifree_superstate
= it
;
212 it
->next_recyclable
= it
;
213 it
->prev_recyclable
= it
;
217 it
->prev_recyclable
= cache
->semifree_superstate
->prev_recyclable
;
218 it
->next_recyclable
= cache
->semifree_superstate
;
219 it
->prev_recyclable
->next_recyclable
= it
;
220 it
->next_recyclable
->prev_recyclable
= it
;
223 struct rx_distinct_future
*df
;
225 ++cache
->semifree_superstates
;
226 df
= it
->transition_refs
;
229 df
->prev_same_dest
->next_same_dest
= 0;
230 for (df
= it
->transition_refs
; df
; df
= df
->next_same_dest
)
232 df
->future_frame
.inx
= cache
->instruction_table
[rx_cache_miss
];
233 df
->future_frame
.data
= 0;
234 df
->future_frame
.data_2
= (void *) df
;
235 /* If there are any NEXT-CHAR instruction frames that
236 * refer to this state, we convert them to CACHE-MISS frames.
239 && (df
->edge
->options
->next_same_super_edge
[0]
240 == df
->edge
->options
))
241 install_transition (df
->present
, &df
->future_frame
,
244 df
= it
->transition_refs
;
245 df
->prev_same_dest
->next_same_dest
= df
;
254 refresh_semifree_superstate (struct rx_cache
* cache
,
255 struct rx_superstate
* super
)
258 refresh_semifree_superstate (cache
, super
)
259 struct rx_cache
* cache
;
260 struct rx_superstate
* super
;
263 struct rx_distinct_future
*df
;
265 if (super
->transition_refs
)
267 super
->transition_refs
->prev_same_dest
->next_same_dest
= 0;
268 for (df
= super
->transition_refs
; df
; df
= df
->next_same_dest
)
270 df
->future_frame
.inx
= cache
->instruction_table
[rx_next_char
];
271 df
->future_frame
.data
= (void *) super
->transitions
;
272 df
->future_frame
.data_2
= (void *)(super
->contents
->is_final
);
273 /* CACHE-MISS instruction frames that refer to this state,
274 * must be converted to NEXT-CHAR frames.
277 && (df
->edge
->options
->next_same_super_edge
[0]
278 == df
->edge
->options
))
279 install_transition (df
->present
, &df
->future_frame
,
282 super
->transition_refs
->prev_same_dest
->next_same_dest
283 = super
->transition_refs
;
285 if (cache
->semifree_superstate
== super
)
286 cache
->semifree_superstate
= (super
->prev_recyclable
== super
288 : super
->prev_recyclable
);
289 super
->next_recyclable
->prev_recyclable
= super
->prev_recyclable
;
290 super
->prev_recyclable
->next_recyclable
= super
->next_recyclable
;
292 if (!cache
->lru_superstate
)
293 (cache
->lru_superstate
294 = super
->next_recyclable
295 = super
->prev_recyclable
299 super
->next_recyclable
= cache
->lru_superstate
;
300 super
->prev_recyclable
= cache
->lru_superstate
->prev_recyclable
;
301 super
->next_recyclable
->prev_recyclable
= super
;
302 super
->prev_recyclable
->next_recyclable
= super
;
304 super
->is_semifree
= 0;
305 --cache
->semifree_superstates
;
310 rx_refresh_this_superstate (struct rx_cache
* cache
,
311 struct rx_superstate
* superstate
)
314 rx_refresh_this_superstate (cache
, superstate
)
315 struct rx_cache
* cache
;
316 struct rx_superstate
* superstate
;
319 if (superstate
->is_semifree
)
320 refresh_semifree_superstate (cache
, superstate
);
321 else if (cache
->lru_superstate
== superstate
)
322 cache
->lru_superstate
= superstate
->next_recyclable
;
323 else if (superstate
!= cache
->lru_superstate
->prev_recyclable
)
325 superstate
->next_recyclable
->prev_recyclable
326 = superstate
->prev_recyclable
;
327 superstate
->prev_recyclable
->next_recyclable
328 = superstate
->next_recyclable
;
329 superstate
->next_recyclable
= cache
->lru_superstate
;
330 superstate
->prev_recyclable
= cache
->lru_superstate
->prev_recyclable
;
331 superstate
->next_recyclable
->prev_recyclable
= superstate
;
332 superstate
->prev_recyclable
->next_recyclable
= superstate
;
338 release_superset_low (struct rx_cache
* cache
,
339 struct rx_superset
*set
)
342 release_superset_low (cache
, set
)
343 struct rx_cache
* cache
;
344 struct rx_superset
*set
;
350 set
->starts_for
->start_set
= 0;
353 release_superset_low (cache
, set
->cdr
);
355 rx_hash_free (&set
->hash_item
, &cache
->superset_hash_rules
);
356 rx_cache_free (cache
,
357 sizeof (struct rx_superset
),
364 rx_release_superset (struct rx
*rx
,
365 struct rx_superset
*set
)
368 rx_release_superset (rx
, set
)
370 struct rx_superset
*set
;
373 release_superset_low (rx
->cache
, set
);
376 /* This tries to add a new superstate to the superstate freelist.
377 * It might, as a result, free some edge pieces or hash tables.
378 * If nothing can be freed because too many locks are being held, fail.
383 rx_really_free_superstate (struct rx_cache
* cache
)
386 rx_really_free_superstate (cache
)
387 struct rx_cache
* cache
;
390 int locked_superstates
= 0;
391 struct rx_superstate
* it
;
393 if (!cache
->superstates
)
397 while ((cache
->hits
+ cache
->misses
) > cache
->superstates
)
403 /* semifree superstates faster than they are freed
404 * so popular states can be rescued.
406 semifree_superstate (cache
);
407 semifree_superstate (cache
);
408 semifree_superstate (cache
);
411 Redundant because semifree_superstate already
415 while (cache
->semifree_superstate
&& cache
->semifree_superstate
->locks
)
417 refresh_semifree_superstate (cache
, cache
->semifree_superstate
);
418 ++locked_superstates
;
419 if (locked_superstates
== cache
->superstates
)
424 if (cache
->semifree_superstate
)
425 insert the
"it =" block which follows
this "if 0" code
;
428 while (cache
->lru_superstate
->locks
)
430 cache
->lru_superstate
= cache
->lru_superstate
->next_recyclable
;
431 ++locked_superstates
;
432 if (locked_superstates
== cache
->superstates
)
435 it
= cache
->lru_superstate
;
436 it
->next_recyclable
->prev_recyclable
= it
->prev_recyclable
;
437 it
->prev_recyclable
->next_recyclable
= it
->next_recyclable
;
438 cache
->lru_superstate
= ((it
== it
->next_recyclable
)
440 : it
->next_recyclable
);
444 if (!cache
->semifree_superstate
)
448 it
= cache
->semifree_superstate
;
449 it
->next_recyclable
->prev_recyclable
= it
->prev_recyclable
;
450 it
->prev_recyclable
->next_recyclable
= it
->next_recyclable
;
451 cache
->semifree_superstate
= ((it
== it
->next_recyclable
)
453 : it
->next_recyclable
);
454 --cache
->semifree_superstates
;
458 if (it
->transition_refs
)
460 struct rx_distinct_future
*df
;
461 for (df
= it
->transition_refs
,
462 df
->prev_same_dest
->next_same_dest
= 0;
464 df
= df
->next_same_dest
)
466 df
->future_frame
.inx
= cache
->instruction_table
[rx_cache_miss
];
467 df
->future_frame
.data
= 0;
468 df
->future_frame
.data_2
= (void *) df
;
471 it
->transition_refs
->prev_same_dest
->next_same_dest
=
475 struct rx_super_edge
*tc
= it
->edges
;
478 struct rx_distinct_future
* df
;
479 struct rx_super_edge
*tct
= tc
->next
;
481 df
->next_same_super_edge
[1]->next_same_super_edge
[0] = 0;
484 struct rx_distinct_future
*dft
= df
;
485 df
= df
->next_same_super_edge
[0];
488 if (dft
->future
&& dft
->future
->transition_refs
== dft
)
490 dft
->future
->transition_refs
= dft
->next_same_dest
;
491 if (dft
->future
->transition_refs
== dft
)
492 dft
->future
->transition_refs
= 0;
494 dft
->next_same_dest
->prev_same_dest
= dft
->prev_same_dest
;
495 dft
->prev_same_dest
->next_same_dest
= dft
->next_same_dest
;
496 rx_cache_free (cache
,
497 sizeof (struct rx_distinct_future
),
500 rx_cache_free (cache
,
501 sizeof (struct rx_super_edge
),
507 if (it
->contents
->superstate
== it
)
508 it
->contents
->superstate
= 0;
509 release_superset_low (cache
, it
->contents
);
510 rx_cache_free (cache
,
511 (sizeof (struct rx_superstate
)
512 + cache
->local_cset_size
* sizeof (struct rx_inx
)),
514 --cache
->superstates
;
521 rx_cache_malloc_or_get (struct rx_cache
* cache
, int size
)
524 rx_cache_malloc_or_get (cache
, size
)
525 struct rx_cache
* cache
;
529 while ( (cache
->bytes_used
+ size
> cache
->bytes_allowed
)
530 && rx_really_free_superstate (cache
))
533 return rx_cache_malloc (cache
, size
);
540 supersetcmp (void * va
, void * vb
)
548 struct rx_superset
* a
= (struct rx_superset
*)va
;
549 struct rx_superset
* b
= (struct rx_superset
*)vb
;
551 || (a
&& b
&& (a
->id
== b
->id
) && (a
->car
== b
->car
) && (a
->cdr
== b
->cdr
)));
554 #define rx_abs(A) (((A) > 0) ? (A) : -(A))
558 static struct rx_hash_item
*
559 superset_allocator (struct rx_hash_rules
* rules
, void * val
)
561 static struct rx_hash_item
*
562 superset_allocator (rules
, val
)
563 struct rx_hash_rules
* rules
;
567 struct rx_cache
* cache
;
568 struct rx_superset
* template;
569 struct rx_superset
* newset
;
571 cache
= ((struct rx_cache
*)
573 - (unsigned long)(&((struct rx_cache
*)0)->superset_hash_rules
)));
574 template = (struct rx_superset
*)val
;
575 newset
= ((struct rx_superset
*)
576 rx_cache_malloc (cache
, sizeof (*template)));
583 cdrfinal
= (template->cdr
584 ? template->cdr
->is_final
586 cdredges
= (template->cdr
587 ? template->cdr
->has_cset_edges
590 newset
->is_final
= (rx_abs (template->car
->is_final
) > rx_abs(cdrfinal
)
591 ? template->car
->is_final
592 : template->cdr
->is_final
);
593 newset
->has_cset_edges
= (template->car
->has_cset_edges
|| cdredges
);
596 newset
->id
= template->id
;
597 newset
->car
= template->car
;
598 newset
->cdr
= template->cdr
;
599 rx_protect_superset (rx
, template->cdr
);
600 newset
->superstate
= 0;
601 newset
->starts_for
= 0;
602 newset
->hash_item
.data
= (void *)newset
;
603 newset
->hash_item
.binding
= 0;
604 return &newset
->hash_item
;
608 static struct rx_hash
*
609 super_hash_allocator (struct rx_hash_rules
* rules
)
611 static struct rx_hash
*
612 super_hash_allocator (rules
)
613 struct rx_hash_rules
* rules
;
616 struct rx_cache
* cache
;
617 cache
= ((struct rx_cache
*)
619 - (unsigned long)(&((struct rx_cache
*)0)->superset_hash_rules
)));
620 return ((struct rx_hash
*)
621 rx_cache_malloc (cache
, sizeof (struct rx_hash
)));
627 super_hash_liberator (struct rx_hash
* hash
, struct rx_hash_rules
* rules
)
630 super_hash_liberator (hash
, rules
)
631 struct rx_hash
* hash
;
632 struct rx_hash_rules
* rules
;
635 struct rx_cache
* cache
636 = ((struct rx_cache
*)
637 (char *)rules
- (long)(&((struct rx_cache
*)0)->superset_hash_rules
));
638 rx_cache_free (cache
, sizeof (struct rx_hash
), (char *)hash
);
643 superset_hash_item_liberator (struct rx_hash_item
* it
,
644 struct rx_hash_rules
* rules
)
647 superset_hash_item_liberator (it
, rules
)
648 struct rx_hash_item
* it
;
649 struct rx_hash_rules
* rules
;
654 int rx_cache_bound
= 3;
655 static int rx_default_cache_got
= 0;
657 static struct rx_cache default_cache
=
661 super_hash_allocator
,
662 super_hash_liberator
,
664 superset_hash_item_liberator
,
673 RX_DEFAULT_DFA_CACHE_SIZE
,
676 rx_id_instruction_table
,
685 struct rx_cache
* rx_default_cache
= &default_cache
;
687 /* This adds an element to a superstate set. These sets are lists, such
688 * that lists with == elements are ==. The empty set is returned by
689 * superset_cons (rx, 0, 0) and is NOT equivelent to
690 * (struct rx_superset)0.
695 rx_superset_cons (struct rx
* rx
,
696 struct rx_nfa_state
*car
, struct rx_superset
*cdr
)
699 rx_superset_cons (rx
, car
, cdr
)
701 struct rx_nfa_state
*car
;
702 struct rx_superset
*cdr
;
705 struct rx_cache
* cache
= rx
->cache
;
708 if (!cache
->empty_superset
)
710 cache
->empty_superset
711 = ((struct rx_superset
*)
712 rx_cache_malloc (cache
, sizeof (struct rx_superset
)));
713 if (!cache
->empty_superset
)
715 rx_bzero ((char *)cache
->empty_superset
, sizeof (struct rx_superset
));
716 cache
->empty_superset
->refs
= 1000;
718 return cache
->empty_superset
;
721 struct rx_superset
template;
722 struct rx_hash_item
* hit
;
725 template.id
= rx
->rx_id
;
726 rx_protect_superset (rx
, template.cdr
);
727 hit
= rx_hash_store (&cache
->superset_table
,
728 (unsigned long)car
^ car
->id
^ (unsigned long)cdr
,
730 &cache
->superset_hash_rules
);
731 rx_protect_superset (rx
, template.cdr
);
733 ? (struct rx_superset
*)hit
->data
738 /* This computes a union of two NFA state sets. The sets do not have the
739 * same representation though. One is a RX_SUPERSET structure (part
740 * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
745 rx_superstate_eclosure_union (struct rx
* rx
, struct rx_superset
*set
, struct rx_nfa_state_set
*ecl
)
748 rx_superstate_eclosure_union (rx
, set
, ecl
)
750 struct rx_superset
*set
;
751 struct rx_nfa_state_set
*ecl
;
758 return rx_superset_cons (rx
, ecl
->car
,
759 rx_superstate_eclosure_union (rx
, set
, ecl
->cdr
));
760 if (set
->car
== ecl
->car
)
761 return rx_superstate_eclosure_union (rx
, set
, ecl
->cdr
);
764 struct rx_superset
* tail
;
765 struct rx_nfa_state
* first
;
767 if (set
->car
->id
< ecl
->car
->id
)
769 tail
= rx_superstate_eclosure_union (rx
, set
->cdr
, ecl
);
774 tail
= rx_superstate_eclosure_union (rx
, set
, ecl
->cdr
);
781 struct rx_superset
* answer
;
782 answer
= rx_superset_cons (rx
, first
, tail
);
785 rx_protect_superset (rx
, tail
);
786 rx_release_superset (rx
, tail
);
799 * This makes sure that a list of rx_distinct_futures contains
800 * a future for each possible set of side effects in the eclosure
801 * of a given state. This is some of the work of filling in a
802 * superstate transition.
806 static struct rx_distinct_future
*
807 include_futures (struct rx
*rx
,
808 struct rx_distinct_future
*df
, struct rx_nfa_state
809 *state
, struct rx_superstate
*superstate
)
811 static struct rx_distinct_future
*
812 include_futures (rx
, df
, state
, superstate
)
814 struct rx_distinct_future
*df
;
815 struct rx_nfa_state
*state
;
816 struct rx_superstate
*superstate
;
819 struct rx_possible_future
*future
;
820 struct rx_cache
* cache
= rx
->cache
;
821 for (future
= rx_state_possible_futures (rx
, state
); future
; future
= future
->next
)
823 struct rx_distinct_future
*dfp
;
824 struct rx_distinct_future
*insert_before
= 0;
826 df
->next_same_super_edge
[1]->next_same_super_edge
[0] = 0;
827 for (dfp
= df
; dfp
; dfp
= dfp
->next_same_super_edge
[0])
828 if (dfp
->effects
== future
->effects
)
832 int order
= rx
->se_list_cmp (rx
, dfp
->effects
, future
->effects
);
841 df
->next_same_super_edge
[1]->next_same_super_edge
[0] = df
;
845 = ((struct rx_distinct_future
*)
846 rx_cache_malloc (cache
,
847 sizeof (struct rx_distinct_future
)));
852 df
= insert_before
= dfp
;
853 df
->next_same_super_edge
[0] = df
->next_same_super_edge
[1] = df
;
855 else if (!insert_before
)
857 else if (insert_before
== df
)
860 dfp
->next_same_super_edge
[0] = insert_before
;
861 dfp
->next_same_super_edge
[1]
862 = insert_before
->next_same_super_edge
[1];
863 dfp
->next_same_super_edge
[1]->next_same_super_edge
[0] = dfp
;
864 dfp
->next_same_super_edge
[0]->next_same_super_edge
[1] = dfp
;
865 dfp
->next_same_dest
= dfp
->prev_same_dest
= dfp
;
867 dfp
->present
= superstate
;
868 dfp
->future_frame
.inx
= rx
->instruction_table
[rx_cache_miss
];
869 dfp
->future_frame
.data
= 0;
870 dfp
->future_frame
.data_2
= (void *) dfp
;
871 dfp
->side_effects_frame
.inx
872 = rx
->instruction_table
[rx_do_side_effects
];
873 dfp
->side_effects_frame
.data
= 0;
874 dfp
->side_effects_frame
.data_2
= (void *) dfp
;
875 dfp
->effects
= future
->effects
;
883 /* This constructs a new superstate from its state set. The only
884 * complexity here is memory management.
887 struct rx_superstate
*
888 rx_superstate (struct rx
*rx
,
889 struct rx_superset
*set
)
891 struct rx_superstate
*
892 rx_superstate (rx
, set
)
894 struct rx_superset
*set
;
897 struct rx_cache
* cache
= rx
->cache
;
898 struct rx_superstate
* superstate
= 0;
900 /* Does the superstate already exist in the cache? */
903 if (set
->superstate
->rx_id
!= rx
->rx_id
)
905 /* Aha. It is in the cache, but belongs to a superstate
906 * that refers to an NFA that no longer exists.
907 * (We know it no longer exists because it was evidently
908 * stored in the same region of memory as the current nfa
909 * yet it has a different id.)
911 superstate
= set
->superstate
;
912 if (!superstate
->is_semifree
)
914 if (cache
->lru_superstate
== superstate
)
916 cache
->lru_superstate
= superstate
->next_recyclable
;
917 if (cache
->lru_superstate
== superstate
)
918 cache
->lru_superstate
= 0;
921 superstate
->next_recyclable
->prev_recyclable
922 = superstate
->prev_recyclable
;
923 superstate
->prev_recyclable
->next_recyclable
924 = superstate
->next_recyclable
;
925 if (!cache
->semifree_superstate
)
927 (cache
->semifree_superstate
928 = superstate
->next_recyclable
929 = superstate
->prev_recyclable
934 superstate
->next_recyclable
= cache
->semifree_superstate
;
935 superstate
->prev_recyclable
936 = cache
->semifree_superstate
->prev_recyclable
;
937 superstate
->next_recyclable
->prev_recyclable
939 superstate
->prev_recyclable
->next_recyclable
941 cache
->semifree_superstate
= superstate
;
943 ++cache
->semifree_superstates
;
947 goto handle_cache_miss
;
950 superstate
= set
->superstate
;
952 rx_refresh_this_superstate (cache
, superstate
);
958 /* This point reached only for cache misses. */
961 if (rx_debug_trace
> 1)
963 struct rx_superset
* setp
= set
;
964 fprintf (stderr
, "Building a superstet %d(%d): ", rx
->rx_id
, set
);
967 fprintf (stderr
, "%d ", setp
->id
);
970 fprintf (stderr
, "(%d)\n", set
);
976 superstate_size
= ( sizeof (*superstate
)
977 + ( sizeof (struct rx_inx
)
978 * rx
->local_cset_size
));
979 superstate
= ((struct rx_superstate
*)
980 rx_cache_malloc_or_get (cache
, superstate_size
));
981 ++cache
->superstates
;
987 if (!cache
->lru_superstate
)
988 (cache
->lru_superstate
989 = superstate
->next_recyclable
990 = superstate
->prev_recyclable
994 superstate
->next_recyclable
= cache
->lru_superstate
;
995 superstate
->prev_recyclable
= cache
->lru_superstate
->prev_recyclable
;
996 ( superstate
->prev_recyclable
->next_recyclable
997 = superstate
->next_recyclable
->prev_recyclable
1000 superstate
->rx_id
= rx
->rx_id
;
1001 superstate
->transition_refs
= 0;
1002 superstate
->locks
= 0;
1003 superstate
->is_semifree
= 0;
1004 set
->superstate
= superstate
;
1005 superstate
->contents
= set
;
1006 rx_protect_superset (rx
, set
);
1007 superstate
->edges
= 0;
1010 /* None of the transitions from this superstate are known yet. */
1011 for (x
= 0; x
< rx
->local_cset_size
; ++x
)
1013 struct rx_inx
* ifr
= &superstate
->transitions
[x
];
1014 ifr
->inx
= rx
->instruction_table
[rx_cache_miss
];
1015 ifr
->data
= ifr
->data_2
= 0;
1022 /* This computes the destination set of one edge of the superstate NFA.
1023 * Note that a RX_DISTINCT_FUTURE is a superstate edge.
1024 * Returns 0 on an allocation failure.
1029 solve_destination (struct rx
*rx
, struct rx_distinct_future
*df
)
1032 solve_destination (rx
, df
)
1034 struct rx_distinct_future
*df
;
1037 struct rx_super_edge
*tc
= df
->edge
;
1038 struct rx_superset
*nfa_state
;
1039 struct rx_superset
*nil_set
= rx_superset_cons (rx
, 0, 0);
1040 struct rx_superset
*solution
= nil_set
;
1041 struct rx_superstate
*dest
;
1043 rx_protect_superset (rx
, solution
);
1044 /* Iterate over all NFA states in the state set of this superstate. */
1045 for (nfa_state
= df
->present
->contents
;
1047 nfa_state
= nfa_state
->cdr
)
1049 struct rx_nfa_edge
*e
;
1050 /* Iterate over all edges of each NFA state. */
1051 for (e
= nfa_state
->car
->edges
; e
; e
= e
->next
)
1052 /* If we find an edge that is labeled with
1053 * the characters we are solving for.....
1055 if ((e
->type
== ne_cset
)
1056 && rx_bitset_is_subset (rx
->local_cset_size
,
1060 struct rx_nfa_state
*n
= e
->dest
;
1061 struct rx_possible_future
*pf
;
1062 /* ....search the partial epsilon closures of the destination
1063 * of that edge for a path that involves the same set of
1064 * side effects we are solving for.
1065 * If we find such a RX_POSSIBLE_FUTURE, we add members to the
1066 * stateset we are computing.
1068 for (pf
= rx_state_possible_futures (rx
, n
); pf
; pf
= pf
->next
)
1069 if (pf
->effects
== df
->effects
)
1071 struct rx_superset
* old_sol
;
1073 solution
= rx_superstate_eclosure_union (rx
, solution
,
1077 rx_protect_superset (rx
, solution
);
1078 rx_release_superset (rx
, old_sol
);
1082 /* It is possible that the RX_DISTINCT_FUTURE we are working on has
1083 * the empty set of NFA states as its definition. In that case, this
1084 * is a failure point.
1086 if (solution
== nil_set
)
1088 df
->future_frame
.inx
= (void *) rx_backtrack
;
1089 df
->future_frame
.data
= 0;
1090 df
->future_frame
.data_2
= 0;
1093 dest
= rx_superstate (rx
, solution
);
1094 rx_release_superset (rx
, solution
);
1099 struct rx_distinct_future
*dft
;
1101 df
->prev_same_dest
->next_same_dest
= 0;
1105 dft
->future_frame
.inx
= rx
->instruction_table
[rx_next_char
];
1106 dft
->future_frame
.data
= (void *) dest
->transitions
;
1107 dft
->future_frame
.data_2
= (void *) dest
->contents
->is_final
;
1108 dft
= dft
->next_same_dest
;
1110 df
->prev_same_dest
->next_same_dest
= df
;
1112 if (!dest
->transition_refs
)
1113 dest
->transition_refs
= df
;
1116 struct rx_distinct_future
*dft
;
1117 dft
= dest
->transition_refs
->next_same_dest
;
1118 dest
->transition_refs
->next_same_dest
= df
->next_same_dest
;
1119 df
->next_same_dest
->prev_same_dest
= dest
->transition_refs
;
1120 df
->next_same_dest
= dft
;
1121 dft
->prev_same_dest
= df
;
1127 /* This takes a superstate and a character, and computes some edges
1128 * from the superstate NFA. In particular, this computes all edges
1129 * that lead from SUPERSTATE given CHR. This function also
1130 * computes the set of characters that share this edge set.
1131 * This returns 0 on allocation error.
1132 * The character set and list of edges are returned through
1133 * the paramters CSETOUT and DFOUT.
1138 compute_super_edge (struct rx
*rx
, struct rx_distinct_future
**dfout
,
1139 rx_Bitset csetout
, struct rx_superstate
*superstate
,
1143 compute_super_edge (rx
, dfout
, csetout
, superstate
, chr
)
1145 struct rx_distinct_future
**dfout
;
1147 struct rx_superstate
*superstate
;
1151 struct rx_superset
*stateset
= superstate
->contents
;
1153 /* To compute the set of characters that share edges with CHR,
1154 * we start with the full character set, and subtract.
1156 rx_bitset_universe (rx
->local_cset_size
, csetout
);
1159 /* Iterate over the NFA states in the superstate state-set. */
1160 while (stateset
->car
)
1162 struct rx_nfa_edge
*e
;
1163 for (e
= stateset
->car
->edges
; e
; e
= e
->next
)
1164 if (e
->type
== ne_cset
)
1166 if (!RX_bitset_member (e
->params
.cset
, chr
))
1167 /* An edge that doesn't apply at least tells us some characters
1168 * that don't share the same edge set as CHR.
1170 rx_bitset_difference (rx
->local_cset_size
, csetout
, e
->params
.cset
);
1173 /* If we find an NFA edge that applies, we make sure there
1174 * are corresponding edges in the superstate NFA.
1177 struct rx_distinct_future
* saved
;
1179 *dfout
= include_futures (rx
, *dfout
, e
->dest
, superstate
);
1182 struct rx_distinct_future
* df
;
1185 df
->next_same_super_edge
[1]->next_same_super_edge
[0] = 0;
1188 struct rx_distinct_future
*dft
;
1190 df
= df
->next_same_super_edge
[0];
1192 if (dft
->future
&& dft
->future
->transition_refs
== dft
)
1194 dft
->future
->transition_refs
= dft
->next_same_dest
;
1195 if (dft
->future
->transition_refs
== dft
)
1196 dft
->future
->transition_refs
= 0;
1198 dft
->next_same_dest
->prev_same_dest
= dft
->prev_same_dest
;
1199 dft
->prev_same_dest
->next_same_dest
= dft
->next_same_dest
;
1200 rx_cache_free (rx
->cache
, sizeof (*dft
), (char *)dft
);
1205 /* We also trim the character set a bit. */
1206 rx_bitset_intersection (rx
->local_cset_size
,
1207 csetout
, e
->params
.cset
);
1210 stateset
= stateset
->cdr
;
1216 /* This is a constructor for RX_SUPER_EDGE structures. These are
1217 * wrappers for lists of superstate NFA edges that share character sets labels.
1218 * If a transition class contains more than one rx_distinct_future (superstate
1219 * edge), then it represents a non-determinism in the superstate NFA.
1223 static struct rx_super_edge
*
1224 rx_super_edge (struct rx
*rx
,
1225 struct rx_superstate
*super
, rx_Bitset cset
,
1226 struct rx_distinct_future
*df
)
1228 static struct rx_super_edge
*
1229 rx_super_edge (rx
, super
, cset
, df
)
1231 struct rx_superstate
*super
;
1233 struct rx_distinct_future
*df
;
1236 struct rx_super_edge
*tc
;
1239 tc_size
= ( sizeof (struct rx_super_edge
)
1240 + rx_sizeof_bitset (rx
->local_cset_size
));
1242 tc
= ((struct rx_super_edge
*)
1243 rx_cache_malloc (rx
->cache
, tc_size
));
1248 tc
->next
= super
->edges
;
1250 tc
->rx_backtrack_frame
.inx
= rx
->instruction_table
[rx_backtrack_point
];
1251 tc
->rx_backtrack_frame
.data
= 0;
1252 tc
->rx_backtrack_frame
.data_2
= (void *) tc
;
1254 tc
->cset
= (rx_Bitset
) ((char *) tc
+ sizeof (*tc
));
1255 rx_bitset_assign (rx
->local_cset_size
, tc
->cset
, cset
);
1258 struct rx_distinct_future
* dfp
= df
;
1259 df
->next_same_super_edge
[1]->next_same_super_edge
[0] = 0;
1263 dfp
= dfp
->next_same_super_edge
[0];
1265 df
->next_same_super_edge
[1]->next_same_super_edge
[0] = df
;
1271 /* There are three kinds of cache miss. The first occurs when a
1272 * transition is taken that has never been computed during the
1273 * lifetime of the source superstate. That cache miss is handled by
1274 * calling COMPUTE_SUPER_EDGE. The second kind of cache miss
1275 * occurs when the destination superstate of a transition doesn't
1276 * exist. SOLVE_DESTINATION is used to construct the destination superstate.
1277 * Finally, the third kind of cache miss occurs when the destination
1278 * superstate of a transition is in a `semi-free state'. That case is
1279 * handled by UNFREE_SUPERSTATE.
1281 * The function of HANDLE_CACHE_MISS is to figure out which of these
1287 install_partial_transition (struct rx_superstate
*super
,
1288 struct rx_inx
*answer
,
1289 RX_subset set
, int offset
)
1292 install_partial_transition (super
, answer
, set
, offset
)
1293 struct rx_superstate
*super
;
1294 struct rx_inx
*answer
;
1300 int end
= start
+ 32;
1302 struct rx_inx
* transitions
= super
->transitions
;
1307 transitions
[start
] = *answer
;
1316 rx_handle_cache_miss (struct rx
*rx
, struct rx_superstate
*super
, unsigned char chr
, void *data
)
1319 rx_handle_cache_miss (rx
, super
, chr
, data
)
1321 struct rx_superstate
*super
;
1326 int offset
= chr
/ RX_subset_bits
;
1327 struct rx_distinct_future
*df
= data
;
1329 if (!df
) /* must be the shared_cache_miss_frame */
1331 /* Perhaps this is just a transition waiting to be filled. */
1332 struct rx_super_edge
*tc
;
1333 RX_subset mask
= rx_subset_singletons
[chr
% RX_subset_bits
];
1335 for (tc
= super
->edges
; tc
; tc
= tc
->next
)
1336 if (tc
->cset
[offset
] & mask
)
1338 struct rx_inx
* answer
;
1340 answer
= ((tc
->options
->next_same_super_edge
[0] != tc
->options
)
1341 ? &tc
->rx_backtrack_frame
1343 ? &df
->side_effects_frame
1344 : &df
->future_frame
));
1345 install_partial_transition (super
, answer
,
1346 tc
->cset
[offset
], offset
* 32);
1349 /* Otherwise, it's a flushed or newly encountered edge. */
1351 char cset_space
[1024]; /* this limit is far from unreasonable */
1353 struct rx_inx
*answer
;
1355 if (rx_sizeof_bitset (rx
->local_cset_size
) > sizeof (cset_space
))
1356 return 0; /* If the arbitrary limit is hit, always fail */
1358 trcset
= (rx_Bitset
)cset_space
;
1359 rx_lock_superstate (rx
, super
);
1360 if (!compute_super_edge (rx
, &df
, trcset
, super
, chr
))
1362 rx_unlock_superstate (rx
, super
);
1365 if (!df
) /* We just computed the fail transition. */
1367 static struct rx_inx
1368 shared_fail_frame
= { 0, 0, (void *)rx_backtrack
, 0 };
1369 answer
= &shared_fail_frame
;
1373 tc
= rx_super_edge (rx
, super
, trcset
, df
);
1376 rx_unlock_superstate (rx
, super
);
1379 answer
= ((tc
->options
->next_same_super_edge
[0] != tc
->options
)
1380 ? &tc
->rx_backtrack_frame
1382 ? &df
->side_effects_frame
1383 : &df
->future_frame
));
1385 install_partial_transition (super
, answer
,
1386 trcset
[offset
], offset
* 32);
1387 rx_unlock_superstate (rx
, super
);
1391 else if (df
->future
) /* A cache miss on an edge with a future? Must be
1392 * a semi-free destination. */
1394 if (df
->future
->is_semifree
)
1395 refresh_semifree_superstate (rx
->cache
, df
->future
);
1396 return &df
->future_frame
;
1399 /* no future superstate on an existing edge */
1401 rx_lock_superstate (rx
, super
);
1402 if (!solve_destination (rx
, df
))
1404 rx_unlock_superstate (rx
, super
);
1408 && (df
->edge
->options
->next_same_super_edge
[0] == df
->edge
->options
))
1409 install_partial_transition (super
, &df
->future_frame
,
1410 df
->edge
->cset
[offset
], offset
* 32);
1411 rx_unlock_superstate (rx
, super
);
1412 return &df
->future_frame
;