]> jfr.im git - irc/evilnet/x3.git/blame - rx/rxsuper.c
Added tempshun support to OpServ trace and addalert
[irc/evilnet/x3.git] / rx / rxsuper.c
CommitLineData
d76ed9a9 1
2
3/* Copyright (C) 1995, 1996 Tom Lord
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Library General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this software; see the file COPYING. If not, write to
17 * the Free Software Foundation, 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
19 */
20
21\f
22
23#include "rxall.h"
24#include "rxsuper.h"
25
26\f
27/* The functions in the next several pages define the lazy-NFA-conversion used
28 * by matchers. The input to this construction is an NFA such as
29 * is built by compactify_nfa (rx.c). The output is the superNFA.
30 */
31
32/* Match engines can use arbitrary values for opcodes. So, the parse tree
33 * is built using instructions names (enum rx_opcode), but the superstate
34 * nfa is populated with mystery opcodes (void *).
35 *
36 * For convenience, here is an id table. The opcodes are == to their inxs
37 *
38 * The lables in re_search_2 would make good values for instructions.
39 */
40
41void * rx_id_instruction_table[rx_num_instructions] =
42{
43 (void *) rx_backtrack_point,
44 (void *) rx_do_side_effects,
45 (void *) rx_cache_miss,
46 (void *) rx_next_char,
47 (void *) rx_backtrack,
48 (void *) rx_error_inx
49};
50
51\f
52
53
54/* The partially instantiated superstate graph has a transition
55 * table at every node. There is one entry for every character.
56 * This fills in the transition for a set.
57 */
58#ifdef __STDC__
59static void
60install_transition (struct rx_superstate *super,
61 struct rx_inx *answer, rx_Bitset trcset)
62#else
63static void
64install_transition (super, answer, trcset)
65 struct rx_superstate *super;
66 struct rx_inx *answer;
67 rx_Bitset trcset;
68#endif
69{
70 struct rx_inx * transitions = super->transitions;
71 int chr;
72 for (chr = 0; chr < 256; )
73 if (!*trcset)
74 {
75 ++trcset;
76 chr += 32;
77 }
78 else
79 {
80 RX_subset sub = *trcset;
81 RX_subset mask = 1;
82 int bound = chr + 32;
83 while (chr < bound)
84 {
85 if (sub & mask)
86 transitions [chr] = *answer;
87 ++chr;
88 mask <<= 1;
89 }
90 ++trcset;
91 }
92}
93
94
95#ifdef __STDC__
96static int
97qlen (struct rx_superstate * q)
98#else
99static int
100qlen (q)
101 struct rx_superstate * q;
102#endif
103{
104 int count = 1;
105 struct rx_superstate * it;
106 if (!q)
107 return 0;
108 for (it = q->next_recyclable; it != q; it = it->next_recyclable)
109 ++count;
110 return count;
111}
112
113#ifdef __STDC__
114static void
115check_cache (struct rx_cache * cache)
116#else
117static void
118check_cache (cache)
119 struct rx_cache * cache;
120#endif
121{
122 struct rx_cache * you_fucked_up = 0;
123 int total = cache->superstates;
124 int semi = cache->semifree_superstates;
125 if (semi != qlen (cache->semifree_superstate))
126 check_cache (you_fucked_up);
127 if ((total - semi) != qlen (cache->lru_superstate))
128 check_cache (you_fucked_up);
129}
130#ifdef __STDC__
131char *
132rx_cache_malloc (struct rx_cache * cache, int size)
133#else
134char *
135rx_cache_malloc (cache, size)
136 struct rx_cache * cache;
137 int size;
138#endif
139{
140 char * answer;
141 answer = (char *)malloc (size);
142 if (answer)
143 cache->bytes_used += size;
144 return answer;
145}
146
147
148#ifdef __STDC__
149void
150rx_cache_free (struct rx_cache * cache, int size, char * mem)
151#else
152void
153rx_cache_free (cache, size, mem)
154 struct rx_cache * cache;
155 int size;
156 char * mem;
157#endif
158{
159 free (mem);
160 cache->bytes_used -= size;
161}
162
163
164
165/* When a superstate is old and neglected, it can enter a
166 * semi-free state. A semi-free state is slated to die.
167 * Incoming transitions to a semi-free state are re-written
168 * to cause an (interpreted) fault when they are taken.
169 * The fault handler revives the semi-free state, patches
170 * incoming transitions back to normal, and continues.
171 *
172 * The idea is basicly to free in two stages, aborting
173 * between the two if the state turns out to be useful again.
174 * When a free is aborted, the rescued superstate is placed
175 * in the most-favored slot to maximize the time until it
176 * is next semi-freed.
177 *
178 * Overall, this approximates truly freeing superstates in
179 * lru order, but does not involve the cost of updating perfectly
180 * accurate timestamps every time a superstate is touched.
181 */
182
183#ifdef __STDC__
184static void
185semifree_superstate (struct rx_cache * cache)
186#else
187static void
188semifree_superstate (cache)
189 struct rx_cache * cache;
190#endif
191{
192 int disqualified = cache->semifree_superstates;
193 if (disqualified == cache->superstates)
194 return;
195 while (cache->lru_superstate->locks)
196 {
197 cache->lru_superstate = cache->lru_superstate->next_recyclable;
198 ++disqualified;
199 if (disqualified == cache->superstates)
200 return;
201 }
202 {
203 struct rx_superstate * it = cache->lru_superstate;
204 it->next_recyclable->prev_recyclable = it->prev_recyclable;
205 it->prev_recyclable->next_recyclable = it->next_recyclable;
206 cache->lru_superstate = (it == it->next_recyclable
207 ? 0
208 : it->next_recyclable);
209 if (!cache->semifree_superstate)
210 {
211 cache->semifree_superstate = it;
212 it->next_recyclable = it;
213 it->prev_recyclable = it;
214 }
215 else
216 {
217 it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
218 it->next_recyclable = cache->semifree_superstate;
219 it->prev_recyclable->next_recyclable = it;
220 it->next_recyclable->prev_recyclable = it;
221 }
222 {
223 struct rx_distinct_future *df;
224 it->is_semifree = 1;
225 ++cache->semifree_superstates;
226 df = it->transition_refs;
227 if (df)
228 {
229 df->prev_same_dest->next_same_dest = 0;
230 for (df = it->transition_refs; df; df = df->next_same_dest)
231 {
232 df->future_frame.inx = cache->instruction_table[rx_cache_miss];
233 df->future_frame.data = 0;
234 df->future_frame.data_2 = (void *) df;
235 /* If there are any NEXT-CHAR instruction frames that
236 * refer to this state, we convert them to CACHE-MISS frames.
237 */
238 if (!df->effects
239 && (df->edge->options->next_same_super_edge[0]
240 == df->edge->options))
241 install_transition (df->present, &df->future_frame,
242 df->edge->cset);
243 }
244 df = it->transition_refs;
245 df->prev_same_dest->next_same_dest = df;
246 }
247 }
248 }
249}
250
251
252#ifdef __STDC__
253static void
254refresh_semifree_superstate (struct rx_cache * cache,
255 struct rx_superstate * super)
256#else
257static void
258refresh_semifree_superstate (cache, super)
259 struct rx_cache * cache;
260 struct rx_superstate * super;
261#endif
262{
263 struct rx_distinct_future *df;
264
265 if (super->transition_refs)
266 {
267 super->transition_refs->prev_same_dest->next_same_dest = 0;
268 for (df = super->transition_refs; df; df = df->next_same_dest)
269 {
270 df->future_frame.inx = cache->instruction_table[rx_next_char];
271 df->future_frame.data = (void *) super->transitions;
272 df->future_frame.data_2 = (void *)(super->contents->is_final);
273 /* CACHE-MISS instruction frames that refer to this state,
274 * must be converted to NEXT-CHAR frames.
275 */
276 if (!df->effects
277 && (df->edge->options->next_same_super_edge[0]
278 == df->edge->options))
279 install_transition (df->present, &df->future_frame,
280 df->edge->cset);
281 }
282 super->transition_refs->prev_same_dest->next_same_dest
283 = super->transition_refs;
284 }
285 if (cache->semifree_superstate == super)
286 cache->semifree_superstate = (super->prev_recyclable == super
287 ? 0
288 : super->prev_recyclable);
289 super->next_recyclable->prev_recyclable = super->prev_recyclable;
290 super->prev_recyclable->next_recyclable = super->next_recyclable;
291
292 if (!cache->lru_superstate)
293 (cache->lru_superstate
294 = super->next_recyclable
295 = super->prev_recyclable
296 = super);
297 else
298 {
299 super->next_recyclable = cache->lru_superstate;
300 super->prev_recyclable = cache->lru_superstate->prev_recyclable;
301 super->next_recyclable->prev_recyclable = super;
302 super->prev_recyclable->next_recyclable = super;
303 }
304 super->is_semifree = 0;
305 --cache->semifree_superstates;
306}
307
308#ifdef __STDC__
309void
310rx_refresh_this_superstate (struct rx_cache * cache,
311 struct rx_superstate * superstate)
312#else
313void
314rx_refresh_this_superstate (cache, superstate)
315 struct rx_cache * cache;
316 struct rx_superstate * superstate;
317#endif
318{
319 if (superstate->is_semifree)
320 refresh_semifree_superstate (cache, superstate);
321 else if (cache->lru_superstate == superstate)
322 cache->lru_superstate = superstate->next_recyclable;
323 else if (superstate != cache->lru_superstate->prev_recyclable)
324 {
325 superstate->next_recyclable->prev_recyclable
326 = superstate->prev_recyclable;
327 superstate->prev_recyclable->next_recyclable
328 = superstate->next_recyclable;
329 superstate->next_recyclable = cache->lru_superstate;
330 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
331 superstate->next_recyclable->prev_recyclable = superstate;
332 superstate->prev_recyclable->next_recyclable = superstate;
333 }
334}
335
336#ifdef __STDC__
337static void
338release_superset_low (struct rx_cache * cache,
339 struct rx_superset *set)
340#else
341static void
342release_superset_low (cache, set)
343 struct rx_cache * cache;
344 struct rx_superset *set;
345#endif
346{
347 if (!--set->refs)
348 {
349 if (set->starts_for)
350 set->starts_for->start_set = 0;
351
352 if (set->cdr)
353 release_superset_low (cache, set->cdr);
354
355 rx_hash_free (&set->hash_item, &cache->superset_hash_rules);
356 rx_cache_free (cache,
357 sizeof (struct rx_superset),
358 (char *)set);
359 }
360}
361
362#ifdef __STDC__
363void
364rx_release_superset (struct rx *rx,
365 struct rx_superset *set)
366#else
367void
368rx_release_superset (rx, set)
369 struct rx *rx;
370 struct rx_superset *set;
371#endif
372{
373 release_superset_low (rx->cache, set);
374}
375
376/* This tries to add a new superstate to the superstate freelist.
377 * It might, as a result, free some edge pieces or hash tables.
378 * If nothing can be freed because too many locks are being held, fail.
379 */
380
381#ifdef __STDC__
382static int
383rx_really_free_superstate (struct rx_cache * cache)
384#else
385static int
386rx_really_free_superstate (cache)
387 struct rx_cache * cache;
388#endif
389{
390 int locked_superstates = 0;
391 struct rx_superstate * it;
392
393 if (!cache->superstates)
394 return 0;
395
396 /* scale these */
397 while ((cache->hits + cache->misses) > cache->superstates)
398 {
399 cache->hits >>= 1;
400 cache->misses >>= 1;
401 }
402
403 /* semifree superstates faster than they are freed
404 * so popular states can be rescued.
405 */
406 semifree_superstate (cache);
407 semifree_superstate (cache);
408 semifree_superstate (cache);
409
410#if 0
411 Redundant because semifree_superstate already
412 makes this check;
413
414
415 while (cache->semifree_superstate && cache->semifree_superstate->locks)
416 {
417 refresh_semifree_superstate (cache, cache->semifree_superstate);
418 ++locked_superstates;
419 if (locked_superstates == cache->superstates)
420 return 0;
421 }
422
423
424 if (cache->semifree_superstate)
425 insert the "it =" block which follows this "if 0" code;
426 else
427 {
428 while (cache->lru_superstate->locks)
429 {
430 cache->lru_superstate = cache->lru_superstate->next_recyclable;
431 ++locked_superstates;
432 if (locked_superstates == cache->superstates)
433 return 0;
434 }
435 it = cache->lru_superstate;
436 it->next_recyclable->prev_recyclable = it->prev_recyclable;
437 it->prev_recyclable->next_recyclable = it->next_recyclable;
438 cache->lru_superstate = ((it == it->next_recyclable)
439 ? 0
440 : it->next_recyclable);
441 }
442#endif
443
444 if (!cache->semifree_superstate)
445 return 0;
446
447 {
448 it = cache->semifree_superstate;
449 it->next_recyclable->prev_recyclable = it->prev_recyclable;
450 it->prev_recyclable->next_recyclable = it->next_recyclable;
451 cache->semifree_superstate = ((it == it->next_recyclable)
452 ? 0
453 : it->next_recyclable);
454 --cache->semifree_superstates;
455 }
456
457
458 if (it->transition_refs)
459 {
460 struct rx_distinct_future *df;
461 for (df = it->transition_refs,
462 df->prev_same_dest->next_same_dest = 0;
463 df;
464 df = df->next_same_dest)
465 {
466 df->future_frame.inx = cache->instruction_table[rx_cache_miss];
467 df->future_frame.data = 0;
468 df->future_frame.data_2 = (void *) df;
469 df->future = 0;
470 }
471 it->transition_refs->prev_same_dest->next_same_dest =
472 it->transition_refs;
473 }
474 {
475 struct rx_super_edge *tc = it->edges;
476 while (tc)
477 {
478 struct rx_distinct_future * df;
479 struct rx_super_edge *tct = tc->next;
480 df = tc->options;
481 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
482 while (df)
483 {
484 struct rx_distinct_future *dft = df;
485 df = df->next_same_super_edge[0];
486
487
488 if (dft->future && dft->future->transition_refs == dft)
489 {
490 dft->future->transition_refs = dft->next_same_dest;
491 if (dft->future->transition_refs == dft)
492 dft->future->transition_refs = 0;
493 }
494 dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
495 dft->prev_same_dest->next_same_dest = dft->next_same_dest;
496 rx_cache_free (cache,
497 sizeof (struct rx_distinct_future),
498 (char *)dft);
499 }
500 rx_cache_free (cache,
501 sizeof (struct rx_super_edge),
502 (char *)tc);
503 tc = tct;
504 }
505 }
506
507 if (it->contents->superstate == it)
508 it->contents->superstate = 0;
509 release_superset_low (cache, it->contents);
510 rx_cache_free (cache,
511 (sizeof (struct rx_superstate)
512 + cache->local_cset_size * sizeof (struct rx_inx)),
513 (char *)it);
514 --cache->superstates;
515 return 1;
516}
517
518
519#ifdef __STDC__
520static char *
521rx_cache_malloc_or_get (struct rx_cache * cache, int size)
522#else
523static char *
524rx_cache_malloc_or_get (cache, size)
525 struct rx_cache * cache;
526 int size;
527#endif
528{
529 while ( (cache->bytes_used + size > cache->bytes_allowed)
530 && rx_really_free_superstate (cache))
531 ;
532
533 return rx_cache_malloc (cache, size);
534}
535
536\f
537
538#ifdef __STDC__
539static int
540supersetcmp (void * va, void * vb)
541#else
542static int
543supersetcmp (va, vb)
544 void * va;
545 void * vb;
546#endif
547{
548 struct rx_superset * a = (struct rx_superset *)va;
549 struct rx_superset * b = (struct rx_superset *)vb;
550 return ( (a == b)
551 || (a && b && (a->id == b->id) && (a->car == b->car) && (a->cdr == b->cdr)));
552}
553
554#define rx_abs(A) (((A) > 0) ? (A) : -(A))
555
556
557#ifdef __STDC__
558static struct rx_hash_item *
559superset_allocator (struct rx_hash_rules * rules, void * val)
560#else
561static struct rx_hash_item *
562superset_allocator (rules, val)
563 struct rx_hash_rules * rules;
564 void * val;
565#endif
566{
567 struct rx_cache * cache;
568 struct rx_superset * template;
569 struct rx_superset * newset;
570
571 cache = ((struct rx_cache *)
572 ((char *)rules
573 - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
574 template = (struct rx_superset *)val;
575 newset = ((struct rx_superset *)
576 rx_cache_malloc (cache, sizeof (*template)));
577 if (!newset)
578 return 0;
579 {
580 int cdrfinal;
581 int cdredges;
582
583 cdrfinal = (template->cdr
584 ? template->cdr->is_final
585 : 0);
586 cdredges = (template->cdr
587 ? template->cdr->has_cset_edges
588 : 0);
589
590 newset->is_final = (rx_abs (template->car->is_final) > rx_abs(cdrfinal)
591 ? template->car->is_final
592 : template->cdr->is_final);
593 newset->has_cset_edges = (template->car->has_cset_edges || cdredges);
594 }
595 newset->refs = 0;
596 newset->id = template->id;
597 newset->car = template->car;
598 newset->cdr = template->cdr;
599 rx_protect_superset (rx, template->cdr);
600 newset->superstate = 0;
601 newset->starts_for = 0;
602 newset->hash_item.data = (void *)newset;
603 newset->hash_item.binding = 0;
604 return &newset->hash_item;
605}
606
607#ifdef __STDC__
608static struct rx_hash *
609super_hash_allocator (struct rx_hash_rules * rules)
610#else
611static struct rx_hash *
612super_hash_allocator (rules)
613 struct rx_hash_rules * rules;
614#endif
615{
616 struct rx_cache * cache;
617 cache = ((struct rx_cache *)
618 ((char *)rules
619 - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
620 return ((struct rx_hash *)
621 rx_cache_malloc (cache, sizeof (struct rx_hash)));
622}
623
624
625#ifdef __STDC__
626static void
627super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
628#else
629static void
630super_hash_liberator (hash, rules)
631 struct rx_hash * hash;
632 struct rx_hash_rules * rules;
633#endif
634{
635 struct rx_cache * cache
636 = ((struct rx_cache *)
637 (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
638 rx_cache_free (cache, sizeof (struct rx_hash), (char *)hash);
639}
640
641#ifdef __STDC__
642static void
643superset_hash_item_liberator (struct rx_hash_item * it,
644 struct rx_hash_rules * rules)
645#else
646static void
647superset_hash_item_liberator (it, rules)
648 struct rx_hash_item * it;
649 struct rx_hash_rules * rules;
650#endif
651{
652}
653
654int rx_cache_bound = 3;
655static int rx_default_cache_got = 0;
656
657static struct rx_cache default_cache =
658{
659 {
660 supersetcmp,
661 super_hash_allocator,
662 super_hash_liberator,
663 superset_allocator,
664 superset_hash_item_liberator,
665 },
666 0,
667 0,
668 0,
669 0,
670 0,
671 0,
672 0,
673 RX_DEFAULT_DFA_CACHE_SIZE,
674 0,
675 256,
676 rx_id_instruction_table,
677 {
678 0,
679 0,
680 0,
681 0,
682 0
683 }
684};
685struct rx_cache * rx_default_cache = &default_cache;
686
687/* This adds an element to a superstate set. These sets are lists, such
688 * that lists with == elements are ==. The empty set is returned by
689 * superset_cons (rx, 0, 0) and is NOT equivelent to
690 * (struct rx_superset)0.
691 */
692
693#ifdef __STDC__
694struct rx_superset *
695rx_superset_cons (struct rx * rx,
696 struct rx_nfa_state *car, struct rx_superset *cdr)
697#else
698struct rx_superset *
699rx_superset_cons (rx, car, cdr)
700 struct rx * rx;
701 struct rx_nfa_state *car;
702 struct rx_superset *cdr;
703#endif
704{
705 struct rx_cache * cache = rx->cache;
706 if (!car && !cdr)
707 {
708 if (!cache->empty_superset)
709 {
710 cache->empty_superset
711 = ((struct rx_superset *)
712 rx_cache_malloc (cache, sizeof (struct rx_superset)));
713 if (!cache->empty_superset)
714 return 0;
715 rx_bzero ((char *)cache->empty_superset, sizeof (struct rx_superset));
716 cache->empty_superset->refs = 1000;
717 }
718 return cache->empty_superset;
719 }
720 {
721 struct rx_superset template;
722 struct rx_hash_item * hit;
723 template.car = car;
724 template.cdr = cdr;
725 template.id = rx->rx_id;
726 rx_protect_superset (rx, template.cdr);
727 hit = rx_hash_store (&cache->superset_table,
728 (unsigned long)car ^ car->id ^ (unsigned long)cdr,
729 (void *)&template,
730 &cache->superset_hash_rules);
731 rx_protect_superset (rx, template.cdr);
732 return (hit
733 ? (struct rx_superset *)hit->data
734 : 0);
735 }
736}
737
738/* This computes a union of two NFA state sets. The sets do not have the
739 * same representation though. One is a RX_SUPERSET structure (part
740 * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
741 */
742
743#ifdef __STDC__
744struct rx_superset *
745rx_superstate_eclosure_union (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
746#else
747struct rx_superset *
748rx_superstate_eclosure_union (rx, set, ecl)
749 struct rx * rx;
750 struct rx_superset *set;
751 struct rx_nfa_state_set *ecl;
752#endif
753{
754 if (!ecl)
755 return set;
756
757 if (!set->car)
758 return rx_superset_cons (rx, ecl->car,
759 rx_superstate_eclosure_union (rx, set, ecl->cdr));
760 if (set->car == ecl->car)
761 return rx_superstate_eclosure_union (rx, set, ecl->cdr);
762
763 {
764 struct rx_superset * tail;
765 struct rx_nfa_state * first;
766
767 if (set->car->id < ecl->car->id)
768 {
769 tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
770 first = set->car;
771 }
772 else
773 {
774 tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
775 first = ecl->car;
776 }
777 if (!tail)
778 return 0;
779 else
780 {
781 struct rx_superset * answer;
782 answer = rx_superset_cons (rx, first, tail);
783 if (!answer)
784 {
785 rx_protect_superset (rx, tail);
786 rx_release_superset (rx, tail);
787 return 0;
788 }
789 else
790 return answer;
791 }
792 }
793}
794
795
796\f
797
798/*
799 * This makes sure that a list of rx_distinct_futures contains
800 * a future for each possible set of side effects in the eclosure
801 * of a given state. This is some of the work of filling in a
802 * superstate transition.
803 */
804
805#ifdef __STDC__
806static struct rx_distinct_future *
807include_futures (struct rx *rx,
808 struct rx_distinct_future *df, struct rx_nfa_state
809 *state, struct rx_superstate *superstate)
810#else
811static struct rx_distinct_future *
812include_futures (rx, df, state, superstate)
813 struct rx *rx;
814 struct rx_distinct_future *df;
815 struct rx_nfa_state *state;
816 struct rx_superstate *superstate;
817#endif
818{
819 struct rx_possible_future *future;
820 struct rx_cache * cache = rx->cache;
821 for (future = rx_state_possible_futures (rx, state); future; future = future->next)
822 {
823 struct rx_distinct_future *dfp;
824 struct rx_distinct_future *insert_before = 0;
825 if (df)
826 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
827 for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
828 if (dfp->effects == future->effects)
829 break;
830 else
831 {
832 int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
833 if (order > 0)
834 {
835 insert_before = dfp;
836 dfp = 0;
837 break;
838 }
839 }
840 if (df)
841 df->next_same_super_edge[1]->next_same_super_edge[0] = df;
842 if (!dfp)
843 {
844 dfp
845 = ((struct rx_distinct_future *)
846 rx_cache_malloc (cache,
847 sizeof (struct rx_distinct_future)));
848 if (!dfp)
849 return 0;
850 if (!df)
851 {
852 df = insert_before = dfp;
853 df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
854 }
855 else if (!insert_before)
856 insert_before = df;
857 else if (insert_before == df)
858 df = dfp;
859
860 dfp->next_same_super_edge[0] = insert_before;
861 dfp->next_same_super_edge[1]
862 = insert_before->next_same_super_edge[1];
863 dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
864 dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
865 dfp->next_same_dest = dfp->prev_same_dest = dfp;
866 dfp->future = 0;
867 dfp->present = superstate;
868 dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
869 dfp->future_frame.data = 0;
870 dfp->future_frame.data_2 = (void *) dfp;
871 dfp->side_effects_frame.inx
872 = rx->instruction_table[rx_do_side_effects];
873 dfp->side_effects_frame.data = 0;
874 dfp->side_effects_frame.data_2 = (void *) dfp;
875 dfp->effects = future->effects;
876 }
877 }
878 return df;
879}
880\f
881
882
883/* This constructs a new superstate from its state set. The only
884 * complexity here is memory management.
885 */
886#ifdef __STDC__
887struct rx_superstate *
888rx_superstate (struct rx *rx,
889 struct rx_superset *set)
890#else
891struct rx_superstate *
892rx_superstate (rx, set)
893 struct rx *rx;
894 struct rx_superset *set;
895#endif
896{
897 struct rx_cache * cache = rx->cache;
898 struct rx_superstate * superstate = 0;
899
900 /* Does the superstate already exist in the cache? */
901 if (set->superstate)
902 {
903 if (set->superstate->rx_id != rx->rx_id)
904 {
905 /* Aha. It is in the cache, but belongs to a superstate
906 * that refers to an NFA that no longer exists.
907 * (We know it no longer exists because it was evidently
908 * stored in the same region of memory as the current nfa
909 * yet it has a different id.)
910 */
911 superstate = set->superstate;
912 if (!superstate->is_semifree)
913 {
914 if (cache->lru_superstate == superstate)
915 {
916 cache->lru_superstate = superstate->next_recyclable;
917 if (cache->lru_superstate == superstate)
918 cache->lru_superstate = 0;
919 }
920 {
921 superstate->next_recyclable->prev_recyclable
922 = superstate->prev_recyclable;
923 superstate->prev_recyclable->next_recyclable
924 = superstate->next_recyclable;
925 if (!cache->semifree_superstate)
926 {
927 (cache->semifree_superstate
928 = superstate->next_recyclable
929 = superstate->prev_recyclable
930 = superstate);
931 }
932 else
933 {
934 superstate->next_recyclable = cache->semifree_superstate;
935 superstate->prev_recyclable
936 = cache->semifree_superstate->prev_recyclable;
937 superstate->next_recyclable->prev_recyclable
938 = superstate;
939 superstate->prev_recyclable->next_recyclable
940 = superstate;
941 cache->semifree_superstate = superstate;
942 }
943 ++cache->semifree_superstates;
944 }
945 }
946 set->superstate = 0;
947 goto handle_cache_miss;
948 }
949 ++cache->hits;
950 superstate = set->superstate;
951
952 rx_refresh_this_superstate (cache, superstate);
953 return superstate;
954 }
955
956 handle_cache_miss:
957
958 /* This point reached only for cache misses. */
959 ++cache->misses;
960#if RX_DEBUG
961 if (rx_debug_trace > 1)
962 {
963 struct rx_superset * setp = set;
964 fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
965 while (setp)
966 {
967 fprintf (stderr, "%d ", setp->id);
968 setp = setp->cdr;
969 }
970 fprintf (stderr, "(%d)\n", set);
971 }
972#endif
973
974 {
975 int superstate_size;
976 superstate_size = ( sizeof (*superstate)
977 + ( sizeof (struct rx_inx)
978 * rx->local_cset_size));
979 superstate = ((struct rx_superstate *)
980 rx_cache_malloc_or_get (cache, superstate_size));
981 ++cache->superstates;
982 }
983
984 if (!superstate)
985 return 0;
986
987 if (!cache->lru_superstate)
988 (cache->lru_superstate
989 = superstate->next_recyclable
990 = superstate->prev_recyclable
991 = superstate);
992 else
993 {
994 superstate->next_recyclable = cache->lru_superstate;
995 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
996 ( superstate->prev_recyclable->next_recyclable
997 = superstate->next_recyclable->prev_recyclable
998 = superstate);
999 }
1000 superstate->rx_id = rx->rx_id;
1001 superstate->transition_refs = 0;
1002 superstate->locks = 0;
1003 superstate->is_semifree = 0;
1004 set->superstate = superstate;
1005 superstate->contents = set;
1006 rx_protect_superset (rx, set);
1007 superstate->edges = 0;
1008 {
1009 int x;
1010 /* None of the transitions from this superstate are known yet. */
1011 for (x = 0; x < rx->local_cset_size; ++x)
1012 {
1013 struct rx_inx * ifr = &superstate->transitions[x];
1014 ifr->inx = rx->instruction_table [rx_cache_miss];
1015 ifr->data = ifr->data_2 = 0;
1016 }
1017 }
1018 return superstate;
1019}
1020\f
1021
1022/* This computes the destination set of one edge of the superstate NFA.
1023 * Note that a RX_DISTINCT_FUTURE is a superstate edge.
1024 * Returns 0 on an allocation failure.
1025 */
1026
1027#ifdef __STDC__
1028static int
1029solve_destination (struct rx *rx, struct rx_distinct_future *df)
1030#else
1031static int
1032solve_destination (rx, df)
1033 struct rx *rx;
1034 struct rx_distinct_future *df;
1035#endif
1036{
1037 struct rx_super_edge *tc = df->edge;
1038 struct rx_superset *nfa_state;
1039 struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
1040 struct rx_superset *solution = nil_set;
1041 struct rx_superstate *dest;
1042
1043 rx_protect_superset (rx, solution);
1044 /* Iterate over all NFA states in the state set of this superstate. */
1045 for (nfa_state = df->present->contents;
1046 nfa_state->car;
1047 nfa_state = nfa_state->cdr)
1048 {
1049 struct rx_nfa_edge *e;
1050 /* Iterate over all edges of each NFA state. */
1051 for (e = nfa_state->car->edges; e; e = e->next)
1052 /* If we find an edge that is labeled with
1053 * the characters we are solving for.....
1054 */
1055 if ((e->type == ne_cset)
1056 && rx_bitset_is_subset (rx->local_cset_size,
1057 tc->cset,
1058 e->params.cset))
1059 {
1060 struct rx_nfa_state *n = e->dest;
1061 struct rx_possible_future *pf;
1062 /* ....search the partial epsilon closures of the destination
1063 * of that edge for a path that involves the same set of
1064 * side effects we are solving for.
1065 * If we find such a RX_POSSIBLE_FUTURE, we add members to the
1066 * stateset we are computing.
1067 */
1068 for (pf = rx_state_possible_futures (rx, n); pf; pf = pf->next)
1069 if (pf->effects == df->effects)
1070 {
1071 struct rx_superset * old_sol;
1072 old_sol = solution;
1073 solution = rx_superstate_eclosure_union (rx, solution,
1074 pf->destset);
1075 if (!solution)
1076 return 0;
1077 rx_protect_superset (rx, solution);
1078 rx_release_superset (rx, old_sol);
1079 }
1080 }
1081 }
1082 /* It is possible that the RX_DISTINCT_FUTURE we are working on has
1083 * the empty set of NFA states as its definition. In that case, this
1084 * is a failure point.
1085 */
1086 if (solution == nil_set)
1087 {
1088 df->future_frame.inx = (void *) rx_backtrack;
1089 df->future_frame.data = 0;
1090 df->future_frame.data_2 = 0;
1091 return 1;
1092 }
1093 dest = rx_superstate (rx, solution);
1094 rx_release_superset (rx, solution);
1095 if (!dest)
1096 return 0;
1097
1098 {
1099 struct rx_distinct_future *dft;
1100 dft = df;
1101 df->prev_same_dest->next_same_dest = 0;
1102 while (dft)
1103 {
1104 dft->future = dest;
1105 dft->future_frame.inx = rx->instruction_table[rx_next_char];
1106 dft->future_frame.data = (void *) dest->transitions;
1107 dft->future_frame.data_2 = (void *) dest->contents->is_final;
1108 dft = dft->next_same_dest;
1109 }
1110 df->prev_same_dest->next_same_dest = df;
1111 }
1112 if (!dest->transition_refs)
1113 dest->transition_refs = df;
1114 else
1115 {
1116 struct rx_distinct_future *dft;
1117 dft = dest->transition_refs->next_same_dest;
1118 dest->transition_refs->next_same_dest = df->next_same_dest;
1119 df->next_same_dest->prev_same_dest = dest->transition_refs;
1120 df->next_same_dest = dft;
1121 dft->prev_same_dest = df;
1122 }
1123 return 1;
1124}
1125
1126
1127/* This takes a superstate and a character, and computes some edges
1128 * from the superstate NFA. In particular, this computes all edges
1129 * that lead from SUPERSTATE given CHR. This function also
1130 * computes the set of characters that share this edge set.
1131 * This returns 0 on allocation error.
1132 * The character set and list of edges are returned through
1133 * the paramters CSETOUT and DFOUT.
1134 */
1135
1136#ifdef __STDC__
1137static int
1138compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
1139 rx_Bitset csetout, struct rx_superstate *superstate,
1140 unsigned char chr)
1141#else
1142static int
1143compute_super_edge (rx, dfout, csetout, superstate, chr)
1144 struct rx *rx;
1145 struct rx_distinct_future **dfout;
1146 rx_Bitset csetout;
1147 struct rx_superstate *superstate;
1148 unsigned char chr;
1149#endif
1150{
1151 struct rx_superset *stateset = superstate->contents;
1152
1153 /* To compute the set of characters that share edges with CHR,
1154 * we start with the full character set, and subtract.
1155 */
1156 rx_bitset_universe (rx->local_cset_size, csetout);
1157 *dfout = 0;
1158
1159 /* Iterate over the NFA states in the superstate state-set. */
1160 while (stateset->car)
1161 {
1162 struct rx_nfa_edge *e;
1163 for (e = stateset->car->edges; e; e = e->next)
1164 if (e->type == ne_cset)
1165 {
1166 if (!RX_bitset_member (e->params.cset, chr))
1167 /* An edge that doesn't apply at least tells us some characters
1168 * that don't share the same edge set as CHR.
1169 */
1170 rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
1171 else
1172 {
1173 /* If we find an NFA edge that applies, we make sure there
1174 * are corresponding edges in the superstate NFA.
1175 */
1176 {
1177 struct rx_distinct_future * saved;
1178 saved = *dfout;
1179 *dfout = include_futures (rx, *dfout, e->dest, superstate);
1180 if (!*dfout)
1181 {
1182 struct rx_distinct_future * df;
1183 df = saved;
1184 if (df)
1185 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
1186 while (df)
1187 {
1188 struct rx_distinct_future *dft;
1189 dft = df;
1190 df = df->next_same_super_edge[0];
1191
1192 if (dft->future && dft->future->transition_refs == dft)
1193 {
1194 dft->future->transition_refs = dft->next_same_dest;
1195 if (dft->future->transition_refs == dft)
1196 dft->future->transition_refs = 0;
1197 }
1198 dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
1199 dft->prev_same_dest->next_same_dest = dft->next_same_dest;
1200 rx_cache_free (rx->cache, sizeof (*dft), (char *)dft);
1201 }
1202 return 0;
1203 }
1204 }
1205 /* We also trim the character set a bit. */
1206 rx_bitset_intersection (rx->local_cset_size,
1207 csetout, e->params.cset);
1208 }
1209 }
1210 stateset = stateset->cdr;
1211 }
1212 return 1;
1213}
1214\f
1215
1216/* This is a constructor for RX_SUPER_EDGE structures. These are
1217 * wrappers for lists of superstate NFA edges that share character sets labels.
1218 * If a transition class contains more than one rx_distinct_future (superstate
1219 * edge), then it represents a non-determinism in the superstate NFA.
1220 */
1221
1222#ifdef __STDC__
1223static struct rx_super_edge *
1224rx_super_edge (struct rx *rx,
1225 struct rx_superstate *super, rx_Bitset cset,
1226 struct rx_distinct_future *df)
1227#else
1228static struct rx_super_edge *
1229rx_super_edge (rx, super, cset, df)
1230 struct rx *rx;
1231 struct rx_superstate *super;
1232 rx_Bitset cset;
1233 struct rx_distinct_future *df;
1234#endif
1235{
1236 struct rx_super_edge *tc;
1237 int tc_size;
1238
1239 tc_size = ( sizeof (struct rx_super_edge)
1240 + rx_sizeof_bitset (rx->local_cset_size));
1241
1242 tc = ((struct rx_super_edge *)
1243 rx_cache_malloc (rx->cache, tc_size));
1244
1245 if (!tc)
1246 return 0;
1247
1248 tc->next = super->edges;
1249 super->edges = tc;
1250 tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
1251 tc->rx_backtrack_frame.data = 0;
1252 tc->rx_backtrack_frame.data_2 = (void *) tc;
1253 tc->options = df;
1254 tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
1255 rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
1256 if (df)
1257 {
1258 struct rx_distinct_future * dfp = df;
1259 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
1260 while (dfp)
1261 {
1262 dfp->edge = tc;
1263 dfp = dfp->next_same_super_edge[0];
1264 }
1265 df->next_same_super_edge[1]->next_same_super_edge[0] = df;
1266 }
1267 return tc;
1268}
1269
1270
1271/* There are three kinds of cache miss. The first occurs when a
1272 * transition is taken that has never been computed during the
1273 * lifetime of the source superstate. That cache miss is handled by
1274 * calling COMPUTE_SUPER_EDGE. The second kind of cache miss
1275 * occurs when the destination superstate of a transition doesn't
1276 * exist. SOLVE_DESTINATION is used to construct the destination superstate.
1277 * Finally, the third kind of cache miss occurs when the destination
1278 * superstate of a transition is in a `semi-free state'. That case is
1279 * handled by UNFREE_SUPERSTATE.
1280 *
1281 * The function of HANDLE_CACHE_MISS is to figure out which of these
1282 * cases applies.
1283 */
1284
1285#ifdef __STDC__
1286static void
1287install_partial_transition (struct rx_superstate *super,
1288 struct rx_inx *answer,
1289 RX_subset set, int offset)
1290#else
1291static void
1292install_partial_transition (super, answer, set, offset)
1293 struct rx_superstate *super;
1294 struct rx_inx *answer;
1295 RX_subset set;
1296 int offset;
1297#endif
1298{
1299 int start = offset;
1300 int end = start + 32;
1301 RX_subset pos = 1;
1302 struct rx_inx * transitions = super->transitions;
1303
1304 while (start < end)
1305 {
1306 if (set & pos)
1307 transitions[start] = *answer;
1308 pos <<= 1;
1309 ++start;
1310 }
1311}
1312
1313
1314#ifdef __STDC__
1315struct rx_inx *
1316rx_handle_cache_miss (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data)
1317#else
1318struct rx_inx *
1319rx_handle_cache_miss (rx, super, chr, data)
1320 struct rx *rx;
1321 struct rx_superstate *super;
1322 unsigned char chr;
1323 void *data;
1324#endif
1325{
1326 int offset = chr / RX_subset_bits;
1327 struct rx_distinct_future *df = data;
1328
1329 if (!df) /* must be the shared_cache_miss_frame */
1330 {
1331 /* Perhaps this is just a transition waiting to be filled. */
1332 struct rx_super_edge *tc;
1333 RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
1334
1335 for (tc = super->edges; tc; tc = tc->next)
1336 if (tc->cset[offset] & mask)
1337 {
1338 struct rx_inx * answer;
1339 df = tc->options;
1340 answer = ((tc->options->next_same_super_edge[0] != tc->options)
1341 ? &tc->rx_backtrack_frame
1342 : (df->effects
1343 ? &df->side_effects_frame
1344 : &df->future_frame));
1345 install_partial_transition (super, answer,
1346 tc->cset [offset], offset * 32);
1347 return answer;
1348 }
1349 /* Otherwise, it's a flushed or newly encountered edge. */
1350 {
1351 char cset_space[1024]; /* this limit is far from unreasonable */
1352 rx_Bitset trcset;
1353 struct rx_inx *answer;
1354
1355 if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
1356 return 0; /* If the arbitrary limit is hit, always fail */
1357 /* cleanly. */
1358 trcset = (rx_Bitset)cset_space;
1359 rx_lock_superstate (rx, super);
1360 if (!compute_super_edge (rx, &df, trcset, super, chr))
1361 {
1362 rx_unlock_superstate (rx, super);
1363 return 0;
1364 }
1365 if (!df) /* We just computed the fail transition. */
1366 {
1367 static struct rx_inx
1368 shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
1369 answer = &shared_fail_frame;
1370 }
1371 else
1372 {
1373 tc = rx_super_edge (rx, super, trcset, df);
1374 if (!tc)
1375 {
1376 rx_unlock_superstate (rx, super);
1377 return 0;
1378 }
1379 answer = ((tc->options->next_same_super_edge[0] != tc->options)
1380 ? &tc->rx_backtrack_frame
1381 : (df->effects
1382 ? &df->side_effects_frame
1383 : &df->future_frame));
1384 }
1385 install_partial_transition (super, answer,
1386 trcset[offset], offset * 32);
1387 rx_unlock_superstate (rx, super);
1388 return answer;
1389 }
1390 }
1391 else if (df->future) /* A cache miss on an edge with a future? Must be
1392 * a semi-free destination. */
1393 {
1394 if (df->future->is_semifree)
1395 refresh_semifree_superstate (rx->cache, df->future);
1396 return &df->future_frame;
1397 }
1398 else
1399 /* no future superstate on an existing edge */
1400 {
1401 rx_lock_superstate (rx, super);
1402 if (!solve_destination (rx, df))
1403 {
1404 rx_unlock_superstate (rx, super);
1405 return 0;
1406 }
1407 if (!df->effects
1408 && (df->edge->options->next_same_super_edge[0] == df->edge->options))
1409 install_partial_transition (super, &df->future_frame,
1410 df->edge->cset[offset], offset * 32);
1411 rx_unlock_superstate (rx, super);
1412 return &df->future_frame;
1413 }
1414}
1415
1416