]>
Commit | Line | Data |
---|---|---|
1 | ||
2 | ||
3 | /* Copyright (C) 1995, 1996 Tom Lord | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU Library General Public License as published by | |
7 | * the Free Software Foundation; either version 2, or (at your option) | |
8 | * any later version. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * GNU Library General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Library General Public License | |
16 | * along with this software; see the file COPYING. If not, write to | |
17 | * the Free Software Foundation, 59 Temple Place - Suite 330, | |
18 | * Boston, MA 02111-1307, USA. | |
19 | */ | |
20 | ||
21 | \f | |
22 | ||
23 | #include "rxall.h" | |
24 | #include "rxsuper.h" | |
25 | ||
26 | \f | |
27 | /* The functions in the next several pages define the lazy-NFA-conversion used | |
28 | * by matchers. The input to this construction is an NFA such as | |
29 | * is built by compactify_nfa (rx.c). The output is the superNFA. | |
30 | */ | |
31 | ||
32 | /* Match engines can use arbitrary values for opcodes. So, the parse tree | |
33 | * is built using instructions names (enum rx_opcode), but the superstate | |
34 | * nfa is populated with mystery opcodes (void *). | |
35 | * | |
36 | * For convenience, here is an id table. The opcodes are == to their inxs | |
37 | * | |
38 | * The lables in re_search_2 would make good values for instructions. | |
39 | */ | |
40 | ||
41 | void * rx_id_instruction_table[rx_num_instructions] = | |
42 | { | |
43 | (void *) rx_backtrack_point, | |
44 | (void *) rx_do_side_effects, | |
45 | (void *) rx_cache_miss, | |
46 | (void *) rx_next_char, | |
47 | (void *) rx_backtrack, | |
48 | (void *) rx_error_inx | |
49 | }; | |
50 | ||
51 | \f | |
52 | ||
53 | ||
54 | /* The partially instantiated superstate graph has a transition | |
55 | * table at every node. There is one entry for every character. | |
56 | * This fills in the transition for a set. | |
57 | */ | |
58 | #ifdef __STDC__ | |
59 | static void | |
60 | install_transition (struct rx_superstate *super, | |
61 | struct rx_inx *answer, rx_Bitset trcset) | |
62 | #else | |
63 | static void | |
64 | install_transition (super, answer, trcset) | |
65 | struct rx_superstate *super; | |
66 | struct rx_inx *answer; | |
67 | rx_Bitset trcset; | |
68 | #endif | |
69 | { | |
70 | struct rx_inx * transitions = super->transitions; | |
71 | int chr; | |
72 | for (chr = 0; chr < 256; ) | |
73 | if (!*trcset) | |
74 | { | |
75 | ++trcset; | |
76 | chr += 32; | |
77 | } | |
78 | else | |
79 | { | |
80 | RX_subset sub = *trcset; | |
81 | RX_subset mask = 1; | |
82 | int bound = chr + 32; | |
83 | while (chr < bound) | |
84 | { | |
85 | if (sub & mask) | |
86 | transitions [chr] = *answer; | |
87 | ++chr; | |
88 | mask <<= 1; | |
89 | } | |
90 | ++trcset; | |
91 | } | |
92 | } | |
93 | ||
94 | ||
95 | #ifdef __STDC__ | |
96 | static int | |
97 | qlen (struct rx_superstate * q) | |
98 | #else | |
99 | static int | |
100 | qlen (q) | |
101 | struct rx_superstate * q; | |
102 | #endif | |
103 | { | |
104 | int count = 1; | |
105 | struct rx_superstate * it; | |
106 | if (!q) | |
107 | return 0; | |
108 | for (it = q->next_recyclable; it != q; it = it->next_recyclable) | |
109 | ++count; | |
110 | return count; | |
111 | } | |
112 | ||
113 | #ifdef __STDC__ | |
114 | static void | |
115 | check_cache (struct rx_cache * cache) | |
116 | #else | |
117 | static void | |
118 | check_cache (cache) | |
119 | struct rx_cache * cache; | |
120 | #endif | |
121 | { | |
122 | struct rx_cache * you_fucked_up = 0; | |
123 | int total = cache->superstates; | |
124 | int semi = cache->semifree_superstates; | |
125 | if (semi != qlen (cache->semifree_superstate)) | |
126 | check_cache (you_fucked_up); | |
127 | if ((total - semi) != qlen (cache->lru_superstate)) | |
128 | check_cache (you_fucked_up); | |
129 | } | |
130 | #ifdef __STDC__ | |
131 | char * | |
132 | rx_cache_malloc (struct rx_cache * cache, int size) | |
133 | #else | |
134 | char * | |
135 | rx_cache_malloc (cache, size) | |
136 | struct rx_cache * cache; | |
137 | int size; | |
138 | #endif | |
139 | { | |
140 | char * answer; | |
141 | answer = (char *)malloc (size); | |
142 | if (answer) | |
143 | cache->bytes_used += size; | |
144 | return answer; | |
145 | } | |
146 | ||
147 | ||
148 | #ifdef __STDC__ | |
149 | void | |
150 | rx_cache_free (struct rx_cache * cache, int size, char * mem) | |
151 | #else | |
152 | void | |
153 | rx_cache_free (cache, size, mem) | |
154 | struct rx_cache * cache; | |
155 | int size; | |
156 | char * mem; | |
157 | #endif | |
158 | { | |
159 | free (mem); | |
160 | cache->bytes_used -= size; | |
161 | } | |
162 | ||
163 | ||
164 | ||
165 | /* When a superstate is old and neglected, it can enter a | |
166 | * semi-free state. A semi-free state is slated to die. | |
167 | * Incoming transitions to a semi-free state are re-written | |
168 | * to cause an (interpreted) fault when they are taken. | |
169 | * The fault handler revives the semi-free state, patches | |
170 | * incoming transitions back to normal, and continues. | |
171 | * | |
172 | * The idea is basicly to free in two stages, aborting | |
173 | * between the two if the state turns out to be useful again. | |
174 | * When a free is aborted, the rescued superstate is placed | |
175 | * in the most-favored slot to maximize the time until it | |
176 | * is next semi-freed. | |
177 | * | |
178 | * Overall, this approximates truly freeing superstates in | |
179 | * lru order, but does not involve the cost of updating perfectly | |
180 | * accurate timestamps every time a superstate is touched. | |
181 | */ | |
182 | ||
183 | #ifdef __STDC__ | |
184 | static void | |
185 | semifree_superstate (struct rx_cache * cache) | |
186 | #else | |
187 | static void | |
188 | semifree_superstate (cache) | |
189 | struct rx_cache * cache; | |
190 | #endif | |
191 | { | |
192 | int disqualified = cache->semifree_superstates; | |
193 | if (disqualified == cache->superstates) | |
194 | return; | |
195 | while (cache->lru_superstate->locks) | |
196 | { | |
197 | cache->lru_superstate = cache->lru_superstate->next_recyclable; | |
198 | ++disqualified; | |
199 | if (disqualified == cache->superstates) | |
200 | return; | |
201 | } | |
202 | { | |
203 | struct rx_superstate * it = cache->lru_superstate; | |
204 | it->next_recyclable->prev_recyclable = it->prev_recyclable; | |
205 | it->prev_recyclable->next_recyclable = it->next_recyclable; | |
206 | cache->lru_superstate = (it == it->next_recyclable | |
207 | ? 0 | |
208 | : it->next_recyclable); | |
209 | if (!cache->semifree_superstate) | |
210 | { | |
211 | cache->semifree_superstate = it; | |
212 | it->next_recyclable = it; | |
213 | it->prev_recyclable = it; | |
214 | } | |
215 | else | |
216 | { | |
217 | it->prev_recyclable = cache->semifree_superstate->prev_recyclable; | |
218 | it->next_recyclable = cache->semifree_superstate; | |
219 | it->prev_recyclable->next_recyclable = it; | |
220 | it->next_recyclable->prev_recyclable = it; | |
221 | } | |
222 | { | |
223 | struct rx_distinct_future *df; | |
224 | it->is_semifree = 1; | |
225 | ++cache->semifree_superstates; | |
226 | df = it->transition_refs; | |
227 | if (df) | |
228 | { | |
229 | df->prev_same_dest->next_same_dest = 0; | |
230 | for (df = it->transition_refs; df; df = df->next_same_dest) | |
231 | { | |
232 | df->future_frame.inx = cache->instruction_table[rx_cache_miss]; | |
233 | df->future_frame.data = 0; | |
234 | df->future_frame.data_2 = (void *) df; | |
235 | /* If there are any NEXT-CHAR instruction frames that | |
236 | * refer to this state, we convert them to CACHE-MISS frames. | |
237 | */ | |
238 | if (!df->effects | |
239 | && (df->edge->options->next_same_super_edge[0] | |
240 | == df->edge->options)) | |
241 | install_transition (df->present, &df->future_frame, | |
242 | df->edge->cset); | |
243 | } | |
244 | df = it->transition_refs; | |
245 | df->prev_same_dest->next_same_dest = df; | |
246 | } | |
247 | } | |
248 | } | |
249 | } | |
250 | ||
251 | ||
252 | #ifdef __STDC__ | |
253 | static void | |
254 | refresh_semifree_superstate (struct rx_cache * cache, | |
255 | struct rx_superstate * super) | |
256 | #else | |
257 | static void | |
258 | refresh_semifree_superstate (cache, super) | |
259 | struct rx_cache * cache; | |
260 | struct rx_superstate * super; | |
261 | #endif | |
262 | { | |
263 | struct rx_distinct_future *df; | |
264 | ||
265 | if (super->transition_refs) | |
266 | { | |
267 | super->transition_refs->prev_same_dest->next_same_dest = 0; | |
268 | for (df = super->transition_refs; df; df = df->next_same_dest) | |
269 | { | |
270 | df->future_frame.inx = cache->instruction_table[rx_next_char]; | |
271 | df->future_frame.data = (void *) super->transitions; | |
272 | df->future_frame.data_2 = (void *)(super->contents->is_final); | |
273 | /* CACHE-MISS instruction frames that refer to this state, | |
274 | * must be converted to NEXT-CHAR frames. | |
275 | */ | |
276 | if (!df->effects | |
277 | && (df->edge->options->next_same_super_edge[0] | |
278 | == df->edge->options)) | |
279 | install_transition (df->present, &df->future_frame, | |
280 | df->edge->cset); | |
281 | } | |
282 | super->transition_refs->prev_same_dest->next_same_dest | |
283 | = super->transition_refs; | |
284 | } | |
285 | if (cache->semifree_superstate == super) | |
286 | cache->semifree_superstate = (super->prev_recyclable == super | |
287 | ? 0 | |
288 | : super->prev_recyclable); | |
289 | super->next_recyclable->prev_recyclable = super->prev_recyclable; | |
290 | super->prev_recyclable->next_recyclable = super->next_recyclable; | |
291 | ||
292 | if (!cache->lru_superstate) | |
293 | (cache->lru_superstate | |
294 | = super->next_recyclable | |
295 | = super->prev_recyclable | |
296 | = super); | |
297 | else | |
298 | { | |
299 | super->next_recyclable = cache->lru_superstate; | |
300 | super->prev_recyclable = cache->lru_superstate->prev_recyclable; | |
301 | super->next_recyclable->prev_recyclable = super; | |
302 | super->prev_recyclable->next_recyclable = super; | |
303 | } | |
304 | super->is_semifree = 0; | |
305 | --cache->semifree_superstates; | |
306 | } | |
307 | ||
308 | #ifdef __STDC__ | |
309 | void | |
310 | rx_refresh_this_superstate (struct rx_cache * cache, | |
311 | struct rx_superstate * superstate) | |
312 | #else | |
313 | void | |
314 | rx_refresh_this_superstate (cache, superstate) | |
315 | struct rx_cache * cache; | |
316 | struct rx_superstate * superstate; | |
317 | #endif | |
318 | { | |
319 | if (superstate->is_semifree) | |
320 | refresh_semifree_superstate (cache, superstate); | |
321 | else if (cache->lru_superstate == superstate) | |
322 | cache->lru_superstate = superstate->next_recyclable; | |
323 | else if (superstate != cache->lru_superstate->prev_recyclable) | |
324 | { | |
325 | superstate->next_recyclable->prev_recyclable | |
326 | = superstate->prev_recyclable; | |
327 | superstate->prev_recyclable->next_recyclable | |
328 | = superstate->next_recyclable; | |
329 | superstate->next_recyclable = cache->lru_superstate; | |
330 | superstate->prev_recyclable = cache->lru_superstate->prev_recyclable; | |
331 | superstate->next_recyclable->prev_recyclable = superstate; | |
332 | superstate->prev_recyclable->next_recyclable = superstate; | |
333 | } | |
334 | } | |
335 | ||
336 | #ifdef __STDC__ | |
337 | static void | |
338 | release_superset_low (struct rx_cache * cache, | |
339 | struct rx_superset *set) | |
340 | #else | |
341 | static void | |
342 | release_superset_low (cache, set) | |
343 | struct rx_cache * cache; | |
344 | struct rx_superset *set; | |
345 | #endif | |
346 | { | |
347 | if (!--set->refs) | |
348 | { | |
349 | if (set->starts_for) | |
350 | set->starts_for->start_set = 0; | |
351 | ||
352 | if (set->cdr) | |
353 | release_superset_low (cache, set->cdr); | |
354 | ||
355 | rx_hash_free (&set->hash_item, &cache->superset_hash_rules); | |
356 | rx_cache_free (cache, | |
357 | sizeof (struct rx_superset), | |
358 | (char *)set); | |
359 | } | |
360 | } | |
361 | ||
362 | #ifdef __STDC__ | |
363 | void | |
364 | rx_release_superset (struct rx *rx, | |
365 | struct rx_superset *set) | |
366 | #else | |
367 | void | |
368 | rx_release_superset (rx, set) | |
369 | struct rx *rx; | |
370 | struct rx_superset *set; | |
371 | #endif | |
372 | { | |
373 | release_superset_low (rx->cache, set); | |
374 | } | |
375 | ||
376 | /* This tries to add a new superstate to the superstate freelist. | |
377 | * It might, as a result, free some edge pieces or hash tables. | |
378 | * If nothing can be freed because too many locks are being held, fail. | |
379 | */ | |
380 | ||
381 | #ifdef __STDC__ | |
382 | static int | |
383 | rx_really_free_superstate (struct rx_cache * cache) | |
384 | #else | |
385 | static int | |
386 | rx_really_free_superstate (cache) | |
387 | struct rx_cache * cache; | |
388 | #endif | |
389 | { | |
390 | int locked_superstates = 0; | |
391 | struct rx_superstate * it; | |
392 | ||
393 | if (!cache->superstates) | |
394 | return 0; | |
395 | ||
396 | /* scale these */ | |
397 | while ((cache->hits + cache->misses) > cache->superstates) | |
398 | { | |
399 | cache->hits >>= 1; | |
400 | cache->misses >>= 1; | |
401 | } | |
402 | ||
403 | /* semifree superstates faster than they are freed | |
404 | * so popular states can be rescued. | |
405 | */ | |
406 | semifree_superstate (cache); | |
407 | semifree_superstate (cache); | |
408 | semifree_superstate (cache); | |
409 | ||
410 | #if 0 | |
411 | Redundant because semifree_superstate already | |
412 | makes this check; | |
413 | ||
414 | ||
415 | while (cache->semifree_superstate && cache->semifree_superstate->locks) | |
416 | { | |
417 | refresh_semifree_superstate (cache, cache->semifree_superstate); | |
418 | ++locked_superstates; | |
419 | if (locked_superstates == cache->superstates) | |
420 | return 0; | |
421 | } | |
422 | ||
423 | ||
424 | if (cache->semifree_superstate) | |
425 | insert the "it =" block which follows this "if 0" code; | |
426 | else | |
427 | { | |
428 | while (cache->lru_superstate->locks) | |
429 | { | |
430 | cache->lru_superstate = cache->lru_superstate->next_recyclable; | |
431 | ++locked_superstates; | |
432 | if (locked_superstates == cache->superstates) | |
433 | return 0; | |
434 | } | |
435 | it = cache->lru_superstate; | |
436 | it->next_recyclable->prev_recyclable = it->prev_recyclable; | |
437 | it->prev_recyclable->next_recyclable = it->next_recyclable; | |
438 | cache->lru_superstate = ((it == it->next_recyclable) | |
439 | ? 0 | |
440 | : it->next_recyclable); | |
441 | } | |
442 | #endif | |
443 | ||
444 | if (!cache->semifree_superstate) | |
445 | return 0; | |
446 | ||
447 | { | |
448 | it = cache->semifree_superstate; | |
449 | it->next_recyclable->prev_recyclable = it->prev_recyclable; | |
450 | it->prev_recyclable->next_recyclable = it->next_recyclable; | |
451 | cache->semifree_superstate = ((it == it->next_recyclable) | |
452 | ? 0 | |
453 | : it->next_recyclable); | |
454 | --cache->semifree_superstates; | |
455 | } | |
456 | ||
457 | ||
458 | if (it->transition_refs) | |
459 | { | |
460 | struct rx_distinct_future *df; | |
461 | for (df = it->transition_refs, | |
462 | df->prev_same_dest->next_same_dest = 0; | |
463 | df; | |
464 | df = df->next_same_dest) | |
465 | { | |
466 | df->future_frame.inx = cache->instruction_table[rx_cache_miss]; | |
467 | df->future_frame.data = 0; | |
468 | df->future_frame.data_2 = (void *) df; | |
469 | df->future = 0; | |
470 | } | |
471 | it->transition_refs->prev_same_dest->next_same_dest = | |
472 | it->transition_refs; | |
473 | } | |
474 | { | |
475 | struct rx_super_edge *tc = it->edges; | |
476 | while (tc) | |
477 | { | |
478 | struct rx_distinct_future * df; | |
479 | struct rx_super_edge *tct = tc->next; | |
480 | df = tc->options; | |
481 | df->next_same_super_edge[1]->next_same_super_edge[0] = 0; | |
482 | while (df) | |
483 | { | |
484 | struct rx_distinct_future *dft = df; | |
485 | df = df->next_same_super_edge[0]; | |
486 | ||
487 | ||
488 | if (dft->future && dft->future->transition_refs == dft) | |
489 | { | |
490 | dft->future->transition_refs = dft->next_same_dest; | |
491 | if (dft->future->transition_refs == dft) | |
492 | dft->future->transition_refs = 0; | |
493 | } | |
494 | dft->next_same_dest->prev_same_dest = dft->prev_same_dest; | |
495 | dft->prev_same_dest->next_same_dest = dft->next_same_dest; | |
496 | rx_cache_free (cache, | |
497 | sizeof (struct rx_distinct_future), | |
498 | (char *)dft); | |
499 | } | |
500 | rx_cache_free (cache, | |
501 | sizeof (struct rx_super_edge), | |
502 | (char *)tc); | |
503 | tc = tct; | |
504 | } | |
505 | } | |
506 | ||
507 | if (it->contents->superstate == it) | |
508 | it->contents->superstate = 0; | |
509 | release_superset_low (cache, it->contents); | |
510 | rx_cache_free (cache, | |
511 | (sizeof (struct rx_superstate) | |
512 | + cache->local_cset_size * sizeof (struct rx_inx)), | |
513 | (char *)it); | |
514 | --cache->superstates; | |
515 | return 1; | |
516 | } | |
517 | ||
518 | ||
519 | #ifdef __STDC__ | |
520 | static char * | |
521 | rx_cache_malloc_or_get (struct rx_cache * cache, int size) | |
522 | #else | |
523 | static char * | |
524 | rx_cache_malloc_or_get (cache, size) | |
525 | struct rx_cache * cache; | |
526 | int size; | |
527 | #endif | |
528 | { | |
529 | while ( (cache->bytes_used + size > cache->bytes_allowed) | |
530 | && rx_really_free_superstate (cache)) | |
531 | ; | |
532 | ||
533 | return rx_cache_malloc (cache, size); | |
534 | } | |
535 | ||
536 | \f | |
537 | ||
538 | #ifdef __STDC__ | |
539 | static int | |
540 | supersetcmp (void * va, void * vb) | |
541 | #else | |
542 | static int | |
543 | supersetcmp (va, vb) | |
544 | void * va; | |
545 | void * vb; | |
546 | #endif | |
547 | { | |
548 | struct rx_superset * a = (struct rx_superset *)va; | |
549 | struct rx_superset * b = (struct rx_superset *)vb; | |
550 | return ( (a == b) | |
551 | || (a && b && (a->id == b->id) && (a->car == b->car) && (a->cdr == b->cdr))); | |
552 | } | |
553 | ||
554 | #define rx_abs(A) (((A) > 0) ? (A) : -(A)) | |
555 | ||
556 | ||
557 | #ifdef __STDC__ | |
558 | static struct rx_hash_item * | |
559 | superset_allocator (struct rx_hash_rules * rules, void * val) | |
560 | #else | |
561 | static struct rx_hash_item * | |
562 | superset_allocator (rules, val) | |
563 | struct rx_hash_rules * rules; | |
564 | void * val; | |
565 | #endif | |
566 | { | |
567 | struct rx_cache * cache; | |
568 | struct rx_superset * template; | |
569 | struct rx_superset * newset; | |
570 | ||
571 | cache = ((struct rx_cache *) | |
572 | ((char *)rules | |
573 | - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules))); | |
574 | template = (struct rx_superset *)val; | |
575 | newset = ((struct rx_superset *) | |
576 | rx_cache_malloc (cache, sizeof (*template))); | |
577 | if (!newset) | |
578 | return 0; | |
579 | { | |
580 | int cdrfinal; | |
581 | int cdredges; | |
582 | ||
583 | cdrfinal = (template->cdr | |
584 | ? template->cdr->is_final | |
585 | : 0); | |
586 | cdredges = (template->cdr | |
587 | ? template->cdr->has_cset_edges | |
588 | : 0); | |
589 | ||
590 | newset->is_final = (rx_abs (template->car->is_final) > rx_abs(cdrfinal) | |
591 | ? template->car->is_final | |
592 | : template->cdr->is_final); | |
593 | newset->has_cset_edges = (template->car->has_cset_edges || cdredges); | |
594 | } | |
595 | newset->refs = 0; | |
596 | newset->id = template->id; | |
597 | newset->car = template->car; | |
598 | newset->cdr = template->cdr; | |
599 | rx_protect_superset (rx, template->cdr); | |
600 | newset->superstate = 0; | |
601 | newset->starts_for = 0; | |
602 | newset->hash_item.data = (void *)newset; | |
603 | newset->hash_item.binding = 0; | |
604 | return &newset->hash_item; | |
605 | } | |
606 | ||
607 | #ifdef __STDC__ | |
608 | static struct rx_hash * | |
609 | super_hash_allocator (struct rx_hash_rules * rules) | |
610 | #else | |
611 | static struct rx_hash * | |
612 | super_hash_allocator (rules) | |
613 | struct rx_hash_rules * rules; | |
614 | #endif | |
615 | { | |
616 | struct rx_cache * cache; | |
617 | cache = ((struct rx_cache *) | |
618 | ((char *)rules | |
619 | - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules))); | |
620 | return ((struct rx_hash *) | |
621 | rx_cache_malloc (cache, sizeof (struct rx_hash))); | |
622 | } | |
623 | ||
624 | ||
625 | #ifdef __STDC__ | |
626 | static void | |
627 | super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules) | |
628 | #else | |
629 | static void | |
630 | super_hash_liberator (hash, rules) | |
631 | struct rx_hash * hash; | |
632 | struct rx_hash_rules * rules; | |
633 | #endif | |
634 | { | |
635 | struct rx_cache * cache | |
636 | = ((struct rx_cache *) | |
637 | (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules)); | |
638 | rx_cache_free (cache, sizeof (struct rx_hash), (char *)hash); | |
639 | } | |
640 | ||
641 | #ifdef __STDC__ | |
642 | static void | |
643 | superset_hash_item_liberator (struct rx_hash_item * it, | |
644 | struct rx_hash_rules * rules) | |
645 | #else | |
646 | static void | |
647 | superset_hash_item_liberator (it, rules) | |
648 | struct rx_hash_item * it; | |
649 | struct rx_hash_rules * rules; | |
650 | #endif | |
651 | { | |
652 | } | |
653 | ||
654 | int rx_cache_bound = 3; | |
655 | static int rx_default_cache_got = 0; | |
656 | ||
657 | static struct rx_cache default_cache = | |
658 | { | |
659 | { | |
660 | supersetcmp, | |
661 | super_hash_allocator, | |
662 | super_hash_liberator, | |
663 | superset_allocator, | |
664 | superset_hash_item_liberator, | |
665 | }, | |
666 | 0, | |
667 | 0, | |
668 | 0, | |
669 | 0, | |
670 | 0, | |
671 | 0, | |
672 | 0, | |
673 | RX_DEFAULT_DFA_CACHE_SIZE, | |
674 | 0, | |
675 | 256, | |
676 | rx_id_instruction_table, | |
677 | { | |
678 | 0, | |
679 | 0, | |
680 | 0, | |
681 | 0, | |
682 | 0 | |
683 | } | |
684 | }; | |
685 | struct rx_cache * rx_default_cache = &default_cache; | |
686 | ||
687 | /* This adds an element to a superstate set. These sets are lists, such | |
688 | * that lists with == elements are ==. The empty set is returned by | |
689 | * superset_cons (rx, 0, 0) and is NOT equivelent to | |
690 | * (struct rx_superset)0. | |
691 | */ | |
692 | ||
693 | #ifdef __STDC__ | |
694 | struct rx_superset * | |
695 | rx_superset_cons (struct rx * rx, | |
696 | struct rx_nfa_state *car, struct rx_superset *cdr) | |
697 | #else | |
698 | struct rx_superset * | |
699 | rx_superset_cons (rx, car, cdr) | |
700 | struct rx * rx; | |
701 | struct rx_nfa_state *car; | |
702 | struct rx_superset *cdr; | |
703 | #endif | |
704 | { | |
705 | struct rx_cache * cache = rx->cache; | |
706 | if (!car && !cdr) | |
707 | { | |
708 | if (!cache->empty_superset) | |
709 | { | |
710 | cache->empty_superset | |
711 | = ((struct rx_superset *) | |
712 | rx_cache_malloc (cache, sizeof (struct rx_superset))); | |
713 | if (!cache->empty_superset) | |
714 | return 0; | |
715 | rx_bzero ((char *)cache->empty_superset, sizeof (struct rx_superset)); | |
716 | cache->empty_superset->refs = 1000; | |
717 | } | |
718 | return cache->empty_superset; | |
719 | } | |
720 | { | |
721 | struct rx_superset template; | |
722 | struct rx_hash_item * hit; | |
723 | template.car = car; | |
724 | template.cdr = cdr; | |
725 | template.id = rx->rx_id; | |
726 | rx_protect_superset (rx, template.cdr); | |
727 | hit = rx_hash_store (&cache->superset_table, | |
728 | (unsigned long)car ^ car->id ^ (unsigned long)cdr, | |
729 | (void *)&template, | |
730 | &cache->superset_hash_rules); | |
731 | rx_protect_superset (rx, template.cdr); | |
732 | return (hit | |
733 | ? (struct rx_superset *)hit->data | |
734 | : 0); | |
735 | } | |
736 | } | |
737 | ||
738 | /* This computes a union of two NFA state sets. The sets do not have the | |
739 | * same representation though. One is a RX_SUPERSET structure (part | |
740 | * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA). | |
741 | */ | |
742 | ||
743 | #ifdef __STDC__ | |
744 | struct rx_superset * | |
745 | rx_superstate_eclosure_union (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl) | |
746 | #else | |
747 | struct rx_superset * | |
748 | rx_superstate_eclosure_union (rx, set, ecl) | |
749 | struct rx * rx; | |
750 | struct rx_superset *set; | |
751 | struct rx_nfa_state_set *ecl; | |
752 | #endif | |
753 | { | |
754 | if (!ecl) | |
755 | return set; | |
756 | ||
757 | if (!set->car) | |
758 | return rx_superset_cons (rx, ecl->car, | |
759 | rx_superstate_eclosure_union (rx, set, ecl->cdr)); | |
760 | if (set->car == ecl->car) | |
761 | return rx_superstate_eclosure_union (rx, set, ecl->cdr); | |
762 | ||
763 | { | |
764 | struct rx_superset * tail; | |
765 | struct rx_nfa_state * first; | |
766 | ||
767 | if (set->car->id < ecl->car->id) | |
768 | { | |
769 | tail = rx_superstate_eclosure_union (rx, set->cdr, ecl); | |
770 | first = set->car; | |
771 | } | |
772 | else | |
773 | { | |
774 | tail = rx_superstate_eclosure_union (rx, set, ecl->cdr); | |
775 | first = ecl->car; | |
776 | } | |
777 | if (!tail) | |
778 | return 0; | |
779 | else | |
780 | { | |
781 | struct rx_superset * answer; | |
782 | answer = rx_superset_cons (rx, first, tail); | |
783 | if (!answer) | |
784 | { | |
785 | rx_protect_superset (rx, tail); | |
786 | rx_release_superset (rx, tail); | |
787 | return 0; | |
788 | } | |
789 | else | |
790 | return answer; | |
791 | } | |
792 | } | |
793 | } | |
794 | ||
795 | ||
796 | \f | |
797 | ||
798 | /* | |
799 | * This makes sure that a list of rx_distinct_futures contains | |
800 | * a future for each possible set of side effects in the eclosure | |
801 | * of a given state. This is some of the work of filling in a | |
802 | * superstate transition. | |
803 | */ | |
804 | ||
805 | #ifdef __STDC__ | |
806 | static struct rx_distinct_future * | |
807 | include_futures (struct rx *rx, | |
808 | struct rx_distinct_future *df, struct rx_nfa_state | |
809 | *state, struct rx_superstate *superstate) | |
810 | #else | |
811 | static struct rx_distinct_future * | |
812 | include_futures (rx, df, state, superstate) | |
813 | struct rx *rx; | |
814 | struct rx_distinct_future *df; | |
815 | struct rx_nfa_state *state; | |
816 | struct rx_superstate *superstate; | |
817 | #endif | |
818 | { | |
819 | struct rx_possible_future *future; | |
820 | struct rx_cache * cache = rx->cache; | |
821 | for (future = rx_state_possible_futures (rx, state); future; future = future->next) | |
822 | { | |
823 | struct rx_distinct_future *dfp; | |
824 | struct rx_distinct_future *insert_before = 0; | |
825 | if (df) | |
826 | df->next_same_super_edge[1]->next_same_super_edge[0] = 0; | |
827 | for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0]) | |
828 | if (dfp->effects == future->effects) | |
829 | break; | |
830 | else | |
831 | { | |
832 | int order = rx->se_list_cmp (rx, dfp->effects, future->effects); | |
833 | if (order > 0) | |
834 | { | |
835 | insert_before = dfp; | |
836 | dfp = 0; | |
837 | break; | |
838 | } | |
839 | } | |
840 | if (df) | |
841 | df->next_same_super_edge[1]->next_same_super_edge[0] = df; | |
842 | if (!dfp) | |
843 | { | |
844 | dfp | |
845 | = ((struct rx_distinct_future *) | |
846 | rx_cache_malloc (cache, | |
847 | sizeof (struct rx_distinct_future))); | |
848 | if (!dfp) | |
849 | return 0; | |
850 | if (!df) | |
851 | { | |
852 | df = insert_before = dfp; | |
853 | df->next_same_super_edge[0] = df->next_same_super_edge[1] = df; | |
854 | } | |
855 | else if (!insert_before) | |
856 | insert_before = df; | |
857 | else if (insert_before == df) | |
858 | df = dfp; | |
859 | ||
860 | dfp->next_same_super_edge[0] = insert_before; | |
861 | dfp->next_same_super_edge[1] | |
862 | = insert_before->next_same_super_edge[1]; | |
863 | dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp; | |
864 | dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp; | |
865 | dfp->next_same_dest = dfp->prev_same_dest = dfp; | |
866 | dfp->future = 0; | |
867 | dfp->present = superstate; | |
868 | dfp->future_frame.inx = rx->instruction_table[rx_cache_miss]; | |
869 | dfp->future_frame.data = 0; | |
870 | dfp->future_frame.data_2 = (void *) dfp; | |
871 | dfp->side_effects_frame.inx | |
872 | = rx->instruction_table[rx_do_side_effects]; | |
873 | dfp->side_effects_frame.data = 0; | |
874 | dfp->side_effects_frame.data_2 = (void *) dfp; | |
875 | dfp->effects = future->effects; | |
876 | } | |
877 | } | |
878 | return df; | |
879 | } | |
880 | \f | |
881 | ||
882 | ||
883 | /* This constructs a new superstate from its state set. The only | |
884 | * complexity here is memory management. | |
885 | */ | |
886 | #ifdef __STDC__ | |
887 | struct rx_superstate * | |
888 | rx_superstate (struct rx *rx, | |
889 | struct rx_superset *set) | |
890 | #else | |
891 | struct rx_superstate * | |
892 | rx_superstate (rx, set) | |
893 | struct rx *rx; | |
894 | struct rx_superset *set; | |
895 | #endif | |
896 | { | |
897 | struct rx_cache * cache = rx->cache; | |
898 | struct rx_superstate * superstate = 0; | |
899 | ||
900 | /* Does the superstate already exist in the cache? */ | |
901 | if (set->superstate) | |
902 | { | |
903 | if (set->superstate->rx_id != rx->rx_id) | |
904 | { | |
905 | /* Aha. It is in the cache, but belongs to a superstate | |
906 | * that refers to an NFA that no longer exists. | |
907 | * (We know it no longer exists because it was evidently | |
908 | * stored in the same region of memory as the current nfa | |
909 | * yet it has a different id.) | |
910 | */ | |
911 | superstate = set->superstate; | |
912 | if (!superstate->is_semifree) | |
913 | { | |
914 | if (cache->lru_superstate == superstate) | |
915 | { | |
916 | cache->lru_superstate = superstate->next_recyclable; | |
917 | if (cache->lru_superstate == superstate) | |
918 | cache->lru_superstate = 0; | |
919 | } | |
920 | { | |
921 | superstate->next_recyclable->prev_recyclable | |
922 | = superstate->prev_recyclable; | |
923 | superstate->prev_recyclable->next_recyclable | |
924 | = superstate->next_recyclable; | |
925 | if (!cache->semifree_superstate) | |
926 | { | |
927 | (cache->semifree_superstate | |
928 | = superstate->next_recyclable | |
929 | = superstate->prev_recyclable | |
930 | = superstate); | |
931 | } | |
932 | else | |
933 | { | |
934 | superstate->next_recyclable = cache->semifree_superstate; | |
935 | superstate->prev_recyclable | |
936 | = cache->semifree_superstate->prev_recyclable; | |
937 | superstate->next_recyclable->prev_recyclable | |
938 | = superstate; | |
939 | superstate->prev_recyclable->next_recyclable | |
940 | = superstate; | |
941 | cache->semifree_superstate = superstate; | |
942 | } | |
943 | ++cache->semifree_superstates; | |
944 | } | |
945 | } | |
946 | set->superstate = 0; | |
947 | goto handle_cache_miss; | |
948 | } | |
949 | ++cache->hits; | |
950 | superstate = set->superstate; | |
951 | ||
952 | rx_refresh_this_superstate (cache, superstate); | |
953 | return superstate; | |
954 | } | |
955 | ||
956 | handle_cache_miss: | |
957 | ||
958 | /* This point reached only for cache misses. */ | |
959 | ++cache->misses; | |
960 | #if RX_DEBUG | |
961 | if (rx_debug_trace > 1) | |
962 | { | |
963 | struct rx_superset * setp = set; | |
964 | fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set); | |
965 | while (setp) | |
966 | { | |
967 | fprintf (stderr, "%d ", setp->id); | |
968 | setp = setp->cdr; | |
969 | } | |
970 | fprintf (stderr, "(%d)\n", set); | |
971 | } | |
972 | #endif | |
973 | ||
974 | { | |
975 | int superstate_size; | |
976 | superstate_size = ( sizeof (*superstate) | |
977 | + ( sizeof (struct rx_inx) | |
978 | * rx->local_cset_size)); | |
979 | superstate = ((struct rx_superstate *) | |
980 | rx_cache_malloc_or_get (cache, superstate_size)); | |
981 | ++cache->superstates; | |
982 | } | |
983 | ||
984 | if (!superstate) | |
985 | return 0; | |
986 | ||
987 | if (!cache->lru_superstate) | |
988 | (cache->lru_superstate | |
989 | = superstate->next_recyclable | |
990 | = superstate->prev_recyclable | |
991 | = superstate); | |
992 | else | |
993 | { | |
994 | superstate->next_recyclable = cache->lru_superstate; | |
995 | superstate->prev_recyclable = cache->lru_superstate->prev_recyclable; | |
996 | ( superstate->prev_recyclable->next_recyclable | |
997 | = superstate->next_recyclable->prev_recyclable | |
998 | = superstate); | |
999 | } | |
1000 | superstate->rx_id = rx->rx_id; | |
1001 | superstate->transition_refs = 0; | |
1002 | superstate->locks = 0; | |
1003 | superstate->is_semifree = 0; | |
1004 | set->superstate = superstate; | |
1005 | superstate->contents = set; | |
1006 | rx_protect_superset (rx, set); | |
1007 | superstate->edges = 0; | |
1008 | { | |
1009 | int x; | |
1010 | /* None of the transitions from this superstate are known yet. */ | |
1011 | for (x = 0; x < rx->local_cset_size; ++x) | |
1012 | { | |
1013 | struct rx_inx * ifr = &superstate->transitions[x]; | |
1014 | ifr->inx = rx->instruction_table [rx_cache_miss]; | |
1015 | ifr->data = ifr->data_2 = 0; | |
1016 | } | |
1017 | } | |
1018 | return superstate; | |
1019 | } | |
1020 | \f | |
1021 | ||
1022 | /* This computes the destination set of one edge of the superstate NFA. | |
1023 | * Note that a RX_DISTINCT_FUTURE is a superstate edge. | |
1024 | * Returns 0 on an allocation failure. | |
1025 | */ | |
1026 | ||
1027 | #ifdef __STDC__ | |
1028 | static int | |
1029 | solve_destination (struct rx *rx, struct rx_distinct_future *df) | |
1030 | #else | |
1031 | static int | |
1032 | solve_destination (rx, df) | |
1033 | struct rx *rx; | |
1034 | struct rx_distinct_future *df; | |
1035 | #endif | |
1036 | { | |
1037 | struct rx_super_edge *tc = df->edge; | |
1038 | struct rx_superset *nfa_state; | |
1039 | struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0); | |
1040 | struct rx_superset *solution = nil_set; | |
1041 | struct rx_superstate *dest; | |
1042 | ||
1043 | rx_protect_superset (rx, solution); | |
1044 | /* Iterate over all NFA states in the state set of this superstate. */ | |
1045 | for (nfa_state = df->present->contents; | |
1046 | nfa_state->car; | |
1047 | nfa_state = nfa_state->cdr) | |
1048 | { | |
1049 | struct rx_nfa_edge *e; | |
1050 | /* Iterate over all edges of each NFA state. */ | |
1051 | for (e = nfa_state->car->edges; e; e = e->next) | |
1052 | /* If we find an edge that is labeled with | |
1053 | * the characters we are solving for..... | |
1054 | */ | |
1055 | if ((e->type == ne_cset) | |
1056 | && rx_bitset_is_subset (rx->local_cset_size, | |
1057 | tc->cset, | |
1058 | e->params.cset)) | |
1059 | { | |
1060 | struct rx_nfa_state *n = e->dest; | |
1061 | struct rx_possible_future *pf; | |
1062 | /* ....search the partial epsilon closures of the destination | |
1063 | * of that edge for a path that involves the same set of | |
1064 | * side effects we are solving for. | |
1065 | * If we find such a RX_POSSIBLE_FUTURE, we add members to the | |
1066 | * stateset we are computing. | |
1067 | */ | |
1068 | for (pf = rx_state_possible_futures (rx, n); pf; pf = pf->next) | |
1069 | if (pf->effects == df->effects) | |
1070 | { | |
1071 | struct rx_superset * old_sol; | |
1072 | old_sol = solution; | |
1073 | solution = rx_superstate_eclosure_union (rx, solution, | |
1074 | pf->destset); | |
1075 | if (!solution) | |
1076 | return 0; | |
1077 | rx_protect_superset (rx, solution); | |
1078 | rx_release_superset (rx, old_sol); | |
1079 | } | |
1080 | } | |
1081 | } | |
1082 | /* It is possible that the RX_DISTINCT_FUTURE we are working on has | |
1083 | * the empty set of NFA states as its definition. In that case, this | |
1084 | * is a failure point. | |
1085 | */ | |
1086 | if (solution == nil_set) | |
1087 | { | |
1088 | df->future_frame.inx = (void *) rx_backtrack; | |
1089 | df->future_frame.data = 0; | |
1090 | df->future_frame.data_2 = 0; | |
1091 | return 1; | |
1092 | } | |
1093 | dest = rx_superstate (rx, solution); | |
1094 | rx_release_superset (rx, solution); | |
1095 | if (!dest) | |
1096 | return 0; | |
1097 | ||
1098 | { | |
1099 | struct rx_distinct_future *dft; | |
1100 | dft = df; | |
1101 | df->prev_same_dest->next_same_dest = 0; | |
1102 | while (dft) | |
1103 | { | |
1104 | dft->future = dest; | |
1105 | dft->future_frame.inx = rx->instruction_table[rx_next_char]; | |
1106 | dft->future_frame.data = (void *) dest->transitions; | |
1107 | dft->future_frame.data_2 = (void *) dest->contents->is_final; | |
1108 | dft = dft->next_same_dest; | |
1109 | } | |
1110 | df->prev_same_dest->next_same_dest = df; | |
1111 | } | |
1112 | if (!dest->transition_refs) | |
1113 | dest->transition_refs = df; | |
1114 | else | |
1115 | { | |
1116 | struct rx_distinct_future *dft; | |
1117 | dft = dest->transition_refs->next_same_dest; | |
1118 | dest->transition_refs->next_same_dest = df->next_same_dest; | |
1119 | df->next_same_dest->prev_same_dest = dest->transition_refs; | |
1120 | df->next_same_dest = dft; | |
1121 | dft->prev_same_dest = df; | |
1122 | } | |
1123 | return 1; | |
1124 | } | |
1125 | ||
1126 | ||
1127 | /* This takes a superstate and a character, and computes some edges | |
1128 | * from the superstate NFA. In particular, this computes all edges | |
1129 | * that lead from SUPERSTATE given CHR. This function also | |
1130 | * computes the set of characters that share this edge set. | |
1131 | * This returns 0 on allocation error. | |
1132 | * The character set and list of edges are returned through | |
1133 | * the paramters CSETOUT and DFOUT. | |
1134 | */ | |
1135 | ||
1136 | #ifdef __STDC__ | |
1137 | static int | |
1138 | compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout, | |
1139 | rx_Bitset csetout, struct rx_superstate *superstate, | |
1140 | unsigned char chr) | |
1141 | #else | |
1142 | static int | |
1143 | compute_super_edge (rx, dfout, csetout, superstate, chr) | |
1144 | struct rx *rx; | |
1145 | struct rx_distinct_future **dfout; | |
1146 | rx_Bitset csetout; | |
1147 | struct rx_superstate *superstate; | |
1148 | unsigned char chr; | |
1149 | #endif | |
1150 | { | |
1151 | struct rx_superset *stateset = superstate->contents; | |
1152 | ||
1153 | /* To compute the set of characters that share edges with CHR, | |
1154 | * we start with the full character set, and subtract. | |
1155 | */ | |
1156 | rx_bitset_universe (rx->local_cset_size, csetout); | |
1157 | *dfout = 0; | |
1158 | ||
1159 | /* Iterate over the NFA states in the superstate state-set. */ | |
1160 | while (stateset->car) | |
1161 | { | |
1162 | struct rx_nfa_edge *e; | |
1163 | for (e = stateset->car->edges; e; e = e->next) | |
1164 | if (e->type == ne_cset) | |
1165 | { | |
1166 | if (!RX_bitset_member (e->params.cset, chr)) | |
1167 | /* An edge that doesn't apply at least tells us some characters | |
1168 | * that don't share the same edge set as CHR. | |
1169 | */ | |
1170 | rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset); | |
1171 | else | |
1172 | { | |
1173 | /* If we find an NFA edge that applies, we make sure there | |
1174 | * are corresponding edges in the superstate NFA. | |
1175 | */ | |
1176 | { | |
1177 | struct rx_distinct_future * saved; | |
1178 | saved = *dfout; | |
1179 | *dfout = include_futures (rx, *dfout, e->dest, superstate); | |
1180 | if (!*dfout) | |
1181 | { | |
1182 | struct rx_distinct_future * df; | |
1183 | df = saved; | |
1184 | if (df) | |
1185 | df->next_same_super_edge[1]->next_same_super_edge[0] = 0; | |
1186 | while (df) | |
1187 | { | |
1188 | struct rx_distinct_future *dft; | |
1189 | dft = df; | |
1190 | df = df->next_same_super_edge[0]; | |
1191 | ||
1192 | if (dft->future && dft->future->transition_refs == dft) | |
1193 | { | |
1194 | dft->future->transition_refs = dft->next_same_dest; | |
1195 | if (dft->future->transition_refs == dft) | |
1196 | dft->future->transition_refs = 0; | |
1197 | } | |
1198 | dft->next_same_dest->prev_same_dest = dft->prev_same_dest; | |
1199 | dft->prev_same_dest->next_same_dest = dft->next_same_dest; | |
1200 | rx_cache_free (rx->cache, sizeof (*dft), (char *)dft); | |
1201 | } | |
1202 | return 0; | |
1203 | } | |
1204 | } | |
1205 | /* We also trim the character set a bit. */ | |
1206 | rx_bitset_intersection (rx->local_cset_size, | |
1207 | csetout, e->params.cset); | |
1208 | } | |
1209 | } | |
1210 | stateset = stateset->cdr; | |
1211 | } | |
1212 | return 1; | |
1213 | } | |
1214 | \f | |
1215 | ||
1216 | /* This is a constructor for RX_SUPER_EDGE structures. These are | |
1217 | * wrappers for lists of superstate NFA edges that share character sets labels. | |
1218 | * If a transition class contains more than one rx_distinct_future (superstate | |
1219 | * edge), then it represents a non-determinism in the superstate NFA. | |
1220 | */ | |
1221 | ||
1222 | #ifdef __STDC__ | |
1223 | static struct rx_super_edge * | |
1224 | rx_super_edge (struct rx *rx, | |
1225 | struct rx_superstate *super, rx_Bitset cset, | |
1226 | struct rx_distinct_future *df) | |
1227 | #else | |
1228 | static struct rx_super_edge * | |
1229 | rx_super_edge (rx, super, cset, df) | |
1230 | struct rx *rx; | |
1231 | struct rx_superstate *super; | |
1232 | rx_Bitset cset; | |
1233 | struct rx_distinct_future *df; | |
1234 | #endif | |
1235 | { | |
1236 | struct rx_super_edge *tc; | |
1237 | int tc_size; | |
1238 | ||
1239 | tc_size = ( sizeof (struct rx_super_edge) | |
1240 | + rx_sizeof_bitset (rx->local_cset_size)); | |
1241 | ||
1242 | tc = ((struct rx_super_edge *) | |
1243 | rx_cache_malloc (rx->cache, tc_size)); | |
1244 | ||
1245 | if (!tc) | |
1246 | return 0; | |
1247 | ||
1248 | tc->next = super->edges; | |
1249 | super->edges = tc; | |
1250 | tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point]; | |
1251 | tc->rx_backtrack_frame.data = 0; | |
1252 | tc->rx_backtrack_frame.data_2 = (void *) tc; | |
1253 | tc->options = df; | |
1254 | tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc)); | |
1255 | rx_bitset_assign (rx->local_cset_size, tc->cset, cset); | |
1256 | if (df) | |
1257 | { | |
1258 | struct rx_distinct_future * dfp = df; | |
1259 | df->next_same_super_edge[1]->next_same_super_edge[0] = 0; | |
1260 | while (dfp) | |
1261 | { | |
1262 | dfp->edge = tc; | |
1263 | dfp = dfp->next_same_super_edge[0]; | |
1264 | } | |
1265 | df->next_same_super_edge[1]->next_same_super_edge[0] = df; | |
1266 | } | |
1267 | return tc; | |
1268 | } | |
1269 | ||
1270 | ||
1271 | /* There are three kinds of cache miss. The first occurs when a | |
1272 | * transition is taken that has never been computed during the | |
1273 | * lifetime of the source superstate. That cache miss is handled by | |
1274 | * calling COMPUTE_SUPER_EDGE. The second kind of cache miss | |
1275 | * occurs when the destination superstate of a transition doesn't | |
1276 | * exist. SOLVE_DESTINATION is used to construct the destination superstate. | |
1277 | * Finally, the third kind of cache miss occurs when the destination | |
1278 | * superstate of a transition is in a `semi-free state'. That case is | |
1279 | * handled by UNFREE_SUPERSTATE. | |
1280 | * | |
1281 | * The function of HANDLE_CACHE_MISS is to figure out which of these | |
1282 | * cases applies. | |
1283 | */ | |
1284 | ||
1285 | #ifdef __STDC__ | |
1286 | static void | |
1287 | install_partial_transition (struct rx_superstate *super, | |
1288 | struct rx_inx *answer, | |
1289 | RX_subset set, int offset) | |
1290 | #else | |
1291 | static void | |
1292 | install_partial_transition (super, answer, set, offset) | |
1293 | struct rx_superstate *super; | |
1294 | struct rx_inx *answer; | |
1295 | RX_subset set; | |
1296 | int offset; | |
1297 | #endif | |
1298 | { | |
1299 | int start = offset; | |
1300 | int end = start + 32; | |
1301 | RX_subset pos = 1; | |
1302 | struct rx_inx * transitions = super->transitions; | |
1303 | ||
1304 | while (start < end) | |
1305 | { | |
1306 | if (set & pos) | |
1307 | transitions[start] = *answer; | |
1308 | pos <<= 1; | |
1309 | ++start; | |
1310 | } | |
1311 | } | |
1312 | ||
1313 | ||
1314 | #ifdef __STDC__ | |
1315 | struct rx_inx * | |
1316 | rx_handle_cache_miss (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data) | |
1317 | #else | |
1318 | struct rx_inx * | |
1319 | rx_handle_cache_miss (rx, super, chr, data) | |
1320 | struct rx *rx; | |
1321 | struct rx_superstate *super; | |
1322 | unsigned char chr; | |
1323 | void *data; | |
1324 | #endif | |
1325 | { | |
1326 | int offset = chr / RX_subset_bits; | |
1327 | struct rx_distinct_future *df = data; | |
1328 | ||
1329 | if (!df) /* must be the shared_cache_miss_frame */ | |
1330 | { | |
1331 | /* Perhaps this is just a transition waiting to be filled. */ | |
1332 | struct rx_super_edge *tc; | |
1333 | RX_subset mask = rx_subset_singletons [chr % RX_subset_bits]; | |
1334 | ||
1335 | for (tc = super->edges; tc; tc = tc->next) | |
1336 | if (tc->cset[offset] & mask) | |
1337 | { | |
1338 | struct rx_inx * answer; | |
1339 | df = tc->options; | |
1340 | answer = ((tc->options->next_same_super_edge[0] != tc->options) | |
1341 | ? &tc->rx_backtrack_frame | |
1342 | : (df->effects | |
1343 | ? &df->side_effects_frame | |
1344 | : &df->future_frame)); | |
1345 | install_partial_transition (super, answer, | |
1346 | tc->cset [offset], offset * 32); | |
1347 | return answer; | |
1348 | } | |
1349 | /* Otherwise, it's a flushed or newly encountered edge. */ | |
1350 | { | |
1351 | char cset_space[1024]; /* this limit is far from unreasonable */ | |
1352 | rx_Bitset trcset; | |
1353 | struct rx_inx *answer; | |
1354 | ||
1355 | if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space)) | |
1356 | return 0; /* If the arbitrary limit is hit, always fail */ | |
1357 | /* cleanly. */ | |
1358 | trcset = (rx_Bitset)cset_space; | |
1359 | rx_lock_superstate (rx, super); | |
1360 | if (!compute_super_edge (rx, &df, trcset, super, chr)) | |
1361 | { | |
1362 | rx_unlock_superstate (rx, super); | |
1363 | return 0; | |
1364 | } | |
1365 | if (!df) /* We just computed the fail transition. */ | |
1366 | { | |
1367 | static struct rx_inx | |
1368 | shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 }; | |
1369 | answer = &shared_fail_frame; | |
1370 | } | |
1371 | else | |
1372 | { | |
1373 | tc = rx_super_edge (rx, super, trcset, df); | |
1374 | if (!tc) | |
1375 | { | |
1376 | rx_unlock_superstate (rx, super); | |
1377 | return 0; | |
1378 | } | |
1379 | answer = ((tc->options->next_same_super_edge[0] != tc->options) | |
1380 | ? &tc->rx_backtrack_frame | |
1381 | : (df->effects | |
1382 | ? &df->side_effects_frame | |
1383 | : &df->future_frame)); | |
1384 | } | |
1385 | install_partial_transition (super, answer, | |
1386 | trcset[offset], offset * 32); | |
1387 | rx_unlock_superstate (rx, super); | |
1388 | return answer; | |
1389 | } | |
1390 | } | |
1391 | else if (df->future) /* A cache miss on an edge with a future? Must be | |
1392 | * a semi-free destination. */ | |
1393 | { | |
1394 | if (df->future->is_semifree) | |
1395 | refresh_semifree_superstate (rx->cache, df->future); | |
1396 | return &df->future_frame; | |
1397 | } | |
1398 | else | |
1399 | /* no future superstate on an existing edge */ | |
1400 | { | |
1401 | rx_lock_superstate (rx, super); | |
1402 | if (!solve_destination (rx, df)) | |
1403 | { | |
1404 | rx_unlock_superstate (rx, super); | |
1405 | return 0; | |
1406 | } | |
1407 | if (!df->effects | |
1408 | && (df->edge->options->next_same_super_edge[0] == df->edge->options)) | |
1409 | install_partial_transition (super, &df->future_frame, | |
1410 | df->edge->cset[offset], offset * 32); | |
1411 | rx_unlock_superstate (rx, super); | |
1412 | return &df->future_frame; | |
1413 | } | |
1414 | } | |
1415 | ||
1416 |