]> jfr.im git - irc/evilnet/x3.git/blob - rx/rxanal.c
Fixed irc_topic() to honour server/hidden_host_type and associated config settings
[irc/evilnet/x3.git] / rx / rxanal.c
1 /* Copyright (C) 1995, 1996 Tom Lord
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU Library General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU Library General Public License for more details.
12 *
13 * You should have received a copy of the GNU Library General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 59 Temple Place - Suite 330,
16 * Boston, MA 02111-1307, USA.
17 */
18
19 \f
20
21 #include "rxall.h"
22 #include "rxanal.h"
23 #include "rxbitset.h"
24 #include "rxsuper.h"
25
26 \f
27
28
29 #ifdef __STDC__
30 int
31 rx_posix_analyze_rexp (struct rexp_node *** subexps,
32 size_t * re_nsub,
33 struct rexp_node * node,
34 int id)
35 #else
36 int
37 rx_posix_analyze_rexp (subexps, re_nsub, node, id)
38 struct rexp_node *** subexps;
39 size_t * re_nsub;
40 struct rexp_node * node;
41 int id;
42 #endif
43 {
44 if (node)
45 {
46 size_t this_subexp;
47 if (node->type == r_parens)
48 {
49 if (node->params.intval >= 0)
50 {
51 this_subexp = *re_nsub;
52 ++*re_nsub;
53 if (!*subexps)
54 *subexps = (struct rexp_node **)malloc (sizeof (struct rexp_node *) * *re_nsub);
55 else
56 *subexps = (struct rexp_node **)realloc (*subexps,
57 sizeof (struct rexp_node *) * *re_nsub);
58 }
59 }
60 if (node->params.pair.left)
61 id = rx_posix_analyze_rexp (subexps, re_nsub, node->params.pair.left, id);
62 if (node->params.pair.right)
63 id = rx_posix_analyze_rexp (subexps, re_nsub, node->params.pair.right, id);
64 switch (node->type)
65 {
66 case r_cset:
67 node->len = 1;
68 node->observed = 0;
69 break;
70 case r_string:
71 node->len = node->params.cstr.len;
72 node->observed = 0;
73 break;
74 case r_cut:
75 node->len = 0;
76 node->observed = 0;
77 break;
78 case r_concat:
79 case r_alternate:
80 {
81 int lob, rob;
82 int llen, rlen;
83 lob = (!node->params.pair.left ? 0 : node->params.pair.left->observed);
84 rob = (!node->params.pair.right ? 0 : node->params.pair.right->observed);
85 llen = (!node->params.pair.left ? 0 : node->params.pair.left->len);
86 rlen = (!node->params.pair.right ? 0 : node->params.pair.right->len);
87 node->len = ((llen >= 0) && (rlen >= 0)
88 ? ((node->type == r_concat)
89 ? llen + rlen
90 : ((llen == rlen) ? llen : -1))
91 : -1);
92 node->observed = lob || rob;
93 break;
94 }
95 case r_opt:
96 case r_star:
97 case r_plus:
98 node->len = -1;
99 node->observed = (node->params.pair.left
100 ? node->params.pair.left->observed
101 : 0);
102 break;
103
104 case r_interval:
105 node->len = -1;
106 node->observed = 1;
107 break;
108
109 case r_parens:
110 if (node->params.intval >= 0)
111 {
112 node->observed = 1;
113 (*subexps)[this_subexp] = node;
114 }
115 else
116 node->observed = (node->params.pair.left
117 ? node->params.pair.left->observed
118 : 0);
119 node->len = (node->params.pair.left
120 ? node->params.pair.left->len
121 : 0);
122 break;
123 case r_context:
124 switch (node->params.intval)
125 {
126 default:
127 node->observed = 1;
128 node->len = -1;
129 break;
130 case '^':
131 case '$':
132 case '=':
133 case '<':
134 case '>':
135 case 'b':
136 case 'B':
137 case '`':
138 case '\'':
139 node->observed = 1;
140 node->len = 0;
141 break;
142 }
143 break;
144 }
145 if (node->observed)
146 node->id = id++;
147 return id;
148 }
149 return id;
150 }
151 \f
152 /* Returns 0 unless the pattern can match the empty string. */
153 #ifdef __STDC__
154 int
155 rx_fill_in_fastmap (int cset_size, unsigned char * map, struct rexp_node * exp)
156 #else
157 int
158 rx_fill_in_fastmap (cset_size, map, exp)
159 int cset_size;
160 unsigned char * map;
161 struct rexp_node * exp;
162 #endif
163 {
164 if (!exp)
165 {
166 can_match_empty:
167 {
168 int x;
169 for (x = 0; x < cset_size; ++x)
170 map[x] = 1;
171 }
172 return 1;
173 }
174
175 switch (exp->type)
176 {
177 case r_cset:
178 {
179 int x;
180 int most;
181
182 most = exp->params.cset_size;
183 for (x = 0; x < most; ++x)
184 if (RX_bitset_member (exp->params.cset, x))
185 map[x] = 1;
186 }
187 return 0;
188
189 case r_string:
190 if (exp->params.cstr.len)
191 {
192 map[exp->params.cstr.contents[0]] = 1;
193 return 0;
194 }
195 else
196 return 1;
197
198 case r_cut:
199 return 1;
200
201
202 case r_concat:
203 return rx_fill_in_fastmap (cset_size, map, exp->params.pair.left);
204
205 /* Why not the right branch? If the left branch
206 * can't be empty it doesn't matter. If it can, then
207 * the fastmap is already saturated, and again, the
208 * right branch doesn't matter.
209 */
210
211
212 case r_alternate:
213 return ( rx_fill_in_fastmap (cset_size, map, exp->params.pair.left)
214 | rx_fill_in_fastmap (cset_size, map, exp->params.pair.right));
215
216 case r_parens:
217 case r_plus:
218 return rx_fill_in_fastmap (cset_size, map, exp->params.pair.left);
219
220 case r_opt:
221 case r_star:
222 goto can_match_empty;
223 /* Why not the subtree? These operators already saturate
224 * the fastmap.
225 */
226
227 case r_interval:
228 if (exp->params.intval == 0)
229 goto can_match_empty;
230 else
231 return rx_fill_in_fastmap (cset_size, map, exp->params.pair.left);
232
233 case r_context:
234 goto can_match_empty;
235 }
236
237 /* this should never happen but gcc seems to like it */
238 return 0;
239
240 }
241 \f
242
243 #ifdef __STDC__
244 int
245 rx_is_anchored_p (struct rexp_node * exp)
246 #else
247 int
248 rx_is_anchored_p (exp)
249 struct rexp_node * exp;
250 #endif
251 {
252 if (!exp)
253 return 0;
254
255 switch (exp->type)
256 {
257 case r_opt:
258 case r_star:
259 case r_cset:
260 case r_string:
261 case r_cut:
262 return 0;
263
264 case r_parens:
265 case r_plus:
266 case r_concat:
267 return rx_is_anchored_p (exp->params.pair.left);
268
269 case r_alternate:
270 return ( rx_is_anchored_p (exp->params.pair.left)
271 && rx_is_anchored_p (exp->params.pair.right));
272
273
274 case r_interval:
275 if (exp->params.intval == 0)
276 return 0;
277 else
278 return rx_is_anchored_p (exp->params.pair.left);
279
280 case r_context:
281 return (exp->params.intval == '^');
282 }
283
284 /* this should never happen but gcc seems to like it */
285 return 0;
286 }
287
288 \f
289
290 #ifdef __STDC__
291 enum rx_answers
292 rx_start_superstate (struct rx_classical_system * frame)
293 #else
294 enum rx_answers
295 rx_start_superstate (frame)
296 struct rx_classical_system * frame;
297 #endif
298 {
299 struct rx_superset * start_contents;
300 struct rx_nfa_state_set * start_nfa_set;
301
302 if (frame->rx->start_set)
303 start_contents = frame->rx->start_set;
304 else
305 {
306 {
307 struct rx_possible_future * futures;
308 futures = rx_state_possible_futures (frame->rx, frame->rx->start_nfa_states);
309
310 if (!futures)
311 return rx_bogus;
312
313 if (futures->next)
314 return rx_start_state_with_too_many_futures;
315
316 start_nfa_set = futures->destset;
317 }
318
319 start_contents =
320 rx_superstate_eclosure_union (frame->rx,
321 rx_superset_cons (frame->rx, 0, 0),
322 start_nfa_set);
323
324 if (!start_contents)
325 return rx_bogus;
326
327 start_contents->starts_for = frame->rx;
328 frame->rx->start_set = start_contents;
329 }
330
331 if ( start_contents->superstate
332 && (start_contents->superstate->rx_id == frame->rx->rx_id))
333 {
334 if (frame->state)
335 {
336 rx_unlock_superstate (frame->rx, frame->state);
337 }
338 frame->state = start_contents->superstate;
339 /* The cached superstate may be in a semifree state.
340 * We need to lock it and preserve the invariant
341 * that a locked superstate is never semifree.
342 * So refresh it.
343 */
344 rx_refresh_this_superstate (frame->rx->cache, frame->state);
345 rx_lock_superstate (frame->rx, frame->state);
346 return rx_yes;
347 }
348 else
349 {
350 struct rx_superstate * state;
351
352 rx_protect_superset (frame->rx, start_contents);
353 state = rx_superstate (frame->rx, start_contents);
354 rx_release_superset (frame->rx, start_contents);
355 if (!state)
356 return rx_bogus;
357 if (frame->state)
358 {
359 rx_unlock_superstate (frame->rx, frame->state);
360 }
361 frame->state = state;
362 rx_lock_superstate (frame->rx, frame->state);
363 return rx_yes;
364 }
365 }
366
367 \f
368
369
370 #ifdef __STDC__
371 enum rx_answers
372 rx_fit_p (struct rx_classical_system * frame, unsigned const char * burst, int len)
373 #else
374 enum rx_answers
375 rx_fit_p (frame, burst, len)
376 struct rx_classical_system * frame;
377 unsigned const char * burst;
378 int len;
379 #endif
380 {
381 struct rx_inx * inx_table;
382 struct rx_inx * inx;
383
384 if (!frame->state)
385 return rx_bogus;
386
387 if (!len)
388 {
389 frame->final_tag = frame->state->contents->is_final;
390 return (frame->state->contents->is_final
391 ? rx_yes
392 : rx_no);
393 }
394
395 inx_table = frame->state->transitions;
396 rx_unlock_superstate (frame->rx, frame->state);
397
398 while (len--)
399 {
400 struct rx_inx * next_table;
401
402 inx = inx_table + *burst;
403 next_table = (struct rx_inx *)inx->data;
404 while (!next_table)
405 {
406 struct rx_superstate * state;
407 state = ((struct rx_superstate *)
408 ((char *)inx_table
409 - ((unsigned long)
410 ((struct rx_superstate *)0)->transitions)));
411
412 switch ((int)inx->inx)
413 {
414 case rx_backtrack:
415 /* RX_BACKTRACK means that we've reached the empty
416 * superstate, indicating that match can't succeed
417 * from this point.
418 */
419 frame->state = 0;
420 return rx_no;
421
422 case rx_cache_miss:
423 /* Because the superstate NFA is lazily constructed,
424 * and in fact may erode from underneath us, we sometimes
425 * have to construct the next instruction from the hard way.
426 * This invokes one step in the lazy-conversion.
427 */
428 inx =
429 rx_handle_cache_miss
430 (frame->rx, state, *burst, inx->data_2);
431
432 if (!inx)
433 {
434 frame->state = 0;
435 return rx_bogus;
436 }
437 next_table = (struct rx_inx *)inx->data;
438 continue;
439
440
441 /* No other instructions are legal here.
442 * (In particular, this function does not handle backtracking
443 * or the related instructions.)
444 */
445 default:
446 frame->state = 0;
447 return rx_bogus;
448 }
449 }
450 inx_table = next_table;
451 ++burst;
452 }
453
454 if (inx->data_2) /* indicates a final superstate */
455 {
456 frame->final_tag = (int)inx->data_2;
457 frame->state = ((struct rx_superstate *)
458 ((char *)inx_table
459 - ((unsigned long)
460 ((struct rx_superstate *)0)->transitions)));
461 rx_lock_superstate (frame->rx, frame->state);
462 return rx_yes;
463 }
464 frame->state = ((struct rx_superstate *)
465 ((char *)inx_table
466 - ((unsigned long)
467 ((struct rx_superstate *)0)->transitions)));
468 rx_lock_superstate (frame->rx, frame->state);
469 return rx_no;
470 }
471
472 \f
473
474
475 #ifdef __STDC__
476 enum rx_answers
477 rx_advance (struct rx_classical_system * frame, unsigned const char * burst, int len)
478 #else
479 enum rx_answers
480 rx_advance (frame, burst, len)
481 struct rx_classical_system * frame;
482 unsigned const char * burst;
483 int len;
484 #endif
485 {
486 struct rx_inx * inx_table;
487
488 if (!frame->state)
489 return rx_bogus;
490
491 if (!len)
492 return rx_yes;
493
494 inx_table = frame->state->transitions;
495 rx_unlock_superstate (frame->rx, frame->state);
496
497 while (len--)
498 {
499 struct rx_inx * inx;
500 struct rx_inx * next_table;
501
502 inx = inx_table + *burst;
503 next_table = (struct rx_inx *)inx->data;
504 while (!next_table)
505 {
506 struct rx_superstate * state;
507 state = ((struct rx_superstate *)
508 ((char *)inx_table
509 - ((unsigned long)
510 ((struct rx_superstate *)0)->transitions)));
511
512 switch ((int)inx->inx)
513 {
514 case rx_backtrack:
515 /* RX_BACKTRACK means that we've reached the empty
516 * superstate, indicating that match can't succeed
517 * from this point.
518 */
519 frame->state = 0;
520 return rx_no;
521
522 case rx_cache_miss:
523 /* Because the superstate NFA is lazily constructed,
524 * and in fact may erode from underneath us, we sometimes
525 * have to construct the next instruction from the hard way.
526 * This invokes one step in the lazy-conversion.
527 */
528 inx =
529 rx_handle_cache_miss
530 (frame->rx, state, *burst, inx->data_2);
531
532 if (!inx)
533 {
534 frame->state = 0;
535 return rx_bogus;
536 }
537 next_table = (struct rx_inx *)inx->data;
538 continue;
539
540
541 /* No other instructions are legal here.
542 * (In particular, this function does not handle backtracking
543 * or the related instructions.)
544 */
545 default:
546 frame->state = 0;
547 return rx_bogus;
548 }
549 }
550 inx_table = next_table;
551 ++burst;
552 }
553
554 frame->state = ((struct rx_superstate *)
555 ((char *)inx_table
556 - ((unsigned long)
557 ((struct rx_superstate *)0)->transitions)));
558 rx_lock_superstate (frame->rx, frame->state);
559 return rx_yes;
560 }
561
562 \f
563
564 #ifdef __STDC__
565 int
566 rx_advance_to_final (struct rx_classical_system * frame, unsigned const char * burst, int len)
567 #else
568 int
569 rx_advance_to_final (frame, burst, len)
570 struct rx_classical_system * frame;
571 unsigned const char * burst;
572 int len;
573 #endif
574 {
575 int initial_len;
576 struct rx_inx * inx_table;
577 struct rx_superstate * this_state;
578
579 if (!frame->state)
580 return 0;
581
582 if (!len)
583 {
584 frame->final_tag = frame->state->contents->is_final;
585 return 0;
586 }
587
588 inx_table = frame->state->transitions;
589
590 initial_len = len;
591
592 this_state = frame->state;
593
594 while (len--)
595 {
596 struct rx_inx * inx;
597 struct rx_inx * next_table;
598
599 /* this_state holds the state for the position we're
600 * leaving. this_state is locked.
601 */
602 inx = inx_table + *burst;
603 next_table = (struct rx_inx *)inx->data;
604
605 while (!next_table)
606 {
607 struct rx_superstate * state;
608
609 state = ((struct rx_superstate *)
610 ((char *)inx_table
611 - ((unsigned long)
612 ((struct rx_superstate *)0)->transitions)));
613
614 switch ((int)inx->inx)
615 {
616 case rx_backtrack:
617 /* RX_BACKTRACK means that we've reached the empty
618 * superstate, indicating that match can't succeed
619 * from this point.
620 *
621 * Return to the state for the position prior to what
622 * we failed at, and return that position.
623 */
624 frame->state = this_state;
625 frame->final_tag = this_state->contents->is_final;
626 return (initial_len - len) - 1;
627
628 case rx_cache_miss:
629 /* Because the superstate NFA is lazily constructed,
630 * and in fact may erode from underneath us, we sometimes
631 * have to construct the next instruction from the hard way.
632 * This invokes one step in the lazy-conversion.
633 */
634 inx = rx_handle_cache_miss
635 (frame->rx, state, *burst, inx->data_2);
636
637 if (!inx)
638 {
639 rx_unlock_superstate (frame->rx, this_state);
640 frame->state = 0;
641 return -1;
642 }
643 next_table = (struct rx_inx *)inx->data;
644 continue;
645
646
647 /* No other instructions are legal here.
648 * (In particular, this function does not handle backtracking
649 * or the related instructions.)
650 */
651 default:
652 rx_unlock_superstate (frame->rx, this_state);
653 frame->state = 0;
654 return -1;
655 }
656 }
657
658 /* Release the superstate for the preceeding position: */
659 rx_unlock_superstate (frame->rx, this_state);
660
661 /* Compute the superstate for the new position: */
662 inx_table = next_table;
663 this_state = ((struct rx_superstate *)
664 ((char *)inx_table
665 - ((unsigned long)
666 ((struct rx_superstate *)0)->transitions)));
667
668 /* Lock it (see top-of-loop invariant): */
669 rx_lock_superstate (frame->rx, this_state);
670
671 /* Check to see if we should stop: */
672 if (this_state->contents->is_final)
673 {
674 frame->final_tag = this_state->contents->is_final;
675 frame->state = this_state;
676 return (initial_len - len);
677 }
678
679 ++burst;
680 }
681
682 /* Consumed all of the characters. */
683 frame->state = this_state;
684 frame->final_tag = this_state->contents->is_final;
685
686 /* state already locked (see top-of-loop invariant) */
687 return initial_len;
688 }
689
690
691 \f
692
693 #ifdef __STDC__
694 void
695 rx_terminate_system (struct rx_classical_system * frame)
696 #else
697 void
698 rx_terminate_system (frame)
699 struct rx_classical_system * frame;
700 #endif
701 {
702 if (frame->state)
703 {
704 rx_unlock_superstate (frame->rx, frame->state);
705 frame->state = 0;
706 }
707 }
708
709 \f
710
711
712
713
714
715
716
717 #ifdef __STDC__
718 void
719 rx_init_system (struct rx_classical_system * frame, struct rx * rx)
720 #else
721 void
722 rx_init_system (frame, rx)
723 struct rx_classical_system * frame;
724 struct rx * rx;
725 #endif
726 {
727 frame->rx = rx;
728 frame->state = 0;
729 }
730
731 \f
732