1 /* Copyright (C) 1995, 1996 Tom Lord
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU Library General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU Library General Public License for more details.
13 * You should have received a copy of the GNU Library General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 59 Temple Place - Suite 330,
16 * Boston, MA 02111-1307, USA.
23 #include "rxspencer.h"
28 static char * silly_hack_2
= 0;
30 struct rx_solutions rx_no_solutions
;
34 rx_make_solutions (struct rx_registers
* regs
, struct rx_unfaniverse
* verse
, struct rexp_node
* expression
, struct rexp_node
** subexps
, int cset_size
, int start
, int end
, rx_vmfn vmfn
, rx_contextfn contextfn
, void * closure
)
37 rx_make_solutions (regs
, verse
, expression
, subexps
, cset_size
,
38 start
, end
, vmfn
, contextfn
, closure
)
39 struct rx_registers
* regs
;
40 struct rx_unfaniverse
* verse
;
41 struct rexp_node
* expression
;
42 struct rexp_node
** subexps
;
47 rx_contextfn contextfn
;
51 struct rx_solutions
* solns
;
54 && (expression
->len
>= 0)
55 && (expression
->len
!= (end
- start
)))
56 return &rx_no_solutions
;
60 solns
= (struct rx_solutions
*)silly_hack_2
;
64 solns
= (struct rx_solutions
*)malloc (sizeof (*solns
));
67 rx_bzero ((char *)solns
, sizeof (*solns
));
70 solns
->cset_size
= cset_size
;
71 solns
->subexps
= subexps
;
72 solns
->exp
= expression
;
73 rx_save_rexp (expression
);
79 solns
->contextfn
= contextfn
;
80 solns
->closure
= closure
;
82 if (!solns
->exp
|| !solns
->exp
->observed
)
84 solns
->dfa
= rx_unfa (verse
, expression
, cset_size
);
87 rx_init_system (&solns
->match_engine
, solns
->dfa
->nfa
);
89 if (rx_yes
!= rx_start_superstate (&solns
->match_engine
))
94 struct rexp_node
* simplified
;
96 status
= rx_simple_rexp (&simplified
, cset_size
, solns
->exp
, subexps
);
99 solns
->dfa
= rx_unfa (verse
, simplified
, cset_size
);
102 rx_free_rexp (simplified
);
105 rx_init_system (&solns
->match_engine
, solns
->dfa
->nfa
);
106 if (rx_yes
!= rx_start_superstate (&solns
->match_engine
))
108 rx_free_rexp (simplified
);
111 if (expression
&& ( (expression
->type
== r_concat
)
112 || (expression
->type
== r_plus
)
113 || (expression
->type
== r_star
)
114 || (expression
->type
== r_interval
)))
116 struct rexp_node
* subexp
;
118 subexp
= solns
->exp
->params
.pair
.left
;
120 if (!subexp
|| !subexp
->observed
)
122 solns
->left_dfa
= rx_unfa (solns
->verse
, subexp
, solns
->cset_size
);
126 struct rexp_node
* simplified
;
128 status
= rx_simple_rexp (&simplified
, solns
->cset_size
, subexp
, solns
->subexps
);
131 solns
->left_dfa
= rx_unfa (solns
->verse
, simplified
, solns
->cset_size
);
132 rx_free_rexp (simplified
);
135 if (!solns
->left_dfa
)
137 rx_bzero ((char *)&solns
->left_match_engine
, sizeof (solns
->left_match_engine
));
138 rx_init_system (&solns
->left_match_engine
, solns
->left_dfa
->nfa
);
144 rx_free_rexp (solns
->exp
);
153 rx_free_solutions (struct rx_solutions
* solns
)
156 rx_free_solutions (solns
)
157 struct rx_solutions
* solns
;
163 if (solns
== &rx_no_solutions
)
168 rx_free_solutions (solns
->left
);
174 rx_free_solutions (solns
->right
);
180 rx_free_unfa (solns
->dfa
);
185 rx_terminate_system (&solns
->left_match_engine
);
186 rx_free_unfa (solns
->left_dfa
);
190 rx_terminate_system (&solns
->match_engine
);
194 rx_free_rexp (solns
->exp
);
199 silly_hack_2
= (char *)solns
;
206 static enum rx_answers
207 rx_solution_fit_p (struct rx_solutions
* solns
)
209 static enum rx_answers
210 rx_solution_fit_p (solns
)
211 struct rx_solutions
* solns
;
214 unsigned const char * burst
;
218 int rel_pos_in_burst
;
219 enum rx_answers vmstat
;
222 current_pos
= solns
->start
;
224 vmstat
= solns
->vmfn (solns
->closure
,
225 &burst
, &burst_len
, &burst_addr
,
226 current_pos
, solns
->end
,
229 if (vmstat
!= rx_yes
)
232 rel_pos_in_burst
= current_pos
- burst_addr
;
233 burst_end_addr
= burst_addr
+ burst_len
;
235 if (burst_end_addr
>= solns
->end
)
237 enum rx_answers fit_status
;
238 fit_status
= rx_fit_p (&solns
->match_engine
,
239 burst
+ rel_pos_in_burst
,
240 solns
->end
- current_pos
);
245 enum rx_answers fit_status
;
246 fit_status
= rx_advance (&solns
->match_engine
,
247 burst
+ rel_pos_in_burst
,
248 burst_len
- rel_pos_in_burst
);
249 if (fit_status
!= rx_yes
)
255 current_pos
+= burst_len
- rel_pos_in_burst
;
263 static enum rx_answers
264 rx_solution_fit_str_p (struct rx_solutions
* solns
)
266 static enum rx_answers
267 rx_solution_fit_str_p (solns
)
268 struct rx_solutions
* solns
;
272 unsigned const char * burst
;
276 int rel_pos_in_burst
;
277 enum rx_answers vmstat
;
282 current_pos
= solns
->start
;
283 count
= solns
->exp
->params
.cstr
.len
;
284 key
= (unsigned char *)solns
->exp
->params
.cstr
.contents
;
287 vmstat
= solns
->vmfn (solns
->closure
,
288 &burst
, &burst_len
, &burst_addr
,
289 current_pos
, solns
->end
,
292 if (vmstat
!= rx_yes
)
295 rel_pos_in_burst
= current_pos
- burst_addr
;
296 burst_end_addr
= burst_addr
+ burst_len
;
299 unsigned const char * pos
;
301 pos
= burst
+ rel_pos_in_burst
;
303 if (burst_end_addr
>= solns
->end
)
320 part_count_init
= burst_len
- rel_pos_in_burst
;
321 part_count
= part_count_init
;
330 count
-= part_count_init
;
331 current_pos
+= burst_len
- rel_pos_in_burst
;
343 rx_best_end_guess (struct rx_solutions
* solns
, struct rexp_node
* exp
, int bound
)
346 rx_best_end_guess (solns
, exp
, bound
)
347 struct rx_solutions
* solns
;
348 struct rexp_node
* exp
;
353 unsigned const char * burst
;
357 int rel_pos_in_burst
;
359 enum rx_answers vmstat
;
362 unparse_print_rexp (256, solns
->exp
);
364 unparse_print_rexp (256, exp
);
365 printf ("\nbound %d \n", bound
);
368 if (rx_yes
!= rx_start_superstate (&solns
->left_match_engine
))
372 best_guess
= current_pos
= solns
->start
;
376 printf (" best_guess %d\n", best_guess
);
378 vmstat
= solns
->vmfn (solns
->closure
,
379 &burst
, &burst_len
, &burst_addr
,
383 printf (" str>%s\n", burst
);
385 if (vmstat
!= rx_yes
)
390 rel_pos_in_burst
= current_pos
- burst_addr
;
391 burst_end_addr
= burst_addr
+ burst_len
;
393 if (burst_end_addr
> bound
)
395 burst_end_addr
= bound
;
396 burst_len
= bound
- burst_addr
;
403 printf (" rel_pos_in_burst %d burst_len %d\n", rel_pos_in_burst
, burst_len
);
405 while (rel_pos_in_burst
< burst_len
)
407 amt_advanced
= rx_advance_to_final (&solns
->left_match_engine
,
408 burst
+ rel_pos_in_burst
,
409 burst_len
- rel_pos_in_burst
);
411 printf (" amt_advanced %d", amt_advanced
);
413 if (amt_advanced
< 0)
417 current_pos
+= amt_advanced
;
418 rel_pos_in_burst
+= amt_advanced
;
419 if (solns
->left_match_engine
.final_tag
)
420 best_guess
= current_pos
;
422 printf (" best_guess %d\n", best_guess
);
423 printf (" current_pos %d\n", current_pos
);
425 if (amt_advanced
== 0)
430 if (current_pos
== bound
)
443 rx_next_solution (struct rx_solutions
* solns
)
446 rx_next_solution (solns
)
447 struct rx_solutions
* solns
;
453 if (solns
== &rx_no_solutions
)
460 if (solns
->step
!= 0)
467 solns
->final_tag
= 1;
468 return (solns
->start
== solns
->end
473 else if ( (solns
->exp
->len
>= 0)
474 && (solns
->exp
->len
!= (solns
->end
- solns
->start
)))
478 else if (!solns
->exp
->observed
)
480 if (solns
->step
!= 0)
484 else if (solns
->exp
->type
== r_string
)
487 ans
= rx_solution_fit_str_p (solns
);
488 solns
->final_tag
= 1;
495 ans
= rx_solution_fit_p (solns
);
496 solns
->final_tag
= solns
->match_engine
.final_tag
;
501 else if (solns
->exp
->observed
)
503 enum rx_answers fit_p
;
507 if (solns
->exp
->params
.intval
)
509 solns
->regs
[solns
->exp
->params
.intval
].rm_so
= solns
->saved_rm_so
;
510 solns
->regs
[solns
->exp
->params
.intval
].rm_eo
= solns
->saved_rm_eo
;
518 fit_p
= rx_solution_fit_p (solns
);
519 /* Set final_tag here because this rough fit test
520 * may be all the matching that gets done.
521 * For example, consider a paren node containing
522 * a true regular expression ending with a cut
525 solns
->final_tag
= solns
->match_engine
.final_tag
;
542 switch (solns
->exp
->type
)
552 enum rx_answers paren_stat
;
556 if (solns
->exp
->params
.intval
)
558 solns
->saved_rm_so
= solns
->regs
[solns
->exp
->params
.intval
].rm_so
;
559 solns
->saved_rm_eo
= solns
->regs
[solns
->exp
->params
.intval
].rm_eo
;
562 if ( !solns
->exp
->params
.pair
.left
563 || !solns
->exp
->params
.pair
.left
->observed
)
565 if (solns
->exp
->params
.intval
)
567 solns
->regs
[solns
->exp
->params
.intval
].rm_so
= solns
->start
;
568 solns
->regs
[solns
->exp
->params
.intval
].rm_eo
= solns
->end
;
571 /* Keep the final_tag from the fit_p test. */
576 solns
->left
= rx_make_solutions (solns
->regs
,
578 solns
->exp
->params
.pair
.left
,
596 if (solns
->exp
->params
.intval
)
598 solns
->regs
[solns
->exp
->params
.intval
].rm_so
= solns
->saved_rm_so
;
599 solns
->regs
[solns
->exp
->params
.intval
].rm_eo
= solns
->saved_rm_eo
;
602 paren_stat
= rx_next_solution (solns
->left
);
603 if (paren_stat
== rx_yes
)
605 if (solns
->exp
->params
.intval
)
607 solns
->regs
[solns
->exp
->params
.intval
].rm_so
= solns
->start
;
608 solns
->regs
[solns
->exp
->params
.intval
].rm_eo
= solns
->end
;
610 solns
->final_tag
= solns
->left
->final_tag
;
616 rx_free_solutions (solns
->left
);
618 if (solns
->exp
->params
.intval
)
620 solns
->regs
[solns
->exp
->params
.intval
].rm_so
= solns
->saved_rm_so
;
621 solns
->regs
[solns
->exp
->params
.intval
].rm_eo
= solns
->saved_rm_eo
;
631 enum rx_answers opt_stat
;
635 solns
->left
= rx_make_solutions (solns
->regs
,
637 solns
->exp
->params
.pair
.left
,
654 opt_stat
= rx_next_solution (solns
->left
);
655 if (opt_stat
== rx_yes
)
657 solns
->final_tag
= solns
->left
->final_tag
;
663 rx_free_solutions (solns
->left
);
665 return ((solns
->start
== solns
->end
)
675 enum rx_answers alt_stat
;
679 solns
->left
= rx_make_solutions (solns
->regs
,
681 solns
->exp
->params
.pair
.left
,
698 alt_stat
= rx_next_solution (solns
->left
);
700 if (alt_stat
== rx_yes
)
702 solns
->final_tag
= solns
->left
->final_tag
;
708 rx_free_solutions (solns
->left
);
714 solns
->right
= rx_make_solutions (solns
->regs
,
716 solns
->exp
->params
.pair
.right
,
733 alt_stat
= rx_next_solution (solns
->right
);
735 if (alt_stat
== rx_yes
)
737 solns
->final_tag
= solns
->right
->final_tag
;
743 rx_free_solutions (solns
->right
);
754 enum rx_answers concat_stat
;
756 solns
->split_guess
= solns
->end
;
758 solns
->split_guess
= ((solns
->end
- solns
->start
) > RX_MANY_CASES
759 ? rx_best_end_guess (solns
,
760 solns
->exp
->params
.pair
.left
, solns
->end
)
764 concat_split_guess_loop
:
765 solns
->left
= rx_make_solutions (solns
->regs
,
767 solns
->exp
->params
.pair
.left
,
783 concat_try_next_left_match
:
785 concat_stat
= rx_next_solution (solns
->left
);
786 if (concat_stat
!= rx_yes
)
788 rx_free_solutions (solns
->left
);
789 rx_free_solutions (solns
->right
);
790 solns
->left
= solns
->right
= 0;
791 solns
->split_guess
= solns
->split_guess
- 1;
793 solns
->split_guess
= ((solns
->split_guess
- solns
->start
) > RX_MANY_CASES
794 ? rx_best_end_guess (solns
,
795 solns
->exp
->params
.pair
.left
,
796 solns
->split_guess
- 1)
797 : solns
->split_guess
- 1);
799 if (solns
->split_guess
>= solns
->start
)
800 goto concat_split_guess_loop
;
814 solns
->right
= rx_make_solutions (solns
->regs
,
816 solns
->exp
->params
.pair
.right
,
826 rx_free_solutions (solns
->left
);
836 /* concat_try_next_right_match: */
838 concat_stat
= rx_next_solution (solns
->right
);
839 if (concat_stat
== rx_yes
)
841 solns
->final_tag
= solns
->right
->final_tag
;
844 else if (concat_stat
== rx_no
)
846 rx_free_solutions (solns
->right
);
849 goto concat_try_next_left_match
;
851 else /* concat_stat == rx_bogus */
853 rx_free_solutions (solns
->left
);
855 rx_free_solutions (solns
->right
);
869 enum rx_answers star_stat
;
871 solns
->split_guess
= solns
->end
;
873 solns
->split_guess
= ((solns
->end
- solns
->start
) > RX_MANY_CASES
874 ? rx_best_end_guess (solns
,
875 solns
->exp
->params
.pair
.left
, solns
->end
)
879 star_split_guess_loop
:
880 solns
->left
= rx_make_solutions (solns
->regs
,
882 solns
->exp
->params
.pair
.left
,
898 star_try_next_left_match
:
900 star_stat
= rx_next_solution (solns
->left
);
901 if (star_stat
!= rx_yes
)
903 rx_free_solutions (solns
->left
);
904 rx_free_solutions (solns
->right
);
905 solns
->left
= solns
->right
= 0;
906 solns
->split_guess
= solns
->split_guess
- 1;
908 solns
->split_guess
= ((solns
->split_guess
- solns
->start
) > RX_MANY_CASES
909 ? rx_best_end_guess (solns
,
910 solns
->exp
->params
.pair
.left
,
911 solns
->split_guess
- 1)
912 : solns
->split_guess
- 1);
914 if (solns
->split_guess
>= solns
->start
)
915 goto star_split_guess_loop
;
920 if ( (solns
->exp
->type
== r_star
)
921 && (solns
->start
== solns
->end
)
922 && (star_stat
== rx_no
))
924 solns
->final_tag
= 1;
938 if (solns
->split_guess
== solns
->end
)
940 solns
->final_tag
= solns
->left
->final_tag
;
945 solns
->right
= rx_make_solutions (solns
->regs
,
957 rx_free_solutions (solns
->left
);
967 /* star_try_next_right_match: */
969 star_stat
= rx_next_solution (solns
->right
);
970 if (star_stat
== rx_yes
)
972 solns
->final_tag
= solns
->right
->final_tag
;
975 else if (star_stat
== rx_no
)
977 rx_free_solutions (solns
->right
);
980 goto star_try_next_left_match
;
982 else /* star_stat == rx_bogus */
984 rx_free_solutions (solns
->left
);
986 rx_free_solutions (solns
->right
);
998 enum rx_answers interval_stat
;
1001 /* If the interval permits nothing,
1002 * return immediately.
1004 if (solns
->exp
->params
.intval2
< solns
->interval_x
)
1010 /* If the interval permits only 0 iterations,
1011 * return immediately. Success depends on the
1012 * emptiness of the match.
1014 if ( (solns
->exp
->params
.intval2
== solns
->interval_x
)
1015 && (solns
->exp
->params
.intval
<= solns
->interval_x
))
1018 solns
->final_tag
= 1;
1019 return ((solns
->start
== solns
->end
)
1024 /* The interval permits at most 0 iterations,
1025 * but also requires more. A bug.
1027 if (solns
->exp
->params
.intval2
== solns
->interval_x
)
1029 /* indicates a regexp compilation error, actually */
1034 solns
->split_guess
= solns
->end
;
1036 solns
->split_guess
= ((solns
->end
- solns
->start
) > RX_MANY_CASES
1037 ? rx_best_end_guess (solns
,
1038 solns
->exp
->params
.pair
.left
, solns
->end
)
1042 /* The interval permits more than 0 iterations.
1043 * If it permits 0 and the match is to be empty,
1044 * the trivial match is the most preferred answer.
1046 if (solns
->exp
->params
.intval
<= solns
->interval_x
)
1049 if (solns
->start
== solns
->end
)
1051 solns
->final_tag
= 1;
1054 /* If this isn't a trivial match, or if the trivial match
1055 * is rejected, look harder.
1060 interval_split_guess_loop
:
1061 /* The match requires at least one iteration, either because
1062 * there are characters to match, or because the interval starts
1065 * Look for the first iteration:
1067 solns
->left
= rx_make_solutions (solns
->regs
,
1069 solns
->exp
->params
.pair
.left
,
1085 interval_try_next_left_match
:
1087 interval_stat
= rx_next_solution (solns
->left
);
1088 if (interval_stat
!= rx_yes
)
1090 rx_free_solutions (solns
->left
);
1091 rx_free_solutions (solns
->right
);
1092 solns
->left
= solns
->right
= 0;
1093 solns
->split_guess
= solns
->split_guess
- 1;
1095 solns
->split_guess
= ((solns
->split_guess
- solns
->start
) > RX_MANY_CASES
1096 ? rx_best_end_guess (solns
,
1097 solns
->exp
->params
.pair
.left
,
1098 solns
->split_guess
- 1)
1099 : solns
->split_guess
- 1);
1101 if (solns
->split_guess
>= solns
->start
)
1102 goto interval_split_guess_loop
;
1106 return interval_stat
;
1117 /* After matching one required iteration, construct a smaller
1118 * interval and try to match that against the rest.
1120 * To avoid thwarting unfa caching, instead of building a new
1121 * rexp node with different interval extents, we keep interval_x
1122 * in each solns structure to keep track of the number of
1123 * iterations matched so far.
1125 solns
->right
= rx_make_solutions (solns
->regs
,
1135 solns
->right
->interval_x
= solns
->interval_x
+ 1;
1139 rx_free_solutions (solns
->left
);
1149 /* interval_try_next_right_match: */
1151 interval_stat
= rx_next_solution (solns
->right
);
1152 if (interval_stat
== rx_yes
)
1154 solns
->final_tag
= solns
->right
->final_tag
;
1155 return interval_stat
;
1157 else if (interval_stat
== rx_no
)
1159 rx_free_solutions (solns
->right
);
1162 goto interval_try_next_left_match
;
1164 else /* interval_stat == rx_bogus */
1166 rx_free_solutions (solns
->left
);
1168 rx_free_solutions (solns
->right
);
1171 return interval_stat
;
1179 solns
->final_tag
= 1;
1180 return solns
->contextfn (solns
->closure
,
1182 solns
->start
, solns
->end
,