1 /* Copyright (C) 1995, 1996 Tom Lord
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU Library General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU Library General Public License for more details.
13 * You should have received a copy of the GNU Library General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 59 Temple Place - Suite 330,
16 * Boston, MA 02111-1307, USA.
23 #include "rxgnucomp.h"
27 /* regcomp takes a regular expression as a string and compiles it.
29 * PATTERN is the address of the pattern string.
31 * CFLAGS is a series of bits which affect compilation.
33 * If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
34 * use POSIX basic syntax.
36 * If REG_NEWLINE is set, then . and [^...] don't match newline.
37 * Also, regexec will try a match beginning after every newline.
39 * If REG_ICASE is set, then we considers upper- and lowercase
40 * versions of letters to be equivalent when matching.
42 * If REG_NOSUB is set, then when PREG is passed to regexec, that
43 * routine will report only success or failure, and nothing about the
46 * It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
47 * the return codes and their meanings.)
53 regncomp (regex_t
* preg
, const char * pattern
, int len
, int cflags
)
56 regncomp (preg
, pattern
, len
, cflags
)
66 rx_bzero ((char *)preg
, sizeof (*preg
));
67 syntax
= ((cflags
& REG_EXTENDED
)
68 ? RE_SYNTAX_POSIX_EXTENDED
69 : RE_SYNTAX_POSIX_BASIC
);
71 if (!(cflags
& REG_ICASE
))
77 preg
->translate
= (unsigned char *) malloc (256);
79 return (int) REG_ESPACE
;
81 /* Map uppercase characters to corresponding lowercase ones. */
82 for (i
= 0; i
< CHAR_SET_SIZE
; i
++)
83 preg
->translate
[i
] = isupper (i
) ? tolower (i
) : i
;
87 /* If REG_NEWLINE is set, newlines are treated differently. */
88 if (!(cflags
& REG_NEWLINE
))
89 preg
->newline_anchor
= 0;
92 /* REG_NEWLINE implies neither . nor [^...] match newline. */
93 syntax
&= ~RE_DOT_NEWLINE
;
94 syntax
|= RE_HAT_LISTS_NOT_NEWLINE
;
95 /* It also changes the matching behavior. */
96 preg
->newline_anchor
= 1;
99 preg
->no_sub
= !!(cflags
& REG_NOSUB
);
101 ret
= rx_parse (&preg
->pattern
,
107 /* POSIX doesn't distinguish between an unmatched open-group and an
108 * unmatched close-group: both are REG_EPAREN.
110 if (ret
== REG_ERPAREN
)
117 rx_posix_analyze_rexp (&preg
->subexps
,
121 preg
->is_nullable
= rx_fill_in_fastmap (256,
125 preg
->is_anchored
= rx_is_anchored_p (preg
->pattern
);
134 regcomp (regex_t
* preg
, const char * pattern
, int cflags
)
137 regcomp (preg
, pattern
, cflags
)
139 const char * pattern
;
143 /* POSIX says a null character in the pattern terminates it, so we
144 * can use strlen here in compiling the pattern.
147 return regncomp (preg
, pattern
, strlen (pattern
), cflags
);
153 /* Returns a message corresponding to an error code, ERRCODE, returned
154 from either regcomp or regexec. */
158 regerror (int errcode
, const regex_t
*preg
,
159 char *errbuf
, size_t errbuf_size
)
162 regerror (errcode
, preg
, errbuf
, errbuf_size
)
172 msg
= rx_error_msg
[errcode
] == 0 ? "Success" : rx_error_msg
[errcode
];
173 msg_size
= strlen (msg
) + 1; /* Includes the 0. */
174 if (errbuf_size
!= 0)
176 if (msg_size
> errbuf_size
)
178 strncpy (errbuf
, msg
, errbuf_size
- 1);
179 errbuf
[errbuf_size
- 1] = 0;
182 strcpy (errbuf
, msg
);
191 rx_regmatch (regmatch_t pmatch
[], const regex_t
*preg
, struct rx_context_rules
* rules
, int start
, int end
, const char *string
)
194 rx_regmatch (pmatch
, preg
, rules
, start
, end
, string
)
197 struct rx_context_rules
* rules
;
203 struct rx_solutions
* solutions
;
204 enum rx_answers answer
;
205 struct rx_context_rules local_rules
;
210 local_rules
= *rules
;
215 end_lower_bound
= start
;
216 end_upper_bound
= start
;
218 else if (preg
->pattern
->len
>= 0)
220 end_lower_bound
= start
+ preg
->pattern
->len
;
221 end_upper_bound
= start
+ preg
->pattern
->len
;
225 end_lower_bound
= start
;
226 end_upper_bound
= end
;
228 end
= end_upper_bound
;
229 while (end
>= end_lower_bound
)
231 local_rules
.not_eol
= (rules
->not_eol
232 ? ( (end
== orig_end
)
233 || !local_rules
.newline_anchor
234 || (string
[end
] != '\n'))
235 : ( (end
!= orig_end
)
236 && (!local_rules
.newline_anchor
237 || (string
[end
] != '\n'))));
238 solutions
= rx_basic_make_solutions (pmatch
, preg
->pattern
, preg
->subexps
,
239 start
, end
, &local_rules
, string
);
243 answer
= rx_next_solution (solutions
);
245 if (answer
== rx_yes
)
249 pmatch
[0].rm_so
= start
;
250 pmatch
[0].rm_eo
= end
;
251 pmatch
[0].final_tag
= solutions
->final_tag
;
253 rx_basic_free_solutions (solutions
);
257 rx_basic_free_solutions (solutions
);
276 rx_regexec (regmatch_t pmatch
[], const regex_t
*preg
, struct rx_context_rules
* rules
, int start
, int end
, const char *string
)
279 rx_regexec (pmatch
, preg
, rules
, start
, end
, string
)
282 struct rx_context_rules
* rules
;
291 struct rexp_node
* simplified
;
292 struct rx_unfa
* unfa
;
293 struct rx_classical_system machine
;
295 anchored
= preg
->is_anchored
;
298 if ((end
- start
) > RX_MANY_CASES
)
300 if (0 > rx_simple_rexp (&simplified
, 256, preg
->pattern
, preg
->subexps
))
302 unfa
= rx_unfa (rx_basic_unfaniverse (), simplified
, 256);
305 rx_free_rexp (simplified
);
308 rx_init_system (&machine
, unfa
->nfa
);
309 rx_free_rexp (simplified
);
312 for (x
= start
; x
<= end
; ++x
)
314 if (preg
->is_nullable
316 && (preg
->fastmap
[((unsigned char *)string
)[x
]])))
318 if ((end
- start
) > RX_MANY_CASES
)
321 if (rx_start_superstate (&machine
) != rx_yes
)
326 amt
= rx_advance_to_final (&machine
, string
+ x
, end
- start
- x
);
327 if (!machine
.final_tag
&& (amt
< (end
- start
- x
)))
330 stat
= rx_regmatch (pmatch
, preg
, rules
, x
, end
, string
);
331 if (!stat
|| (stat
!= REG_NOMATCH
))
339 if (!preg
->newline_anchor
)
346 if (string
[x
] == '\n')
357 /* regexec searches for a given pattern, specified by PREG, in the
360 * If NMATCH is zero or REG_NOSUB was set in the cflags argument to
361 * `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
362 * least NMATCH elements, and we set them to the offsets of the
363 * corresponding matched substrings.
365 * EFLAGS specifies `execution flags' which affect matching: if
366 * REG_NOTBOL is set, then ^ does not match at the beginning of the
367 * string; if REG_NOTEOL is set, then $ does not match at the end.
369 * We return 0 if we find a match and REG_NOMATCH if not.
374 regnexec (const regex_t
*preg
, const char *string
, int len
, size_t nmatch
, regmatch_t
**pmatch
, int eflags
)
377 regnexec (preg
, string
, len
, nmatch
, pmatch
, eflags
)
387 struct rx_context_rules rules
;
392 want_reg_info
= (!preg
->no_sub
&& (nmatch
> 0));
394 rules
.newline_anchor
= preg
->newline_anchor
;
395 rules
.not_bol
= !!(eflags
& REG_NOTBOL
);
396 rules
.not_eol
= !!(eflags
& REG_NOTEOL
);
397 rules
.case_indep
= !!(eflags
& REG_ICASE
);
399 if (nmatch
>= preg
->re_nsub
)
406 regs
= (regmatch_t
*)malloc (preg
->re_nsub
* sizeof (*regs
));
409 nregs
= preg
->re_nsub
;
414 for (x
= 0; x
< nregs
; ++x
)
415 regs
[x
].rm_so
= regs
[x
].rm_eo
= -1;
419 stat
= rx_regexec (regs
, preg
, &rules
, 0, len
, string
);
421 if (!stat
&& want_reg_info
&& pmatch
&& (regs
!= *pmatch
))
424 for (x
= 0; x
< nmatch
; ++x
)
425 (*pmatch
)[x
] = regs
[x
];
428 if (!stat
&& (eflags
& REG_ALLOC_REGS
))
430 else if (regs
&& (!pmatch
|| (regs
!= *pmatch
)))
438 regexec (const regex_t
*preg
, const char *string
, size_t nmatch
, regmatch_t pmatch
[], int eflags
)
441 regexec (preg
, string
, nmatch
, pmatch
, eflags
)
449 return regnexec (preg
,
454 (eflags
& ~REG_ALLOC_REGS
));
458 /* Free dynamically allocated space used by PREG. */
462 regfree (regex_t
*preg
)
471 rx_free_rexp (preg
->pattern
);
476 free (preg
->subexps
);
479 if (preg
->translate
!= 0)
481 free (preg
->translate
);