]> jfr.im git - irc/evilnet/x3.git/blame - rx/rxposix.c
Couple of srvx updates.
[irc/evilnet/x3.git] / rx / rxposix.c
CommitLineData
d76ed9a9 1/* Copyright (C) 1995, 1996 Tom Lord
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU Library General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU Library General Public License for more details.
12 *
13 * You should have received a copy of the GNU Library General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 59 Temple Place - Suite 330,
16 * Boston, MA 02111-1307, USA.
17 */
18
19
20\f
21#include "rxall.h"
22#include "rxposix.h"
23#include "rxgnucomp.h"
24#include "rxbasic.h"
25#include "rxsimp.h"
26\f
27/* regcomp takes a regular expression as a string and compiles it.
28 *
29 * PATTERN is the address of the pattern string.
30 *
31 * CFLAGS is a series of bits which affect compilation.
32 *
33 * If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
34 * use POSIX basic syntax.
35 *
36 * If REG_NEWLINE is set, then . and [^...] don't match newline.
37 * Also, regexec will try a match beginning after every newline.
38 *
39 * If REG_ICASE is set, then we considers upper- and lowercase
40 * versions of letters to be equivalent when matching.
41 *
42 * If REG_NOSUB is set, then when PREG is passed to regexec, that
43 * routine will report only success or failure, and nothing about the
44 * registers.
45 *
46 * It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
47 * the return codes and their meanings.)
48 */
49
50
51#ifdef __STDC__
52int
53regncomp (regex_t * preg, const char * pattern, int len, int cflags)
54#else
55int
56regncomp (preg, pattern, len, cflags)
57 regex_t * preg;
58 const char * pattern;
59 int len;
60 int cflags;
61#endif
62{
63 int ret;
64 unsigned int syntax;
65
66 rx_bzero ((char *)preg, sizeof (*preg));
67 syntax = ((cflags & REG_EXTENDED)
68 ? RE_SYNTAX_POSIX_EXTENDED
69 : RE_SYNTAX_POSIX_BASIC);
70
71 if (!(cflags & REG_ICASE))
72 preg->translate = 0;
73 else
74 {
75 unsigned i;
76
77 preg->translate = (unsigned char *) malloc (256);
78 if (!preg->translate)
79 return (int) REG_ESPACE;
80
81 /* Map uppercase characters to corresponding lowercase ones. */
82 for (i = 0; i < CHAR_SET_SIZE; i++)
83 preg->translate[i] = isupper (i) ? tolower (i) : i;
84 }
85
86
87 /* If REG_NEWLINE is set, newlines are treated differently. */
88 if (!(cflags & REG_NEWLINE))
89 preg->newline_anchor = 0;
90 else
91 {
92 /* REG_NEWLINE implies neither . nor [^...] match newline. */
93 syntax &= ~RE_DOT_NEWLINE;
94 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
95 /* It also changes the matching behavior. */
96 preg->newline_anchor = 1;
97 }
98
99 preg->no_sub = !!(cflags & REG_NOSUB);
100
101 ret = rx_parse (&preg->pattern,
102 pattern, len,
103 syntax,
104 256,
105 preg->translate);
106
107 /* POSIX doesn't distinguish between an unmatched open-group and an
108 * unmatched close-group: both are REG_EPAREN.
109 */
110 if (ret == REG_ERPAREN)
111 ret = REG_EPAREN;
112
113 if (!ret)
114 {
115 preg->re_nsub = 1;
116 preg->subexps = 0;
117 rx_posix_analyze_rexp (&preg->subexps,
118 &preg->re_nsub,
119 preg->pattern,
120 0);
121 preg->is_nullable = rx_fill_in_fastmap (256,
122 preg->fastmap,
123 preg->pattern);
124
125 preg->is_anchored = rx_is_anchored_p (preg->pattern);
126 }
127
128 return (int) ret;
129}
130
131
132#ifdef __STDC__
133int
134regcomp (regex_t * preg, const char * pattern, int cflags)
135#else
136int
137regcomp (preg, pattern, cflags)
138 regex_t * preg;
139 const char * pattern;
140 int cflags;
141#endif
142{
143 /* POSIX says a null character in the pattern terminates it, so we
144 * can use strlen here in compiling the pattern.
145 */
146
147 return regncomp (preg, pattern, strlen (pattern), cflags);
148}
149
150
151\f
152
153/* Returns a message corresponding to an error code, ERRCODE, returned
154 from either regcomp or regexec. */
155
156#ifdef __STDC__
157size_t
158regerror (int errcode, const regex_t *preg,
159 char *errbuf, size_t errbuf_size)
160#else
161size_t
162regerror (errcode, preg, errbuf, errbuf_size)
163 int errcode;
164 const regex_t *preg;
165 char *errbuf;
166 size_t errbuf_size;
167#endif
168{
169 const char *msg;
170 size_t msg_size;
171
172 msg = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode];
173 msg_size = strlen (msg) + 1; /* Includes the 0. */
174 if (errbuf_size != 0)
175 {
176 if (msg_size > errbuf_size)
177 {
178 strncpy (errbuf, msg, errbuf_size - 1);
179 errbuf[errbuf_size - 1] = 0;
180 }
181 else
182 strcpy (errbuf, msg);
183 }
184 return msg_size;
185}
186\f
187
188
189#ifdef __STDC__
190int
191rx_regmatch (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string)
192#else
193int
194rx_regmatch (pmatch, preg, rules, start, end, string)
195 regmatch_t pmatch[];
196 const regex_t *preg;
197 struct rx_context_rules * rules;
198 int start;
199 int end;
200 const char *string;
201#endif
202{
203 struct rx_solutions * solutions;
204 enum rx_answers answer;
205 struct rx_context_rules local_rules;
206 int orig_end;
207 int end_lower_bound;
208 int end_upper_bound;
209
210 local_rules = *rules;
211 orig_end = end;
212
213 if (!preg->pattern)
214 {
215 end_lower_bound = start;
216 end_upper_bound = start;
217 }
218 else if (preg->pattern->len >= 0)
219 {
220 end_lower_bound = start + preg->pattern->len;
221 end_upper_bound = start + preg->pattern->len;
222 }
223 else
224 {
225 end_lower_bound = start;
226 end_upper_bound = end;
227 }
228 end = end_upper_bound;
229 while (end >= end_lower_bound)
230 {
231 local_rules.not_eol = (rules->not_eol
232 ? ( (end == orig_end)
233 || !local_rules.newline_anchor
234 || (string[end] != '\n'))
235 : ( (end != orig_end)
236 && (!local_rules.newline_anchor
237 || (string[end] != '\n'))));
238 solutions = rx_basic_make_solutions (pmatch, preg->pattern, preg->subexps,
239 start, end, &local_rules, string);
240 if (!solutions)
241 return REG_ESPACE;
242
243 answer = rx_next_solution (solutions);
244
245 if (answer == rx_yes)
246 {
247 if (pmatch)
248 {
249 pmatch[0].rm_so = start;
250 pmatch[0].rm_eo = end;
251 pmatch[0].final_tag = solutions->final_tag;
252 }
253 rx_basic_free_solutions (solutions);
254 return 0;
255 }
256 else
257 rx_basic_free_solutions (solutions);
258
259 --end;
260 }
261
262 switch (answer)
263 {
264 default:
265 case rx_bogus:
266 return REG_ESPACE;
267
268 case rx_no:
269 return REG_NOMATCH;
270 }
271}
272
273
274#ifdef __STDC__
275int
276rx_regexec (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string)
277#else
278int
279rx_regexec (pmatch, preg, rules, start, end, string)
280 regmatch_t pmatch[];
281 const regex_t *preg;
282 struct rx_context_rules * rules;
283 int start;
284 int end;
285 const char *string;
286#endif
287{
288 int x;
289 int stat;
290 int anchored;
291 struct rexp_node * simplified;
292 struct rx_unfa * unfa;
293 struct rx_classical_system machine;
294
295 anchored = preg->is_anchored;
296
297 unfa = 0;
298 if ((end - start) > RX_MANY_CASES)
299 {
300 if (0 > rx_simple_rexp (&simplified, 256, preg->pattern, preg->subexps))
301 return REG_ESPACE;
302 unfa = rx_unfa (rx_basic_unfaniverse (), simplified, 256);
303 if (!unfa)
304 {
305 rx_free_rexp (simplified);
306 return REG_ESPACE;
307 }
308 rx_init_system (&machine, unfa->nfa);
309 rx_free_rexp (simplified);
310 }
311
312 for (x = start; x <= end; ++x)
313 {
314 if (preg->is_nullable
315 || ((x < end)
316 && (preg->fastmap[((unsigned char *)string)[x]])))
317 {
318 if ((end - start) > RX_MANY_CASES)
319 {
320 int amt;
321 if (rx_start_superstate (&machine) != rx_yes)
322 {
323 rx_free_unfa (unfa);
324 return REG_ESPACE;
325 }
326 amt = rx_advance_to_final (&machine, string + x, end - start - x);
327 if (!machine.final_tag && (amt < (end - start - x)))
328 goto nomatch;
329 }
330 stat = rx_regmatch (pmatch, preg, rules, x, end, string);
331 if (!stat || (stat != REG_NOMATCH))
332 {
333 rx_free_unfa (unfa);
334 return stat;
335 }
336 }
337 nomatch:
338 if (anchored)
339 if (!preg->newline_anchor)
340 {
341 rx_free_unfa (unfa);
342 return REG_NOMATCH;
343 }
344 else
345 while (x < end)
346 if (string[x] == '\n')
347 break;
348 else
349 ++x;
350 }
351 rx_free_unfa (unfa);
352 return REG_NOMATCH;
353}
354
355\f
356
357/* regexec searches for a given pattern, specified by PREG, in the
358 * string STRING.
359 *
360 * If NMATCH is zero or REG_NOSUB was set in the cflags argument to
361 * `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
362 * least NMATCH elements, and we set them to the offsets of the
363 * corresponding matched substrings.
364 *
365 * EFLAGS specifies `execution flags' which affect matching: if
366 * REG_NOTBOL is set, then ^ does not match at the beginning of the
367 * string; if REG_NOTEOL is set, then $ does not match at the end.
368 *
369 * We return 0 if we find a match and REG_NOMATCH if not.
370 */
371
372#ifdef __STDC__
373int
374regnexec (const regex_t *preg, const char *string, int len, size_t nmatch, regmatch_t **pmatch, int eflags)
375#else
376int
377regnexec (preg, string, len, nmatch, pmatch, eflags)
378 const regex_t *preg;
379 const char *string;
380 int len;
381 size_t nmatch;
382 regmatch_t **pmatch;
383 int eflags;
384#endif
385{
386 int want_reg_info;
387 struct rx_context_rules rules;
388 regmatch_t * regs;
389 size_t nregs;
390 int stat;
391
392 want_reg_info = (!preg->no_sub && (nmatch > 0));
393
394 rules.newline_anchor = preg->newline_anchor;
395 rules.not_bol = !!(eflags & REG_NOTBOL);
396 rules.not_eol = !!(eflags & REG_NOTEOL);
397 rules.case_indep = !!(eflags & REG_ICASE);
398
399 if (nmatch >= preg->re_nsub)
400 {
401 regs = *pmatch;
402 nregs = nmatch;
403 }
404 else
405 {
406 regs = (regmatch_t *)malloc (preg->re_nsub * sizeof (*regs));
407 if (!regs)
408 return REG_ESPACE;
409 nregs = preg->re_nsub;
410 }
411
412 {
413 int x;
414 for (x = 0; x < nregs; ++x)
415 regs[x].rm_so = regs[x].rm_eo = -1;
416 }
417
418
419 stat = rx_regexec (regs, preg, &rules, 0, len, string);
420
421 if (!stat && want_reg_info && pmatch && (regs != *pmatch))
422 {
423 size_t x;
424 for (x = 0; x < nmatch; ++x)
425 (*pmatch)[x] = regs[x];
426 }
427
428 if (!stat && (eflags & REG_ALLOC_REGS))
429 *pmatch = regs;
430 else if (regs && (!pmatch || (regs != *pmatch)))
431 free (regs);
432
433 return stat;
434}
435
436#ifdef __STDC__
437int
438regexec (const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
439#else
440int
441regexec (preg, string, nmatch, pmatch, eflags)
442 const regex_t *preg;
443 const char *string;
444 size_t nmatch;
445 regmatch_t pmatch[];
446 int eflags;
447#endif
448{
449 return regnexec (preg,
450 string,
451 strlen (string),
452 nmatch,
453 &pmatch,
454 (eflags & ~REG_ALLOC_REGS));
455}
456
457
458/* Free dynamically allocated space used by PREG. */
459
460#ifdef __STDC__
461void
462regfree (regex_t *preg)
463#else
464void
465regfree (preg)
466 regex_t *preg;
467#endif
468{
469 if (preg->pattern)
470 {
471 rx_free_rexp (preg->pattern);
472 preg->pattern = 0;
473 }
474 if (preg->subexps)
475 {
476 free (preg->subexps);
477 preg->subexps = 0;
478 }
479 if (preg->translate != 0)
480 {
481 free (preg->translate);
482 preg->translate = 0;
483 }
484}