jfr.im git - irc/evilnet/x3.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* Copyright (C) 1995, 1996 Tom Lord
	2	*
	3	* This program is free software; you can redistribute it and/or modify
	4	* it under the terms of the GNU Library General Public License as published by
	5	* the Free Software Foundation; either version 2, or (at your option)
	6	* any later version.
	7	*
	8	* This program is distributed in the hope that it will be useful,
	9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	* GNU Library General Public License for more details.
	12	*
	13	* You should have received a copy of the GNU Library General Public License
	14	* along with this software; see the file COPYING. If not, write to
	15	* the Free Software Foundation, 59 Temple Place - Suite 330,
	16	* Boston, MA 02111-1307, USA.
	17	*/
	18
	19
	20	\f
	21	#include "rxall.h"
	22	#include "rxposix.h"
	23	#include "rxgnucomp.h"
	24	#include "rxbasic.h"
	25	#include "rxsimp.h"
	26	\f
	27	/* regcomp takes a regular expression as a string and compiles it.
	28	*
	29	* PATTERN is the address of the pattern string.
	30	*
	31	* CFLAGS is a series of bits which affect compilation.
	32	*
	33	* If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
	34	* use POSIX basic syntax.
	35	*
	36	* If REG_NEWLINE is set, then . and [^...] don't match newline.
	37	* Also, regexec will try a match beginning after every newline.
	38	*
	39	* If REG_ICASE is set, then we considers upper- and lowercase
	40	* versions of letters to be equivalent when matching.
	41	*
	42	* If REG_NOSUB is set, then when PREG is passed to regexec, that
	43	* routine will report only success or failure, and nothing about the
	44	* registers.
	45	*
	46	* It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
	47	* the return codes and their meanings.)
	48	*/
	49
	50
	51	#ifdef __STDC__
	52	int
	53	regncomp (regex_t * preg, const char * pattern, int len, int cflags)
	54	#else
	55	int
	56	regncomp (preg, pattern, len, cflags)
	57	regex_t * preg;
	58	const char * pattern;
	59	int len;
	60	int cflags;
	61	#endif
	62	{
	63	int ret;
	64	unsigned int syntax;
	65
	66	rx_bzero ((char )preg, sizeof (preg));
	67	syntax = ((cflags & REG_EXTENDED)
	68	? RE_SYNTAX_POSIX_EXTENDED
	69	: RE_SYNTAX_POSIX_BASIC);
	70
	71	if (!(cflags & REG_ICASE))
	72	preg->translate = 0;
	73	else
	74	{
	75	unsigned i;
	76
	77	preg->translate = (unsigned char *) malloc (256);
	78	if (!preg->translate)
	79	return (int) REG_ESPACE;
	80
	81	/* Map uppercase characters to corresponding lowercase ones. */
	82	for (i = 0; i < CHAR_SET_SIZE; i++)
	83	preg->translate[i] = isupper (i) ? tolower (i) : i;
	84	}
	85
	86
	87	/* If REG_NEWLINE is set, newlines are treated differently. */
	88	if (!(cflags & REG_NEWLINE))
	89	preg->newline_anchor = 0;
	90	else
	91	{
	92	/* REG_NEWLINE implies neither . nor [^...] match newline. */
	93	syntax &= ~RE_DOT_NEWLINE;
	94	syntax \|= RE_HAT_LISTS_NOT_NEWLINE;
	95	/* It also changes the matching behavior. */
	96	preg->newline_anchor = 1;
	97	}
	98
	99	preg->no_sub = !!(cflags & REG_NOSUB);
	100
	101	ret = rx_parse (&preg->pattern,
	102	pattern, len,
	103	syntax,
	104	256,
	105	preg->translate);
	106
	107	/* POSIX doesn't distinguish between an unmatched open-group and an
	108	* unmatched close-group: both are REG_EPAREN.
	109	*/
	110	if (ret == REG_ERPAREN)
	111	ret = REG_EPAREN;
	112
	113	if (!ret)
	114	{
	115	preg->re_nsub = 1;
	116	preg->subexps = 0;
	117	rx_posix_analyze_rexp (&preg->subexps,
	118	&preg->re_nsub,
	119	preg->pattern,
	120	0);
	121	preg->is_nullable = rx_fill_in_fastmap (256,
	122	preg->fastmap,
	123	preg->pattern);
	124
	125	preg->is_anchored = rx_is_anchored_p (preg->pattern);
	126	}
	127
	128	return (int) ret;
	129	}
	130
	131
	132	#ifdef __STDC__
	133	int
	134	regcomp (regex_t * preg, const char * pattern, int cflags)
	135	#else
	136	int
	137	regcomp (preg, pattern, cflags)
	138	regex_t * preg;
	139	const char * pattern;
	140	int cflags;
	141	#endif
	142	{
	143	/* POSIX says a null character in the pattern terminates it, so we
	144	* can use strlen here in compiling the pattern.
	145	*/
	146
	147	return regncomp (preg, pattern, strlen (pattern), cflags);
	148	}
	149
	150
	151	\f
	152
	153	/* Returns a message corresponding to an error code, ERRCODE, returned
	154	from either regcomp or regexec. */
	155
	156	#ifdef __STDC__
	157	size_t
	158	regerror (int errcode, const regex_t *preg,
	159	char *errbuf, size_t errbuf_size)
	160	#else
	161	size_t
	162	regerror (errcode, preg, errbuf, errbuf_size)
	163	int errcode;
	164	const regex_t *preg;
	165	char *errbuf;
	166	size_t errbuf_size;
	167	#endif
	168	{
	169	const char *msg;
	170	size_t msg_size;
	171
	172	msg = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode];
	173	msg_size = strlen (msg) + 1; /* Includes the 0. */
	174	if (errbuf_size != 0)
	175	{
	176	if (msg_size > errbuf_size)
	177	{
	178	strncpy (errbuf, msg, errbuf_size - 1);
	179	errbuf[errbuf_size - 1] = 0;
	180	}
	181	else
	182	strcpy (errbuf, msg);
	183	}
	184	return msg_size;
	185	}
	186	\f
	187
	188
	189	#ifdef __STDC__
	190	int
	191	rx_regmatch (regmatch_t pmatch[], const regex_t preg, struct rx_context_rules rules, int start, int end, const char *string)
	192	#else
	193	int
	194	rx_regmatch (pmatch, preg, rules, start, end, string)
	195	regmatch_t pmatch[];
	196	const regex_t *preg;
	197	struct rx_context_rules * rules;
	198	int start;
	199	int end;
	200	const char *string;
	201	#endif
	202	{
	203	struct rx_solutions * solutions;
	204	enum rx_answers answer;
	205	struct rx_context_rules local_rules;
	206	int orig_end;
	207	int end_lower_bound;
	208	int end_upper_bound;
	209
	210	local_rules = *rules;
	211	orig_end = end;
	212
	213	if (!preg->pattern)
	214	{
	215	end_lower_bound = start;
	216	end_upper_bound = start;
	217	}
	218	else if (preg->pattern->len >= 0)
	219	{
	220	end_lower_bound = start + preg->pattern->len;
	221	end_upper_bound = start + preg->pattern->len;
	222	}
	223	else
	224	{
	225	end_lower_bound = start;
	226	end_upper_bound = end;
	227	}
	228	end = end_upper_bound;
	229	while (end >= end_lower_bound)
	230	{
	231	local_rules.not_eol = (rules->not_eol
	232	? ( (end == orig_end)
	233	\|\| !local_rules.newline_anchor
	234	\|\| (string[end] != '\n'))
	235	: ( (end != orig_end)
	236	&& (!local_rules.newline_anchor
	237	\|\| (string[end] != '\n'))));
	238	solutions = rx_basic_make_solutions (pmatch, preg->pattern, preg->subexps,
	239	start, end, &local_rules, string);
	240	if (!solutions)
	241	return REG_ESPACE;
	242
	243	answer = rx_next_solution (solutions);
	244
	245	if (answer == rx_yes)
	246	{
	247	if (pmatch)
	248	{
	249	pmatch[0].rm_so = start;
	250	pmatch[0].rm_eo = end;
	251	pmatch[0].final_tag = solutions->final_tag;
	252	}
	253	rx_basic_free_solutions (solutions);
	254	return 0;
	255	}
	256	else
	257	rx_basic_free_solutions (solutions);
	258
	259	--end;
	260	}
	261
	262	switch (answer)
	263	{
	264	default:
	265	case rx_bogus:
	266	return REG_ESPACE;
	267
	268	case rx_no:
	269	return REG_NOMATCH;
	270	}
	271	}
	272
	273
	274	#ifdef __STDC__
	275	int
	276	rx_regexec (regmatch_t pmatch[], const regex_t preg, struct rx_context_rules rules, int start, int end, const char *string)
	277	#else
	278	int
	279	rx_regexec (pmatch, preg, rules, start, end, string)
	280	regmatch_t pmatch[];
	281	const regex_t *preg;
	282	struct rx_context_rules * rules;
	283	int start;
	284	int end;
	285	const char *string;
	286	#endif
	287	{
	288	int x;
	289	int stat;
	290	int anchored;
	291	struct rexp_node * simplified;
	292	struct rx_unfa * unfa;
	293	struct rx_classical_system machine;
	294
	295	anchored = preg->is_anchored;
	296
	297	unfa = 0;
	298	if ((end - start) > RX_MANY_CASES)
	299	{
	300	if (0 > rx_simple_rexp (&simplified, 256, preg->pattern, preg->subexps))
	301	return REG_ESPACE;
	302	unfa = rx_unfa (rx_basic_unfaniverse (), simplified, 256);
	303	if (!unfa)
	304	{
	305	rx_free_rexp (simplified);
	306	return REG_ESPACE;
	307	}
	308	rx_init_system (&machine, unfa->nfa);
	309	rx_free_rexp (simplified);
	310	}
	311
	312	for (x = start; x <= end; ++x)
	313	{
	314	if (preg->is_nullable
	315	\|\| ((x < end)
	316	&& (preg->fastmap[((unsigned char *)string)[x]])))
	317	{
	318	if ((end - start) > RX_MANY_CASES)
	319	{
	320	int amt;
	321	if (rx_start_superstate (&machine) != rx_yes)
	322	{
	323	rx_free_unfa (unfa);
	324	return REG_ESPACE;
	325	}
	326	amt = rx_advance_to_final (&machine, string + x, end - start - x);
	327	if (!machine.final_tag && (amt < (end - start - x)))
	328	goto nomatch;
	329	}
	330	stat = rx_regmatch (pmatch, preg, rules, x, end, string);
	331	if (!stat \|\| (stat != REG_NOMATCH))
	332	{
	333	rx_free_unfa (unfa);
	334	return stat;
	335	}
	336	}
	337	nomatch:
	338	if (anchored)
	339	if (!preg->newline_anchor)
	340	{
	341	rx_free_unfa (unfa);
	342	return REG_NOMATCH;
	343	}
	344	else
	345	while (x < end)
	346	if (string[x] == '\n')
	347	break;
	348	else
	349	++x;
	350	}
	351	rx_free_unfa (unfa);
	352	return REG_NOMATCH;
	353	}
	354
	355	\f
	356
	357	/* regexec searches for a given pattern, specified by PREG, in the
	358	* string STRING.
	359	*
	360	* If NMATCH is zero or REG_NOSUB was set in the cflags argument to
	361	* `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
	362	* least NMATCH elements, and we set them to the offsets of the
	363	* corresponding matched substrings.
	364	*
	365	* EFLAGS specifies `execution flags' which affect matching: if
	366	* REG_NOTBOL is set, then ^ does not match at the beginning of the
	367	* string; if REG_NOTEOL is set, then $ does not match at the end.
	368	*
	369	* We return 0 if we find a match and REG_NOMATCH if not.
	370	*/
	371
	372	#ifdef __STDC__
	373	int
	374	regnexec (const regex_t preg, const char string, int len, size_t nmatch, regmatch_t **pmatch, int eflags)
	375	#else
	376	int
	377	regnexec (preg, string, len, nmatch, pmatch, eflags)
	378	const regex_t *preg;
	379	const char *string;
	380	int len;
	381	size_t nmatch;
	382	regmatch_t **pmatch;
	383	int eflags;
	384	#endif
	385	{
	386	int want_reg_info;
	387	struct rx_context_rules rules;
	388	regmatch_t * regs;
	389	size_t nregs;
	390	int stat;
	391
	392	want_reg_info = (!preg->no_sub && (nmatch > 0));
	393
	394	rules.newline_anchor = preg->newline_anchor;
	395	rules.not_bol = !!(eflags & REG_NOTBOL);
	396	rules.not_eol = !!(eflags & REG_NOTEOL);
	397	rules.case_indep = !!(eflags & REG_ICASE);
	398
	399	if (nmatch >= preg->re_nsub)
	400	{
	401	regs = *pmatch;
	402	nregs = nmatch;
	403	}
	404	else
	405	{
	406	regs = (regmatch_t )malloc (preg->re_nsub sizeof (*regs));
	407	if (!regs)
	408	return REG_ESPACE;
	409	nregs = preg->re_nsub;
	410	}
	411
	412	{
	413	int x;
	414	for (x = 0; x < nregs; ++x)
	415	regs[x].rm_so = regs[x].rm_eo = -1;
	416	}
	417
	418
	419	stat = rx_regexec (regs, preg, &rules, 0, len, string);
	420
	421	if (!stat && want_reg_info && pmatch && (regs != *pmatch))
	422	{
	423	size_t x;
	424	for (x = 0; x < nmatch; ++x)
	425	(*pmatch)[x] = regs[x];
	426	}
	427
	428	if (!stat && (eflags & REG_ALLOC_REGS))
	429	*pmatch = regs;
	430	else if (regs && (!pmatch \|\| (regs != *pmatch)))
	431	free (regs);
	432
	433	return stat;
	434	}
	435
	436	#ifdef __STDC__
	437	int
	438	regexec (const regex_t preg, const char string, size_t nmatch, regmatch_t pmatch[], int eflags)
	439	#else
	440	int
	441	regexec (preg, string, nmatch, pmatch, eflags)
	442	const regex_t *preg;
	443	const char *string;
	444	size_t nmatch;
	445	regmatch_t pmatch[];
	446	int eflags;
	447	#endif
	448	{
	449	return regnexec (preg,
	450	string,
	451	strlen (string),
	452	nmatch,
	453	&pmatch,
	454	(eflags & ~REG_ALLOC_REGS));
	455	}
	456
	457
	458	/* Free dynamically allocated space used by PREG. */
	459
	460	#ifdef __STDC__
	461	void
	462	regfree (regex_t *preg)
	463	#else
	464	void
	465	regfree (preg)
	466	regex_t *preg;
	467	#endif
	468	{
	469	if (preg->pattern)
	470	{
	471	rx_free_rexp (preg->pattern);
	472	preg->pattern = 0;
	473	}
	474	if (preg->subexps)
	475	{
	476	free (preg->subexps);
	477	preg->subexps = 0;
	478	}
	479	if (preg->translate != 0)
	480	{
	481	free (preg->translate);
	482	preg->translate = 0;
	483	}
	484	}