jfr.im git - irc/evilnet/x3.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
	2	*
	3	* This program is free software; you can redistribute it and/or modify
	4	* it under the terms of the GNU Library General Public License as published by
	5	* the Free Software Foundation; either version 2, or (at your option)
	6	* any later version.
	7	*
	8	* This program is distributed in the hope that it will be useful,
	9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	* GNU Library General Public License for more details.
	12	*
	13	* You should have received a copy of the GNU Library General Public License
	14	* along with this software; see the file COPYING. If not, write to
	15	* the Free Software Foundation, 59 Temple Place - Suite 330,
	16	* Boston, MA 02111-1307, USA.
	17	*/
	18
	19	\f
	20	#include <sys/types.h>
	21	#include "rxall.h"
	22	#include "rxgnucomp.h"
	23	#include "inst-rxposix.h"
	24
	25	\f
	26	/* {A Syntax Table}
	27	*/
	28
	29	/* Define the syntax basics for \<, \>, etc.
	30	*/
	31
	32	#ifndef emacs
	33	#define CHARBITS 8
	34	#define CHAR_SET_SIZE (1 << CHARBITS)
	35	#define Sword 1
	36	#define SYNTAX(c) re_syntax_table[c]
	37	char re_syntax_table[CHAR_SET_SIZE];
	38
	39	#ifdef __STDC__
	40	static void
	41	init_syntax_once (void)
	42	#else
	43	static void
	44	init_syntax_once ()
	45	#endif
	46	{
	47	register int c;
	48	static int done = 0;
	49
	50	if (done)
	51	return;
	52
	53	rx_bzero ((char *)re_syntax_table, sizeof re_syntax_table);
	54
	55	for (c = 'a'; c <= 'z'; c++)
	56	re_syntax_table[c] = Sword;
	57
	58	for (c = 'A'; c <= 'Z'; c++)
	59	re_syntax_table[c] = Sword;
	60
	61	for (c = '0'; c <= '9'; c++)
	62	re_syntax_table[c] = Sword;
	63
	64	re_syntax_table['_'] = Sword;
	65
	66	done = 1;
	67	}
	68	#endif /* not emacs */
	69
	70	\f
	71
	72
	73
	74	const char *rx_error_msg[] =
	75	{
	76	0, /* REG_NOUT */
	77	"No match", /* REG_NOMATCH */
	78	"Invalid regular expression", /* REG_BADPAT */
	79	"Invalid collation character", /* REG_ECOLLATE */
	80	"Invalid character class name", /* REG_ECTYPE */
	81	"Trailing backslash", /* REG_EESCAPE */
	82	"Invalid back reference", /* REG_ESUBREG */
	83	"Unmatched [ or [^", /* REG_EBRACK */
	84	"Unmatched ( or \\(", /* REG_EPAREN */
	85	"Unmatched \\{", /* REG_EBRACE */
	86	"Invalid content of \\{\\}", /* REG_BADBR */
	87	"Invalid range end", /* REG_ERANGE */
	88	"Memory exhausted", /* REG_ESPACE */
	89	"Invalid preceding regular expression", /* REG_BADRPT */
	90	"Premature end of regular expression", /* REG_EEND */
	91	"Regular expression too big", /* REG_ESIZE */
	92	"Unmatched ) or \\)", /* REG_ERPAREN */
	93	};
	94
	95
	96	\f
	97	/*
	98	* Macros used while compiling patterns.
	99	*
	100	* By convention, PEND points just past the end of the uncompiled pattern,
	101	* P points to the read position in the pattern. `translate' is the name
	102	* of the translation table (`TRANSLATE' is the name of a macro that looks
	103	* things up in `translate').
	104	*/
	105
	106
	107	/*
	108	* Fetch the next character in the uncompiled pattern---translating it
	109	* if necessary. *Also cast from a signed character in the constant
	110	* string passed to us by the user to an unsigned char that we can use
	111	* as an array index (in, e.g., `translate').
	112	*/
	113	#define PATFETCH(c) \
	114	do {if (p == pend) return REG_EEND; \
	115	c = (unsigned char) *p++; \
	116	c = translate[c]; \
	117	} while (0)
	118
	119	/*
	120	* Fetch the next character in the uncompiled pattern, with no
	121	* translation.
	122	*/
	123	#define PATFETCH_RAW(c) \
	124	do {if (p == pend) return REG_EEND; \
	125	c = (unsigned char) *p++; \
	126	} while (0)
	127
	128	/* Go backwards one character in the pattern. */
	129	#define PATUNFETCH p--
	130
	131
	132	#define TRANSLATE(d) translate[(unsigned char) (d)]
	133
	134	typedef int regnum_t;
	135
	136	/* Since offsets can go either forwards or backwards, this type needs to
	137	* be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.
	138	*/
	139	typedef int pattern_offset_t;
	140
	141	typedef struct
	142	{
	143	struct rexp_node ** top_expression;
	144	struct rexp_node ** last_expression;
	145	struct rexp_node ** last_non_regular_expression;
	146	pattern_offset_t inner_group_offset;
	147	regnum_t regnum;
	148	} compile_stack_elt_t;
	149
	150	typedef struct
	151	{
	152	compile_stack_elt_t *stack;
	153	unsigned size;
	154	unsigned avail; /* Offset of next open position. */
	155	} compile_stack_type;
	156
	157
	158	#define INIT_COMPILE_STACK_SIZE 32
	159
	160	#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
	161	#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
	162
	163	/* The next available element. */
	164	#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
	165
	166
	167	/* Set the bit for character C in a list. */
	168	#define SET_LIST_BIT(c) \
	169	(b[((unsigned char) (c)) / CHARBITS] \
	170	\|= 1 << (((unsigned char) c) % CHARBITS))
	171
	172	/* Get the next unsigned number in the uncompiled pattern. */
	173	#define GET_UNSIGNED_NUMBER(num) \
	174	{ if (p != pend) \
	175	{ \
	176	PATFETCH (c); \
	177	while (isdigit (c)) \
	178	{ \
	179	if (num < 0) \
	180	num = 0; \
	181	num = num * 10 + c - '0'; \
	182	if (p == pend) \
	183	break; \
	184	PATFETCH (c); \
	185	} \
	186	} \
	187	}
	188
	189	#define CHAR_CLASS_MAX_LENGTH 64
	190
	191	#define IS_CHAR_CLASS(string) \
	192	(!strcmp (string, "alpha") \|\| !strcmp (string, "upper") \
	193	\|\| !strcmp (string, "lower") \|\| !strcmp (string, "digit") \
	194	\|\| !strcmp (string, "alnum") \|\| !strcmp (string, "xdigit") \
	195	\|\| !strcmp (string, "space") \|\| !strcmp (string, "print") \
	196	\|\| !strcmp (string, "punct") \|\| !strcmp (string, "graph") \
	197	\|\| !strcmp (string, "cntrl") \|\| !strcmp (string, "blank"))
	198
	199	\f
	200	/* These predicates are used in regex_compile. */
	201
	202	/* P points to just after a ^ in PATTERN. Return true if that ^ comes
	203	* after an alternative or a begin-subexpression. We assume there is at
	204	* least one character before the ^.
	205	*/
	206
	207	#ifdef __STDC__
	208	static int
	209	at_begline_loc_p (const char pattern, const char p, unsigned long syntax)
	210	#else
	211	static int
	212	at_begline_loc_p (pattern, p, syntax)
	213	const char *pattern;
	214	const char * p;
	215	unsigned long syntax;
	216	#endif
	217	{
	218	const char *prev = p - 2;
	219	int prev_prev_backslash = ((prev > pattern) && (prev[-1] == '\\'));
	220
	221	return
	222
	223	(/* After a subexpression? */
	224	((*prev == '(') && ((syntax & RE_NO_BK_PARENS) \|\| prev_prev_backslash))
	225	\|\|
	226	/* After an alternative? */
	227	((*prev == '\|') && ((syntax & RE_NO_BK_VBAR) \|\| prev_prev_backslash))
	228	);
	229	}
	230
	231	/* The dual of at_begline_loc_p. This one is for $. We assume there is
	232	* at least one character after the $, i.e., `P < PEND'.
	233	*/
	234
	235	#ifdef __STDC__
	236	static int
	237	at_endline_loc_p (const char p, const char pend, int syntax)
	238	#else
	239	static int
	240	at_endline_loc_p (p, pend, syntax)
	241	const char *p;
	242	const char *pend;
	243	int syntax;
	244	#endif
	245	{
	246	const char *next = p;
	247	int next_backslash = (*next == '\\');
	248	const char *next_next = (p + 1 < pend) ? (p + 1) : 0;
	249
	250	return
	251	(
	252	/* Before a subexpression? */
	253	((syntax & RE_NO_BK_PARENS)
	254	? (*next == ')')
	255	: (next_backslash && next_next && (*next_next == ')')))
	256	\|\|
	257	/* Before an alternative? */
	258	((syntax & RE_NO_BK_VBAR)
	259	? (*next == '\|')
	260	: (next_backslash && next_next && (*next_next == '\|')))
	261	);
	262	}
	263	\f
	264
	265	unsigned char rx_id_translation[256] =
	266	{
	267	0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
	268	10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
	269	20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
	270	30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
	271	40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
	272	50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
	273	60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
	274	70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
	275	80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
	276	90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
	277
	278	100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
	279	110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
	280	120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
	281	130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
	282	140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
	283	150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
	284	160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
	285	170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
	286	180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
	287	190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
	288
	289	200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
	290	210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
	291	220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
	292	230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
	293	240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
	294	250, 251, 252, 253, 254, 255
	295	};
	296
	297	/* The compiler keeps an inverted translation table.
	298	* This looks up/inititalize elements.
	299	* VALID is an array of booleans that validate CACHE.
	300	*/
	301
	302	#ifdef __STDC__
	303	static rx_Bitset
	304	inverse_translation (int * n_members, int cset_size, char * valid, rx_Bitset cache, unsigned char * translate, int c)
	305	#else
	306	static rx_Bitset
	307	inverse_translation (n_members, cset_size, valid, cache, translate, c)
	308	int * n_members;
	309	int cset_size;
	310	char * valid;
	311	rx_Bitset cache;
	312	unsigned char * translate;
	313	int c;
	314	#endif
	315	{
	316	rx_Bitset cs;
	317	cs = cache + c * rx_bitset_numb_subsets (cset_size);
	318
	319	if (!valid[c])
	320	{
	321	int x;
	322	int c_tr;
	323	int membs;
	324
	325	c_tr = TRANSLATE(c);
	326	rx_bitset_null (cset_size, cs);
	327	membs = 0;
	328	for (x = 0; x < 256; ++x)
	329	if (TRANSLATE(x) == c_tr)
	330	{
	331	RX_bitset_enjoin (cs, x);
	332	membs++;
	333	}
	334	valid[c] = 1;
	335	n_members[c] = membs;
	336	}
	337	return cs;
	338	}
	339
	340	\f
	341
	342
	343	/* More subroutine declarations and macros for regex_compile. */
	344
	345	/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
	346	* false if it's not.
	347	*/
	348
	349	#ifdef __STDC__
	350	static int
	351	group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
	352	#else
	353	static int
	354	group_in_compile_stack (compile_stack, regnum)
	355	compile_stack_type compile_stack;
	356	regnum_t regnum;
	357	#endif
	358	{
	359	int this_element;
	360
	361	for (this_element = compile_stack.avail - 1;
	362	this_element >= 0;
	363	this_element--)
	364	if (compile_stack.stack[this_element].regnum == regnum)
	365	return 1;
	366
	367	return 0;
	368	}
	369
	370
	371	/*
	372	* Read the ending character of a range (in a bracket expression) from the
	373	* uncompiled pattern *P_PTR (which ends at PEND). We assume the
	374	* starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
	375	* Then we set the translation of all bits between the starting and
	376	* ending characters (inclusive) in the compiled pattern B.
	377	*
	378	* Return an error code.
	379	*
	380	* We use these short variable names so we can use the same macros as
	381	* `regex_compile' itself.
	382	*/
	383
	384	#ifdef __STDC__
	385	static int
	386	compile_range (int * n_members, int cset_size, rx_Bitset cs, const char ** p_ptr, const char * pend, unsigned char * translate, unsigned long syntax, rx_Bitset inv_tr, char * valid_inv_tr)
	387	#else
	388	static int
	389	compile_range (n_members, cset_size, cs, p_ptr, pend, translate, syntax, inv_tr, valid_inv_tr)
	390	int * n_members;
	391	int cset_size;
	392	rx_Bitset cs;
	393	const char ** p_ptr;
	394	const char * pend;
	395	unsigned char * translate;
	396	unsigned long syntax;
	397	rx_Bitset inv_tr;
	398	char * valid_inv_tr;
	399	#endif
	400	{
	401	unsigned this_char;
	402
	403	const char p = p_ptr;
	404
	405	unsigned char range_end;
	406	unsigned char range_start = TRANSLATE(p[-2]);
	407
	408	if (p == pend)
	409	return REG_ERANGE;
	410
	411	PATFETCH (range_end);
	412
	413	(*p_ptr)++;
	414
	415	if (range_start > range_end)
	416	return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
	417
	418	for (this_char = range_start; this_char <= range_end; this_char++)
	419	{
	420	rx_Bitset it =
	421	inverse_translation (n_members, cset_size, valid_inv_tr, inv_tr, translate, this_char);
	422	rx_bitset_union (cset_size, cs, it);
	423	}
	424
	425	return REG_NOERROR;
	426	}
	427	\f
	428
	429	#ifdef __STDC__
	430	static int
	431	pointless_if_repeated (struct rexp_node * node)
	432	#else
	433	static int
	434	pointless_if_repeated (node)
	435	struct rexp_node * node;
	436	#endif
	437	{
	438	if (!node)
	439	return 1;
	440	switch (node->type)
	441	{
	442	case r_cset:
	443	case r_string:
	444	case r_cut:
	445	return 0;
	446	case r_concat:
	447	case r_alternate:
	448	return (pointless_if_repeated (node->params.pair.left)
	449	&& pointless_if_repeated (node->params.pair.right));
	450	case r_opt:
	451	case r_star:
	452	case r_interval:
	453	case r_parens:
	454	return pointless_if_repeated (node->params.pair.left);
	455	case r_context:
	456	switch (node->params.intval)
	457	{
	458	case '=':
	459	case '<':
	460	case '^':
	461	case 'b':
	462	case 'B':
	463	case '`':
	464	case '\'':
	465	return 1;
	466	default:
	467	return 0;
	468	}
	469	default:
	470	return 0;
	471	}
	472	}
	473
	474	\f
	475
	476	#ifdef __STDC__
	477	static int
	478	factor_string (struct rexp_node *** lastp, int cset_size)
	479	#else
	480	static int
	481	factor_string (lastp, cset_size)
	482	struct rexp_node *** lastp;
	483	int cset_size;
	484	#endif
	485	{
	486	struct rexp_node ** expp;
	487	struct rexp_node * exp;
	488	rx_Bitset cs;
	489	struct rexp_node * cset_node;
	490
	491	expp = *lastp;
	492	exp = expp; / presumed r_string */
	493
	494	cs = rx_cset (cset_size);
	495	if (!cs)
	496	return -1;
	497	RX_bitset_enjoin (cs, exp->params.cstr.contents[exp->params.cstr.len - 1]);
	498	cset_node = rx_mk_r_cset (r_cset, cset_size, cs);
	499	if (!cset_node)
	500	{
	501	rx_free_cset (cs);
	502	return -1;
	503	}
	504	if (exp->params.cstr.len == 1)
	505	{
	506	rx_free_rexp (exp);
	507	*expp = cset_node;
	508	/* lastp remains the same */
	509	return 0;
	510	}
	511	else
	512	{
	513	struct rexp_node * concat_node;
	514	exp->params.cstr.len--;
	515	concat_node = rx_mk_r_binop (r_concat, exp, cset_node);
	516	if (!concat_node)
	517	{
	518	rx_free_rexp (cset_node);
	519	return -1;
	520	}
	521	*expp = concat_node;
	522	*lastp = &concat_node->params.pair.right;
	523	return 0;
	524	}
	525	}
	526
	527	\f
	528
	529	#define isa_blank(C) (((C) == ' ') \|\| ((C) == '\t'))
	530
	531	#ifdef __STDC__
	532	int
	533	rx_parse (struct rexp_node ** rexp_p,
	534	const char *pattern,
	535	int size,
	536	unsigned long syntax,
	537	int cset_size,
	538	unsigned char *translate)
	539	#else
	540	int
	541	rx_parse (rexp_p, pattern, size, syntax, cset_size, translate)
	542	struct rexp_node ** rexp_p;
	543	const char *pattern;
	544	int size;
	545	unsigned long syntax;
	546	int cset_size;
	547	unsigned char *translate;
	548	#endif
	549	{
	550	int compile_error;
	551	RX_subset
	552	inverse_translate [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
	553	char validate_inv_tr [CHAR_SET_SIZE];
	554	int n_members [CHAR_SET_SIZE];
	555
	556	/* We fetch characters from PATTERN here. Even though PATTERN is
	557	* `char *' (i.e., signed), we declare these variables as unsigned, so
	558	* they can be reliably used as array indices.
	559	*/
	560	register unsigned char c;
	561	register unsigned char c1;
	562
	563	/* A random tempory spot in PATTERN. */
	564	const char *p1;
	565
	566	/* Keeps track of unclosed groups. */
	567	compile_stack_type compile_stack;
	568
	569	/* Points to the current (ending) position in the pattern. */
	570	const char *p;
	571	const char *pend;
	572
	573	/* When parsing is done, this will hold the expression tree. */
	574	struct rexp_node * rexp;
	575
	576	/* This and top_expression are saved on the compile stack. */
	577	struct rexp_node ** top_expression;
	578	struct rexp_node ** last_non_regular_expression;
	579	struct rexp_node ** last_expression;
	580
	581	/* Parameter to `goto append_node' */
	582	struct rexp_node * append;
	583
	584	/* Counts open-groups as they are encountered. This is the index of the
	585	* innermost group being compiled.
	586	*/
	587	regnum_t regnum;
	588
	589	/* True iff the sub-expression just started
	590	* is purely syntactic. Otherwise, a regmatch data
	591	* slot is allocated for the subexpression.
	592	*/
	593	int syntax_only_parens;
	594
	595	/* Place in the uncompiled pattern (i.e., the {) to
	596	* which to go back if the interval is invalid.
	597	*/
	598	const char *beg_interval;
	599
	600	int side;
	601
	602	\f
	603
	604	if (!translate)
	605	translate = rx_id_translation;
	606
	607	/* Points to the current (ending) position in the pattern. */
	608	p = pattern;
	609	pend = pattern + size;
	610
	611	/* When parsing is done, this will hold the expression tree. */
	612	rexp = 0;
	613
	614	/* In the midst of compilation, this holds onto the regexp
	615	* first parst while rexp goes on to aquire additional constructs.
	616	*/
	617	top_expression = &rexp;
	618	last_non_regular_expression = top_expression;
	619	last_expression = top_expression;
	620
	621	/* Counts open-groups as they are encountered. This is the index of the
	622	* innermost group being compiled.
	623	*/
	624	regnum = 0;
	625
	626	rx_bzero ((char *)validate_inv_tr, sizeof (validate_inv_tr));
	627
	628
	629	/* Initialize the compile stack. */
	630	compile_stack.stack = (( compile_stack_elt_t ) malloc ((INIT_COMPILE_STACK_SIZE) sizeof ( compile_stack_elt_t)));
	631	if (compile_stack.stack == 0)
	632	return REG_ESPACE;
	633
	634	compile_stack.size = INIT_COMPILE_STACK_SIZE;
	635	compile_stack.avail = 0;
	636
	637	#if !defined (emacs) && !defined (SYNTAX_TABLE)
	638	/* Initialize the syntax table. */
	639	init_syntax_once ();
	640	#endif
	641
	642	/* Loop through the uncompiled pattern until we're at the end. */
	643	while (p != pend)
	644	{
	645	PATFETCH (c);
	646
	647	switch (c)
	648	{
	649	case '^':
	650	{
	651	if ( /* If at start of pattern, it's an operator. */
	652	p == pattern + 1
	653	/* If context independent, it's an operator. */
	654	\|\| syntax & RE_CONTEXT_INDEP_ANCHORS
	655	/* Otherwise, depends on what's come before. */
	656	\|\| at_begline_loc_p (pattern, p, syntax))
	657	{
	658	struct rexp_node * n
	659	= rx_mk_r_int (r_context, '^');
	660	if (!n)
	661	goto space_error;
	662	append = n;
	663	goto append_node;
	664	}
	665	else
	666	goto normal_char;
	667	}
	668	break;
	669
	670
	671	case '$':
	672	{
	673	if ( /* If at end of pattern, it's an operator. */
	674	p == pend
	675	/* If context independent, it's an operator. */
	676	\|\| syntax & RE_CONTEXT_INDEP_ANCHORS
	677	/* Otherwise, depends on what's next. */
	678	\|\| at_endline_loc_p (p, pend, syntax))
	679	{
	680	struct rexp_node * n
	681	= rx_mk_r_int (r_context, '$');
	682	if (!n)
	683	goto space_error;
	684	append = n;
	685	goto append_node;
	686	}
	687	else
	688	goto normal_char;
	689	}
	690	break;
	691
	692
	693	case '+':
	694	case '?':
	695	if ((syntax & RE_BK_PLUS_QM)
	696	\|\| (syntax & RE_LIMITED_OPS))
	697	goto normal_char;
	698
	699	handle_plus:
	700	case '*':
	701	/* If there is no previous pattern... */
	702	if (pointless_if_repeated (*last_expression))
	703	{
	704	if (syntax & RE_CONTEXT_INVALID_OPS)
	705	{
	706	compile_error = REG_BADRPT;
	707	goto error_return;
	708	}
	709	else if (!(syntax & RE_CONTEXT_INDEP_OPS))
	710	goto normal_char;
	711	}
	712
	713	{
	714	/* 1 means zero (many) matches is allowed. */
	715	char zero_times_ok = 0, many_times_ok = 0;
	716
	717	/* If there is a sequence of repetition chars, collapse it
	718	down to just one (the right one). We can't combine
	719	interval operators with these because of, e.g., `a{2}*',
	720	which should only match an even number of `a's. */
	721
	722	for (;;)
	723	{
	724	zero_times_ok \|= c != '+';
	725	many_times_ok \|= c != '?';
	726
	727	if (p == pend)
	728	break;
	729
	730	PATFETCH (c);
	731
	732	if (c == '*'
	733	\|\| (!(syntax & RE_BK_PLUS_QM) && (c == '+' \|\| c == '?')))
	734	;
	735
	736	else if (syntax & RE_BK_PLUS_QM && c == '\\')
	737	{
	738	if (p == pend)
	739	{
	740	compile_error = REG_EESCAPE;
	741	goto error_return;
	742	}
	743
	744	PATFETCH (c1);
	745	if (!(c1 == '+' \|\| c1 == '?'))
	746	{
	747	PATUNFETCH;
	748	PATUNFETCH;
	749	break;
	750	}
	751
	752	c = c1;
	753	}
	754	else
	755	{
	756	PATUNFETCH;
	757	break;
	758	}
	759
	760	/* If we get here, we found another repeat character. */
	761	}
	762
	763	/* Now we know whether or not zero matches is allowed
	764	* and also whether or not two or more matches is allowed.
	765	*/
	766
	767	{
	768	struct rexp_node * inner_exp;
	769	struct rexp_node * star;
	770
	771	if (last_expression && ((last_expression)->type == r_string))
	772	if (factor_string (&last_expression, cset_size))
	773	goto space_error;
	774	inner_exp = *last_expression;
	775	star = rx_mk_r_monop ((many_times_ok
	776	? (zero_times_ok ? r_star : r_plus)
	777	: r_opt),
	778	inner_exp);
	779	if (!star)
	780	goto space_error;
	781	*last_expression = star;
	782	}
	783	}
	784	break;
	785
	786
	787	case '.':
	788	{
	789	rx_Bitset cs;
	790	struct rexp_node * n;
	791	cs = rx_cset (cset_size);
	792	if (!cs)
	793	goto space_error;
	794	n = rx_mk_r_cset (r_cset, cset_size, cs);
	795	if (!n)
	796	{
	797	rx_free_cset (cs);
	798	goto space_error;
	799	}
	800	rx_bitset_universe (cset_size, cs);
	801	if (!(syntax & RE_DOT_NEWLINE))
	802	RX_bitset_remove (cs, '\n');
	803	if (syntax & RE_DOT_NOT_NULL)
	804	RX_bitset_remove (cs, 0);
	805
	806	append = n;
	807	goto append_node;
	808	break;
	809	}
	810
	811
	812	case '[':
	813	if (p == pend)
	814	{
	815	compile_error = REG_EBRACK;
	816	goto error_return;
	817	}
	818	{
	819	int had_char_class;
	820	rx_Bitset cs;
	821	struct rexp_node * node;
	822	int is_inverted;
	823
	824	had_char_class = 0;
	825	is_inverted = *p == '^';
	826	cs = rx_cset (cset_size);
	827	if (!cs)
	828	goto space_error;
	829	node = rx_mk_r_cset (r_cset, cset_size ,cs);
	830	if (!node)
	831	{
	832	rx_free_cset (cs);
	833	goto space_error;
	834	}
	835
	836	/* This branch of the switch is normally exited with
	837	*`goto append_node'
	838	*/
	839	append = node;
	840
	841	if (is_inverted)
	842	p++;
	843
	844	/* Remember the first position in the bracket expression. */
	845	p1 = p;
	846
	847	/* Read in characters and ranges, setting map bits. */
	848	for (;;)
	849	{
	850	if (p == pend)
	851	{
	852	compile_error = REG_EBRACK;
	853	goto error_return;
	854	}
	855
	856	PATFETCH (c);
	857
	858	/* \ might escape characters inside [...] and [^...]. */
	859	if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
	860	{
	861	if (p == pend)
	862	{
	863	compile_error = REG_EESCAPE;
	864	goto error_return;
	865	}
	866
	867	PATFETCH (c1);
	868	{
	869	rx_Bitset it = inverse_translation (n_members,
	870	cset_size,
	871	validate_inv_tr,
	872	inverse_translate,
	873	translate,
	874	c1);
	875	rx_bitset_union (cset_size, cs, it);
	876	}
	877	continue;
	878	}
	879
	880	/* Could be the end of the bracket expression. If it's
	881	not (i.e., when the bracket expression is `[]' so
	882	far), the ']' character bit gets set way below. */
	883	if (c == ']' && p != p1 + 1)
	884	goto finalize_class_and_append;
	885
	886	/* Look ahead to see if it's a range when the last thing
	887	was a character class. */
	888	if (had_char_class && c == '-' && *p != ']')
	889	{
	890	compile_error = REG_ERANGE;
	891	goto error_return;
	892	}
	893
	894	/* Look ahead to see if it's a range when the last thing
	895	was a character: if this is a hyphen not at the
	896	beginning or the end of a list, then it's the range
	897	operator. */
	898	if (c == '-'
	899	&& !(p - 2 >= pattern && p[-2] == '[')
	900	&& !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
	901	&& *p != ']')
	902	{
	903	int ret
	904	= compile_range (n_members, cset_size, cs, &p, pend, translate, syntax,
	905	inverse_translate, validate_inv_tr);
	906	if (ret != REG_NOERROR)
	907	{
	908	compile_error = ret;
	909	goto error_return;
	910	}
	911	}
	912
	913	else if (p[0] == '-' && p[1] != ']')
	914	{ /* This handles ranges made up of characters only. */
	915	int ret;
	916
	917	/* Move past the `-'. */
	918	PATFETCH (c1);
	919
	920	ret = compile_range (n_members, cset_size, cs, &p, pend, translate, syntax,
	921	inverse_translate, validate_inv_tr);
	922	if (ret != REG_NOERROR)
	923	{
	924	compile_error = ret;
	925	goto error_return;
	926	}
	927	}
	928
	929	/* See if we're at the beginning of a possible character
	930	class. */
	931
	932	else if ((syntax & RE_CHAR_CLASSES)
	933	&& (c == '[') && (*p == ':'))
	934	{
	935	char str[CHAR_CLASS_MAX_LENGTH + 1];
	936
	937	PATFETCH (c);
	938	c1 = 0;
	939
	940	/* If pattern is `[[:'. */
	941	if (p == pend)
	942	{
	943	compile_error = REG_EBRACK;
	944	goto error_return;
	945	}
	946
	947	for (;;)
	948	{
	949	PATFETCH (c);
	950	if (c == ':' \|\| c == ']' \|\| p == pend
	951	\|\| c1 == CHAR_CLASS_MAX_LENGTH)
	952	break;
	953	str[c1++] = c;
	954	}
	955	str[c1] = '\0';
	956
	957	/* If isn't a word bracketed by `[:' and:`]':
	958	undo the ending character, the letters, and leave
	959	the leading `:' and `[' (but set bits for them). */
	960	if (c == ':' && *p == ']')
	961	{
	962	if (!strncmp (str, "cut", 3))
	963	{
	964	int val;
	965	if (1 != sscanf (str + 3, " %d", &val))
	966	{
	967	compile_error = REG_ECTYPE;
	968	goto error_return;
	969	}
	970	/* Throw away the ]] */
	971	PATFETCH (c);
	972	PATFETCH (c);
	973	{
	974	struct rexp_node * cut;
	975	cut = rx_mk_r_int (r_cut, val);
	976	append = cut;
	977	goto append_node;
	978	}
	979	}
	980	else if (!strncmp (str, "(", 1))
	981	{
	982	/* Throw away the ]] */
	983	PATFETCH (c);
	984	PATFETCH (c);
	985	syntax_only_parens = 1;
	986	goto handle_open;
	987	}
	988	else if (!strncmp (str, ")", 1))
	989	{
	990	/* Throw away the ]] */
	991	PATFETCH (c);
	992	PATFETCH (c);
	993	syntax_only_parens = 1;
	994	goto handle_close;
	995	}
	996	else
	997	{
	998	int ch;
	999	int is_alnum = !strcmp (str, "alnum");
	1000	int is_alpha = !strcmp (str, "alpha");
	1001	int is_blank = !strcmp (str, "blank");
	1002	int is_cntrl = !strcmp (str, "cntrl");
	1003	int is_digit = !strcmp (str, "digit");
	1004	int is_graph = !strcmp (str, "graph");
	1005	int is_lower = !strcmp (str, "lower");
	1006	int is_print = !strcmp (str, "print");
	1007	int is_punct = !strcmp (str, "punct");
	1008	int is_space = !strcmp (str, "space");
	1009	int is_upper = !strcmp (str, "upper");
	1010	int is_xdigit = !strcmp (str, "xdigit");
	1011
	1012	if (!IS_CHAR_CLASS (str))
	1013	{
	1014	compile_error = REG_ECTYPE;
	1015	goto error_return;
	1016	}
	1017
	1018	/* Throw away the ] at the end of the character
	1019	class. */
	1020	PATFETCH (c);
	1021
	1022	if (p == pend) { compile_error = REG_EBRACK; goto error_return; }
	1023
	1024	for (ch = 0; ch < 1 << CHARBITS; ch++)
	1025	{
	1026	if ( (is_alnum && isalnum (ch))
	1027	\|\| (is_alpha && isalpha (ch))
	1028	\|\| (is_blank && isa_blank (ch))
	1029	\|\| (is_cntrl && iscntrl (ch))
	1030	\|\| (is_digit && isdigit (ch))
	1031	\|\| (is_graph && isgraph (ch))
	1032	\|\| (is_lower && islower (ch))
	1033	\|\| (is_print && isprint (ch))
	1034	\|\| (is_punct && ispunct (ch))
	1035	\|\| (is_space && isspace (ch))
	1036	\|\| (is_upper && isupper (ch))
	1037	\|\| (is_xdigit && isxdigit (ch)))
	1038	{
	1039	rx_Bitset it =
	1040	inverse_translation (n_members,
	1041	cset_size,
	1042	validate_inv_tr,
	1043	inverse_translate,
	1044	translate,
	1045	ch);
	1046	rx_bitset_union (cset_size,
	1047	cs, it);
	1048	}
	1049	}
	1050	had_char_class = 1;
	1051	}
	1052	}
	1053	else
	1054	{
	1055	c1++;
	1056	while (c1--)
	1057	PATUNFETCH;
	1058	{
	1059	rx_Bitset it =
	1060	inverse_translation (n_members,
	1061	cset_size,
	1062	validate_inv_tr,
	1063	inverse_translate,
	1064	translate,
	1065	'[');
	1066	rx_bitset_union (cset_size,
	1067	cs, it);
	1068	}
	1069	{
	1070	rx_Bitset it =
	1071	inverse_translation (n_members,
	1072	cset_size,
	1073	validate_inv_tr,
	1074	inverse_translate,
	1075	translate,
	1076	':');
	1077	rx_bitset_union (cset_size,
	1078	cs, it);
	1079	}
	1080	had_char_class = 0;
	1081	}
	1082	}
	1083	else
	1084	{
	1085	had_char_class = 0;
	1086	{
	1087	rx_Bitset it = inverse_translation (n_members,
	1088	cset_size,
	1089	validate_inv_tr,
	1090	inverse_translate,
	1091	translate,
	1092	c);
	1093	rx_bitset_union (cset_size, cs, it);
	1094	}
	1095	}
	1096	}
	1097
	1098	finalize_class_and_append:
	1099	if (is_inverted)
	1100	{
	1101	rx_bitset_complement (cset_size, cs);
	1102	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
	1103	RX_bitset_remove (cs, '\n');
	1104	}
	1105	goto append_node;
	1106	}
	1107	break;
	1108
	1109
	1110	case '(':
	1111	if (syntax & RE_NO_BK_PARENS)
	1112	{
	1113	syntax_only_parens = 0;
	1114	goto handle_open;
	1115	}
	1116	else
	1117	goto normal_char;
	1118
	1119
	1120	case ')':
	1121	if (syntax & RE_NO_BK_PARENS)
	1122	{
	1123	syntax_only_parens = 0;
	1124	goto handle_close;
	1125	}
	1126	else
	1127	goto normal_char;
	1128
	1129
	1130	case '\n':
	1131	if (syntax & RE_NEWLINE_ALT)
	1132	goto handle_alt;
	1133	else
	1134	goto normal_char;
	1135
	1136
	1137	case '\|':
	1138	if (syntax & RE_NO_BK_VBAR)
	1139	goto handle_alt;
	1140	else
	1141	goto normal_char;
	1142
	1143
	1144	case '{':
	1145	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
	1146	goto handle_interval;
	1147	else
	1148	goto normal_char;
	1149
	1150
	1151	case '\\':
	1152	if (p == pend) { compile_error = REG_EESCAPE; goto error_return; }
	1153
	1154	/* Do not translate the character after the \, so that we can
	1155	distinguish, e.g., \B from \b, even if we normally would
	1156	translate, e.g., B to b. */
	1157	PATFETCH_RAW (c);
	1158
	1159	switch (c)
	1160	{
	1161	case '(':
	1162	if (syntax & RE_NO_BK_PARENS)
	1163	goto normal_backslash;
	1164
	1165	syntax_only_parens = 0;
	1166
	1167	handle_open:
	1168	if (!syntax_only_parens)
	1169	regnum++;
	1170
	1171	if (COMPILE_STACK_FULL)
	1172	{
	1173	compile_stack.stack
	1174	= ((compile_stack_elt_t *)
	1175	realloc (compile_stack.stack,
	1176	(compile_stack.size << 1) * sizeof (compile_stack_elt_t)));
	1177	if (compile_stack.stack == 0)
	1178	goto space_error;
	1179	compile_stack.size <<= 1;
	1180	}
	1181
	1182	if (*last_non_regular_expression)
	1183	{
	1184	struct rexp_node * concat;
	1185	concat = rx_mk_r_binop (r_concat, *last_non_regular_expression, 0);
	1186	if (!concat)
	1187	goto space_error;
	1188	*last_non_regular_expression = concat;
	1189	last_non_regular_expression = &concat->params.pair.right;
	1190	last_expression = last_non_regular_expression;
	1191	}
	1192
	1193	/*
	1194	* These are the values to restore when we hit end of this
	1195	* group.
	1196	*/
	1197	COMPILE_STACK_TOP.top_expression = top_expression;
	1198	COMPILE_STACK_TOP.last_expression = last_expression;
	1199	COMPILE_STACK_TOP.last_non_regular_expression = last_non_regular_expression;
	1200
	1201	if (syntax_only_parens)
	1202	COMPILE_STACK_TOP.regnum = -1;
	1203	else
	1204	COMPILE_STACK_TOP.regnum = regnum;
	1205
	1206	compile_stack.avail++;
	1207
	1208	top_expression = last_non_regular_expression;
	1209	break;
	1210
	1211
	1212	case ')':
	1213	if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
	1214	syntax_only_parens = 0;
	1215
	1216	handle_close:
	1217	/* See similar code for backslashed left paren above. */
	1218	if (COMPILE_STACK_EMPTY)
	1219	if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
	1220	goto normal_char;
	1221	else
	1222	{ compile_error = REG_ERPAREN; goto error_return; }
	1223
	1224	/* Since we just checked for an empty stack above, this
	1225	* ``can't happen''.
	1226	*/
	1227
	1228	{
	1229	/* We don't just want to restore into `regnum', because
	1230	* later groups should continue to be numbered higher,
	1231	* as in `(ab)c(de)' -- the second group is #2.
	1232	*/
	1233	regnum_t this_group_regnum;
	1234	struct rexp_node ** inner;
	1235	struct rexp_node * parens;
	1236
	1237	inner = top_expression;
	1238	compile_stack.avail--;
	1239
	1240	if (!!syntax_only_parens != (COMPILE_STACK_TOP.regnum == -1))
	1241	{ compile_error = REG_ERPAREN; goto error_return; }
	1242
	1243	top_expression = COMPILE_STACK_TOP.top_expression;
	1244	last_expression = COMPILE_STACK_TOP.last_expression;
	1245	last_non_regular_expression = COMPILE_STACK_TOP.last_non_regular_expression;
	1246	this_group_regnum = COMPILE_STACK_TOP.regnum;
	1247
	1248	{
	1249	parens = rx_mk_r_monop (r_parens, *inner);
	1250	if (!parens)
	1251	goto space_error;
	1252	parens->params.intval = this_group_regnum;
	1253	*inner = parens;
	1254	break;
	1255	}
	1256	}
	1257
	1258	case '\|': /* `\\|'. */
	1259	if ((syntax & RE_LIMITED_OPS) \|\| (syntax & RE_NO_BK_VBAR))
	1260	goto normal_backslash;
	1261	handle_alt:
	1262	if (syntax & RE_LIMITED_OPS)
	1263	goto normal_char;
	1264
	1265	{
	1266	struct rexp_node * alt;
	1267
	1268	alt = rx_mk_r_binop (r_alternate, *top_expression, 0);
	1269	if (!alt)
	1270	goto space_error;
	1271	*top_expression = alt;
	1272	last_expression = &alt->params.pair.right;
	1273	last_non_regular_expression = &alt->params.pair.right;
	1274	}
	1275	break;
	1276
	1277
	1278	case '{':
	1279	/* If \{ is a literal. */
	1280	if (!(syntax & RE_INTERVALS)
	1281	/* If we're at `\{' and it's not the open-interval
	1282	operator. */
	1283	\|\| ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
	1284	\|\| (p - 2 == pattern && p == pend))
	1285	goto normal_backslash;
	1286
	1287	handle_interval:
	1288	{
	1289	/* If got here, then the syntax allows intervals.
	1290	*/
	1291
	1292	/* At least (most) this many matches must be made.
	1293	*/
	1294	int lower_bound;
	1295	int upper_bound;
	1296
	1297	lower_bound = -1;
	1298	upper_bound = -1;
	1299
	1300	/* We're about to parse the bounds of the interval.
	1301	* It may turn out that this isn't an interval after
	1302	* all, in which case these same characters will have
	1303	* to be reparsed as literals. This remembers
	1304	* the backtrack point in the parse:
	1305	*/
	1306	beg_interval = p - 1;
	1307
	1308	if (p == pend)
	1309	{
	1310	if (syntax & RE_NO_BK_BRACES)
	1311	goto unfetch_interval;
	1312	else
	1313	{ compile_error = REG_EBRACE; goto error_return; }
	1314	}
	1315
	1316	GET_UNSIGNED_NUMBER (lower_bound);
	1317
	1318	if (c == ',')
	1319	{
	1320	GET_UNSIGNED_NUMBER (upper_bound);
	1321	if (upper_bound < 0) upper_bound = RE_DUP_MAX;
	1322	}
	1323	else
	1324	/* Interval such as `{n}' => match exactly n times.
	1325	*/
	1326	upper_bound = lower_bound;
	1327
	1328	if (lower_bound < 0
	1329	\|\| upper_bound > RE_DUP_MAX
	1330	\|\| lower_bound > upper_bound)
	1331	{
	1332	if (syntax & RE_NO_BK_BRACES)
	1333	goto unfetch_interval;
	1334	else
	1335	{ compile_error = REG_BADBR; goto error_return; }
	1336	}
	1337
	1338	if (!(syntax & RE_NO_BK_BRACES))
	1339	{
	1340	if (c != '\\') { compile_error = REG_EBRACE; goto error_return; }
	1341	PATFETCH (c);
	1342	}
	1343
	1344	if (c != '}')
	1345	{
	1346	if (syntax & RE_NO_BK_BRACES)
	1347	goto unfetch_interval;
	1348	else
	1349	{ compile_error = REG_BADBR; goto error_return; }
	1350	}
	1351
	1352	/* We just parsed a valid interval.
	1353	* lower_bound and upper_bound are set.
	1354	*/
	1355
	1356	/* If it's invalid to have no preceding re.
	1357	*/
	1358	if (pointless_if_repeated (*last_expression))
	1359	{
	1360	if (syntax & RE_CONTEXT_INVALID_OPS)
	1361	{ compile_error = REG_BADRPT; goto error_return; }
	1362	else if (!(syntax & RE_CONTEXT_INDEP_OPS))
	1363	/* treat the interval spec as literal chars. */
	1364	goto unfetch_interval;
	1365	}
	1366
	1367	{
	1368	struct rexp_node * interval;
	1369
	1370	if (last_expression && ((last_expression)->type == r_string))
	1371	if (factor_string (&last_expression, cset_size))
	1372	goto space_error;
	1373	interval = rx_mk_r_monop (r_interval, *last_expression);
	1374	if (!interval)
	1375	goto space_error;
	1376	interval->params.intval = lower_bound;
	1377	interval->params.intval2 = upper_bound;
	1378	*last_expression = interval;
	1379	last_non_regular_expression = last_expression;
	1380	}
	1381	beg_interval = 0;
	1382	}
	1383	break;
	1384
	1385	unfetch_interval:
	1386	/* If an invalid interval, match the characters as literals. */
	1387	p = beg_interval;
	1388	beg_interval = 0;
	1389
	1390	/* normal_char and normal_backslash need `c'. */
	1391	PATFETCH (c);
	1392
	1393	if (!(syntax & RE_NO_BK_BRACES))
	1394	{
	1395	if ((p > pattern) && (p[-1] == '\\'))
	1396	goto normal_backslash;
	1397	}
	1398	goto normal_char;
	1399
	1400	#ifdef emacs
	1401	/* There is no way to specify the before_dot and after_dot
	1402	* operators. rms says this is ok. --karl
	1403	*/
	1404	case '=':
	1405	side = '=';
	1406	goto add_side_effect;
	1407	break;
	1408
	1409	case 's':
	1410	case 'S':
	1411	{
	1412	rx_Bitset cs;
	1413	struct rexp_node * set;
	1414
	1415	cs = rx_cset (&cset_size);
	1416	if (!cs)
	1417	goto space_error;
	1418	set = rx_mk_r_cset (r_cset, cset_size, cs);
	1419	if (!set)
	1420	{
	1421	rx_free_cset (cs);
	1422	goto space_error;
	1423	}
	1424	if (c == 'S')
	1425	rx_bitset_universe (cset_size, cs);
	1426
	1427	PATFETCH (c);
	1428	{
	1429	int x;
	1430	enum syntaxcode code = syntax_spec_code [c];
	1431	for (x = 0; x < 256; ++x)
	1432	{
	1433
	1434	if (SYNTAX (x) == code)
	1435	{
	1436	rx_Bitset it =
	1437	inverse_translation (n_members,
	1438	cset_size, validate_inv_tr,
	1439	inverse_translate,
	1440	translate, x);
	1441	rx_bitset_xor (cset_size, cs, it);
	1442	}
	1443	}
	1444	}
	1445	append = set;
	1446	goto append_node;
	1447	}
	1448	break;
	1449	#endif /* emacs */
	1450
	1451
	1452	case 'w':
	1453	case 'W':
	1454	{
	1455	rx_Bitset cs;
	1456	struct rexp_node * n;
	1457
	1458	cs = rx_cset (cset_size);
	1459	n = (cs ? rx_mk_r_cset (r_cset, cset_size, cs) : 0);
	1460	if (!(cs && n))
	1461	{
	1462	if (cs)
	1463	rx_free_cset (cs);
	1464	goto space_error;
	1465	}
	1466	if (c == 'W')
	1467	rx_bitset_universe (cset_size ,cs);
	1468	{
	1469	int x;
	1470	for (x = cset_size - 1; x > 0; --x)
	1471	if (SYNTAX(x) & Sword)
	1472	RX_bitset_toggle (cs, x);
	1473	}
	1474	append = n;
	1475	goto append_node;
	1476	}
	1477	break;
	1478
	1479	case '<':
	1480	side = '<';
	1481	goto add_side_effect;
	1482	break;
	1483
	1484	case '>':
	1485	side = '>';
	1486	goto add_side_effect;
	1487	break;
	1488
	1489	case 'b':
	1490	side = 'b';
	1491	goto add_side_effect;
	1492	break;
	1493
	1494	case 'B':
	1495	side = 'B';
	1496	goto add_side_effect;
	1497	break;
	1498
	1499	case '`':
	1500	side = '`';
	1501	goto add_side_effect;
	1502	break;
	1503
	1504	case '\'':
	1505	side = '\'';
	1506	goto add_side_effect;
	1507	break;
	1508
	1509	add_side_effect:
	1510	{
	1511	struct rexp_node * se;
	1512	se = rx_mk_r_int (r_context, side);
	1513	if (!se)
	1514	goto space_error;
	1515	append = se;
	1516	goto append_node;
	1517	}
	1518	break;
	1519
	1520	case '1': case '2': case '3': case '4': case '5':
	1521	case '6': case '7': case '8': case '9':
	1522	if (syntax & RE_NO_BK_REFS)
	1523	goto normal_char;
	1524
	1525	c1 = c - '0';
	1526
	1527	/* Can't back reference to a subexpression if inside of it. */
	1528	if (group_in_compile_stack (compile_stack, c1))
	1529	goto normal_char;
	1530
	1531	if (c1 > regnum)
	1532	{ compile_error = REG_ESUBREG; goto error_return; }
	1533
	1534	side = c;
	1535	goto add_side_effect;
	1536	break;
	1537
	1538	case '+':
	1539	case '?':
	1540	if (syntax & RE_BK_PLUS_QM)
	1541	goto handle_plus;
	1542	else
	1543	goto normal_backslash;
	1544
	1545	default:
	1546	normal_backslash:
	1547	/* You might think it would be useful for \ to mean
	1548	* not to translate; but if we don't translate it
	1549	* it will never match anything.
	1550	*/
	1551	c = TRANSLATE (c);
	1552	goto normal_char;
	1553	}
	1554	break;
	1555
	1556
	1557	default:
	1558	/* Expects the character in `c'. */
	1559	normal_char:
	1560	{
	1561	rx_Bitset cs;
	1562	struct rexp_node * match;
	1563	rx_Bitset it;
	1564
	1565	it = inverse_translation (n_members,
	1566	cset_size, validate_inv_tr,
	1567	inverse_translate, translate, c);
	1568
	1569	if (1 != n_members[c])
	1570	{
	1571	cs = rx_cset (cset_size);
	1572	match = (cs ? rx_mk_r_cset (r_cset, cset_size, cs) : 0);
	1573	if (!(cs && match))
	1574	{
	1575	if (cs)
	1576	rx_free_cset (cs);
	1577	goto space_error;
	1578	}
	1579	rx_bitset_union (CHAR_SET_SIZE, cs, it);
	1580	append = match;
	1581	goto append_node;
	1582	}
	1583	else
	1584	{
	1585	if (last_expression && (last_expression)->type == r_string)
	1586	{
	1587	if (rx_adjoin_string (&((*last_expression)->params.cstr), c))
	1588	goto space_error;
	1589	break;
	1590	}
	1591	else
	1592	{
	1593	append = rx_mk_r_str (r_string, c);
	1594	if(!append)
	1595	goto space_error;
	1596	goto append_node;
	1597	}
	1598	}
	1599	break;
	1600
	1601	append_node:
	1602	/* This genericly appends the rexp APPEND to *LAST_EXPRESSION
	1603	* and then parses the next character normally.
	1604	*/
	1605	if (RX_regular_node_type (append->type))
	1606	{
	1607	if (!*last_expression)
	1608	*last_expression = append;
	1609	else
	1610	{
	1611	struct rexp_node * concat;
	1612	concat = rx_mk_r_binop (r_concat,
	1613	*last_expression, append);
	1614	if (!concat)
	1615	goto space_error;
	1616	*last_expression = concat;
	1617	last_expression = &concat->params.pair.right;
	1618	}
	1619	}
	1620	else
	1621	{
	1622	if (!*last_non_regular_expression)
	1623	{
	1624	*last_non_regular_expression = append;
	1625	last_expression = last_non_regular_expression;
	1626	}
	1627	else
	1628	{
	1629	struct rexp_node * concat;
	1630	concat = rx_mk_r_binop (r_concat,
	1631	*last_non_regular_expression, append);
	1632	if (!concat)
	1633	goto space_error;
	1634	*last_non_regular_expression = concat;
	1635	last_non_regular_expression = &concat->params.pair.right;
	1636	last_expression = last_non_regular_expression;
	1637	}
	1638	}
	1639	}
	1640	} /* switch (c) */
	1641	} /* while p != pend */
	1642
	1643
	1644	/* Through the pattern now. */
	1645
	1646	if (!COMPILE_STACK_EMPTY)
	1647	{ compile_error = REG_EPAREN; goto error_return; }
	1648	free (compile_stack.stack);
	1649
	1650
	1651	*rexp_p = rexp;
	1652	return REG_NOERROR;
	1653
	1654	space_error:
	1655	compile_error = REG_ESPACE;
	1656
	1657	error_return:
	1658	free (compile_stack.stack);
	1659	/* Free expressions pushed onto the compile stack! */
	1660	if (rexp)
	1661	rx_free_rexp (rexp);
	1662	return compile_error;
	1663	}
	1664
	1665