5 /* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Library General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this software; see the file COPYING. If not, write to
19 * the Free Software Foundation, 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
31 /* This is an array of error messages corresponding to the error codes.
33 extern const char *rx_error_msg
[];
40 /* The following bits are used to determine the regexp syntax we
41 recognize. The set/not-set meanings are chosen so that Emacs syntax
42 remains the value 0. The bits are given in alphabetical order, and
43 the definitions shifted by one from the previous bit; thus, when we
44 add or remove a bit, only one other definition need change. */
48 /* If this bit is not set, then \ inside a bracket expression is literal.
49 If set, then such a \ quotes the following character. */
50 RE_BACKSLASH_ESCAPE_IN_LISTS
= (1),
52 /* If this bit is not set, then + and ? are operators, and \+ and \? are
54 If set, then \+ and \? are operators and + and ? are literals. */
55 RE_BK_PLUS_QM
= (RE_BACKSLASH_ESCAPE_IN_LISTS
<< 1),
57 /* If this bit is set, then character classes are supported. They are:
58 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
59 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
60 If not set, then character classes are not supported. */
61 RE_CHAR_CLASSES
= (RE_BK_PLUS_QM
<< 1),
63 /* If this bit is set, then ^ and $ are always anchors (outside bracket
64 expressions, of course).
65 If this bit is not set, then it depends:
66 ^ is an anchor if it is at the beginning of a regular
67 expression or after an open-group or an alternation operator;
68 $ is an anchor if it is at the end of a regular expression, or
69 before a close-group or an alternation operator.
71 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
72 POSIX draft 11.2 says that * etc. in leading positions is undefined.
73 We already implemented a previous draft which made those constructs
74 invalid, though, so we haven't changed the code back. */
75 RE_CONTEXT_INDEP_ANCHORS
= (RE_CHAR_CLASSES
<< 1),
77 /* If this bit is set, then special characters are always special
78 regardless of where they are in the pattern.
79 If this bit is not set, then special characters are special only in
80 some contexts; otherwise they are ordinary. Specifically,
81 * + ? and intervals are only special when not after the beginning,
82 open-group, or alternation operator. */
83 RE_CONTEXT_INDEP_OPS
= (RE_CONTEXT_INDEP_ANCHORS
<< 1),
85 /* If this bit is set, then *, +, ?, and { cannot be first in an re or
86 immediately after an alternation or begin-group operator. */
87 RE_CONTEXT_INVALID_OPS
= (RE_CONTEXT_INDEP_OPS
<< 1),
89 /* If this bit is set, then . matches newline.
90 If not set, then it doesn't. */
91 RE_DOT_NEWLINE
= (RE_CONTEXT_INVALID_OPS
<< 1),
93 /* If this bit is set, then . doesn't match NUL.
94 If not set, then it does. */
95 RE_DOT_NOT_NULL
= (RE_DOT_NEWLINE
<< 1),
97 /* If this bit is set, nonmatching lists [^...] do not match newline.
98 If not set, they do. */
99 RE_HAT_LISTS_NOT_NEWLINE
= (RE_DOT_NOT_NULL
<< 1),
101 /* If this bit is set, either \{...\} or {...} defines an
102 interval, depending on RE_NO_BK_BRACES.
103 If not set, \{, \}, {, and } are literals. */
104 RE_INTERVALS
= (RE_HAT_LISTS_NOT_NEWLINE
<< 1),
106 /* If this bit is set, +, ? and | aren't recognized as operators.
107 If not set, they are. */
108 RE_LIMITED_OPS
= (RE_INTERVALS
<< 1),
110 /* If this bit is set, newline is an alternation operator.
111 If not set, newline is literal. */
112 RE_NEWLINE_ALT
= (RE_LIMITED_OPS
<< 1),
114 /* If this bit is set, then `{...}' defines an interval, and \{ and \}
116 If not set, then `\{...\}' defines an interval. */
117 RE_NO_BK_BRACES
= (RE_NEWLINE_ALT
<< 1),
119 /* If this bit is set, (...) defines a group, and \( and \) are literals.
120 If not set, \(...\) defines a group, and ( and ) are literals. */
121 RE_NO_BK_PARENS
= (RE_NO_BK_BRACES
<< 1),
123 /* If this bit is set, then \<digit> matches <digit>.
124 If not set, then \<digit> is a back-reference. */
125 RE_NO_BK_REFS
= (RE_NO_BK_PARENS
<< 1),
127 /* If this bit is set, then | is an alternation operator, and \| is literal.
128 If not set, then \| is an alternation operator, and | is literal. */
129 RE_NO_BK_VBAR
= (RE_NO_BK_REFS
<< 1),
131 /* If this bit is set, then an ending range point collating higher
132 than the starting range point, as in [z-a], is invalid.
133 If not set, then when ending range point collates higher than the
134 starting range point, the range is ignored. */
135 RE_NO_EMPTY_RANGES
= (RE_NO_BK_VBAR
<< 1),
137 /* If this bit is set, then an unmatched ) is ordinary.
138 If not set, then an unmatched ) is invalid. */
139 RE_UNMATCHED_RIGHT_PAREN_ORD
= (RE_NO_EMPTY_RANGES
<< 1),
143 RE_SYNTAX_AWK
= (RE_BACKSLASH_ESCAPE_IN_LISTS
| RE_DOT_NOT_NULL
144 | RE_NO_BK_PARENS
| RE_NO_BK_REFS
145 | RE_NO_BK_VBAR
| RE_NO_EMPTY_RANGES
146 | RE_UNMATCHED_RIGHT_PAREN_ORD
),
148 RE_SYNTAX_GREP
= (RE_BK_PLUS_QM
| RE_CHAR_CLASSES
149 | RE_HAT_LISTS_NOT_NEWLINE
| RE_INTERVALS
152 RE_SYNTAX_EGREP
= (RE_CHAR_CLASSES
| RE_CONTEXT_INDEP_ANCHORS
153 | RE_CONTEXT_INDEP_OPS
| RE_HAT_LISTS_NOT_NEWLINE
154 | RE_NEWLINE_ALT
| RE_NO_BK_PARENS
157 RE_SYNTAX_POSIX_EGREP
= (RE_SYNTAX_EGREP
| RE_INTERVALS
| RE_NO_BK_BRACES
),
159 /* Syntax bits common to both basic and extended POSIX regex syntax. */
160 _RE_SYNTAX_POSIX_COMMON
= (RE_CHAR_CLASSES
| RE_DOT_NEWLINE
| RE_DOT_NOT_NULL
161 | RE_INTERVALS
| RE_NO_EMPTY_RANGES
),
163 RE_SYNTAX_POSIX_BASIC
= (_RE_SYNTAX_POSIX_COMMON
| RE_BK_PLUS_QM
),
165 /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
166 RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. */
168 RE_SYNTAX_POSIX_MINIMAL_BASIC
= (_RE_SYNTAX_POSIX_COMMON
| RE_LIMITED_OPS
),
170 RE_SYNTAX_POSIX_EXTENDED
= (_RE_SYNTAX_POSIX_COMMON
| RE_CONTEXT_INDEP_ANCHORS
171 | RE_CONTEXT_INDEP_OPS
| RE_NO_BK_BRACES
172 | RE_NO_BK_PARENS
| RE_NO_BK_VBAR
173 | RE_UNMATCHED_RIGHT_PAREN_ORD
),
175 /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
176 replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
177 RE_SYNTAX_POSIX_MINIMAL_EXTENDED
= (_RE_SYNTAX_POSIX_COMMON
| RE_CONTEXT_INDEP_ANCHORS
178 | RE_CONTEXT_INVALID_OPS
| RE_NO_BK_BRACES
179 | RE_NO_BK_PARENS
| RE_NO_BK_REFS
180 | RE_NO_BK_VBAR
| RE_UNMATCHED_RIGHT_PAREN_ORD
),
182 RE_SYNTAX_SED
= RE_SYNTAX_POSIX_BASIC
,
184 RE_SYNTAX_POSIX_AWK
= (RE_SYNTAX_POSIX_EXTENDED
| RE_BACKSLASH_ESCAPE_IN_LISTS
)
188 /* Maximum number of duplicates an interval can allow. Some systems
189 (erroneously) define this in other header files, but we want our
190 value, so remove any previous define. */
192 #define RE_DUP_MAX ((1 << 15) - 1)
197 extern int rx_parse (struct rexp_node
** rexp_p
,
200 unsigned long syntax
,
202 unsigned char *translate
);
205 extern int rx_parse ();
210 #endif /* RXGNUCOMPH */