]>
Commit | Line | Data |
---|---|---|
1 | /* classes: h_files */ | |
2 | ||
3 | #ifndef RXGNUCOMPH | |
4 | #define RXGNUCOMPH | |
5 | /* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc. | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU Library General Public License as published by | |
9 | * the Free Software Foundation; either version 2, or (at your option) | |
10 | * any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU Library General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Library General Public License | |
18 | * along with this software; see the file COPYING. If not, write to | |
19 | * the Free Software Foundation, 59 Temple Place - Suite 330, | |
20 | * Boston, MA 02111-1307, USA. | |
21 | */ | |
22 | ||
23 | \f | |
24 | ||
25 | #include "rxcset.h" | |
26 | #include "rxnode.h" | |
27 | ||
28 | ||
29 | \f | |
30 | ||
31 | /* This is an array of error messages corresponding to the error codes. | |
32 | */ | |
33 | extern const char *rx_error_msg[]; | |
34 | ||
35 | ||
36 | \f | |
37 | /* {Syntax Bits} | |
38 | */ | |
39 | ||
40 | /* The following bits are used to determine the regexp syntax we | |
41 | recognize. The set/not-set meanings are chosen so that Emacs syntax | |
42 | remains the value 0. The bits are given in alphabetical order, and | |
43 | the definitions shifted by one from the previous bit; thus, when we | |
44 | add or remove a bit, only one other definition need change. */ | |
45 | ||
46 | enum RE_SYNTAX_BITS | |
47 | { | |
48 | /* If this bit is not set, then \ inside a bracket expression is literal. | |
49 | If set, then such a \ quotes the following character. */ | |
50 | RE_BACKSLASH_ESCAPE_IN_LISTS = (1), | |
51 | ||
52 | /* If this bit is not set, then + and ? are operators, and \+ and \? are | |
53 | literals. | |
54 | If set, then \+ and \? are operators and + and ? are literals. */ | |
55 | RE_BK_PLUS_QM = (RE_BACKSLASH_ESCAPE_IN_LISTS << 1), | |
56 | ||
57 | /* If this bit is set, then character classes are supported. They are: | |
58 | [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], | |
59 | [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. | |
60 | If not set, then character classes are not supported. */ | |
61 | RE_CHAR_CLASSES = (RE_BK_PLUS_QM << 1), | |
62 | ||
63 | /* If this bit is set, then ^ and $ are always anchors (outside bracket | |
64 | expressions, of course). | |
65 | If this bit is not set, then it depends: | |
66 | ^ is an anchor if it is at the beginning of a regular | |
67 | expression or after an open-group or an alternation operator; | |
68 | $ is an anchor if it is at the end of a regular expression, or | |
69 | before a close-group or an alternation operator. | |
70 | ||
71 | This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because | |
72 | POSIX draft 11.2 says that * etc. in leading positions is undefined. | |
73 | We already implemented a previous draft which made those constructs | |
74 | invalid, though, so we haven't changed the code back. */ | |
75 | RE_CONTEXT_INDEP_ANCHORS = (RE_CHAR_CLASSES << 1), | |
76 | ||
77 | /* If this bit is set, then special characters are always special | |
78 | regardless of where they are in the pattern. | |
79 | If this bit is not set, then special characters are special only in | |
80 | some contexts; otherwise they are ordinary. Specifically, | |
81 | * + ? and intervals are only special when not after the beginning, | |
82 | open-group, or alternation operator. */ | |
83 | RE_CONTEXT_INDEP_OPS = (RE_CONTEXT_INDEP_ANCHORS << 1), | |
84 | ||
85 | /* If this bit is set, then *, +, ?, and { cannot be first in an re or | |
86 | immediately after an alternation or begin-group operator. */ | |
87 | RE_CONTEXT_INVALID_OPS = (RE_CONTEXT_INDEP_OPS << 1), | |
88 | ||
89 | /* If this bit is set, then . matches newline. | |
90 | If not set, then it doesn't. */ | |
91 | RE_DOT_NEWLINE = (RE_CONTEXT_INVALID_OPS << 1), | |
92 | ||
93 | /* If this bit is set, then . doesn't match NUL. | |
94 | If not set, then it does. */ | |
95 | RE_DOT_NOT_NULL = (RE_DOT_NEWLINE << 1), | |
96 | ||
97 | /* If this bit is set, nonmatching lists [^...] do not match newline. | |
98 | If not set, they do. */ | |
99 | RE_HAT_LISTS_NOT_NEWLINE = (RE_DOT_NOT_NULL << 1), | |
100 | ||
101 | /* If this bit is set, either \{...\} or {...} defines an | |
102 | interval, depending on RE_NO_BK_BRACES. | |
103 | If not set, \{, \}, {, and } are literals. */ | |
104 | RE_INTERVALS = (RE_HAT_LISTS_NOT_NEWLINE << 1), | |
105 | ||
106 | /* If this bit is set, +, ? and | aren't recognized as operators. | |
107 | If not set, they are. */ | |
108 | RE_LIMITED_OPS = (RE_INTERVALS << 1), | |
109 | ||
110 | /* If this bit is set, newline is an alternation operator. | |
111 | If not set, newline is literal. */ | |
112 | RE_NEWLINE_ALT = (RE_LIMITED_OPS << 1), | |
113 | ||
114 | /* If this bit is set, then `{...}' defines an interval, and \{ and \} | |
115 | are literals. | |
116 | If not set, then `\{...\}' defines an interval. */ | |
117 | RE_NO_BK_BRACES = (RE_NEWLINE_ALT << 1), | |
118 | ||
119 | /* If this bit is set, (...) defines a group, and \( and \) are literals. | |
120 | If not set, \(...\) defines a group, and ( and ) are literals. */ | |
121 | RE_NO_BK_PARENS = (RE_NO_BK_BRACES << 1), | |
122 | ||
123 | /* If this bit is set, then \<digit> matches <digit>. | |
124 | If not set, then \<digit> is a back-reference. */ | |
125 | RE_NO_BK_REFS = (RE_NO_BK_PARENS << 1), | |
126 | ||
127 | /* If this bit is set, then | is an alternation operator, and \| is literal. | |
128 | If not set, then \| is an alternation operator, and | is literal. */ | |
129 | RE_NO_BK_VBAR = (RE_NO_BK_REFS << 1), | |
130 | ||
131 | /* If this bit is set, then an ending range point collating higher | |
132 | than the starting range point, as in [z-a], is invalid. | |
133 | If not set, then when ending range point collates higher than the | |
134 | starting range point, the range is ignored. */ | |
135 | RE_NO_EMPTY_RANGES = (RE_NO_BK_VBAR << 1), | |
136 | ||
137 | /* If this bit is set, then an unmatched ) is ordinary. | |
138 | If not set, then an unmatched ) is invalid. */ | |
139 | RE_UNMATCHED_RIGHT_PAREN_ORD = (RE_NO_EMPTY_RANGES << 1), | |
140 | ||
141 | RE_SYNTAX_EMACS = 0, | |
142 | ||
143 | RE_SYNTAX_AWK = (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL | |
144 | | RE_NO_BK_PARENS | RE_NO_BK_REFS | |
145 | | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES | |
146 | | RE_UNMATCHED_RIGHT_PAREN_ORD), | |
147 | ||
148 | RE_SYNTAX_GREP = (RE_BK_PLUS_QM | RE_CHAR_CLASSES | |
149 | | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS | |
150 | | RE_NEWLINE_ALT), | |
151 | ||
152 | RE_SYNTAX_EGREP = (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS | |
153 | | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE | |
154 | | RE_NEWLINE_ALT | RE_NO_BK_PARENS | |
155 | | RE_NO_BK_VBAR), | |
156 | ||
157 | RE_SYNTAX_POSIX_EGREP = (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES), | |
158 | ||
159 | /* Syntax bits common to both basic and extended POSIX regex syntax. */ | |
160 | _RE_SYNTAX_POSIX_COMMON = (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL | |
161 | | RE_INTERVALS | RE_NO_EMPTY_RANGES), | |
162 | ||
163 | RE_SYNTAX_POSIX_BASIC = (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM), | |
164 | ||
165 | /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes | |
166 | RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. */ | |
167 | ||
168 | RE_SYNTAX_POSIX_MINIMAL_BASIC = (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS), | |
169 | ||
170 | RE_SYNTAX_POSIX_EXTENDED = (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS | |
171 | | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES | |
172 | | RE_NO_BK_PARENS | RE_NO_BK_VBAR | |
173 | | RE_UNMATCHED_RIGHT_PAREN_ORD), | |
174 | ||
175 | /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS | |
176 | replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ | |
177 | RE_SYNTAX_POSIX_MINIMAL_EXTENDED = (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS | |
178 | | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES | |
179 | | RE_NO_BK_PARENS | RE_NO_BK_REFS | |
180 | | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD), | |
181 | ||
182 | RE_SYNTAX_SED = RE_SYNTAX_POSIX_BASIC, | |
183 | ||
184 | RE_SYNTAX_POSIX_AWK = (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) | |
185 | }; | |
186 | \f | |
187 | ||
188 | /* Maximum number of duplicates an interval can allow. Some systems | |
189 | (erroneously) define this in other header files, but we want our | |
190 | value, so remove any previous define. */ | |
191 | #undef RE_DUP_MAX | |
192 | #define RE_DUP_MAX ((1 << 15) - 1) | |
193 | ||
194 | ||
195 | \f | |
196 | #ifdef __STDC__ | |
197 | extern int rx_parse (struct rexp_node ** rexp_p, | |
198 | const char *pattern, | |
199 | int size, | |
200 | unsigned long syntax, | |
201 | int cset_size, | |
202 | unsigned char *translate); | |
203 | ||
204 | #else /* STDC */ | |
205 | extern int rx_parse (); | |
206 | ||
207 | #endif /* STDC */ | |
208 | ||
209 | ||
210 | #endif /* RXGNUCOMPH */ |