]> jfr.im git - irc/evilnet/x3.git/blame - rx/rxgnucomp.h
Rewrote PHP X3 DB parser function sample code as a class and faster code
[irc/evilnet/x3.git] / rx / rxgnucomp.h
CommitLineData
d76ed9a9 1/* classes: h_files */
2
3#ifndef RXGNUCOMPH
4#define RXGNUCOMPH
5/* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Library General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this software; see the file COPYING. If not, write to
19 * the Free Software Foundation, 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
21 */
22
23\f
24
25#include "rxcset.h"
26#include "rxnode.h"
27
28
29\f
30
31/* This is an array of error messages corresponding to the error codes.
32 */
33extern const char *rx_error_msg[];
34
35
36\f
37/* {Syntax Bits}
38 */
39
40/* The following bits are used to determine the regexp syntax we
41 recognize. The set/not-set meanings are chosen so that Emacs syntax
42 remains the value 0. The bits are given in alphabetical order, and
43 the definitions shifted by one from the previous bit; thus, when we
44 add or remove a bit, only one other definition need change. */
45
46enum RE_SYNTAX_BITS
47{
48/* If this bit is not set, then \ inside a bracket expression is literal.
49 If set, then such a \ quotes the following character. */
50 RE_BACKSLASH_ESCAPE_IN_LISTS = (1),
51
52/* If this bit is not set, then + and ? are operators, and \+ and \? are
53 literals.
54 If set, then \+ and \? are operators and + and ? are literals. */
55 RE_BK_PLUS_QM = (RE_BACKSLASH_ESCAPE_IN_LISTS << 1),
56
57/* If this bit is set, then character classes are supported. They are:
58 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
59 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
60 If not set, then character classes are not supported. */
61 RE_CHAR_CLASSES = (RE_BK_PLUS_QM << 1),
62
63/* If this bit is set, then ^ and $ are always anchors (outside bracket
64 expressions, of course).
65 If this bit is not set, then it depends:
66 ^ is an anchor if it is at the beginning of a regular
67 expression or after an open-group or an alternation operator;
68 $ is an anchor if it is at the end of a regular expression, or
69 before a close-group or an alternation operator.
70
71 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
72 POSIX draft 11.2 says that * etc. in leading positions is undefined.
73 We already implemented a previous draft which made those constructs
74 invalid, though, so we haven't changed the code back. */
75 RE_CONTEXT_INDEP_ANCHORS = (RE_CHAR_CLASSES << 1),
76
77/* If this bit is set, then special characters are always special
78 regardless of where they are in the pattern.
79 If this bit is not set, then special characters are special only in
80 some contexts; otherwise they are ordinary. Specifically,
81 * + ? and intervals are only special when not after the beginning,
82 open-group, or alternation operator. */
83 RE_CONTEXT_INDEP_OPS = (RE_CONTEXT_INDEP_ANCHORS << 1),
84
85/* If this bit is set, then *, +, ?, and { cannot be first in an re or
86 immediately after an alternation or begin-group operator. */
87 RE_CONTEXT_INVALID_OPS = (RE_CONTEXT_INDEP_OPS << 1),
88
89/* If this bit is set, then . matches newline.
90 If not set, then it doesn't. */
91 RE_DOT_NEWLINE = (RE_CONTEXT_INVALID_OPS << 1),
92
93/* If this bit is set, then . doesn't match NUL.
94 If not set, then it does. */
95 RE_DOT_NOT_NULL = (RE_DOT_NEWLINE << 1),
96
97/* If this bit is set, nonmatching lists [^...] do not match newline.
98 If not set, they do. */
99 RE_HAT_LISTS_NOT_NEWLINE = (RE_DOT_NOT_NULL << 1),
100
101/* If this bit is set, either \{...\} or {...} defines an
102 interval, depending on RE_NO_BK_BRACES.
103 If not set, \{, \}, {, and } are literals. */
104 RE_INTERVALS = (RE_HAT_LISTS_NOT_NEWLINE << 1),
105
106/* If this bit is set, +, ? and | aren't recognized as operators.
107 If not set, they are. */
108 RE_LIMITED_OPS = (RE_INTERVALS << 1),
109
110/* If this bit is set, newline is an alternation operator.
111 If not set, newline is literal. */
112 RE_NEWLINE_ALT = (RE_LIMITED_OPS << 1),
113
114/* If this bit is set, then `{...}' defines an interval, and \{ and \}
115 are literals.
116 If not set, then `\{...\}' defines an interval. */
117 RE_NO_BK_BRACES = (RE_NEWLINE_ALT << 1),
118
119/* If this bit is set, (...) defines a group, and \( and \) are literals.
120 If not set, \(...\) defines a group, and ( and ) are literals. */
121 RE_NO_BK_PARENS = (RE_NO_BK_BRACES << 1),
122
123/* If this bit is set, then \<digit> matches <digit>.
124 If not set, then \<digit> is a back-reference. */
125 RE_NO_BK_REFS = (RE_NO_BK_PARENS << 1),
126
127/* If this bit is set, then | is an alternation operator, and \| is literal.
128 If not set, then \| is an alternation operator, and | is literal. */
129 RE_NO_BK_VBAR = (RE_NO_BK_REFS << 1),
130
131/* If this bit is set, then an ending range point collating higher
132 than the starting range point, as in [z-a], is invalid.
133 If not set, then when ending range point collates higher than the
134 starting range point, the range is ignored. */
135 RE_NO_EMPTY_RANGES = (RE_NO_BK_VBAR << 1),
136
137/* If this bit is set, then an unmatched ) is ordinary.
138 If not set, then an unmatched ) is invalid. */
139 RE_UNMATCHED_RIGHT_PAREN_ORD = (RE_NO_EMPTY_RANGES << 1),
140
141 RE_SYNTAX_EMACS = 0,
142
143 RE_SYNTAX_AWK = (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL
144 | RE_NO_BK_PARENS | RE_NO_BK_REFS
145 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES
146 | RE_UNMATCHED_RIGHT_PAREN_ORD),
147
148 RE_SYNTAX_GREP = (RE_BK_PLUS_QM | RE_CHAR_CLASSES
149 | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS
150 | RE_NEWLINE_ALT),
151
152 RE_SYNTAX_EGREP = (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS
153 | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE
154 | RE_NEWLINE_ALT | RE_NO_BK_PARENS
155 | RE_NO_BK_VBAR),
156
157 RE_SYNTAX_POSIX_EGREP = (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES),
158
159 /* Syntax bits common to both basic and extended POSIX regex syntax. */
160 _RE_SYNTAX_POSIX_COMMON = (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL
161 | RE_INTERVALS | RE_NO_EMPTY_RANGES),
162
163 RE_SYNTAX_POSIX_BASIC = (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM),
164
165 /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
166 RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. */
167
168 RE_SYNTAX_POSIX_MINIMAL_BASIC = (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS),
169
170 RE_SYNTAX_POSIX_EXTENDED = (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS
171 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES
172 | RE_NO_BK_PARENS | RE_NO_BK_VBAR
173 | RE_UNMATCHED_RIGHT_PAREN_ORD),
174
175 /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
176 replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
177 RE_SYNTAX_POSIX_MINIMAL_EXTENDED = (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS
178 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES
179 | RE_NO_BK_PARENS | RE_NO_BK_REFS
180 | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD),
181
182 RE_SYNTAX_SED = RE_SYNTAX_POSIX_BASIC,
183
184 RE_SYNTAX_POSIX_AWK = (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
185};
186\f
187
188/* Maximum number of duplicates an interval can allow. Some systems
189 (erroneously) define this in other header files, but we want our
190 value, so remove any previous define. */
191#undef RE_DUP_MAX
192#define RE_DUP_MAX ((1 << 15) - 1)
193
194
195\f
196#ifdef __STDC__
197extern int rx_parse (struct rexp_node ** rexp_p,
198 const char *pattern,
199 int size,
200 unsigned long syntax,
201 int cset_size,
202 unsigned char *translate);
203
204#else /* STDC */
205extern int rx_parse ();
206
207#endif /* STDC */
208
209
210#endif /* RXGNUCOMPH */