]> jfr.im git - irc/quakenet/snircd-patchqueue.git/blob - match.patch
merge
[irc/quakenet/snircd-patchqueue.git] / match.patch
1 diff -r 2b442008e1b7 ircd/match.c
2 --- a/ircd/match.c Sun Jan 25 21:58:36 2009 +0000
3 +++ b/ircd/match.c Sun Jan 25 21:59:17 2009 +0000
4 @@ -27,6 +27,9 @@
5 #include "ircd_string.h"
6 #include "ircd_snprintf.h"
7
8 +#define likely(x) __builtin_expect((x),1)
9 +#define unlikely(x) __builtin_expect((x),0)
10 +
11 /*
12 * mmatch()
13 *
14 @@ -62,53 +65,79 @@
15 *
16 * @param[in] old_mask One wildcard mask.
17 * @param[in] new_mask Another wildcard mask.
18 + * @param[in] case_sensitive Indicate case sensitivity
19 * @return Zero if \a old_mask is a superset of \a new_mask, non-zero otherwise.
20 */
21 -int mmatch(const char *old_mask, const char *new_mask)
22 +int _mmatch(const char *old_mask, const char *new_mask, int rfcmatch);
23 +
24 +int mmatch(const char *old_mask, const char *new_mask) {
25 + return _mmatch(old_mask, new_mask, 1);
26 +}
27 +
28 +__inline__ int _mmatch(const char *old_mask, const char *new_mask, int rfcmatch)
29 {
30 const char *m = old_mask;
31 const char *n = new_mask;
32 - const char *ma = m;
33 - const char *na = n;
34 - int wild = 0;
35 - int mq = 0, nq = 0;
36 + /* Note that ma / na never point to a character escaped by a backslash. */
37 + const char *ma = NULL; // Remembered m for backtracking.
38 + const char *na = NULL;
39 + int mq = 0, nq = 0; // Is *m / *n escaped?
40 + int match;
41 +
42 + if ( m[0] == '*' && m[1] == '\0' ) {
43 + return 0;
44 + } else if ( n[0] == '*' && n[1] == '\0' ) {
45 + return 1;
46 + }
47
48 while (1)
49 {
50 - if (*m == '*')
51 + if (unlikely(*m == '*'))
52 {
53 + /* Optimization: Skip redundant *'s */
54 while (*m == '*')
55 m++;
56 - wild = 1;
57 + /* And remember this position for backtracking. */
58 ma = m;
59 na = n;
60 }
61
62 - if (!*m)
63 + if (unlikely(!*m))
64 {
65 if (!*n)
66 return 0;
67 + /* This construct speeds up matches of patterns ending with a *
68 + * followed by any number of ?. The tricky part is figuring
69 + * out whether or not that * was escaped. */
70 for (m--; (m > old_mask) && (*m == '?'); m--)
71 - ;
72 - if ((*m == '*') && (m > old_mask) && (m[-1] != '\\'))
73 - return 0;
74 - if (!wild)
75 + ; /* Skip trailing ?'s */
76 + if (*m == '*') {
77 + if ((--m >= old_mask) && (*m != '\\'))
78 + return 0;
79 + /* Now if there's an odd number of backslashes, the for loop
80 + * breaks out and we backtrack. */
81 + if (!rfcmatch) /* In rfc, backslashes can't be escaped. */
82 + for(--m; (m >= old_mask) && (*m == '\\'); m--)
83 + if ((--m >= old_mask) && (*m != '\\'))
84 + return 0;
85 + }
86 + if (!ma)
87 return 1;
88 m = ma;
89 -
90 - /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
91 - if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
92 + /* skip one escaped character */
93 + if (*na == '\\' && (!rfcmatch || na[1] == '*' || na[1] == '?'))
94 ++na;
95 -
96 n = ++na;
97 }
98 - else if (!*n)
99 +
100 + if (unlikely(!*n))
101 {
102 - while (*m == '*')
103 + while (unlikely(*m == '*'))
104 m++;
105 return (*m != 0);
106 }
107 - if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
108 +
109 + if (unlikely(*m == '\\' && (!rfcmatch || m[1] == '*' || m[1] == '?')))
110 {
111 m++;
112 mq = 1;
113 @@ -116,8 +145,7 @@
114 else
115 mq = 0;
116
117 - /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
118 - if ((*n == '\\') && ((n[1] == '*') || (n[1] == '?')))
119 + if (unlikely(*n == '\\' && (!rfcmatch || n[1] == '*' || n[1] == '?')))
120 {
121 n++;
122 nq = 1;
123 @@ -126,24 +154,24 @@
124 nq = 0;
125
126 /*
127 - * This `if' has been changed compared to match() to do the following:
128 - * Match when:
129 - * old (m) new (n) boolean expression
130 - * * any (*m == '*' && !mq) ||
131 - * ? any except '*' (*m == '?' && !mq && (*n != '*' || nq)) ||
132 - * any except * or ? same as m (!((*m == '*' || *m == '?') && !mq) &&
133 - * ToLower(*m) == ToLower(*n) &&
134 - * !((mq && !nq) || (!mq && nq)))
135 - *
136 - * Here `any' also includes \* and \? !
137 - *
138 - * After reworking the boolean expressions, we get:
139 - * (Optimized to use boolean short-circuits, with most frequently occurring
140 - * cases upfront (which took 2 hours!)).
141 + * There was fancy short-circuit logic here. It got killed. Fuck 2 hours.
142 + * It was probably slower than the branches here now. Nobody will notice
143 + * in any case. -- BP
144 */
145 - if ((*m == '*' && !mq) ||
146 - ((!mq || nq) && ToLower(*m) == ToLower(*n)) ||
147 - (*m == '?' && !mq && (*n != '*' || nq)))
148 + if (unlikely(mq)) { /* m is quoted, match the exact same, or the
149 + * same character if quoting is irrelevant. */
150 + match = (*m == *n && (nq ||
151 + (*n != '*' && *n != '?' && ToUpper(*n) == ToLower(*n))));
152 + } else if (unlikely(*m == '?')) { /* m is '?', match anything but unquoted '*' */
153 + match = (*n != '*' || nq);
154 + } else if (unlikely(*m == '*')) { /* m is '*', match. */
155 + match=1;
156 + } else /* m is neither quoted nor special */
157 + {
158 + match = (ToLower(*m) == ToLower(*n));
159 + }
160 +
161 + if (unlikely(match))
162 {
163 if (*m)
164 m++;
165 @@ -151,15 +179,12 @@
166 n++;
167 }
168 else
169 - {
170 - if (!wild)
171 + if (unlikely(!ma))
172 return 1;
173 m = ma;
174 -
175 - /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
176 - if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
177 + /* skip one escaped character */
178 + if (unlikely(*na == '\\' && (!rfcmatch || na[1] == '*' || na[1] == '?')))
179 ++na;
180 -
181 n = ++na;
182 }
183 }
184 @@ -186,8 +211,15 @@
185 * @param[in] name String to check against \a mask.
186 * @return Zero if \a mask matches \a name, non-zero if no match.
187 */
188 +int _match(const char *mask, const char *name, int rfcmatch);
189 +
190 int match(const char *mask, const char *name)
191 {
192 + return _match(mask, name, 1);
193 +}
194 +
195 +int _match(const char *mask, const char *name, int rfcmatch)
196 +{
197 const char *m = mask, *n = name;
198 const char *m_tmp = mask, *n_tmp = name;
199 int star_p;
200 @@ -205,10 +237,14 @@
201 return 1;
202 break;
203 case '\\':
204 - m++;
205 /* allow escaping to force capitalization */
206 - if (*m++ != *n++)
207 - goto backtrack;
208 + if (!rfcmatch) {
209 + m++;
210 + if (*m++ != *n++)
211 + goto backtrack;
212 + } else {
213 + goto fallthrough;
214 + }
215 break;
216 case '*': case '?':
217 for (star_p = 0; ; m++) {
218 @@ -234,6 +270,7 @@
219 }
220 /* and fall through */
221 default:
222 + fallthrough:
223 if (!*n)
224 return *m != '\0';
225 if (ToLower(*m) != ToLower(*n))