]>
jfr.im git - irc/quakenet/snircd.git/blob - ircd/match.c
2 * IRC - Internet Relay Chat, common/match.c
3 * Copyright (C) 1990 Jarkko Oikarinen
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 1, or (at your option)
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 * @brief Functions to match strings against IRC mask strings.
21 * @version $Id: match.c,v 1.20 2005/09/12 03:40:17 entrope Exp $
26 #include "ircd_chattr.h"
27 #include "ircd_string.h"
28 #include "ircd_snprintf.h"
33 * Written by Run (carlo@runaway.xs4all.nl), 25-10-96
36 * From: Carlo Wood <carlo@runaway.xs4all.nl>
37 * Message-Id: <199609021026.MAA02393@runaway.xs4all.nl>
38 * Subject: [C-Com] Analysis for `mmatch' (was: gline4 problem)
39 * To: coder-com@mail.undernet.org (coder committee)
40 * Date: Mon, 2 Sep 1996 12:26:01 +0200 (MET DST)
42 * We need a new function `mmatch(const char *old_mask, const char *new_mask)'
43 * which returns `true' likewise the current `match' (start with copying it),
44 * but which treats '*' and '?' in `new_mask' differently (not "\*" and "\?" !)
45 * as follows: a '*' in `new_mask' does not match a '?' in `old_mask' and
46 * a '?' in `new_mask' does not match a '\?' in `old_mask'.
47 * And ofcourse... a '*' in `new_mask' does not match a '\*' in `old_mask'...
48 * And last but not least, '\?' and '\*' in `new_mask' now become one character.
51 /** Compares one mask against another.
52 * One wildcard mask may be said to be a superset of another if the
53 * set of strings matched by the first is a proper superset of the set
54 * of strings matched by the second. In practical terms, this means
55 * that the second is made redundant by the first.
57 * The logic for this test is similar to that in match(), but a
58 * backslash in old_mask only matches a backslash in new_mask (and
59 * requires the next character to match exactly), and -- after
60 * contiguous runs of wildcards are logically collapsed -- a '?' in
61 * old_mask does not match a '*' in new_mask.
63 * @param[in] old_mask One wildcard mask.
64 * @param[in] new_mask Another wildcard mask.
65 * @return Zero if \a old_mask is a superset of \a new_mask, non-zero otherwise.
67 int mmatch(const char *old_mask
, const char *new_mask
)
69 const char *m
= old_mask
;
70 const char *n
= new_mask
;
91 for (m
--; (m
> old_mask
) && (*m
== '?'); m
--)
93 if ((*m
== '*') && (m
> old_mask
) && (m
[-1] != '\\'))
99 /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
100 if ((*na
== '\\') && ((na
[1] == '*') || (na
[1] == '?')))
111 if ((*m
== '\\') && ((m
[1] == '*') || (m
[1] == '?')))
119 /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
120 if ((*n
== '\\') && ((n
[1] == '*') || (n
[1] == '?')))
129 * This `if' has been changed compared to match() to do the following:
131 * old (m) new (n) boolean expression
132 * * any (*m == '*' && !mq) ||
133 * ? any except '*' (*m == '?' && !mq && (*n != '*' || nq)) ||
134 * any except * or ? same as m (!((*m == '*' || *m == '?') && !mq) &&
135 * ToLower(*m) == ToLower(*n) &&
136 * !((mq && !nq) || (!mq && nq)))
138 * Here `any' also includes \* and \? !
140 * After reworking the boolean expressions, we get:
141 * (Optimized to use boolean short-circuits, with most frequently occurring
142 * cases upfront (which took 2 hours!)).
144 if ((*m
== '*' && !mq
) ||
145 ((!mq
|| nq
) && ToLower(*m
) == ToLower(*n
)) ||
146 (*m
== '?' && !mq
&& (*n
!= '*' || nq
)))
159 /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
160 if ((*na
== '\\') && ((na
[1] == '*') || (na
[1] == '?')))
169 * Compare if a given string (name) matches the given
170 * mask (which can contain wild cards: '*' - match any
171 * number of chars, '?' - match any single character.
176 * Originally by Douglas A Lewis (dalewis@acsu.buffalo.edu)
177 * Rewritten by Timothy Vogelsang (netski), net@astrolink.org
180 /** Check a string against a mask.
181 * This test checks using traditional IRC wildcards only: '*' means
182 * match zero or more characters of any type; '?' means match exactly
183 * one character of any type. A backslash escapes the next character
184 * so that a wildcard may be matched exactly.
185 * @param[in] mask Wildcard-containing mask.
186 * @param[in] name String to check against \a mask.
187 * @return Zero if \a mask matches \a name, non-zero if no match.
189 int match(const char *mask
, const char *name
)
191 const char *m
= mask
, *n
= name
;
192 const char *m_tmp
= mask
, *n_tmp
= name
;
195 for (;;) switch (*m
) {
207 /* allow escaping to force capitalization */
212 for (star_p
= 0; ; m
++) {
215 else if (*m
== '?') {
223 else if (*m
== '\\') {
227 for (n_tmp
= n
; *n
&& *n
!= *m
; n
++) ;
230 for (n_tmp
= n
; *n
&& ToLower(*n
) != ToLower(*m
); n
++) ;
233 /* and fall through */
237 if (ToLower(*m
) != ToLower(*n
))
247 * Collapse a pattern string into minimal components.
248 * This particular version is "in place", so that it changes the pattern
249 * which is to be reduced to a "minimal" size.
251 * (C) Carlo Wood - 6 Oct 1998
252 * Speedup rewrite by Andrea Cocito, December 1998.
253 * Note that this new optimized algorithm can *only* work in place.
256 /** Collapse a mask string to remove redundancies.
257 * Specifically, it replaces a sequence of '*' followed by additional
258 * '*' or '?' with the same number of '?'s as the input, followed by
259 * one '*'. This minimizes useless backtracking when matching later.
260 * @param[in,out] mask Mask string to collapse.
261 * @return Pointer to the start of the string.
263 char *collapse(char *mask
)
273 if ((*m
== '*') && ((m
[1] == '*') || (m
[1] == '?')))
282 if (star
&& (*m
!= '?'))
288 if ((*m
== '\\') && ((m
[1] == '*') || (m
[1] == '?')))
297 if ((*m
== '\\') && ((m
[1] == '*') || (m
[1] == '?')))
307 ***************** Nemesi's matchcomp() / matchexec() **************
310 /** @page compiledmasks Compiled Masks
311 * These functions allow the use of "compiled" masks, you compile a mask
312 * by means of matchcomp() that gets the plain text mask as input and writes
313 * its result in the memory locations addressed by the 3 parameters:
314 * - *cmask will contain the text of the compiled mask
315 * - *minlen will contain the length of the shortest string that can match
317 * - *charset will contain the minimal set of chars needed to match the mask
318 * You can pass NULL as *charset and it will be simply not returned, but you
319 * MUST pass valid pointers for *minlen and *cmask (which must be big enough
320 * to contain the compiled mask text that is in the worst case as long as the
321 * text of the mask itself in plaintext format) and the return value of
322 * matchcomp() will be the number of chars actually written there (excluded
323 * the trailing zero). cmask can be == mask, matchcomp() can work in place.
324 * The {cmask, minlen} couple of values make the real compiled mask and
325 * need to be passed to the functions that use the compiled mask, if you pass
326 * the wrong minlen or something wrong in cmask to one of these expect a
327 * coredump. This means that when you record a compiled mask you must store
328 * *both* these values.
329 * Once compiled the mask can be used to match a string by means of
330 * matchexec(), it can be printed back to human-readable format by means
331 * of sprintmatch() or it can be compared to another compiled mask by means
332 * of mmexec() that will tell if it completely overrides that mask (a lot like
333 * what mmatch() does for plain text masks).
334 * You can gain a lot of speed in many situations avoiding to matchexec() when:
335 * - The maximum length of the field you are about to match() the mask to is
336 * shorter than minlen, in example when matching abc*def*ghil with a nick:
337 * It just cannot match since a nick is at most 9 chars long and the mask
338 * needs at least 10 chars (10 will be the value returned in minlen).
339 * - The charset allowed for the field you are about to match to doesn't
340 * "contain" the charset returned by matchcomp(), in example when you
341 * have *.* as mask it makes no sense to try to match it against a nick
342 * because, again, a nick can't contain a '.', you can check this with
343 * a simple (charset & NTL_IRCNK) in this case.
344 * - As a special case, since compiled masks are forced to lowercase,
345 * it would make no sense to use the NTL_LOWER and NTL_UPPER on a compiled
346 * mask, thus they are reused as follows: if the NTL_LOWER bit of charset
347 * is set it means that the mask contains only non-wilds chars (i.e. you can
348 * use strCasecmp() to match it or a direct hash lookup), if the NTL_UPPER
349 * bit is set it means that it contains only wild chars (and you can
350 * match it with strlen(field)>=minlen).
351 * Do these optimizations ONLY when the data you are about to pass to
352 * matchexec() are *known* to be invalid in advance, using strChattr()
353 * or strlen() on the text would be slower than calling matchexec() directly
355 * Internally a compiled mask contain in the *cmask area the text of
356 * the plain text form of the mask itself with applied the following hacks:
357 * - All characters are forced to lowercase (so that uppercase letters and
358 * specifically the symbols 'A' and 'Z' are reserved for special use)
359 * - All non-escaped stars '*' are replaced by the letter 'Z'
360 * - All non-escaped question marks '?' are replaced by the letter 'A'
361 * - All escape characters are removed, the wilds escaped by them are
362 * then passed by without the escape since they don't collide anymore
363 * with the real wilds (encoded as A/Z)
364 * - Finally the part of the mask that follows the last asterisk is
365 * reversed (byte order mirroring) and moved right after the first
367 * After all this a mask like: Head*CHUNK1*chu\*nK2*ch??k3*TaIl
368 * .... becomes: headZliatZchunk1Zchu*nk2ZchAAk3
369 * This can still be printed on a console, more or less understood by an
370 * human and handled with the usual str*() library functions.
371 * When you store somewhere the compiled mask you can avoid storing the
372 * textform of it since it can be "decompiled" by means of sprintmatch(),
373 * but at that time the following things are changed in the mask:
374 * - All chars have been forced to lowercase.
375 * - The mask is collapsed.
376 * The balance point of using compiled masks in terms of CPU is when you expect
377 * to use matchexec() instead of match() at least 20 times on the same mask
378 * or when you expect to use mmexec() instead of mmatch() 3 times.
381 /** Compile a mask for faster matching.
382 * See also @ref compiledmasks.
383 * @param[out] cmask Output buffer for compiled mask.
384 * @param[out] minlen Minimum length of matching strings.
385 * @param[out] charset Character attributes used in compiled mask.
386 * @param[out] mask Input mask.
387 * @return Length of compiled mask, not including NUL terminator.
389 int matchcomp(char *cmask
, int *minlen
, int *charset
, const char *mask
)
391 const char *m
= mask
;
396 int l1
, l2
, lmin
, loop
, sign
;
401 int chset2
= (NTL_LOWER
| NTL_UPPER
);
413 chset2
&= ~NTL_LOWER
;
416 if ((*m
== '?') || (*m
== '*'))
424 chset2
&= ~NTL_LOWER
;
429 chset
&= IRCD_CharAttrTab
[*b
++ - CHAR_MIN
];
430 chset2
&= ~NTL_UPPER
;
434 *charset
= (chset
| chset2
);
445 for (x1
= ls
+ 1, x2
= (b
- 1); x1
< x2
; x1
++, x2
--)
454 while ((lmin
= (l1
< l2
) ? l1
: l2
))
457 for (loop
= 0; loop
< lmin
; loop
++)
465 l1
-= (sign
< 0) ? 0 : lmin
;
466 l2
-= (sign
> 0) ? 0 : lmin
;
476 /** Compare a string to a compiled mask.
477 * If \a cmask is not from matchcomp(), or if \a minlen is not the value
478 * passed out of matchcomp(), this may core.
479 * See also @ref compiledmasks.
480 * @param[in] string String to test.
481 * @param[in] cmask Compiled mask string.
482 * @param[in] minlen Minimum length of strings that match \a cmask.
483 * @return Zero if the string matches, non-zero otherwise.
485 int matchexec(const char *string
, const char *cmask
, int minlen
)
487 const char *s
= string
- 1;
488 const char *b
= cmask
- 1;
494 while ((ToLower(*++s
) == *++b
) && *s
);
496 return ((*b
!= '\0') && ((*b
++ != 'Z') || (*b
!= '\0')));
507 if ((trash
= (s
- string
- minlen
)) < 0)
511 while ((ToLower(*--s
) == *++b
) && *b
&& (ToLower(*--s
) == *++b
) && *b
512 && (ToLower(*--s
) == *++b
) && *b
&& (ToLower(*--s
) == *++b
) && *b
);
525 while ((ToLower(*++s
) != ch
))
531 while ((ToLower(*++s
) == *++b
) && *b
);
554 * Prints the human readable version of *cmask into *mask, (decompiles
556 * The area pointed by *mask MUST be big enough (the mask might be up to
557 * twice the size of its compiled form if it's made all of \? or \*, and
558 * this function can NOT work in place since it might inflate the mask)
559 * The printed mask is not identical to the one that was compiled to cmask,
560 * in fact it is 1) forced to all lowercase, 2) collapsed, both things
561 * are supposed to NOT change it's meaning.
562 * It returns the number of chars actually written to *mask;
565 /** Decompile a compiled mask into printable form.
566 * See also @ref compiledmasks.
567 * @param[out] mask Output mask buffer.
568 * @param[in] cmask Compiled mask.
569 * @return Number of characters written to \a mask.
571 int matchdecomp(char *mask
, const char *cmask
)
574 const char *rcm
= cmask
;
575 const char *begtail
, *endtail
;
583 for (; (*rcm
!= 'Z'); rcm
++, rtb
++)
585 if ((*rcm
== '?') || (*rcm
== '*'))
587 if (!((*rtb
= ((*rcm
== 'A') ? '?' : *rcm
))))
594 while (*rcm
&& (*rcm
!= 'Z'))
619 for (rcm
= endtail
; (--rcm
) > begtail
; *rtb
++ = ((*rcm
== 'A') ? '?' : *rcm
))
620 if ((*rcm
== '?') || (*rcm
== '*'))
629 * Checks if a wider compiled mask (wcm/wminlen) completely overrides
630 * a more restrict one (rcm/rminlen), basically what mmatch() does for
631 * non-compiled masks, returns 0 if the override is true (like mmatch()).
632 * "the wider overrides the restrict" means that any string that matches
633 * the restrict one _will_ also match the wider one, always.
634 * In this we behave differently from mmatch() because in example we return
635 * true for " a?*cd overrides a*bcd " for which the override happens for how
636 * we literally defined it, here mmatch() would have returned false.
637 * The original concepts and the base algorithm are copied from mmatch()
638 * written by Run (Carlo Wood), this function is written by
639 * Nemesi (Andrea Cocito)
641 /** Tests for a superset relationship between compiled masks. This
642 * function does for compiled masks what mmatch() is does for normal
644 * See also @ref compiledmasks.
645 * @param[in] wcm Compiled mask believed to be wider.
646 * @param[in] wminlen Minimum match length for \a wcm.
647 * @param[in] rcm Compiled mask believed to be restricted.
648 * @param[in] rminlen Minimum match length for \a rcm.
649 * @return Zero if \a wcm is a superset of \a rcm, non-zero if not.
651 int mmexec(const char *wcm
, int wminlen
, const char *rcm
, int rminlen
)
653 const char *w
, *r
, *br
, *bw
, *rx
, *rz
;
656 /* First of all rm must have enough non-stars to 'contain' wm */
657 if ((trash
= rminlen
- wminlen
) < 0)
663 /* Let's start the game, remember that '*' is mapped to 'Z', '?'
664 is mapped to 'A' and that head?*??*?chunk*???*tail becomes
665 headAAAAZliatAAAZchunk for compiled masks */
667 /* Match the head of wm with the head of rm */
668 for (; (*r
) && (*r
!= 'Z') && ((*w
== *r
) || (*w
== 'A')); r
++, w
++);
670 while (*w
== 'A') /* Eat extra '?' before '*' in wm if got '*' in rm */
672 if (*w
!= 'Z') /* head1<any>.. can't match head2<any>.. */
673 return ((*w
) || (*r
)) ? 1 : 0; /* and head<nul> matches only head<nul> */
675 return 0; /* headZ<nul> matches head<anything> */
677 /* Does rm have any stars in it ? let's check */
678 for (rx
= r
; *r
&& (*r
!= 'Z'); r
++);
681 /* rm has no stars and thus isn't a mask but it's just a flat
682 string: special handling occurs here, note that eat must be 0 here */
687 for (; r
--, (*w
) && ((*w
== *r
) || (*w
== 'A')); w
++);
688 if (*w
!= 'Z') /* headZliat1<any> fails on head<any>2tail */
689 return (*w
) ? 1 : 0; /* but headZliat<nul> matches head<any>tail */
692 /* match the chunks */
694 { /* This loop can't break but only return */
696 for (bw
= w
++; (*w
!= *rx
); rx
++) /* Seek the 1st char of the chunk */
697 if (--trash
< 0) /* See if we can trash one more char of rm */
698 return 1; /* If not we can only fail of course */
699 for (r
= ++rx
, w
++; (*w
) && ((*w
== *r
) || (*w
== 'A')); r
++, w
++);
700 if (!*w
) /* Did last loop match the rest of chunk ? */
701 return 0; /* ... Yes, end of wm, matched ! */
703 { /* ... No, hit non-star */
704 w
= bw
; /* Rollback at beginning of chunk */
705 if (--trash
< 0) /* Trashed the char where this try started */
706 return 1; /* if we can't trash more chars fail */
710 rx
= r
; /* Successfully matched a chunk, move rx */
711 } /* and go on with the next one */
715 /* rm has at least one '*' and thus is a 'real' mask */
716 rz
= r
++; /* rx = unused of head, rz = beg-tail */
718 /* Match the tail of wm (if any) against the tail of rm */
721 for (; (*w
) && (*r
!= 'Z') && ((*w
== *r
) || (*w
== 'A')); w
++, r
++);
722 if (*r
== 'Z') /* extra '?' before tail are fluff, just flush 'em */
725 if (*w
!= 'Z') /* We aren't matching a chunk, can't rollback */
729 /* Match the chunks of wm against what remains of the head of rm */
733 for (bw
++; (rx
< rz
) && (*bw
!= *rx
); rx
++) /* Seek the first */
734 if (--trash
< 0) /* waste some trash reserve */
736 if (!(rx
< rz
)) /* head finished */
738 for (bw
++, (br
= ++rx
);
739 (br
< rz
) && (*bw
) && ((*bw
== *br
) || (*bw
== 'A')); br
++, bw
++);
740 if (!(br
< rz
)) /* Note that we didn't use any 'eat' char yet, if */
741 while (*bw
== 'A') /* there were eat-en chars the head would be over */
742 bw
++, eat
++; /* Happens only at end of head, and eat is still 0 */
749 { /* If we failed because we got the end of head */
750 trash
-= (br
- rx
); /* it makes no sense to rollback, just trash */
751 if (--trash
< 0) /* all the rest of the head which isn't long */
752 return 1; /* enough for this chunk and go out of this */
753 break; /* loop, then we try with the chunks of rm */
765 /* Match the unused chunks of wm against the chunks of rm */
767 for (; *r
&& (*r
!= 'Z'); r
++);
774 while (eat
&& *r
) /* the '?' we ate makes us skip as many chars */
775 if (*r
++ != 'Z') /* here, but can't skip stars or trailing zero */
777 for (bw
++; (*r
) && (*bw
!= *r
); r
++)
778 if ((*r
!= 'Z') && (--trash
< 0))
782 for ((br
= ++r
), bw
++;
783 (*br
) && (*br
!= 'Z') && ((*bw
== *br
) || (*bw
== 'A')); br
++, bw
++);
792 if ((!*br
) || (*r
== 'Z'))
793 { /* If we hit the end of rm or a star in it */
794 trash
-= (br
- r
); /* makes no sense to rollback within this */
795 if (trash
< 0) /* same chunk of br, skip it all and then */
796 return 1; /* either rollback or break this loop if */
797 if (!*br
) /* it was the end of rm */
812 /* match the remaining chunks of wm against what remains of the tail of rm */
813 r
= rz
- eat
- 1; /* can't have <nul> or 'Z' within the tail, so just move r */
817 for (bw
++; (*bw
!= *r
); r
--)
822 for ((br
= --r
), bw
++;
823 (*bw
) && (br
>= rx
) && ((*bw
== *br
) || (*bw
== 'A')); br
--, bw
++);
839 return 1; /* Auch... something left out ? Fail */
842 /** Test whether an address matches the most significant bits of a mask.
843 * @param[in] addr Address to test.
844 * @param[in] mask Address to test against.
845 * @param[in] bits Number of bits to test.
846 * @return 0 on mismatch, 1 if bits < 128 and all bits match; -1 if
847 * bits == 128 and all bits match.
849 int ipmask_check(const struct irc_in_addr
*addr
, const struct irc_in_addr
*mask
, unsigned char bits
)
853 for (k
= 0; k
< 8; k
++) {
855 return !(htons(addr
->in6_16
[k
] ^ mask
->in6_16
[k
]) >> (16-bits
));
856 if (addr
->in6_16
[k
] != mask
->in6_16
[k
])