[irc/quakenet/snircd-patchqueue.git] / match.patch

diff -r 19854df2b7c2 ircd/match.c
--- a/ircd/match.c	Mon Jan 26 18:42:26 2009 +0000
+++ b/ircd/match.c	Mon Jan 26 18:48:52 2009 +0000
@@ -27,6 +27,9 @@
 #include "ircd_string.h"
 #include "ircd_snprintf.h"
 
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
+
 /*
  * mmatch()
  *
@@ -62,53 +65,73 @@
  *
  * @param[in] old_mask One wildcard mask.
  * @param[in] new_mask Another wildcard mask.
+ * @param[in] case_sensitive Indicate case sensitivity
  * @return Zero if \a old_mask is a superset of \a new_mask, non-zero otherwise.
  */
-int mmatch(const char *old_mask, const char *new_mask)
+__inline__ int _mmatch(const char *old_mask, const char *new_mask, int rfcmatch)
 {
   const char *m = old_mask;
   const char *n = new_mask;
-  const char *ma = m;
-  const char *na = n;
-  int wild = 0;
-  int mq = 0, nq = 0;
+  /* Note that ma / na never point to a character escaped by a backslash. */
+  const char *ma = NULL; /* Remembered m for backtracking. */
+  const char *na = NULL;
+  int mq = 0, nq = 0; /* Is *m / *n escaped? */
+  int match;
+
+  if ( m[0] == '*' && m[1] == '\0' ) {
+    return 0;
+  } else if ( n[0] == '*' && n[1] == '\0' ) {
+    return 1;
+  }
 
   while (1)
   {
-    if (*m == '*')
+    if (unlikely(*m == '*'))
     {
+      /* Optimization: Skip redundant *'s */
       while (*m == '*')
         m++;
-      wild = 1;
+      /* And remember this position for backtracking. */
       ma = m;
       na = n;
     }
 
-    if (!*m)
+    if (unlikely(!*m))
     {
       if (!*n)
         return 0;
+      /* This construct speeds up matches of patterns ending with a *
+       * followed by any number of ?. The tricky part is figuring
+       * out whether or not that * was escaped. */
       for (m--; (m > old_mask) && (*m == '?'); m--)
-        ;
-      if ((*m == '*') && (m > old_mask) && (m[-1] != '\\'))
-        return 0;
-      if (!wild)
+        ; /* Skip trailing ?'s */
+      if (*m == '*') {
+        if ((--m >= old_mask) && (*m != '\\'))
+          return 0;
+        /* Now if there's an odd number of backslashes, the for loop
+         * breaks out and we backtrack. */
+        if (!rfcmatch) /* In rfc, backslashes can't be escaped. */
+          for(--m; (m >= old_mask) && (*m == '\\'); m--)
+            if ((--m >= old_mask) && (*m != '\\'))
+              return 0;
+      }
+      if (!ma)
         return 1;
       m = ma;
-
-      /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
-      if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
+      /* skip one escaped character */
+      if (*na == '\\' && (!rfcmatch || na[1] == '*' || na[1] == '?'))
         ++na;
-
       n = ++na;
     }
-    else if (!*n)
+
+    if (unlikely(!*n))
     {
-      while (*m == '*')
+      while (unlikely(*m == '*'))
         m++;
       return (*m != 0);
     }
-    if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
+
+    if (unlikely(*m == '\\' && (!rfcmatch || m[1] == '*' || m[1] == '?')))
     {
       m++;
       mq = 1;
@@ -116,8 +139,7 @@
     else
       mq = 0;
 
-    /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
-    if ((*n == '\\') && ((n[1] == '*') || (n[1] == '?')))
+    if (unlikely(*n == '\\' && (!rfcmatch || n[1] == '*' || n[1] == '?')))
     {
       n++;
       nq = 1;
@@ -126,45 +148,47 @@
       nq = 0;
 
 /*
- * This `if' has been changed compared to match() to do the following:
- * Match when:
- *   old (m)         new (n)         boolean expression
- *    *               any             (*m == '*' && !mq) ||
- *    ?               any except '*'  (*m == '?' && !mq && (*n != '*' || nq)) ||
- * any except * or ?  same as m       (!((*m == '*' || *m == '?') && !mq) &&
- *                                      ToLower(*m) == ToLower(*n) &&
- *                                        !((mq && !nq) || (!mq && nq)))
- *
- * Here `any' also includes \* and \? !
- *
- * After reworking the boolean expressions, we get:
- * (Optimized to use boolean short-circuits, with most frequently occurring
- *  cases upfront (which took 2 hours!)).
+ * There was fancy short-circuit logic here. It got killed. Fuck 2 hours.
+ * It was probably slower than the branches here now. Nobody will notice
+ * in any case.    -- BP
  */
-    if ((*m == '*' && !mq) ||
-        ((!mq || nq) && ToLower(*m) == ToLower(*n)) ||
-        (*m == '?' && !mq && (*n != '*' || nq)))
+    if (unlikely(mq))   {          /* m is quoted, match the exact same, or the
+                                    * same character if quoting is irrelevant.  */
+      match = (*m == *n && (nq ||
+                (*n != '*' && *n != '?' && ToUpper(*n) == ToLower(*n))));
+    } else if (unlikely(*m == '?')) { /* m is '?', match anything but unquoted '*' */
+      match = (*n != '*' || nq);
+    } else if (unlikely(*m == '*')) { /* m is '*', match.                          */
+      match=1;
+    } else                /* m is neither quoted nor special           */
+    {
+      match = (ToLower(*m) == ToLower(*n));
+    }
+
+    if (unlikely(match))
     {
       if (*m)
         m++;
       if (*n)
         n++;
     }
-    else
+    else 
     {
-      if (!wild)
+      if (unlikely(!ma))
         return 1;
       m = ma;
-
-      /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
-      if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
+      /* skip one escaped character */
+      if (unlikely(*na == '\\' && (!rfcmatch || na[1] == '*' || na[1] == '?')))
         ++na;
-
       n = ++na;
     }
   }
 }
 
+int mmatch(const char *old_mask, const char *new_mask) {
+  return _mmatch(old_mask, new_mask, 1);
+}
+
 /*
  * Compare if a given string (name) matches the given
  * mask (which can contain wild cards: '*' - match any
@@ -186,7 +210,7 @@
  * @param[in] name String to check against \a mask.
  * @return Zero if \a mask matches \a name, non-zero if no match.
  */
-int match(const char *mask, const char *name)
+__inline__ int _match(const char *mask, const char *name, int rfcmatch)
 {
   const char *m = mask, *n = name;
   const char *m_tmp = mask, *n_tmp = name;
@@ -205,10 +229,14 @@
       return 1;
     break;
   case '\\':
-    m++;
     /* allow escaping to force capitalization */
-    if (*m++ != *n++)
-      goto backtrack;
+    if (!rfcmatch) {
+      m++;
+      if (*m++ != *n++)
+        goto backtrack;
+    } else {
+      goto fallthrough;
+    }
     break;
   case '*': case '?':
     for (star_p = 0; ; m++) {
@@ -234,6 +262,7 @@
     }
     /* and fall through */
   default:
+  fallthrough:
     if (!*n)
       return *m != '\0';
     if (ToLower(*m) != ToLower(*n))
@@ -244,6 +273,11 @@
   }
 }
 
+int match(const char *mask, const char *name)
+{
+  return _match(mask, name, 1);
+}
+
 /*
  * collapse()
  * Collapse a pattern string into minimal components.
Commit	Line	Data
c2ba8a45 P	1	diff -r 19854df2b7c2 ircd/match.c
	2	--- a/ircd/match.c Mon Jan 26 18:42:26 2009 +0000
	3	+++ b/ircd/match.c Mon Jan 26 18:48:52 2009 +0000
c7b68605 P	4	@@ -27,6 +27,9 @@
	5	#include "ircd_string.h"
	6	#include "ircd_snprintf.h"
	7
	8	+#define likely(x) __builtin_expect((x),1)
	9	+#define unlikely(x) __builtin_expect((x),0)
	10	+
	11	/*
	12	* mmatch()
	13	*
aaeaaa08	14	@@ -62,53 +65,73 @@
c7b68605 P	15	*
	16	* @param[in] old_mask One wildcard mask.
	17	* @param[in] new_mask Another wildcard mask.
	18	+ * @param[in] case_sensitive Indicate case sensitivity
	19	* @return Zero if \a old_mask is a superset of \a new_mask, non-zero otherwise.
	20	*/
	21	-int mmatch(const char old_mask, const char new_mask)
c7b68605 P	22	+__inline__ int _mmatch(const char old_mask, const char new_mask, int rfcmatch)
	23	{
	24	const char *m = old_mask;
	25	const char *n = new_mask;
	26	- const char *ma = m;
	27	- const char *na = n;
	28	- int wild = 0;
	29	- int mq = 0, nq = 0;
	30	+ /* Note that ma / na never point to a character escaped by a backslash. */
aaeaaa08	31	+ const char ma = NULL; / Remembered m for backtracking. */
c7b68605	32	+ const char *na = NULL;
c2ba8a45	33	+ int mq = 0, nq = 0; /* Is m / n escaped? */
c7b68605 P	34	+ int match;
	35	+
	36	+ if ( m[0] == '*' && m[1] == '\0' ) {
	37	+ return 0;
	38	+ } else if ( n[0] == '*' && n[1] == '\0' ) {
	39	+ return 1;
	40	+ }
	41
	42	while (1)
	43	{
	44	- if (m == '')
	45	+ if (unlikely(m == ''))
	46	{
	47	+ /* Optimization: Skip redundant 's /
	48	while (m == '')
	49	m++;
	50	- wild = 1;
	51	+ /* And remember this position for backtracking. */
	52	ma = m;
	53	na = n;
	54	}
	55
	56	- if (!*m)
	57	+ if (unlikely(!*m))
	58	{
	59	if (!*n)
	60	return 0;
	61	+ /* This construct speeds up matches of patterns ending with a *
	62	+ * followed by any number of ?. The tricky part is figuring
	63	+ * out whether or not that * was escaped. */
	64	for (m--; (m > old_mask) && (*m == '?'); m--)
	65	- ;
	66	- if ((m == '') && (m > old_mask) && (m[-1] != '\\'))
	67	- return 0;
	68	- if (!wild)
	69	+ ; /* Skip trailing ?'s */
	70	+ if (m == '') {
	71	+ if ((--m >= old_mask) && (*m != '\\'))
	72	+ return 0;
	73	+ /* Now if there's an odd number of backslashes, the for loop
	74	+ * breaks out and we backtrack. */
	75	+ if (!rfcmatch) /* In rfc, backslashes can't be escaped. */
	76	+ for(--m; (m >= old_mask) && (*m == '\\'); m--)
	77	+ if ((--m >= old_mask) && (*m != '\\'))
	78	+ return 0;
	79	+ }
	80	+ if (!ma)
	81	return 1;
	82	m = ma;
	83	-
	84	- /* Added to `mmatch' : Because '\?' and '\' now is one character: /
	85	- if ((na == '\\') && ((na[1] == '') \|\| (na[1] == '?')))
	86	+ /* skip one escaped character */
	87	+ if (na == '\\' && (!rfcmatch \|\| na[1] == '' \|\| na[1] == '?'))
	88	++na;
	89	-
	90	n = ++na;
	91	}
	92	- else if (!*n)
	93	+
	94	+ if (unlikely(!*n))
	95	{
	96	- while (m == '')
	97	+ while (unlikely(m == ''))
98	m++;
99	return (*m != 0);
100	}
101	- if ((m == '\\') && ((m[1] == '') \|\| (m[1] == '?')))
102	+
103	+ if (unlikely(m == '\\' && (!rfcmatch \|\| m[1] == '' \|\| m[1] == '?')))
104	{
105	m++;
106	mq = 1;
aaeaaa08	107	@@ -116,8 +139,7 @@
c7b68605 P	108	else
	109	mq = 0;
	110
	111	- /* Added to `mmatch' : Because '\?' and '\' now is one character: /
	112	- if ((n == '\\') && ((n[1] == '') \|\| (n[1] == '?')))
	113	+ if (unlikely(n == '\\' && (!rfcmatch \|\| n[1] == '' \|\| n[1] == '?')))
	114	{
	115	n++;
	116	nq = 1;
aaeaaa08	117	@@ -126,45 +148,47 @@
c7b68605 P	118	nq = 0;
	119
	120	/*
	121	- * This `if' has been changed compared to match() to do the following:
	122	- * Match when:
	123	- * old (m) new (n) boolean expression
	124	- * * any (m == '' && !mq) \|\|
	125	- * ? any except '' (m == '?' && !mq && (n != '' \|\| nq)) \|\|
	126	- * any except * or ? same as m (!((m == '' \|\| *m == '?') && !mq) &&
	127	- * ToLower(m) == ToLower(n) &&
	128	- * !((mq && !nq) \|\| (!mq && nq)))
	129	- *
	130	- * Here `any' also includes \* and \? !
	131	- *
	132	- * After reworking the boolean expressions, we get:
	133	- * (Optimized to use boolean short-circuits, with most frequently occurring
	134	- * cases upfront (which took 2 hours!)).
	135	+ * There was fancy short-circuit logic here. It got killed. Fuck 2 hours.
	136	+ * It was probably slower than the branches here now. Nobody will notice
	137	+ * in any case. -- BP
	138	*/
	139	- if ((m == '' && !mq) \|\|
	140	- ((!mq \|\| nq) && ToLower(m) == ToLower(n)) \|\|
	141	- (m == '?' && !mq && (n != '*' \|\| nq)))
	142	+ if (unlikely(mq)) { /* m is quoted, match the exact same, or the
aaeaaa08	143	+ * same character if quoting is irrelevant. */
c7b68605 P	144	+ match = (m == n && (nq \|\|
	145	+ (n != '' && n != '?' && ToUpper(n) == ToLower(*n))));
	146	+ } else if (unlikely(m == '?')) { / m is '?', match anything but unquoted '' /
	147	+ match = (n != '' \|\| nq);
	148	+ } else if (unlikely(m == '')) { /* m is '', match. /
	149	+ match=1;
	150	+ } else /* m is neither quoted nor special */
	151	+ {
	152	+ match = (ToLower(m) == ToLower(n));
	153	+ }
	154	+
	155	+ if (unlikely(match))
	156	{
	157	if (*m)
	158	m++;
aaeaaa08	159	if (*n)
c7b68605 P	160	n++;
c7b68605 P	161	}
aaeaaa08 P	162	- else
	163	+ else
	164	{
c7b68605 P	165	- if (!wild)
	166	+ if (unlikely(!ma))
	167	return 1;
	168	m = ma;
	169	-
	170	- /* Added to `mmatch' : Because '\?' and '\' now is one character: /
	171	- if ((na == '\\') && ((na[1] == '') \|\| (na[1] == '?')))
	172	+ /* skip one escaped character */
	173	+ if (unlikely(na == '\\' && (!rfcmatch \|\| na[1] == '' \|\| na[1] == '?')))
	174	++na;
	175	-
	176	n = ++na;
	177	}
	178	}
aaeaaa08 P	179	}
	180
	181	+int mmatch(const char old_mask, const char new_mask) {
	182	+ return _mmatch(old_mask, new_mask, 1);
	183	+}
	184	+
	185	/*
	186	* Compare if a given string (name) matches the given
	187	* mask (which can contain wild cards: '*' - match any
	188	@@ -186,7 +210,7 @@
c7b68605 P	189	* @param[in] name String to check against \a mask.
	190	* @return Zero if \a mask matches \a name, non-zero if no match.
	191	*/
aaeaaa08 P	192	-int match(const char mask, const char name)
aaeaaa08 P	193	+__inline__ int _match(const char mask, const char name, int rfcmatch)
c7b68605	194	{
c7b68605 P	195	const char m = mask, n = name;
c7b68605 P	196	const char m_tmp = mask, n_tmp = name;
aaeaaa08	197	@@ -205,10 +229,14 @@
c7b68605 P	198	return 1;
	199	break;
	200	case '\\':
	201	- m++;
	202	/* allow escaping to force capitalization */
	203	- if (m++ != n++)
	204	- goto backtrack;
	205	+ if (!rfcmatch) {
	206	+ m++;
	207	+ if (m++ != n++)
	208	+ goto backtrack;
	209	+ } else {
	210	+ goto fallthrough;
	211	+ }
	212	break;
	213	case '*': case '?':
	214	for (star_p = 0; ; m++) {
aaeaaa08	215	@@ -234,6 +262,7 @@
c7b68605 P	216	}
	217	/* and fall through */
	218	default:
	219	+ fallthrough:
	220	if (!*n)
	221	return *m != '\0';
	222	if (ToLower(m) != ToLower(n))
aaeaaa08 P	223	@@ -244,6 +273,11 @@
	224	}
	225	}
	226
	227	+int match(const char mask, const char name)
	228	+{
	229	+ return _match(mask, name, 1);
	230	+}
	231	+
	232	/*
	233	* collapse()
	234	* Collapse a pattern string into minimal components.