match.patch

   1 diff -r 19854df2b7c2 ircd/match.c
   2 --- a/ircd/match.c      Mon Jan 26 18:42:26 2009 +0000
   3 +++ b/ircd/match.c      Mon Jan 26 18:48:52 2009 +0000
   4 @@ -27,6 +27,9 @@
   5  #include "ircd_string.h"
   6  #include "ircd_snprintf.h"
   7
   8 +#define likely(x) __builtin_expect((x),1)
   9 +#define unlikely(x) __builtin_expect((x),0)
  10 +
  11  /*
  12   * mmatch()
  13   *
  14 @@ -62,53 +65,73 @@
  15   *
  16   * @param[in] old_mask One wildcard mask.
  17   * @param[in] new_mask Another wildcard mask.
  18 + * @param[in] case_sensitive Indicate case sensitivity
  19   * @return Zero if \a old_mask is a superset of \a new_mask, non-zero otherwise.
  20   */
  21 -int mmatch(const char *old_mask, const char *new_mask)
  22 +__inline__ int _mmatch(const char *old_mask, const char *new_mask, int rfcmatch)
  23  {
  24    const char *m = old_mask;
  25    const char *n = new_mask;
  26 -  const char *ma = m;
  27 -  const char *na = n;
  28 -  int wild = 0;
  29 -  int mq = 0, nq = 0;
  30 +  /* Note that ma / na never point to a character escaped by a backslash. */
  31 +  const char *ma = NULL; /* Remembered m for backtracking. */
  32 +  const char *na = NULL;
  33 +  int mq = 0, nq = 0; /* Is *m / *n escaped? */
  34 +  int match;
  35 +
  36 +  if ( m[0] == '*' && m[1] == '\0' ) {
  37 +    return 0;
  38 +  } else if ( n[0] == '*' && n[1] == '\0' ) {
  39 +    return 1;
  40 +  }
  41
  42    while (1)
  43    {
  44 -    if (*m == '*')
  45 +    if (unlikely(*m == '*'))
  46      {
  47 +      /* Optimization: Skip redundant *'s */
  48        while (*m == '*')
  49          m++;
  50 -      wild = 1;
  51 +      /* And remember this position for backtracking. */
  52        ma = m;
  53        na = n;
  54      }
  55
  56 -    if (!*m)
  57 +    if (unlikely(!*m))
  58      {
  59        if (!*n)
  60          return 0;
  61 +      /* This construct speeds up matches of patterns ending with a *
  62 +       * followed by any number of ?. The tricky part is figuring
  63 +       * out whether or not that * was escaped. */
  64        for (m--; (m > old_mask) && (*m == '?'); m--)
  65 -        ;
  66 -      if ((*m == '*') && (m > old_mask) && (m[-1] != '\\'))
  67 -        return 0;
  68 -      if (!wild)
  69 +        ; /* Skip trailing ?'s */
  70 +      if (*m == '*') {
  71 +        if ((--m >= old_mask) && (*m != '\\'))
  72 +          return 0;
  73 +        /* Now if there's an odd number of backslashes, the for loop
  74 +         * breaks out and we backtrack. */
  75 +        if (!rfcmatch) /* In rfc, backslashes can't be escaped. */
  76 +          for(--m; (m >= old_mask) && (*m == '\\'); m--)
  77 +            if ((--m >= old_mask) && (*m != '\\'))
  78 +              return 0;
  79 +      }
  80 +      if (!ma)
  81          return 1;
  82        m = ma;
  83 -
  84 -      /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
  85 -      if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
  86 +      /* skip one escaped character */
  87 +      if (*na == '\\' && (!rfcmatch || na[1] == '*' || na[1] == '?'))
  88          ++na;
  89 -
  90        n = ++na;
  91      }
  92 -    else if (!*n)
  93 +
  94 +    if (unlikely(!*n))
  95      {
  96 -      while (*m == '*')
  97 +      while (unlikely(*m == '*'))
  98          m++;
  99        return (*m != 0);
 100      }
 101 -    if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
 102 +
 103 +    if (unlikely(*m == '\\' && (!rfcmatch || m[1] == '*' || m[1] == '?')))
 104      {
 105        m++;
 106        mq = 1;
 107 @@ -116,8 +139,7 @@
 108      else
 109        mq = 0;
 110
 111 -    /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
 112 -    if ((*n == '\\') && ((n[1] == '*') || (n[1] == '?')))
 113 +    if (unlikely(*n == '\\' && (!rfcmatch || n[1] == '*' || n[1] == '?')))
 114      {
 115        n++;
 116        nq = 1;
 117 @@ -126,45 +148,47 @@
 118        nq = 0;
 119
 120  /*
 121 - * This `if' has been changed compared to match() to do the following:
 122 - * Match when:
 123 - *   old (m)         new (n)         boolean expression
 124 - *    *               any             (*m == '*' && !mq) ||
 125 - *    ?               any except '*'  (*m == '?' && !mq && (*n != '*' || nq)) ||
 126 - * any except * or ?  same as m       (!((*m == '*' || *m == '?') && !mq) &&
 127 - *                                      ToLower(*m) == ToLower(*n) &&
 128 - *                                        !((mq && !nq) || (!mq && nq)))
 129 - *
 130 - * Here `any' also includes \* and \? !
 131 - *
 132 - * After reworking the boolean expressions, we get:
 133 - * (Optimized to use boolean short-circuits, with most frequently occurring
 134 - *  cases upfront (which took 2 hours!)).
 135 + * There was fancy short-circuit logic here. It got killed. Fuck 2 hours.
 136 + * It was probably slower than the branches here now. Nobody will notice
 137 + * in any case.    -- BP
 138   */
 139 -    if ((*m == '*' && !mq) ||
 140 -        ((!mq || nq) && ToLower(*m) == ToLower(*n)) ||
 141 -        (*m == '?' && !mq && (*n != '*' || nq)))
 142 +    if (unlikely(mq))   {          /* m is quoted, match the exact same, or the
 143 +                                    * same character if quoting is irrelevant.  */
 144 +      match = (*m == *n && (nq ||
 145 +                (*n != '*' && *n != '?' && ToUpper(*n) == ToLower(*n))));
 146 +    } else if (unlikely(*m == '?')) { /* m is '?', match anything but unquoted '*' */
 147 +      match = (*n != '*' || nq);
 148 +    } else if (unlikely(*m == '*')) { /* m is '*', match.                          */
 149 +      match=1;
 150 +    } else                /* m is neither quoted nor special           */
 151 +    {
 152 +      match = (ToLower(*m) == ToLower(*n));
 153 +    }
 154 +
 155 +    if (unlikely(match))
 156      {
 157        if (*m)
 158          m++;
 159        if (*n)
 160          n++;
 161      }
 162 -    else
 163 +    else
 164      {
 165 -      if (!wild)
 166 +      if (unlikely(!ma))
 167          return 1;
 168        m = ma;
 169 -
 170 -      /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
 171 -      if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
 172 +      /* skip one escaped character */
 173 +      if (unlikely(*na == '\\' && (!rfcmatch || na[1] == '*' || na[1] == '?')))
 174          ++na;
 175 -
 176        n = ++na;
 177      }
 178    }
 179  }
 180
 181 +int mmatch(const char *old_mask, const char *new_mask) {
 182 +  return _mmatch(old_mask, new_mask, 1);
 183 +}
 184 +
 185  /*
 186   * Compare if a given string (name) matches the given
 187   * mask (which can contain wild cards: '*' - match any
 188 @@ -186,7 +210,7 @@
 189   * @param[in] name String to check against \a mask.
 190   * @return Zero if \a mask matches \a name, non-zero if no match.
 191   */
 192 -int match(const char *mask, const char *name)
 193 +__inline__ int _match(const char *mask, const char *name, int rfcmatch)
 194  {
 195    const char *m = mask, *n = name;
 196    const char *m_tmp = mask, *n_tmp = name;
 197 @@ -205,10 +229,14 @@
 198        return 1;
 199      break;
 200    case '\\':
 201 -    m++;
 202      /* allow escaping to force capitalization */
 203 -    if (*m++ != *n++)
 204 -      goto backtrack;
 205 +    if (!rfcmatch) {
 206 +      m++;
 207 +      if (*m++ != *n++)
 208 +        goto backtrack;
 209 +    } else {
 210 +      goto fallthrough;
 211 +    }
 212      break;
 213    case '*': case '?':
 214      for (star_p = 0; ; m++) {
 215 @@ -234,6 +262,7 @@
 216      }
 217      /* and fall through */
 218    default:
 219 +  fallthrough:
 220      if (!*n)
 221        return *m != '\0';
 222      if (ToLower(*m) != ToLower(*n))
 223 @@ -244,6 +273,11 @@
 224    }
 225  }
 226
 227 +int match(const char *mask, const char *name)
 228 +{
 229 +  return _match(mask, name, 1);
 230 +}
 231 +
 232  /*
 233   * collapse()
 234   * Collapse a pattern string into minimal components.