| 1 | /*****************************************************************************/ |
| 2 | /* */ |
| 3 | /* matchpat.c */ |
| 4 | /* */ |
| 5 | /* Unix shell like pattern matching */ |
| 6 | /* */ |
| 7 | /* */ |
| 8 | /* */ |
| 9 | /* (C) 2002 Ullrich von Bassewitz */ |
| 10 | /* Wacholderweg 14 */ |
| 11 | /* D-70597 Stuttgart */ |
| 12 | /* EMail: uz@musoftware.de */ |
| 13 | /* */ |
| 14 | /* */ |
| 15 | /* This software is provided 'as-is', without any expressed or implied */ |
| 16 | /* warranty. In no event will the authors be held liable for any damages */ |
| 17 | /* arising from the use of this software. */ |
| 18 | /* */ |
| 19 | /* Permission is granted to anyone to use this software for any purpose, */ |
| 20 | /* including commercial applications, and to alter it and redistribute it */ |
| 21 | /* freely, subject to the following restrictions: */ |
| 22 | /* */ |
| 23 | /* 1. The origin of this software must not be misrepresented; you must not */ |
| 24 | /* claim that you wrote the original software. If you use this software */ |
| 25 | /* in a product, an acknowledgment in the product documentation would be */ |
| 26 | /* appreciated but is not required. */ |
| 27 | /* 2. Altered source versions must be plainly marked as such, and must not */ |
| 28 | /* be misrepresented as being the original software. */ |
| 29 | /* 3. This notice may not be removed or altered from any source */ |
| 30 | /* distribution. */ |
| 31 | /* */ |
| 32 | /*****************************************************************************/ |
| 33 | |
| 34 | |
| 35 | |
| 36 | #include <string.h> |
| 37 | |
| 38 | /* common */ |
| 39 | #include "matchpat.h" |
| 40 | |
| 41 | |
| 42 | |
| 43 | /*****************************************************************************/ |
| 44 | /* Character bit set implementation */ |
| 45 | /*****************************************************************************/ |
| 46 | |
| 47 | |
| 48 | |
| 49 | typedef unsigned char CharSet[32]; /* 256 bits */ |
| 50 | |
| 51 | |
| 52 | |
| 53 | /* Clear a character set */ |
| 54 | #define CS_CLEAR(CS) memset (CS, 0, sizeof (CharSet)) |
| 55 | |
| 56 | /* Set all characters in the set */ |
| 57 | #define CS_SETALL(CS) memset (CS, 0xFF, sizeof (CharSet)) |
| 58 | |
| 59 | /* Add one char to the set */ |
| 60 | #define CS_ADD(CS, C) ((CS)[(C) >> 3] |= (0x01 << ((C) & 0x07))) |
| 61 | |
| 62 | /* Check if a character is a member of the set */ |
| 63 | #define CS_CONTAINS(CS, C) ((CS)[(C) >> 3] & (0x01 << ((C) & 0x07))) |
| 64 | |
| 65 | /* Invert a character set */ |
| 66 | #define CS_INVERT(CS) \ |
| 67 | do { \ |
| 68 | unsigned I; \ |
| 69 | for (I = 0; I < sizeof (CharSet); ++I) { \ |
| 70 | CS[I] ^= 0xFF; \ |
| 71 | } \ |
| 72 | } while (0) |
| 73 | |
| 74 | |
| 75 | |
| 76 | |
| 77 | |
| 78 | /*****************************************************************************/ |
| 79 | /* Code */ |
| 80 | /*****************************************************************************/ |
| 81 | |
| 82 | |
| 83 | |
| 84 | /* Escape character */ |
| 85 | #define ESCAPE_CHAR '\\' |
| 86 | |
| 87 | /* Utility macro used in RecursiveMatch */ |
| 88 | #define IncPattern() Pattern++; \ |
| 89 | if (*Pattern == '\0') { \ |
| 90 | return 0; \ |
| 91 | } |
| 92 | |
| 93 | |
| 94 | |
| 95 | static int RealChar (const unsigned char* Pattern) |
| 96 | /* Return the next character from Pattern. If the next character is the |
| 97 | ** escape character, skip it and return the following. |
| 98 | */ |
| 99 | { |
| 100 | if (*Pattern == ESCAPE_CHAR) { |
| 101 | Pattern++; |
| 102 | return (*Pattern == '\0') ? -1 : *Pattern; |
| 103 | } else { |
| 104 | return *Pattern; |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | |
| 109 | |
| 110 | static int RecursiveMatch (const unsigned char* Source, const unsigned char* Pattern) |
| 111 | /* A recursive pattern matcher */ |
| 112 | { |
| 113 | |
| 114 | CharSet CS; |
| 115 | |
| 116 | while (1) { |
| 117 | |
| 118 | if (*Pattern == '\0') { |
| 119 | |
| 120 | /* Reached the end of Pattern, what about Source? */ |
| 121 | return (*Source == '\0') ? 1 : 0; |
| 122 | |
| 123 | } else if (*Pattern == '*') { |
| 124 | |
| 125 | if (*++Pattern == '\0') { |
| 126 | /* A trailing '*' is always a match */ |
| 127 | return 1; |
| 128 | } |
| 129 | |
| 130 | /* Check the rest of the string */ |
| 131 | while (*Source) { |
| 132 | if (RecursiveMatch (Source++, Pattern)) { |
| 133 | /* Match! */ |
| 134 | return 1; |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | /* No match... */ |
| 139 | return 0; |
| 140 | |
| 141 | } else if (*Source == '\0') { |
| 142 | |
| 143 | /* End of Source reached, no match */ |
| 144 | return 0; |
| 145 | |
| 146 | } else { |
| 147 | |
| 148 | /* Check a single char. Build a set of all possible characters in |
| 149 | ** CS, then check if the current char of Source is contained in |
| 150 | ** there. |
| 151 | */ |
| 152 | CS_CLEAR (CS); /* Clear the character set */ |
| 153 | |
| 154 | if (*Pattern == '?') { |
| 155 | |
| 156 | /* All chars are allowed */ |
| 157 | CS_SETALL (CS); |
| 158 | ++Pattern; /* Skip '?' */ |
| 159 | |
| 160 | } else if (*Pattern == ESCAPE_CHAR) { |
| 161 | |
| 162 | /* Use the next char as is */ |
| 163 | IncPattern (); |
| 164 | CS_ADD (CS, *Pattern); |
| 165 | ++Pattern; /* Skip the character */ |
| 166 | |
| 167 | } else if (*Pattern == '[') { |
| 168 | |
| 169 | /* A set follows */ |
| 170 | int Invert = 0; |
| 171 | IncPattern (); |
| 172 | if (*Pattern == '!') { |
| 173 | IncPattern (); |
| 174 | Invert = 1; |
| 175 | } |
| 176 | while (*Pattern != ']') { |
| 177 | |
| 178 | int C1; |
| 179 | if ((C1 = RealChar (Pattern)) == -1) { |
| 180 | return 0; |
| 181 | } |
| 182 | IncPattern (); |
| 183 | if (*Pattern != '-') { |
| 184 | CS_ADD (CS, C1); |
| 185 | } else { |
| 186 | int C2; |
| 187 | unsigned char C; |
| 188 | IncPattern (); |
| 189 | if ((C2 = RealChar (Pattern)) == -1) { |
| 190 | return 0; |
| 191 | } |
| 192 | IncPattern (); |
| 193 | for (C = C1; C <= C2; C++) { |
| 194 | CS_ADD (CS, C); |
| 195 | } |
| 196 | } |
| 197 | } |
| 198 | /* Skip ']' */ |
| 199 | ++Pattern; |
| 200 | if (Invert) { |
| 201 | /* Reverse all bits in the set */ |
| 202 | CS_INVERT (CS); |
| 203 | } |
| 204 | |
| 205 | } else { |
| 206 | |
| 207 | /* Include the char in the charset, then skip it */ |
| 208 | CS_ADD (CS, *Pattern); |
| 209 | ++Pattern; |
| 210 | |
| 211 | } |
| 212 | |
| 213 | if (!CS_CONTAINS (CS, *Source)) { |
| 214 | /* No match */ |
| 215 | return 0; |
| 216 | } |
| 217 | ++Source; |
| 218 | } |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | |
| 223 | |
| 224 | |
| 225 | int MatchPattern (const char* Source, const char* Pattern) |
| 226 | /* Match the string in Source against Pattern. Pattern may contain the |
| 227 | ** wildcards '*', '?', '[abcd]' '[ab-d]', '[!abcd]', '[!ab-d]'. The |
| 228 | ** function returns a value of zero if Source does not match Pattern, |
| 229 | ** otherwise a non zero value is returned. If Pattern contains an invalid |
| 230 | ** wildcard pattern (e.g. 'A[x'), the function returns zero. |
| 231 | */ |
| 232 | { |
| 233 | /* Handle the trivial cases */ |
| 234 | if (Pattern == 0 || *Pattern == '\0') { |
| 235 | return (Source == 0 || *Source == '\0'); |
| 236 | } |
| 237 | |
| 238 | /* Do the real thing */ |
| 239 | return RecursiveMatch ((const unsigned char*) Source, (const unsigned char*) Pattern); |
| 240 | } |
| 241 | |