1/*****************************************************************************/
2/* */
3/* matchpat.c */
4/* */
5/* Unix shell like pattern matching */
6/* */
7/* */
8/* */
9/* (C) 2002 Ullrich von Bassewitz */
10/* Wacholderweg 14 */
11/* D-70597 Stuttgart */
12/* EMail: uz@musoftware.de */
13/* */
14/* */
15/* This software is provided 'as-is', without any expressed or implied */
16/* warranty. In no event will the authors be held liable for any damages */
17/* arising from the use of this software. */
18/* */
19/* Permission is granted to anyone to use this software for any purpose, */
20/* including commercial applications, and to alter it and redistribute it */
21/* freely, subject to the following restrictions: */
22/* */
23/* 1. The origin of this software must not be misrepresented; you must not */
24/* claim that you wrote the original software. If you use this software */
25/* in a product, an acknowledgment in the product documentation would be */
26/* appreciated but is not required. */
27/* 2. Altered source versions must be plainly marked as such, and must not */
28/* be misrepresented as being the original software. */
29/* 3. This notice may not be removed or altered from any source */
30/* distribution. */
31/* */
32/*****************************************************************************/
33
34
35
36#include <string.h>
37
38/* common */
39#include "matchpat.h"
40
41
42
43/*****************************************************************************/
44/* Character bit set implementation */
45/*****************************************************************************/
46
47
48
49typedef unsigned char CharSet[32]; /* 256 bits */
50
51
52
53/* Clear a character set */
54#define CS_CLEAR(CS) memset (CS, 0, sizeof (CharSet))
55
56/* Set all characters in the set */
57#define CS_SETALL(CS) memset (CS, 0xFF, sizeof (CharSet))
58
59/* Add one char to the set */
60#define CS_ADD(CS, C) ((CS)[(C) >> 3] |= (0x01 << ((C) & 0x07)))
61
62/* Check if a character is a member of the set */
63#define CS_CONTAINS(CS, C) ((CS)[(C) >> 3] & (0x01 << ((C) & 0x07)))
64
65/* Invert a character set */
66#define CS_INVERT(CS) \
67 do { \
68 unsigned I; \
69 for (I = 0; I < sizeof (CharSet); ++I) { \
70 CS[I] ^= 0xFF; \
71 } \
72 } while (0)
73
74
75
76
77
78/*****************************************************************************/
79/* Code */
80/*****************************************************************************/
81
82
83
84/* Escape character */
85#define ESCAPE_CHAR '\\'
86
87/* Utility macro used in RecursiveMatch */
88#define IncPattern() Pattern++; \
89 if (*Pattern == '\0') { \
90 return 0; \
91 }
92
93
94
95static int RealChar (const unsigned char* Pattern)
96/* Return the next character from Pattern. If the next character is the
97** escape character, skip it and return the following.
98*/
99{
100 if (*Pattern == ESCAPE_CHAR) {
101 Pattern++;
102 return (*Pattern == '\0') ? -1 : *Pattern;
103 } else {
104 return *Pattern;
105 }
106}
107
108
109
110static int RecursiveMatch (const unsigned char* Source, const unsigned char* Pattern)
111/* A recursive pattern matcher */
112{
113
114 CharSet CS;
115
116 while (1) {
117
118 if (*Pattern == '\0') {
119
120 /* Reached the end of Pattern, what about Source? */
121 return (*Source == '\0') ? 1 : 0;
122
123 } else if (*Pattern == '*') {
124
125 if (*++Pattern == '\0') {
126 /* A trailing '*' is always a match */
127 return 1;
128 }
129
130 /* Check the rest of the string */
131 while (*Source) {
132 if (RecursiveMatch (Source++, Pattern)) {
133 /* Match! */
134 return 1;
135 }
136 }
137
138 /* No match... */
139 return 0;
140
141 } else if (*Source == '\0') {
142
143 /* End of Source reached, no match */
144 return 0;
145
146 } else {
147
148 /* Check a single char. Build a set of all possible characters in
149 ** CS, then check if the current char of Source is contained in
150 ** there.
151 */
152 CS_CLEAR (CS); /* Clear the character set */
153
154 if (*Pattern == '?') {
155
156 /* All chars are allowed */
157 CS_SETALL (CS);
158 ++Pattern; /* Skip '?' */
159
160 } else if (*Pattern == ESCAPE_CHAR) {
161
162 /* Use the next char as is */
163 IncPattern ();
164 CS_ADD (CS, *Pattern);
165 ++Pattern; /* Skip the character */
166
167 } else if (*Pattern == '[') {
168
169 /* A set follows */
170 int Invert = 0;
171 IncPattern ();
172 if (*Pattern == '!') {
173 IncPattern ();
174 Invert = 1;
175 }
176 while (*Pattern != ']') {
177
178 int C1;
179 if ((C1 = RealChar (Pattern)) == -1) {
180 return 0;
181 }
182 IncPattern ();
183 if (*Pattern != '-') {
184 CS_ADD (CS, C1);
185 } else {
186 int C2;
187 unsigned char C;
188 IncPattern ();
189 if ((C2 = RealChar (Pattern)) == -1) {
190 return 0;
191 }
192 IncPattern ();
193 for (C = C1; C <= C2; C++) {
194 CS_ADD (CS, C);
195 }
196 }
197 }
198 /* Skip ']' */
199 ++Pattern;
200 if (Invert) {
201 /* Reverse all bits in the set */
202 CS_INVERT (CS);
203 }
204
205 } else {
206
207 /* Include the char in the charset, then skip it */
208 CS_ADD (CS, *Pattern);
209 ++Pattern;
210
211 }
212
213 if (!CS_CONTAINS (CS, *Source)) {
214 /* No match */
215 return 0;
216 }
217 ++Source;
218 }
219 }
220}
221
222
223
224
225int MatchPattern (const char* Source, const char* Pattern)
226/* Match the string in Source against Pattern. Pattern may contain the
227** wildcards '*', '?', '[abcd]' '[ab-d]', '[!abcd]', '[!ab-d]'. The
228** function returns a value of zero if Source does not match Pattern,
229** otherwise a non zero value is returned. If Pattern contains an invalid
230** wildcard pattern (e.g. 'A[x'), the function returns zero.
231*/
232{
233 /* Handle the trivial cases */
234 if (Pattern == 0 || *Pattern == '\0') {
235 return (Source == 0 || *Source == '\0');
236 }
237
238 /* Do the real thing */
239 return RecursiveMatch ((const unsigned char*) Source, (const unsigned char*) Pattern);
240}
241