1 | /*****************************************************************************/ |
2 | /* */ |
3 | /* matchpat.c */ |
4 | /* */ |
5 | /* Unix shell like pattern matching */ |
6 | /* */ |
7 | /* */ |
8 | /* */ |
9 | /* (C) 2002 Ullrich von Bassewitz */ |
10 | /* Wacholderweg 14 */ |
11 | /* D-70597 Stuttgart */ |
12 | /* EMail: uz@musoftware.de */ |
13 | /* */ |
14 | /* */ |
15 | /* This software is provided 'as-is', without any expressed or implied */ |
16 | /* warranty. In no event will the authors be held liable for any damages */ |
17 | /* arising from the use of this software. */ |
18 | /* */ |
19 | /* Permission is granted to anyone to use this software for any purpose, */ |
20 | /* including commercial applications, and to alter it and redistribute it */ |
21 | /* freely, subject to the following restrictions: */ |
22 | /* */ |
23 | /* 1. The origin of this software must not be misrepresented; you must not */ |
24 | /* claim that you wrote the original software. If you use this software */ |
25 | /* in a product, an acknowledgment in the product documentation would be */ |
26 | /* appreciated but is not required. */ |
27 | /* 2. Altered source versions must be plainly marked as such, and must not */ |
28 | /* be misrepresented as being the original software. */ |
29 | /* 3. This notice may not be removed or altered from any source */ |
30 | /* distribution. */ |
31 | /* */ |
32 | /*****************************************************************************/ |
33 | |
34 | |
35 | |
36 | #include <string.h> |
37 | |
38 | /* common */ |
39 | #include "matchpat.h" |
40 | |
41 | |
42 | |
43 | /*****************************************************************************/ |
44 | /* Character bit set implementation */ |
45 | /*****************************************************************************/ |
46 | |
47 | |
48 | |
49 | typedef unsigned char CharSet[32]; /* 256 bits */ |
50 | |
51 | |
52 | |
53 | /* Clear a character set */ |
54 | #define CS_CLEAR(CS) memset (CS, 0, sizeof (CharSet)) |
55 | |
56 | /* Set all characters in the set */ |
57 | #define CS_SETALL(CS) memset (CS, 0xFF, sizeof (CharSet)) |
58 | |
59 | /* Add one char to the set */ |
60 | #define CS_ADD(CS, C) ((CS)[(C) >> 3] |= (0x01 << ((C) & 0x07))) |
61 | |
62 | /* Check if a character is a member of the set */ |
63 | #define CS_CONTAINS(CS, C) ((CS)[(C) >> 3] & (0x01 << ((C) & 0x07))) |
64 | |
65 | /* Invert a character set */ |
66 | #define CS_INVERT(CS) \ |
67 | do { \ |
68 | unsigned I; \ |
69 | for (I = 0; I < sizeof (CharSet); ++I) { \ |
70 | CS[I] ^= 0xFF; \ |
71 | } \ |
72 | } while (0) |
73 | |
74 | |
75 | |
76 | |
77 | |
78 | /*****************************************************************************/ |
79 | /* Code */ |
80 | /*****************************************************************************/ |
81 | |
82 | |
83 | |
84 | /* Escape character */ |
85 | #define ESCAPE_CHAR '\\' |
86 | |
87 | /* Utility macro used in RecursiveMatch */ |
88 | #define IncPattern() Pattern++; \ |
89 | if (*Pattern == '\0') { \ |
90 | return 0; \ |
91 | } |
92 | |
93 | |
94 | |
95 | static int RealChar (const unsigned char* Pattern) |
96 | /* Return the next character from Pattern. If the next character is the |
97 | ** escape character, skip it and return the following. |
98 | */ |
99 | { |
100 | if (*Pattern == ESCAPE_CHAR) { |
101 | Pattern++; |
102 | return (*Pattern == '\0') ? -1 : *Pattern; |
103 | } else { |
104 | return *Pattern; |
105 | } |
106 | } |
107 | |
108 | |
109 | |
110 | static int RecursiveMatch (const unsigned char* Source, const unsigned char* Pattern) |
111 | /* A recursive pattern matcher */ |
112 | { |
113 | |
114 | CharSet CS; |
115 | |
116 | while (1) { |
117 | |
118 | if (*Pattern == '\0') { |
119 | |
120 | /* Reached the end of Pattern, what about Source? */ |
121 | return (*Source == '\0') ? 1 : 0; |
122 | |
123 | } else if (*Pattern == '*') { |
124 | |
125 | if (*++Pattern == '\0') { |
126 | /* A trailing '*' is always a match */ |
127 | return 1; |
128 | } |
129 | |
130 | /* Check the rest of the string */ |
131 | while (*Source) { |
132 | if (RecursiveMatch (Source++, Pattern)) { |
133 | /* Match! */ |
134 | return 1; |
135 | } |
136 | } |
137 | |
138 | /* No match... */ |
139 | return 0; |
140 | |
141 | } else if (*Source == '\0') { |
142 | |
143 | /* End of Source reached, no match */ |
144 | return 0; |
145 | |
146 | } else { |
147 | |
148 | /* Check a single char. Build a set of all possible characters in |
149 | ** CS, then check if the current char of Source is contained in |
150 | ** there. |
151 | */ |
152 | CS_CLEAR (CS); /* Clear the character set */ |
153 | |
154 | if (*Pattern == '?') { |
155 | |
156 | /* All chars are allowed */ |
157 | CS_SETALL (CS); |
158 | ++Pattern; /* Skip '?' */ |
159 | |
160 | } else if (*Pattern == ESCAPE_CHAR) { |
161 | |
162 | /* Use the next char as is */ |
163 | IncPattern (); |
164 | CS_ADD (CS, *Pattern); |
165 | ++Pattern; /* Skip the character */ |
166 | |
167 | } else if (*Pattern == '[') { |
168 | |
169 | /* A set follows */ |
170 | int Invert = 0; |
171 | IncPattern (); |
172 | if (*Pattern == '!') { |
173 | IncPattern (); |
174 | Invert = 1; |
175 | } |
176 | while (*Pattern != ']') { |
177 | |
178 | int C1; |
179 | if ((C1 = RealChar (Pattern)) == -1) { |
180 | return 0; |
181 | } |
182 | IncPattern (); |
183 | if (*Pattern != '-') { |
184 | CS_ADD (CS, C1); |
185 | } else { |
186 | int C2; |
187 | unsigned char C; |
188 | IncPattern (); |
189 | if ((C2 = RealChar (Pattern)) == -1) { |
190 | return 0; |
191 | } |
192 | IncPattern (); |
193 | for (C = C1; C <= C2; C++) { |
194 | CS_ADD (CS, C); |
195 | } |
196 | } |
197 | } |
198 | /* Skip ']' */ |
199 | ++Pattern; |
200 | if (Invert) { |
201 | /* Reverse all bits in the set */ |
202 | CS_INVERT (CS); |
203 | } |
204 | |
205 | } else { |
206 | |
207 | /* Include the char in the charset, then skip it */ |
208 | CS_ADD (CS, *Pattern); |
209 | ++Pattern; |
210 | |
211 | } |
212 | |
213 | if (!CS_CONTAINS (CS, *Source)) { |
214 | /* No match */ |
215 | return 0; |
216 | } |
217 | ++Source; |
218 | } |
219 | } |
220 | } |
221 | |
222 | |
223 | |
224 | |
225 | int MatchPattern (const char* Source, const char* Pattern) |
226 | /* Match the string in Source against Pattern. Pattern may contain the |
227 | ** wildcards '*', '?', '[abcd]' '[ab-d]', '[!abcd]', '[!ab-d]'. The |
228 | ** function returns a value of zero if Source does not match Pattern, |
229 | ** otherwise a non zero value is returned. If Pattern contains an invalid |
230 | ** wildcard pattern (e.g. 'A[x'), the function returns zero. |
231 | */ |
232 | { |
233 | /* Handle the trivial cases */ |
234 | if (Pattern == 0 || *Pattern == '\0') { |
235 | return (Source == 0 || *Source == '\0'); |
236 | } |
237 | |
238 | /* Do the real thing */ |
239 | return RecursiveMatch ((const unsigned char*) Source, (const unsigned char*) Pattern); |
240 | } |
241 | |