1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "config.h" |
30 | #include "expressions.h" |
31 | |
32 | #include "hs.h" |
33 | #include "string_util.h" |
34 | |
35 | #include <algorithm> |
36 | #include <fstream> |
37 | #include <iostream> |
38 | #include <stdexcept> |
39 | #include <string> |
40 | |
41 | #include <sys/types.h> |
42 | #include <sys/stat.h> |
43 | #if !defined(_WIN32) |
44 | #include <dirent.h> |
45 | #include <fcntl.h> |
46 | #include <unistd.h> |
47 | #else |
48 | // Windows support is probably very fragile |
49 | #include <windows.h> |
50 | #endif |
51 | |
52 | #include <boost/algorithm/string/trim.hpp> |
53 | |
54 | using namespace std; |
55 | |
56 | static |
57 | void failLine(unsigned lineNum, const string &file, |
58 | const string &line, const string &error) { |
59 | cerr << "Parse error in file " << file |
60 | << " on line " << lineNum << ": " << error |
61 | << endl << "Line is: '" << line << "'" << endl; |
62 | exit(1); |
63 | } |
64 | |
65 | static |
66 | void processLine(string &line, unsigned lineNum, |
67 | const string &file, ExpressionMap &exprMap) { |
68 | // if line is empty, or a comment, we can skip it |
69 | if (line.empty() || line[0] == '#') { |
70 | return; |
71 | } |
72 | |
73 | // cull any whitespace |
74 | boost::trim(line); |
75 | |
76 | // otherwise, it should be ID:PCRE, e.g. |
77 | // 10001:/foobar/is |
78 | |
79 | size_t colonIdx = line.find_first_of(':'); |
80 | if (colonIdx == string::npos) { |
81 | failLine(lineNum, file, line, "Could not parse line." ); |
82 | } |
83 | |
84 | // we should have an unsigned int as an ID, before the colon |
85 | unsigned id; |
86 | if (!fromString(line.substr(0, colonIdx), id)) { |
87 | failLine(lineNum, file, line, "Unable to parse ID." ); |
88 | } |
89 | |
90 | // rest of the expression is the PCRE |
91 | const string pcre_str(line.substr(colonIdx + 1)); |
92 | |
93 | //cout << "Inserting expr: id=" << id << ", pcre=" << pcre_str << endl; |
94 | |
95 | bool ins = exprMap.emplace(id, pcre_str).second; |
96 | if (!ins) { |
97 | failLine(lineNum, file, line, "Duplicate ID found." ); |
98 | } |
99 | } |
100 | |
101 | #if defined(_WIN32) |
102 | #define stat _stat |
103 | #define S_ISDIR(st_m) (_S_IFDIR & (st_m)) |
104 | #define S_ISREG(st_m) (_S_IFREG & (st_m)) |
105 | #endif |
106 | void HS_CDECL loadExpressionsFromFile(const string &fname, ExpressionMap &exprMap) { |
107 | struct stat st; |
108 | if (stat(fname.c_str(), &st) != 0) { |
109 | return; |
110 | } |
111 | if (!S_ISREG(st.st_mode)) { |
112 | return; |
113 | } |
114 | ifstream f(fname.c_str()); |
115 | if (!f.good()) { |
116 | throw runtime_error("Can't open file" ); |
117 | } |
118 | |
119 | unsigned lineNum = 0; |
120 | string line; |
121 | while (getline(f, line)) { |
122 | lineNum++; |
123 | processLine(line, lineNum, fname, exprMap); |
124 | } |
125 | } |
126 | |
127 | static |
128 | bool isIgnorable(const std::string &f) { |
129 | if (f.empty()) { |
130 | return true; |
131 | } |
132 | |
133 | // Editor backup files |
134 | if (*f.rbegin() == '~') { |
135 | return true; |
136 | } |
137 | |
138 | // Ignore dotfiles |
139 | if (*f.begin() == '.') { |
140 | return true; |
141 | } |
142 | |
143 | return false; |
144 | } |
145 | |
146 | #ifndef _WIN32 |
147 | void loadExpressions(const string &inPath, ExpressionMap &exprMap) { |
148 | // Is our input path a file or a directory? |
149 | int fd = open(inPath.c_str(), O_RDONLY); |
150 | struct stat st; |
151 | if (fstat(fd, &st) != 0) { |
152 | cerr << "Can't stat path: '" << inPath << "'" << endl; |
153 | exit(1); |
154 | } |
155 | if (S_ISREG(st.st_mode)) { |
156 | // process file |
157 | try { |
158 | loadExpressionsFromFile(inPath, exprMap); |
159 | } catch (runtime_error &e) { |
160 | cerr << e.what() << ": '" << inPath << "'" << endl; |
161 | exit(1); |
162 | } |
163 | } else if (S_ISDIR(st.st_mode)) { |
164 | DIR *d = fdopendir(fd); |
165 | if (d == nullptr) { |
166 | cerr << "Can't open directory: '" << inPath << "'" << endl; |
167 | exit(1); |
168 | } |
169 | for (struct dirent *ent = readdir(d); ent; ent = readdir(d)) { |
170 | string basename(ent->d_name); |
171 | string fname(inPath); |
172 | fname.push_back('/'); |
173 | fname.append(basename); |
174 | |
175 | // Ignore '.' and '..' |
176 | if (basename == "." || basename == ".." ) { |
177 | continue; |
178 | } |
179 | |
180 | // Skip emacs backup files, dotfiles (such as VIM swap). |
181 | if (isIgnorable(basename)) { |
182 | cerr << "Ignoring signature file " << fname << endl; |
183 | continue; |
184 | } |
185 | |
186 | try { |
187 | loadExpressionsFromFile(fname, exprMap); |
188 | } catch (runtime_error &e) { |
189 | cerr << e.what() << ": '" << fname << "'" << endl; |
190 | exit(1); |
191 | } |
192 | } |
193 | (void)closedir(d); |
194 | } else { |
195 | cerr << "Can't stat path: '" << inPath << "'" << endl; |
196 | exit(1); |
197 | } |
198 | (void)close(fd); |
199 | } |
200 | #else // windows TODO: improve |
201 | void HS_CDECL loadExpressions(const string &inPath, ExpressionMap &exprMap) { |
202 | // Is our input path a file or a directory? |
203 | struct stat st; |
204 | if (stat(inPath.c_str(), &st) != 0) { |
205 | cerr << "Can't stat path: '" << inPath << "'" << endl; |
206 | exit(1); |
207 | } |
208 | if (S_ISREG(st.st_mode)) { |
209 | // process file |
210 | try { |
211 | loadExpressionsFromFile(inPath, exprMap); |
212 | } catch (runtime_error &e) { |
213 | cerr << e.what() << ": '" << inPath << "'" << endl; |
214 | exit(1); |
215 | } |
216 | } else if (S_ISDIR(st.st_mode)) { |
217 | WIN32_FIND_DATA ffd; |
218 | HANDLE hFind = INVALID_HANDLE_VALUE; |
219 | string glob = inPath + "/*" ; |
220 | hFind = FindFirstFile(glob.c_str(), &ffd); |
221 | if (hFind == INVALID_HANDLE_VALUE) { |
222 | cerr << "Can't open directory: '" << inPath << "'" << endl; |
223 | exit(1); |
224 | } |
225 | do { |
226 | string basename(ffd.cFileName); |
227 | string fname(inPath); |
228 | fname.push_back('/'); |
229 | fname.append(basename); |
230 | |
231 | // Ignore '.' and '..' |
232 | if (basename == "." || basename == ".." ) { |
233 | continue; |
234 | } |
235 | |
236 | // Skip emacs backup files, dotfiles (such as VIM swap). |
237 | if (isIgnorable(basename)) { |
238 | cerr << "Ignoring signature file " << fname << endl; |
239 | continue; |
240 | } |
241 | |
242 | try { |
243 | loadExpressionsFromFile(fname, exprMap); |
244 | } catch (runtime_error &e) { |
245 | cerr << e.what() << ": '" << fname << "'" << endl; |
246 | exit(1); |
247 | } |
248 | } while (FindNextFile(hFind, &ffd) != 0); |
249 | FindClose(hFind); |
250 | } else { |
251 | cerr << "Can't stat path: '" << inPath << "'" << endl; |
252 | exit(1); |
253 | } |
254 | } |
255 | #endif |
256 | |
257 | void HS_CDECL loadSignatureList(const string &inFile, |
258 | SignatureSet &signatures) { |
259 | ifstream f(inFile.c_str()); |
260 | if (!f.good()) { |
261 | cerr << "Can't open file: '" << inFile << "'" << endl; |
262 | exit(1); |
263 | } |
264 | |
265 | unsigned lineNum = 0; |
266 | string line; |
267 | while (getline(f, line)) { |
268 | lineNum++; |
269 | |
270 | // if line is empty, or a comment, we can skip it |
271 | if (line.empty() || line[0] == '#') { |
272 | continue; |
273 | } |
274 | |
275 | unsigned id; |
276 | if (fromString(line, id)) { |
277 | signatures.push_back(id); |
278 | } else { |
279 | // Parse error occurred |
280 | failLine(lineNum, inFile, line, "Unable to parse ID." ); |
281 | } |
282 | } |
283 | } |
284 | |
285 | ExpressionMap limitToSignatures(const ExpressionMap &exprMap, |
286 | const SignatureSet &signatures) { |
287 | ExpressionMap keepers; |
288 | |
289 | for (auto id : signatures) { |
290 | auto match = exprMap.find(id); |
291 | if (match == exprMap.end()) { |
292 | cerr << "Unable to find signature " << id |
293 | << " in expression set!" << endl; |
294 | exit(1); |
295 | } |
296 | keepers.insert(*match); |
297 | } |
298 | |
299 | return keepers; |
300 | } |
301 | |