1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30#include "expressions.h"
31
32#include "hs.h"
33#include "string_util.h"
34
35#include <algorithm>
36#include <fstream>
37#include <iostream>
38#include <stdexcept>
39#include <string>
40
41#include <sys/types.h>
42#include <sys/stat.h>
43#if !defined(_WIN32)
44#include <dirent.h>
45#include <fcntl.h>
46#include <unistd.h>
47#else
48// Windows support is probably very fragile
49#include <windows.h>
50#endif
51
52#include <boost/algorithm/string/trim.hpp>
53
54using namespace std;
55
56static
57void failLine(unsigned lineNum, const string &file,
58 const string &line, const string &error) {
59 cerr << "Parse error in file " << file
60 << " on line " << lineNum << ": " << error
61 << endl << "Line is: '" << line << "'" << endl;
62 exit(1);
63}
64
65static
66void processLine(string &line, unsigned lineNum,
67 const string &file, ExpressionMap &exprMap) {
68 // if line is empty, or a comment, we can skip it
69 if (line.empty() || line[0] == '#') {
70 return;
71 }
72
73 // cull any whitespace
74 boost::trim(line);
75
76 // otherwise, it should be ID:PCRE, e.g.
77 // 10001:/foobar/is
78
79 size_t colonIdx = line.find_first_of(':');
80 if (colonIdx == string::npos) {
81 failLine(lineNum, file, line, "Could not parse line.");
82 }
83
84 // we should have an unsigned int as an ID, before the colon
85 unsigned id;
86 if (!fromString(line.substr(0, colonIdx), id)) {
87 failLine(lineNum, file, line, "Unable to parse ID.");
88 }
89
90 // rest of the expression is the PCRE
91 const string pcre_str(line.substr(colonIdx + 1));
92
93 //cout << "Inserting expr: id=" << id << ", pcre=" << pcre_str << endl;
94
95 bool ins = exprMap.emplace(id, pcre_str).second;
96 if (!ins) {
97 failLine(lineNum, file, line, "Duplicate ID found.");
98 }
99}
100
101#if defined(_WIN32)
102#define stat _stat
103#define S_ISDIR(st_m) (_S_IFDIR & (st_m))
104#define S_ISREG(st_m) (_S_IFREG & (st_m))
105#endif
106void HS_CDECL loadExpressionsFromFile(const string &fname, ExpressionMap &exprMap) {
107 struct stat st;
108 if (stat(fname.c_str(), &st) != 0) {
109 return;
110 }
111 if (!S_ISREG(st.st_mode)) {
112 return;
113 }
114 ifstream f(fname.c_str());
115 if (!f.good()) {
116 throw runtime_error("Can't open file");
117 }
118
119 unsigned lineNum = 0;
120 string line;
121 while (getline(f, line)) {
122 lineNum++;
123 processLine(line, lineNum, fname, exprMap);
124 }
125}
126
127static
128bool isIgnorable(const std::string &f) {
129 if (f.empty()) {
130 return true;
131 }
132
133 // Editor backup files
134 if (*f.rbegin() == '~') {
135 return true;
136 }
137
138 // Ignore dotfiles
139 if (*f.begin() == '.') {
140 return true;
141 }
142
143 return false;
144}
145
146#ifndef _WIN32
147void loadExpressions(const string &inPath, ExpressionMap &exprMap) {
148 // Is our input path a file or a directory?
149 int fd = open(inPath.c_str(), O_RDONLY);
150 struct stat st;
151 if (fstat(fd, &st) != 0) {
152 cerr << "Can't stat path: '" << inPath << "'" << endl;
153 exit(1);
154 }
155 if (S_ISREG(st.st_mode)) {
156 // process file
157 try {
158 loadExpressionsFromFile(inPath, exprMap);
159 } catch (runtime_error &e) {
160 cerr << e.what() << ": '" << inPath << "'" << endl;
161 exit(1);
162 }
163 } else if (S_ISDIR(st.st_mode)) {
164 DIR *d = fdopendir(fd);
165 if (d == nullptr) {
166 cerr << "Can't open directory: '" << inPath << "'" << endl;
167 exit(1);
168 }
169 for (struct dirent *ent = readdir(d); ent; ent = readdir(d)) {
170 string basename(ent->d_name);
171 string fname(inPath);
172 fname.push_back('/');
173 fname.append(basename);
174
175 // Ignore '.' and '..'
176 if (basename == "." || basename == "..") {
177 continue;
178 }
179
180 // Skip emacs backup files, dotfiles (such as VIM swap).
181 if (isIgnorable(basename)) {
182 cerr << "Ignoring signature file " << fname << endl;
183 continue;
184 }
185
186 try {
187 loadExpressionsFromFile(fname, exprMap);
188 } catch (runtime_error &e) {
189 cerr << e.what() << ": '" << fname << "'" << endl;
190 exit(1);
191 }
192 }
193 (void)closedir(d);
194 } else {
195 cerr << "Can't stat path: '" << inPath << "'" << endl;
196 exit(1);
197 }
198 (void)close(fd);
199}
200#else // windows TODO: improve
201void HS_CDECL loadExpressions(const string &inPath, ExpressionMap &exprMap) {
202 // Is our input path a file or a directory?
203 struct stat st;
204 if (stat(inPath.c_str(), &st) != 0) {
205 cerr << "Can't stat path: '" << inPath << "'" << endl;
206 exit(1);
207 }
208 if (S_ISREG(st.st_mode)) {
209 // process file
210 try {
211 loadExpressionsFromFile(inPath, exprMap);
212 } catch (runtime_error &e) {
213 cerr << e.what() << ": '" << inPath << "'" << endl;
214 exit(1);
215 }
216 } else if (S_ISDIR(st.st_mode)) {
217 WIN32_FIND_DATA ffd;
218 HANDLE hFind = INVALID_HANDLE_VALUE;
219 string glob = inPath + "/*";
220 hFind = FindFirstFile(glob.c_str(), &ffd);
221 if (hFind == INVALID_HANDLE_VALUE) {
222 cerr << "Can't open directory: '" << inPath << "'" << endl;
223 exit(1);
224 }
225 do {
226 string basename(ffd.cFileName);
227 string fname(inPath);
228 fname.push_back('/');
229 fname.append(basename);
230
231 // Ignore '.' and '..'
232 if (basename == "." || basename == "..") {
233 continue;
234 }
235
236 // Skip emacs backup files, dotfiles (such as VIM swap).
237 if (isIgnorable(basename)) {
238 cerr << "Ignoring signature file " << fname << endl;
239 continue;
240 }
241
242 try {
243 loadExpressionsFromFile(fname, exprMap);
244 } catch (runtime_error &e) {
245 cerr << e.what() << ": '" << fname << "'" << endl;
246 exit(1);
247 }
248 } while (FindNextFile(hFind, &ffd) != 0);
249 FindClose(hFind);
250 } else {
251 cerr << "Can't stat path: '" << inPath << "'" << endl;
252 exit(1);
253 }
254}
255#endif
256
257void HS_CDECL loadSignatureList(const string &inFile,
258 SignatureSet &signatures) {
259 ifstream f(inFile.c_str());
260 if (!f.good()) {
261 cerr << "Can't open file: '" << inFile << "'" << endl;
262 exit(1);
263 }
264
265 unsigned lineNum = 0;
266 string line;
267 while (getline(f, line)) {
268 lineNum++;
269
270 // if line is empty, or a comment, we can skip it
271 if (line.empty() || line[0] == '#') {
272 continue;
273 }
274
275 unsigned id;
276 if (fromString(line, id)) {
277 signatures.push_back(id);
278 } else {
279 // Parse error occurred
280 failLine(lineNum, inFile, line, "Unable to parse ID.");
281 }
282 }
283}
284
285ExpressionMap limitToSignatures(const ExpressionMap &exprMap,
286 const SignatureSet &signatures) {
287 ExpressionMap keepers;
288
289 for (auto id : signatures) {
290 auto match = exprMap.find(id);
291 if (match == exprMap.end()) {
292 cerr << "Unable to find signature " << id
293 << " in expression set!" << endl;
294 exit(1);
295 }
296 keepers.insert(*match);
297 }
298
299 return keepers;
300}
301