1/*
2 * Copyright (c) 2015, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * Hyperscan example program 1: simplegrep
31 *
32 * This is a simple example of Hyperscan's most basic functionality: it will
33 * search a given input file for a pattern supplied as a command-line argument.
34 * It is intended to demonstrate correct usage of the hs_compile and hs_scan
35 * functions of Hyperscan.
36 *
37 * Patterns are scanned in 'DOTALL' mode, which is equivalent to PCRE's '/s'
38 * modifier. This behaviour can be changed by modifying the "flags" argument to
39 * hs_compile.
40 *
41 * Build instructions:
42 *
43 * gcc -o simplegrep simplegrep.c $(pkg-config --cflags --libs libhs)
44 *
45 * Usage:
46 *
47 * ./simplegrep <pattern> <input file>
48 *
49 * Example:
50 *
51 * ./simplegrep int simplegrep.c
52 *
53 */
54
55#include <errno.h>
56#include <limits.h>
57#include <stdio.h>
58#include <stdlib.h>
59#include <string.h>
60
61#include <hs.h>
62
63/**
64 * This is the function that will be called for each match that occurs. @a ctx
65 * is to allow you to have some application-specific state that you will get
66 * access to for each match. In our simple example we're just going to use it
67 * to pass in the pattern that was being searched for so we can print it out.
68 */
69static int eventHandler(unsigned int id, unsigned long long from,
70 unsigned long long to, unsigned int flags, void *ctx) {
71 printf("Match for pattern \"%s\" at offset %llu\n", (char *)ctx, to);
72 return 0;
73}
74
75/**
76 * Fill a data buffer from the given filename, returning it and filling @a
77 * length with its length. Returns NULL on failure.
78 */
79static char *readInputData(const char *inputFN, unsigned int *length) {
80 FILE *f = fopen(inputFN, "rb");
81 if (!f) {
82 fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN,
83 strerror(errno));
84 return NULL;
85 }
86
87 /* We use fseek/ftell to get our data length, in order to keep this example
88 * code as portable as possible. */
89 if (fseek(f, 0, SEEK_END) != 0) {
90 fprintf(stderr, "ERROR: unable to seek file \"%s\": %s\n", inputFN,
91 strerror(errno));
92 fclose(f);
93 return NULL;
94 }
95 long dataLen = ftell(f);
96 if (dataLen < 0) {
97 fprintf(stderr, "ERROR: ftell() failed: %s\n", strerror(errno));
98 fclose(f);
99 return NULL;
100 }
101 if (fseek(f, 0, SEEK_SET) != 0) {
102 fprintf(stderr, "ERROR: unable to seek file \"%s\": %s\n", inputFN,
103 strerror(errno));
104 fclose(f);
105 return NULL;
106 }
107
108 /* Hyperscan's hs_scan function accepts length as an unsigned int, so we
109 * limit the size of our buffer appropriately. */
110 if ((unsigned long)dataLen > UINT_MAX) {
111 dataLen = UINT_MAX;
112 printf("WARNING: clipping data to %ld bytes\n", dataLen);
113 } else if (dataLen == 0) {
114 fprintf(stderr, "ERROR: input file \"%s\" is empty\n", inputFN);
115 fclose(f);
116 return NULL;
117 }
118
119 char *inputData = malloc(dataLen);
120 if (!inputData) {
121 fprintf(stderr, "ERROR: unable to malloc %ld bytes\n", dataLen);
122 fclose(f);
123 return NULL;
124 }
125
126 char *p = inputData;
127 size_t bytesLeft = dataLen;
128 while (bytesLeft) {
129 size_t bytesRead = fread(p, 1, bytesLeft, f);
130 bytesLeft -= bytesRead;
131 p += bytesRead;
132 if (ferror(f) != 0) {
133 fprintf(stderr, "ERROR: fread() failed\n");
134 free(inputData);
135 fclose(f);
136 return NULL;
137 }
138 }
139
140 fclose(f);
141
142 *length = (unsigned int)dataLen;
143 return inputData;
144}
145
146int main(int argc, char *argv[]) {
147 if (argc != 3) {
148 fprintf(stderr, "Usage: %s <pattern> <input file>\n", argv[0]);
149 return -1;
150 }
151
152 char *pattern = argv[1];
153 char *inputFN = argv[2];
154
155 /* First, we attempt to compile the pattern provided on the command line.
156 * We assume 'DOTALL' semantics, meaning that the '.' meta-character will
157 * match newline characters. The compiler will analyse the given pattern and
158 * either return a compiled Hyperscan database, or an error message
159 * explaining why the pattern didn't compile.
160 */
161 hs_database_t *database;
162 hs_compile_error_t *compile_err;
163 if (hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, NULL, &database,
164 &compile_err) != HS_SUCCESS) {
165 fprintf(stderr, "ERROR: Unable to compile pattern \"%s\": %s\n",
166 pattern, compile_err->message);
167 hs_free_compile_error(compile_err);
168 return -1;
169 }
170
171 /* Next, we read the input data file into a buffer. */
172 unsigned int length;
173 char *inputData = readInputData(inputFN, &length);
174 if (!inputData) {
175 hs_free_database(database);
176 return -1;
177 }
178
179 /* Finally, we issue a call to hs_scan, which will search the input buffer
180 * for the pattern represented in the bytecode. Note that in order to do
181 * this, scratch space needs to be allocated with the hs_alloc_scratch
182 * function. In typical usage, you would reuse this scratch space for many
183 * calls to hs_scan, but as we're only doing one, we'll be allocating it
184 * and deallocating it as soon as our matching is done.
185 *
186 * When matches occur, the specified callback function (eventHandler in
187 * this file) will be called. Note that although it is reminiscent of
188 * asynchronous APIs, Hyperscan operates synchronously: all matches will be
189 * found, and all callbacks issued, *before* hs_scan returns.
190 *
191 * In this example, we provide the input pattern as the context pointer so
192 * that the callback is able to print out the pattern that matched on each
193 * match event.
194 */
195 hs_scratch_t *scratch = NULL;
196 if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) {
197 fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n");
198 free(inputData);
199 hs_free_database(database);
200 return -1;
201 }
202
203 printf("Scanning %u bytes with Hyperscan\n", length);
204
205 if (hs_scan(database, inputData, length, 0, scratch, eventHandler,
206 pattern) != HS_SUCCESS) {
207 fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n");
208 hs_free_scratch(scratch);
209 free(inputData);
210 hs_free_database(database);
211 return -1;
212 }
213
214 /* Scanning is complete, any matches have been handled, so now we just
215 * clean up and exit.
216 */
217 hs_free_scratch(scratch);
218 free(inputData);
219 hs_free_database(database);
220 return 0;
221}
222