1/*
2 * Copyright (c) 2015-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef HS_RUNTIME_H_
30#define HS_RUNTIME_H_
31
32#include <stdlib.h>
33
34/**
35 * @file
36 * @brief The Hyperscan runtime API definition.
37 *
38 * Hyperscan is a high speed regular expression engine.
39 *
40 * This header contains functions for using compiled Hyperscan databases for
41 * scanning data at runtime.
42 */
43
44#include "hs_common.h"
45
46#ifdef __cplusplus
47extern "C"
48{
49#endif
50
51/**
52 * Definition of the stream identifier type.
53 */
54struct hs_stream;
55
56/**
57 * The stream identifier returned by @ref hs_open_stream().
58 */
59typedef struct hs_stream hs_stream_t;
60
61struct hs_scratch;
62
63/**
64 * A Hyperscan scratch space.
65 */
66typedef struct hs_scratch hs_scratch_t;
67
68/**
69 * Definition of the match event callback function type.
70 *
71 * A callback function matching the defined type must be provided by the
72 * application calling the @ref hs_scan(), @ref hs_scan_vector() or @ref
73 * hs_scan_stream() functions (or other streaming calls which can produce
74 * matches).
75 *
76 * This callback function will be invoked whenever a match is located in the
77 * target data during the execution of a scan. The details of the match are
78 * passed in as parameters to the callback function, and the callback function
79 * should return a value indicating whether or not matching should continue on
80 * the target data. If no callbacks are desired from a scan call, NULL may be
81 * provided in order to suppress match production.
82 *
83 * This callback function should not attempt to call Hyperscan API functions on
84 * the same stream nor should it attempt to reuse the scratch space allocated
85 * for the API calls that caused it to be triggered. Making another call to the
86 * Hyperscan library with completely independent parameters should work (for
87 * example, scanning a different database in a new stream and with new scratch
88 * space), but reusing data structures like stream state and/or scratch space
89 * will produce undefined behavior.
90 *
91 * @param id
92 * The ID number of the expression that matched. If the expression was a
93 * single expression compiled with @ref hs_compile(), this value will be
94 * zero.
95 *
96 * @param from
97 * - If a start of match flag is enabled for the current pattern, this
98 * argument will be set to the start of match for the pattern assuming
99 * that that start of match value lies within the current 'start of match
100 * horizon' chosen by one of the SOM_HORIZON mode flags.
101
102 * - If the start of match value lies outside this horizon (possible only
103 * when the SOM_HORIZON value is not @ref HS_MODE_SOM_HORIZON_LARGE),
104 * the @p from value will be set to @ref HS_OFFSET_PAST_HORIZON.
105
106 * - This argument will be set to zero if the Start of Match flag is not
107 * enabled for the given pattern.
108 *
109 * @param to
110 * The offset after the last byte that matches the expression.
111 *
112 * @param flags
113 * This is provided for future use and is unused at present.
114 *
115 * @param context
116 * The pointer supplied by the user to the @ref hs_scan(), @ref
117 * hs_scan_vector() or @ref hs_scan_stream() function.
118 *
119 * @return
120 * Non-zero if the matching should cease, else zero. If scanning is
121 * performed in streaming mode and a non-zero value is returned, any
122 * subsequent calls to @ref hs_scan_stream() for that stream will
123 * immediately return with @ref HS_SCAN_TERMINATED.
124 */
125typedef int (HS_CDECL *match_event_handler)(unsigned int id,
126 unsigned long long from,
127 unsigned long long to,
128 unsigned int flags,
129 void *context);
130
131/**
132 * Open and initialise a stream.
133 *
134 * @param db
135 * A compiled pattern database.
136 *
137 * @param flags
138 * Flags modifying the behaviour of the stream. This parameter is provided
139 * for future use and is unused at present.
140 *
141 * @param stream
142 * On success, a pointer to the generated @ref hs_stream_t will be
143 * returned; NULL on failure.
144 *
145 * @return
146 * @ref HS_SUCCESS on success, other values on failure.
147 */
148hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, unsigned int flags,
149 hs_stream_t **stream);
150
151/**
152 * Write data to be scanned to the opened stream.
153 *
154 * This is the function call in which the actual pattern matching takes place
155 * as data is written to the stream. Matches will be returned via the @ref
156 * match_event_handler callback supplied.
157 *
158 * @param id
159 * The stream ID (returned by @ref hs_open_stream()) to which the data
160 * will be written.
161 *
162 * @param data
163 * Pointer to the data to be scanned.
164 *
165 * @param length
166 * The number of bytes to scan.
167 *
168 * @param flags
169 * Flags modifying the behaviour of the stream. This parameter is provided
170 * for future use and is unused at present.
171 *
172 * @param scratch
173 * A per-thread scratch space allocated by @ref hs_alloc_scratch().
174 *
175 * @param onEvent
176 * Pointer to a match event callback function. If a NULL pointer is given,
177 * no matches will be returned.
178 *
179 * @param ctxt
180 * The user defined pointer which will be passed to the callback function
181 * when a match occurs.
182 *
183 * @return
184 * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
185 * match callback indicated that scanning should stop; other values on
186 * error.
187 */
188hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data,
189 unsigned int length, unsigned int flags,
190 hs_scratch_t *scratch,
191 match_event_handler onEvent, void *ctxt);
192
193/**
194 * Close a stream.
195 *
196 * This function completes matching on the given stream and frees the memory
197 * associated with the stream state. After this call, the stream pointed to by
198 * @p id is invalid and can no longer be used. To reuse the stream state after
199 * completion, rather than closing it, the @ref hs_reset_stream function can be
200 * used.
201 *
202 * This function must be called for any stream created with @ref
203 * hs_open_stream(), even if scanning has been terminated by a non-zero return
204 * from the match callback function.
205 *
206 * Note: This operation may result in matches being returned (via calls to the
207 * match event callback) for expressions anchored to the end of the data stream
208 * (for example, via the use of the `$` meta-character). If these matches are
209 * not desired, NULL may be provided as the @ref match_event_handler callback.
210 *
211 * If NULL is provided as the @ref match_event_handler callback, it is
212 * permissible to provide a NULL scratch.
213 *
214 * @param id
215 * The stream ID returned by @ref hs_open_stream().
216 *
217 * @param scratch
218 * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
219 * allowed to be NULL only if the @p onEvent callback is also NULL.
220 *
221 * @param onEvent
222 * Pointer to a match event callback function. If a NULL pointer is given,
223 * no matches will be returned.
224 *
225 * @param ctxt
226 * The user defined pointer which will be passed to the callback function
227 * when a match occurs.
228 *
229 * @return
230 * Returns @ref HS_SUCCESS on success, other values on failure.
231 */
232hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
233 match_event_handler onEvent, void *ctxt);
234
235/**
236 * Reset a stream to an initial state.
237 *
238 * Conceptually, this is equivalent to performing @ref hs_close_stream() on the
239 * given stream, followed by a @ref hs_open_stream(). This new stream replaces
240 * the original stream in memory, avoiding the overhead of freeing the old
241 * stream and allocating the new one.
242 *
243 * Note: This operation may result in matches being returned (via calls to the
244 * match event callback) for expressions anchored to the end of the original
245 * data stream (for example, via the use of the `$` meta-character). If these
246 * matches are not desired, NULL may be provided as the @ref match_event_handler
247 * callback.
248 *
249 * Note: the stream will also be tied to the same database.
250 *
251 * @param id
252 * The stream (as created by @ref hs_open_stream()) to be replaced.
253 *
254 * @param flags
255 * Flags modifying the behaviour of the stream. This parameter is provided
256 * for future use and is unused at present.
257 *
258 * @param scratch
259 * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
260 * allowed to be NULL only if the @p onEvent callback is also NULL.
261 *
262 * @param onEvent
263 * Pointer to a match event callback function. If a NULL pointer is given,
264 * no matches will be returned.
265 *
266 * @param context
267 * The user defined pointer which will be passed to the callback function
268 * when a match occurs.
269 *
270 * @return
271 * @ref HS_SUCCESS on success, other values on failure.
272 */
273hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, unsigned int flags,
274 hs_scratch_t *scratch,
275 match_event_handler onEvent, void *context);
276
277/**
278 * Duplicate the given stream. The new stream will have the same state as the
279 * original including the current stream offset.
280 *
281 * @param to_id
282 * On success, a pointer to the new, copied @ref hs_stream_t will be
283 * returned; NULL on failure.
284 *
285 * @param from_id
286 * The stream (as created by @ref hs_open_stream()) to be copied.
287 *
288 * @return
289 * @ref HS_SUCCESS on success, other values on failure.
290 */
291hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id,
292 const hs_stream_t *from_id);
293
294/**
295 * Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream
296 * will first be reset (reporting any EOD matches if a non-NULL @p onEvent
297 * callback handler is provided).
298 *
299 * Note: the 'to' stream and the 'from' stream must be open against the same
300 * database.
301 *
302 * @param to_id
303 * On success, a pointer to the new, copied @ref hs_stream_t will be
304 * returned; NULL on failure.
305 *
306 * @param from_id
307 * The stream (as created by @ref hs_open_stream()) to be copied.
308 *
309 * @param scratch
310 * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
311 * allowed to be NULL only if the @p onEvent callback is also NULL.
312 *
313 * @param onEvent
314 * Pointer to a match event callback function. If a NULL pointer is given,
315 * no matches will be returned.
316 *
317 * @param context
318 * The user defined pointer which will be passed to the callback function
319 * when a match occurs.
320 *
321 * @return
322 * @ref HS_SUCCESS on success, other values on failure.
323 */
324hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id,
325 const hs_stream_t *from_id,
326 hs_scratch_t *scratch,
327 match_event_handler onEvent,
328 void *context);
329
330/**
331 * Creates a compressed representation of the provided stream in the buffer
332 * provided. This compressed representation can be converted back into a stream
333 * state by using @ref hs_expand_stream() or @ref hs_reset_and_expand_stream().
334 * The size of the compressed representation will be placed into @p used_space.
335 *
336 * If there is not sufficient space in the buffer to hold the compressed
337 * representation, @ref HS_INSUFFICIENT_SPACE will be returned and @p used_space
338 * will be populated with the amount of space required.
339 *
340 * Note: this function does not close the provided stream, you may continue to
341 * use the stream or to free it with @ref hs_close_stream().
342 *
343 * @param stream
344 * The stream (as created by @ref hs_open_stream()) to be compressed.
345 *
346 * @param buf
347 * Buffer to write the compressed representation into. Note: if the call is
348 * just being used to determine the amount of space required, it is allowed
349 * to pass NULL here and @p buf_space as 0.
350 *
351 * @param buf_space
352 * The number of bytes in @p buf. If buf_space is too small, the call will
353 * fail with @ref HS_INSUFFICIENT_SPACE.
354 *
355 * @param used_space
356 * Pointer to where the amount of used space will be written to. The used
357 * buffer space is always less than or equal to @p buf_space. If the call
358 * fails with @ref HS_INSUFFICIENT_SPACE, this pointer will be used to
359 * write out the amount of buffer space required.
360 *
361 * @return
362 * @ref HS_SUCCESS on success, @ref HS_INSUFFICIENT_SPACE if the provided
363 * buffer is too small.
364 */
365hs_error_t HS_CDECL hs_compress_stream(const hs_stream_t *stream, char *buf,
366 size_t buf_space, size_t *used_space);
367
368/**
369 * Decompresses a compressed representation created by @ref hs_compress_stream()
370 * into a new stream.
371 *
372 * Note: @p buf must correspond to a complete compressed representation created
373 * by @ref hs_compress_stream() of a stream that was opened against @p db. It is
374 * not always possible to detect misuse of this API and behaviour is undefined
375 * if these properties are not satisfied.
376 *
377 * @param db
378 * The compiled pattern database that the compressed stream was opened
379 * against.
380 *
381 * @param stream
382 * On success, a pointer to the expanded @ref hs_stream_t will be
383 * returned; NULL on failure.
384 *
385 * @param buf
386 * A compressed representation of a stream. These compressed forms are
387 * created by @ref hs_compress_stream().
388 *
389 * @param buf_size
390 * The size in bytes of the compressed representation.
391 *
392 * @return
393 * @ref HS_SUCCESS on success, other values on failure.
394 */
395hs_error_t HS_CDECL hs_expand_stream(const hs_database_t *db,
396 hs_stream_t **stream, const char *buf,
397 size_t buf_size);
398
399/**
400 * Decompresses a compressed representation created by @ref hs_compress_stream()
401 * on top of the 'to' stream. The 'to' stream will first be reset (reporting
402 * any EOD matches if a non-NULL @p onEvent callback handler is provided).
403 *
404 * Note: the 'to' stream must be opened against the same database as the
405 * compressed stream.
406 *
407 * Note: @p buf must correspond to a complete compressed representation created
408 * by @ref hs_compress_stream() of a stream that was opened against @p db. It is
409 * not always possible to detect misuse of this API and behaviour is undefined
410 * if these properties are not satisfied.
411 *
412 * @param to_stream
413 * A pointer to a valid stream state. A pointer to the expanded @ref
414 * hs_stream_t will be returned; NULL on failure.
415 *
416 * @param buf
417 * A compressed representation of a stream. These compressed forms are
418 * created by @ref hs_compress_stream().
419 *
420 * @param buf_size
421 * The size in bytes of the compressed representation.
422 *
423 * @param scratch
424 * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
425 * allowed to be NULL only if the @p onEvent callback is also NULL.
426 *
427 * @param onEvent
428 * Pointer to a match event callback function. If a NULL pointer is given,
429 * no matches will be returned.
430 *
431 * @param context
432 * The user defined pointer which will be passed to the callback function
433 * when a match occurs.
434 *
435 * @return
436 * @ref HS_SUCCESS on success, other values on failure.
437 */
438hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream,
439 const char *buf, size_t buf_size,
440 hs_scratch_t *scratch,
441 match_event_handler onEvent,
442 void *context);
443
444/**
445 * The block (non-streaming) regular expression scanner.
446 *
447 * This is the function call in which the actual pattern matching takes place
448 * for block-mode pattern databases.
449 *
450 * @param db
451 * A compiled pattern database.
452 *
453 * @param data
454 * Pointer to the data to be scanned.
455 *
456 * @param length
457 * The number of bytes to scan.
458 *
459 * @param flags
460 * Flags modifying the behaviour of this function. This parameter is
461 * provided for future use and is unused at present.
462 *
463 * @param scratch
464 * A per-thread scratch space allocated by @ref hs_alloc_scratch() for this
465 * database.
466 *
467 * @param onEvent
468 * Pointer to a match event callback function. If a NULL pointer is given,
469 * no matches will be returned.
470 *
471 * @param context
472 * The user defined pointer which will be passed to the callback function.
473 *
474 * @return
475 * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
476 * match callback indicated that scanning should stop; other values on
477 * error.
478 */
479hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
480 unsigned int length, unsigned int flags,
481 hs_scratch_t *scratch, match_event_handler onEvent,
482 void *context);
483
484/**
485 * The vectored regular expression scanner.
486 *
487 * This is the function call in which the actual pattern matching takes place
488 * for vectoring-mode pattern databases.
489 *
490 * @param db
491 * A compiled pattern database.
492 *
493 * @param data
494 * An array of pointers to the data blocks to be scanned.
495 *
496 * @param length
497 * An array of lengths (in bytes) of each data block to scan.
498 *
499 * @param count
500 * Number of data blocks to scan. This should correspond to the size of
501 * of the @p data and @p length arrays.
502 *
503 * @param flags
504 * Flags modifying the behaviour of this function. This parameter is
505 * provided for future use and is unused at present.
506 *
507 * @param scratch
508 * A per-thread scratch space allocated by @ref hs_alloc_scratch() for
509 * this database.
510 *
511 * @param onEvent
512 * Pointer to a match event callback function. If a NULL pointer is given,
513 * no matches will be returned.
514 *
515 * @param context
516 * The user defined pointer which will be passed to the callback function.
517 *
518 * @return
519 * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match
520 * callback indicated that scanning should stop; other values on error.
521 */
522hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db,
523 const char *const *data,
524 const unsigned int *length,
525 unsigned int count, unsigned int flags,
526 hs_scratch_t *scratch,
527 match_event_handler onEvent, void *context);
528
529/**
530 * Allocate a "scratch" space for use by Hyperscan.
531 *
532 * This is required for runtime use, and one scratch space per thread, or
533 * concurrent caller, is required. Any allocator callback set by @ref
534 * hs_set_scratch_allocator() or @ref hs_set_allocator() will be used by this
535 * function.
536 *
537 * @param db
538 * The database, as produced by @ref hs_compile().
539 *
540 * @param scratch
541 * On first allocation, a pointer to NULL should be provided so a new
542 * scratch can be allocated. If a scratch block has been previously
543 * allocated, then a pointer to it should be passed back in to see if it
544 * is valid for this database block. If a new scratch block is required,
545 * the original will be freed and the new one returned, otherwise the
546 * previous scratch block will be returned. On success, the scratch block
547 * will be suitable for use with the provided database in addition to any
548 * databases that original scratch space was suitable for.
549 *
550 * @return
551 * @ref HS_SUCCESS on successful allocation; @ref HS_NOMEM if the
552 * allocation fails. Other errors may be returned if invalid parameters
553 * are specified.
554 */
555hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db,
556 hs_scratch_t **scratch);
557
558/**
559 * Allocate a scratch space that is a clone of an existing scratch space.
560 *
561 * This is useful when multiple concurrent threads will be using the same set
562 * of compiled databases, and another scratch space is required. Any allocator
563 * callback set by @ref hs_set_scratch_allocator() or @ref hs_set_allocator()
564 * will be used by this function.
565 *
566 * @param src
567 * The existing @ref hs_scratch_t to be cloned.
568 *
569 * @param dest
570 * A pointer to the new scratch space will be returned here.
571 *
572 * @return
573 * @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails.
574 * Other errors may be returned if invalid parameters are specified.
575 */
576hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src,
577 hs_scratch_t **dest);
578
579/**
580 * Provides the size of the given scratch space.
581 *
582 * @param scratch
583 * A per-thread scratch space allocated by @ref hs_alloc_scratch() or @ref
584 * hs_clone_scratch().
585 *
586 * @param scratch_size
587 * On success, the size of the scratch space in bytes is placed in this
588 * parameter.
589 *
590 * @return
591 * @ref HS_SUCCESS on success, other values on failure.
592 */
593hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch,
594 size_t *scratch_size);
595
596/**
597 * Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref
598 * hs_clone_scratch().
599 *
600 * The free callback set by @ref hs_set_scratch_allocator() or @ref
601 * hs_set_allocator() will be used by this function.
602 *
603 * @param scratch
604 * The scratch block to be freed. NULL may also be safely provided.
605 *
606 * @return
607 * @ref HS_SUCCESS on success, other values on failure.
608 */
609hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch);
610
611/**
612 * Callback 'from' return value, indicating that the start of this match was
613 * too early to be tracked with the requested SOM_HORIZON precision.
614 */
615#define HS_OFFSET_PAST_HORIZON (~0ULL)
616
617#ifdef __cplusplus
618} /* extern "C" */
619#endif
620
621#endif /* HS_RUNTIME_H_ */
622