1/*
2 * Copyright (c) 2015-2019, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Hamster Wheel Literal Matcher: literal representation at build time.
31 */
32
33#ifndef HWLM_LITERAL_H
34#define HWLM_LITERAL_H
35
36#include "hwlm.h"
37#include "ue2common.h"
38
39#include <string>
40#include <tuple>
41#include <vector>
42
43namespace ue2 {
44
45/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
46#define HWLM_MASKLEN 8
47
48#define INVALID_LIT_ID ~0U
49
50/** \brief Class representing a literal, fed to \ref hwlmBuild. */
51struct hwlmLiteral {
52 std::string s; //!< \brief The literal itself.
53
54 /** \brief The ID to pass to the callback if this literal matches.
55 *
56 * Note that the special value 0xFFFFFFFF is reserved for internal use and
57 * should not be used. */
58 u32 id;
59
60 bool nocase; //!< \brief True if literal is case-insensitive.
61
62 /** \brief Matches for runs of this literal can be quashed.
63 *
64 * Advisory flag meaning that there is no value in returning runs of
65 * additional matches for a literal after the first one, so such matches
66 * can be quashed by the literal matcher. */
67 bool noruns;
68
69 /** \brief included literal id. */
70 u32 included_id = INVALID_LIT_ID;
71
72 /** \brief Squash mask for FDR's confirm mask for included literals.
73 *
74 * In FDR confirm, if we have included literal in another bucket,
75 * we can use this mask to squash the bit for the bucket in FDR confirm
76 * mask and then run programs of included literal directly and avoid
77 * confirm work.
78 *
79 * This value is calculated in FDR compile code once bucket assignment is
80 * completed
81 */
82 u8 squash = 0;
83
84 /** \brief Set of groups that literal belongs to.
85 *
86 * Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of
87 * the groups that are switched on. */
88 hwlm_group_t groups;
89
90 /** \brief Supplementary comparison mask.
91 *
92 * These two values add a supplementary comparison that is done over the
93 * final 8 bytes of the string -- if v is those bytes, then the string must
94 * match as well as (v & msk) == cmp.
95 *
96 * An empty msk is the safe way of not adding any comparison to the string
97 * unnecessarily filling in msk may turn off optimizations.
98 *
99 * The msk/cmp mechanism must NOT place a value into the literal that
100 * conflicts with the contents of the string, but can be allowed to add
101 * additional power within the string -- for example, to allow some case
102 * sensitivity within a case-insensitive string.
103
104 * Values are stored in memory order -- i.e. the last byte of the mask
105 * corresponds to the last byte of the string. Both vectors must be the
106 * same size, and must not exceed \ref HWLM_MASKLEN in length.
107 */
108 std::vector<u8> msk;
109
110 /** \brief Supplementary comparison value.
111 *
112 * See documentation for \ref msk.
113 */
114 std::vector<u8> cmp;
115
116 bool pure; //!< \brief The pass-on of pure flag from LitFragment.
117
118 /** \brief Complete constructor, takes group information and msk/cmp.
119 *
120 * This constructor takes a msk/cmp pair. Both must be vectors of length <=
121 * \ref HWLM_MASKLEN. */
122 hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
123 u32 id_in, hwlm_group_t groups_in,
124 const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in,
125 bool pure_in = false);
126
127 /** \brief Simple constructor: no group information, no msk/cmp.
128 *
129 * This constructor is only used in internal unit test. */
130 hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in)
131 : hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {}
132};
133
134inline
135bool operator<(const hwlmLiteral &a, const hwlmLiteral &b) {
136 return std::tie(a.id, a.s, a.nocase, a.noruns, a.groups, a.msk, a.cmp) <
137 std::tie(b.id, b.s, b.nocase, b.noruns, b.groups, b.msk, b.cmp);
138}
139
140inline
141bool operator==(const hwlmLiteral &a, const hwlmLiteral &b) {
142 return a.id == b.id && a.s == b.s && a.nocase == b.nocase &&
143 a.noruns == b.noruns && a.groups == b.groups && a.msk == b.msk &&
144 a.cmp == b.cmp;
145}
146
147/**
148 * Consistency test; returns false if the given msk/cmp test can never match
149 * the literal string s.
150 */
151bool maskIsConsistent(const std::string &s, bool nocase,
152 const std::vector<u8> &msk, const std::vector<u8> &cmp);
153
154} // namespace ue2
155
156#endif // HWLM_LITERAL_H
157