1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "fdr_compile_internal.h" |
30 | #include "fdr_engine_description.h" |
31 | #include "hs_compile.h" |
32 | #include "util/target_info.h" |
33 | #include "util/compare.h" // for ourisalpha() |
34 | #include "util/make_unique.h" |
35 | |
36 | #include <cassert> |
37 | #include <cstdlib> |
38 | #include <map> |
39 | #include <string> |
40 | |
41 | using namespace std; |
42 | |
43 | namespace ue2 { |
44 | |
45 | FDREngineDescription::FDREngineDescription(const FDREngineDef &def) |
46 | : EngineDescription(def.id, targetByArchFeatures(def.cpu_features), |
47 | def.numBuckets), |
48 | schemeWidth(def.schemeWidth), stride(0), bits(0) {} |
49 | |
50 | u32 FDREngineDescription::getDefaultFloodSuffixLength() const { |
51 | // rounding up, so that scheme width 32 and 6 buckets is 6 not 5! |
52 | // the +1 avoids pain due to various reach choices |
53 | return ((getSchemeWidth() + getNumBuckets() - 1) / getNumBuckets()) + 1; |
54 | } |
55 | |
56 | void getFdrDescriptions(vector<FDREngineDescription> *out) { |
57 | static const FDREngineDef def = {0, 64, 8, 0}; |
58 | out->clear(); |
59 | out->emplace_back(def); |
60 | } |
61 | |
62 | static |
63 | u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) { |
64 | u32 desiredStride = 1; // always our safe fallback |
65 | if (min_len > 1) { |
66 | if (num_lits < 250) { |
67 | // small cases we just go for it |
68 | desiredStride = min_len; |
69 | } else if (num_lits < 800) { |
70 | // intermediate cases |
71 | desiredStride = min_len - 1; |
72 | } else if (num_lits < 5000) { |
73 | // for larger but not huge sizes, go to stride 2 only if we have at |
74 | // least minlen 3 |
75 | desiredStride = MIN(min_len - 1, 2); |
76 | } |
77 | } |
78 | |
79 | // patch if count is quite large - a ton of length 2 literals can |
80 | // break things |
81 | #ifdef TRY_THIS_LATER |
82 | if ((min_len == 2) && (desiredStride == 2) && (min_len_count > 20)) { |
83 | desiredStride = 1; |
84 | } |
85 | #endif |
86 | |
87 | // patch stuff just for the stride 4 case; don't let min_len=4, |
88 | // desiredStride=4 through as even a few length 4 literals can break things |
89 | // (far more fragile) |
90 | if ((min_len == 4) && (desiredStride == 4) && (min_len_count > 2)) { |
91 | desiredStride = 2; |
92 | } |
93 | |
94 | return desiredStride; |
95 | } |
96 | |
97 | unique_ptr<FDREngineDescription> chooseEngine(const target_t &target, |
98 | const vector<hwlmLiteral> &vl, |
99 | bool make_small) { |
100 | vector<FDREngineDescription> allDescs; |
101 | getFdrDescriptions(&allDescs); |
102 | |
103 | // find desired stride |
104 | size_t count; |
105 | size_t msl = minLenCount(vl, &count); |
106 | u32 desiredStride = findDesiredStride(vl.size(), msl, count); |
107 | |
108 | DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n" , vl.size(), msl, |
109 | desiredStride); |
110 | |
111 | FDREngineDescription *best = nullptr; |
112 | u32 best_score = 0; |
113 | |
114 | FDREngineDescription &eng = allDescs[0]; |
115 | |
116 | for (u32 domain = 9; domain <= 15; domain++) { |
117 | for (size_t stride = 1; stride <= 4; stride *= 2) { |
118 | // to make sure that domains >=14 have stride 1 according to origin |
119 | if (domain > 13 && stride > 1) { |
120 | continue; |
121 | } |
122 | if (!eng.isValidOnTarget(target)) { |
123 | continue; |
124 | } |
125 | if (msl < stride) { |
126 | continue; |
127 | } |
128 | |
129 | u32 score = 100; |
130 | |
131 | score -= absdiff(desiredStride, stride); |
132 | |
133 | if (stride <= desiredStride) { |
134 | score += stride; |
135 | } |
136 | |
137 | u32 effLits = vl.size(); /* * desiredStride;*/ |
138 | u32 ideal; |
139 | if (effLits < eng.getNumBuckets()) { |
140 | if (stride == 1) { |
141 | ideal = 8; |
142 | } else { |
143 | ideal = 10; |
144 | } |
145 | } else if (effLits < 20) { |
146 | ideal = 10; |
147 | } else if (effLits < 100) { |
148 | ideal = 11; |
149 | } else if (effLits < 1000) { |
150 | ideal = 12; |
151 | } else if (effLits < 10000) { |
152 | ideal = 13; |
153 | } else { |
154 | ideal = 15; |
155 | } |
156 | |
157 | if (ideal != 8 && eng.schemeWidth == 32) { |
158 | ideal += 1; |
159 | } |
160 | |
161 | if (make_small) { |
162 | ideal -= 2; |
163 | } |
164 | |
165 | if (stride > 1) { |
166 | ideal++; |
167 | } |
168 | |
169 | DEBUG_PRINTF("effLits %u\n" , effLits); |
170 | |
171 | if (target.is_atom_class() && !make_small && effLits < 4000) { |
172 | /* Unless it is a very heavy case, we want to build smaller |
173 | * tables on lightweight machines due to their small caches. */ |
174 | ideal -= 2; |
175 | } |
176 | |
177 | score -= absdiff(ideal, domain); |
178 | |
179 | DEBUG_PRINTF("fdr %u: width=%u, domain=%u, buckets=%u, stride=%zu " |
180 | "-> score=%u\n" , |
181 | eng.getID(), eng.schemeWidth, domain, |
182 | eng.getNumBuckets(), stride, score); |
183 | |
184 | if (!best || score > best_score) { |
185 | eng.bits = domain; |
186 | eng.stride = stride; |
187 | best = ŋ |
188 | best_score = score; |
189 | } |
190 | } |
191 | } |
192 | |
193 | if (!best) { |
194 | DEBUG_PRINTF("failed to find engine\n" ); |
195 | return nullptr; |
196 | } |
197 | |
198 | DEBUG_PRINTF("using engine %u\n" , best->getID()); |
199 | return ue2::make_unique<FDREngineDescription>(*best); |
200 | } |
201 | |
202 | SchemeBitIndex FDREngineDescription::getSchemeBit(BucketIndex b, |
203 | PositionInBucket p) const { |
204 | assert(p < getBucketWidth(b)); |
205 | SchemeBitIndex sbi = p * getNumBuckets() + b; |
206 | assert(sbi < getSchemeWidth()); |
207 | return sbi; |
208 | } |
209 | |
210 | u32 FDREngineDescription::getBucketWidth(BucketIndex) const { |
211 | u32 sw = getSchemeWidth(); |
212 | u32 nm = getNumBuckets(); |
213 | assert(sw % nm == 0); |
214 | return sw/nm; |
215 | } |
216 | |
217 | unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID) { |
218 | vector<FDREngineDescription> allDescs; |
219 | getFdrDescriptions(&allDescs); |
220 | |
221 | if (engineID >= allDescs.size()) { |
222 | return nullptr; |
223 | } |
224 | |
225 | return ue2::make_unique<FDREngineDescription>(allDescs[engineID]); |
226 | } |
227 | |
228 | } // namespace ue2 |
229 | |