| 1 | /* |
| 2 | * Copyright (c) 2015-2018, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "goughcompile.h" |
| 30 | |
| 31 | #include "accel.h" |
| 32 | #include "goughcompile_dump.h" |
| 33 | #include "goughcompile_internal.h" |
| 34 | #include "gough_internal.h" |
| 35 | #include "grey.h" |
| 36 | #include "mcclellancompile.h" |
| 37 | #include "nfa_internal.h" |
| 38 | #include "util/compile_context.h" |
| 39 | #include "util/container.h" |
| 40 | #include "util/flat_containers.h" |
| 41 | #include "util/graph_range.h" |
| 42 | #include "util/make_unique.h" |
| 43 | #include "util/order_check.h" |
| 44 | #include "util/report_manager.h" |
| 45 | #include "util/verify_types.h" |
| 46 | |
| 47 | #include "ue2common.h" |
| 48 | |
| 49 | #include <algorithm> |
| 50 | #include <boost/dynamic_bitset.hpp> |
| 51 | #include <boost/range/adaptor/map.hpp> |
| 52 | |
| 53 | using namespace std; |
| 54 | using boost::adaptors::map_keys; |
| 55 | using boost::adaptors::map_values; |
| 56 | using boost::vertex_index; |
| 57 | |
| 58 | namespace ue2 { |
| 59 | |
| 60 | void raw_som_dfa::(void) { |
| 61 | /* if a state generates a given report as a normal accept - then it does |
| 62 | * not also need to generate an eod report for it */ |
| 63 | for (vector<dstate_som>::iterator it = state_som.begin(); |
| 64 | it != state_som.end(); ++it) { |
| 65 | for (const som_report &sr : it->reports) { |
| 66 | it->reports_eod.erase(sr); |
| 67 | } |
| 68 | dstate &norm = states[it - state_som.begin()]; |
| 69 | norm.reports_eod.clear(); |
| 70 | for (const som_report &sr : it->reports_eod) { |
| 71 | norm.reports_eod.insert(sr.report); |
| 72 | } |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | namespace { |
| 77 | |
| 78 | class gough_build_strat : public mcclellan_build_strat { |
| 79 | public: |
| 80 | gough_build_strat( |
| 81 | raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in, |
| 82 | const map<dstate_id_t, gough_accel_state_info> &accel_info) |
| 83 | : mcclellan_build_strat(r, rm_in, false), rdfa(r), gg(g), |
| 84 | accel_gough_info(accel_info) {} |
| 85 | unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */, |
| 86 | vector<u32> &reports_eod /* out */, |
| 87 | u8 *isSingleReport /* out */, |
| 88 | ReportID *arbReport /* out */) const override; |
| 89 | AccelScheme find_escape_strings(dstate_id_t this_idx) const override; |
| 90 | size_t accelSize(void) const override { return sizeof(gough_accel); } |
| 91 | void buildAccel(dstate_id_t this_idx, const AccelScheme &info, |
| 92 | void *accel_out) override; |
| 93 | u32 max_allowed_offset_accel() const override { return 0; } |
| 94 | DfaType getType() const override { return Gough; } |
| 95 | |
| 96 | raw_som_dfa &rdfa; |
| 97 | const GoughGraph ≫ |
| 98 | map<dstate_id_t, gough_accel_state_info> accel_gough_info; |
| 99 | map<gough_accel *, dstate_id_t> built_accel; |
| 100 | }; |
| 101 | |
| 102 | } |
| 103 | |
| 104 | GoughSSAVar::~GoughSSAVar() { |
| 105 | } |
| 106 | |
| 107 | void GoughSSAVar::clear_outputs() { |
| 108 | for (GoughSSAVarWithInputs *var : outputs) { |
| 109 | var->remove_input_raw(this); |
| 110 | } |
| 111 | outputs.clear(); |
| 112 | } |
| 113 | |
| 114 | void GoughSSAVarWithInputs::clear_all() { |
| 115 | clear_inputs(); |
| 116 | clear_outputs(); |
| 117 | } |
| 118 | |
| 119 | void GoughSSAVarMin::clear_inputs() { |
| 120 | for (GoughSSAVar *var : inputs) { |
| 121 | assert(contains(var->outputs, this)); |
| 122 | var->outputs.erase(this); |
| 123 | } |
| 124 | inputs.clear(); |
| 125 | } |
| 126 | |
| 127 | void GoughSSAVarMin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) { |
| 128 | assert(contains(inputs, old_v)); |
| 129 | inputs.erase(old_v); |
| 130 | old_v->outputs.erase(this); |
| 131 | inputs.insert(new_v); |
| 132 | new_v->outputs.insert(this); |
| 133 | } |
| 134 | |
| 135 | static |
| 136 | void translateRawReports(UNUSED GoughGraph &cfg, UNUSED const raw_som_dfa &raw, |
| 137 | const flat_map<u32, GoughSSAVarJoin *> &joins_at_s, |
| 138 | UNUSED GoughVertex s, |
| 139 | const set<som_report> &reports_in, |
| 140 | vector<pair<ReportID, GoughSSAVar *> > *reports_out) { |
| 141 | for (const som_report &sr : reports_in) { |
| 142 | DEBUG_PRINTF("state %u: report %u slot %d\n" , cfg[s].state_id, |
| 143 | sr.report, sr.slot); |
| 144 | GoughSSAVar *var = nullptr; |
| 145 | if (sr.slot == CREATE_NEW_SOM) { |
| 146 | assert(!generates_callbacks(raw.kind)); |
| 147 | } else { |
| 148 | var = joins_at_s.at(sr.slot); |
| 149 | } |
| 150 | reports_out->push_back(make_pair(sr.report, var)); |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | static |
| 155 | void makeCFG_reports(GoughGraph &cfg, const raw_som_dfa &raw, |
| 156 | const vector<flat_map<u32, GoughSSAVarJoin *> > &joins, |
| 157 | const vector<GoughVertex> &vertices) { |
| 158 | for (u32 i = 1; i < raw.states.size(); ++i) { |
| 159 | GoughVertex s = vertices[i]; |
| 160 | const flat_map<u32, GoughSSAVarJoin *> &joins_at_s |
| 161 | = joins[get(vertex_index, cfg, s)]; |
| 162 | translateRawReports(cfg, raw, joins_at_s, s, |
| 163 | raw.state_som[i].reports, &cfg[s].reports); |
| 164 | translateRawReports(cfg, raw, joins_at_s, s, |
| 165 | raw.state_som[i].reports_eod, &cfg[s].reports_eod); |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | static never_inline |
| 170 | void makeCFG_top_edge(GoughGraph &cfg, const vector<GoughVertex> &vertices, |
| 171 | const vector<flat_map<u32, GoughSSAVarJoin *> > &joins, |
| 172 | u32 trigger_slot, const som_tran_info &src_slots, |
| 173 | const som_tran_info &dest_slot_pred, |
| 174 | dstate_id_t i, dstate_id_t n, const GoughEdge &e) { |
| 175 | GoughVertex s = vertices[i]; |
| 176 | GoughVertex t = vertices[n]; |
| 177 | const flat_map<u32, GoughSSAVarJoin *> &joins_at_s |
| 178 | = joins[get(vertex_index, cfg, s)]; |
| 179 | const flat_map<u32, GoughSSAVarJoin *> &joins_at_t |
| 180 | = joins[get(vertex_index, cfg, t)]; |
| 181 | |
| 182 | DEBUG_PRINTF("top for %u -> %u\n" , i, n); |
| 183 | |
| 184 | for (som_tran_info::const_iterator it = dest_slot_pred.begin(); |
| 185 | it != dest_slot_pred.end(); ++it) { |
| 186 | /* for ordering, need to ensure that new values feeding directly |
| 187 | * into mins come first */ |
| 188 | u32 slot_id = it->first; |
| 189 | |
| 190 | shared_ptr<GoughSSAVarNew> vnew; |
| 191 | if (slot_id == trigger_slot) { |
| 192 | vnew = make_shared<GoughSSAVarNew>(0U); |
| 193 | cfg[e].vars.push_back(vnew); |
| 194 | } else { |
| 195 | assert(contains(src_slots, slot_id)); |
| 196 | } |
| 197 | |
| 198 | GoughSSAVar *final_var; |
| 199 | if (vnew && !contains(src_slots, slot_id)) { |
| 200 | final_var = vnew.get(); |
| 201 | DEBUG_PRINTF("bypassing min on join %u\n" , slot_id); |
| 202 | } else if (!vnew) { |
| 203 | final_var = joins_at_s.at(slot_id); |
| 204 | DEBUG_PRINTF("bypassing min on join %u\n" , slot_id); |
| 205 | } else { |
| 206 | assert(vnew); |
| 207 | assert(contains(src_slots, slot_id)); |
| 208 | |
| 209 | shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>(); |
| 210 | cfg[e].vars.push_back(vmin); |
| 211 | final_var = vmin.get(); |
| 212 | |
| 213 | DEBUG_PRINTF("slot %u gets a new value\n" , slot_id); |
| 214 | vmin->add_input(vnew.get()); |
| 215 | |
| 216 | DEBUG_PRINTF("slot %u is constant\n" , slot_id); |
| 217 | vmin->add_input(joins_at_s.at(slot_id)); |
| 218 | } |
| 219 | |
| 220 | /* wire to destination target */ |
| 221 | GoughSSAVarJoin *vk = joins_at_t.at(slot_id); |
| 222 | vk->add_input(final_var, e); |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | static never_inline |
| 227 | void makeCFG_edge(GoughGraph &cfg, const map<u32, u32> &som_creators, |
| 228 | const vector<GoughVertex> &vertices, |
| 229 | const vector<flat_map<u32, GoughSSAVarJoin *> > &joins, |
| 230 | const som_tran_info &src_slots, |
| 231 | const som_tran_info &dest_slot_pred, dstate_id_t i, |
| 232 | dstate_id_t n, const GoughEdge &e) { |
| 233 | GoughVertex s = vertices[i]; |
| 234 | GoughVertex t = vertices[n]; |
| 235 | const flat_map<u32, GoughSSAVarJoin *> &joins_at_s |
| 236 | = joins[get(vertex_index, cfg, s)]; |
| 237 | const flat_map<u32, GoughSSAVarJoin *> &joins_at_t |
| 238 | = joins[get(vertex_index, cfg, t)]; |
| 239 | |
| 240 | map<u32, shared_ptr<GoughSSAVarNew> > vnew_by_adj; |
| 241 | for (som_tran_info::const_iterator it = dest_slot_pred.begin(); |
| 242 | it != dest_slot_pred.end(); ++it) { |
| 243 | /* for ordering, need to ensure that new values feeding directly |
| 244 | * into mins come first */ |
| 245 | u32 slot_id = it->first; |
| 246 | |
| 247 | if (contains(som_creators, slot_id) && !som_creators.at(slot_id)) { |
| 248 | continue; |
| 249 | } |
| 250 | |
| 251 | shared_ptr<GoughSSAVarNew> vnew; |
| 252 | const vector<u32> &inputs = it->second; |
| 253 | u32 useful_input_count = 0; |
| 254 | u32 first_useful_input = ~0U; |
| 255 | |
| 256 | for (const u32 &input_slot : inputs) { |
| 257 | if (!contains(src_slots, input_slot)) { |
| 258 | continue; |
| 259 | } |
| 260 | DEBUG_PRINTF("%u is useful\n" , input_slot); |
| 261 | |
| 262 | if (!vnew || !contains(som_creators, input_slot)) { |
| 263 | useful_input_count++; |
| 264 | if (useful_input_count == 1) { |
| 265 | first_useful_input = input_slot; |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | if (contains(som_creators, input_slot)) { |
| 270 | u32 adjust = som_creators.at(input_slot); |
| 271 | |
| 272 | if (vnew && vnew->adjust >= adjust) { |
| 273 | DEBUG_PRINTF("skipping %u as domininated by adj%u\n" , |
| 274 | adjust, vnew->adjust); |
| 275 | continue; /* deeper starts can be seen to statically |
| 276 | dominate */ |
| 277 | } |
| 278 | |
| 279 | if (contains(vnew_by_adj, adjust)) { |
| 280 | vnew = vnew_by_adj[adjust]; |
| 281 | } else { |
| 282 | vnew = make_shared<GoughSSAVarNew>(adjust); |
| 283 | cfg[e].vars.push_back(vnew); |
| 284 | vnew_by_adj[adjust] = vnew; |
| 285 | } |
| 286 | assert(vnew); |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | /* If we have a new start of match (with no offset or 1 byte offset) and |
| 291 | * other variables coming in, the new will always be dominated by the |
| 292 | * existing variables (as they must be at least one byte into the match) |
| 293 | * -- and so can be dropped. */ |
| 294 | if (vnew && vnew->adjust < 2 && useful_input_count > 1) { |
| 295 | useful_input_count--; |
| 296 | vnew.reset(); |
| 297 | |
| 298 | /* need to reestablish the first useful input */ |
| 299 | for (const u32 &input_slot : inputs) { |
| 300 | if (!contains(src_slots, input_slot)) { |
| 301 | continue; |
| 302 | } |
| 303 | if (!contains(som_creators, input_slot)) { |
| 304 | first_useful_input = input_slot; |
| 305 | } |
| 306 | } |
| 307 | |
| 308 | } |
| 309 | |
| 310 | GoughSSAVar *final_var; |
| 311 | if (useful_input_count == 1) { |
| 312 | if (vnew) { |
| 313 | final_var = vnew.get(); |
| 314 | } else { |
| 315 | assert(first_useful_input != ~0U); |
| 316 | final_var = joins_at_s.at(first_useful_input); |
| 317 | } |
| 318 | DEBUG_PRINTF("bypassing min on join %u\n" , slot_id); |
| 319 | } else { |
| 320 | shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>(); |
| 321 | cfg[e].vars.push_back(vmin); |
| 322 | final_var = vmin.get(); |
| 323 | |
| 324 | if (vnew) { |
| 325 | vmin->add_input(vnew.get()); |
| 326 | } |
| 327 | |
| 328 | /* wire the normal inputs to the min */ |
| 329 | for (const u32 &input_slot : inputs) { |
| 330 | if (!contains(src_slots, input_slot)) { |
| 331 | continue; |
| 332 | } |
| 333 | if (!contains(som_creators, input_slot)) { |
| 334 | vmin->add_input(joins_at_s.at(input_slot)); |
| 335 | } |
| 336 | } |
| 337 | assert(vmin->get_inputs().size() > 1); |
| 338 | DEBUG_PRINTF("wire min to join %u\n" , slot_id); |
| 339 | } |
| 340 | |
| 341 | GoughSSAVarJoin *vk = joins_at_t.at(slot_id); |
| 342 | assert(final_var); |
| 343 | vk->add_input(final_var, e); |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | static never_inline |
| 348 | unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) { |
| 349 | vector<GoughVertex> vertices; |
| 350 | vertices.reserve(raw.states.size()); |
| 351 | unique_ptr<GoughGraph> cfg = ue2::make_unique<GoughGraph>(); |
| 352 | u32 min_state = !is_triggered(raw.kind); |
| 353 | |
| 354 | if (min_state) { |
| 355 | vertices.push_back(GoughGraph::null_vertex()); /* skip dead state */ |
| 356 | } |
| 357 | |
| 358 | vector<flat_map<u32, GoughSSAVarJoin *> > joins(raw.states.size()); |
| 359 | for (u32 i = min_state; i < raw.states.size(); ++i) { |
| 360 | GoughVertex v = add_vertex(GoughVertexProps(i), *cfg); |
| 361 | vertices.push_back(v); |
| 362 | |
| 363 | /* create JOIN variables */ |
| 364 | for (som_tran_info::const_iterator it = raw.state_som[i].preds.begin(); |
| 365 | it != raw.state_som[i].preds.end(); ++it) { |
| 366 | u32 slot_id = it->first; |
| 367 | if (!contains(raw.new_som_nfa_states, slot_id) |
| 368 | || raw.new_som_nfa_states.at(slot_id)) { |
| 369 | (*cfg)[v].vars.push_back(make_shared<GoughSSAVarJoin>()); |
| 370 | joins[get(vertex_index, *cfg, v)][slot_id] |
| 371 | = (*cfg)[v].vars.back().get(); |
| 372 | DEBUG_PRINTF("dfa %u:: slot %u\n" , i, slot_id); |
| 373 | } |
| 374 | } |
| 375 | } |
| 376 | |
| 377 | u16 top_sym = raw.alpha_remap[TOP]; |
| 378 | DEBUG_PRINTF("top: %hu, kind %s\n" , top_sym, to_string(raw.kind).c_str()); |
| 379 | |
| 380 | /* create edges, JOIN variables (on edge targets) */ |
| 381 | map<dstate_id_t, GoughEdge> seen; |
| 382 | for (u32 i = min_state; i < raw.states.size(); ++i) { |
| 383 | seen.clear(); /* seen is really local to each state */ |
| 384 | |
| 385 | DEBUG_PRINTF("creating edges out of %u/%zu\n" , i, raw.states.size()); |
| 386 | GoughVertex s = vertices[i]; |
| 387 | const vector<dstate_id_t> &next = raw.states[i].next; |
| 388 | for (u32 j = 0; j < next.size(); ++j) { |
| 389 | if (!is_triggered(raw.kind) && j == top_sym) { |
| 390 | continue; |
| 391 | } |
| 392 | |
| 393 | dstate_id_t n = next[j]; |
| 394 | DEBUG_PRINTF(" edge to %hu out on %u\n" , n, j); |
| 395 | assert(n < raw.states.size()); |
| 396 | GoughVertex t = vertices[n]; |
| 397 | |
| 398 | if (j == top_sym) { |
| 399 | GoughEdge e = add_edge(s, t, *cfg).first; |
| 400 | (*cfg)[e].top = true; |
| 401 | makeCFG_top_edge(*cfg, vertices, joins, raw.trigger_nfa_state, |
| 402 | raw.state_som[i].preds, raw.state_som[n].preds, |
| 403 | i, n, e); |
| 404 | } else { |
| 405 | if (contains(seen, n)) { |
| 406 | const GoughEdge &e = seen[n]; |
| 407 | (*cfg)[e].reach.set(j); |
| 408 | continue; |
| 409 | } |
| 410 | |
| 411 | GoughEdge e = add_edge(s, t, *cfg).first; |
| 412 | (*cfg)[e].reach.set(j); |
| 413 | |
| 414 | seen[n] = e; |
| 415 | |
| 416 | makeCFG_edge(*cfg, raw.new_som_nfa_states, vertices, joins, |
| 417 | raw.state_som[i].preds, raw.state_som[n].preds, |
| 418 | i, n, e); |
| 419 | } |
| 420 | } |
| 421 | } |
| 422 | |
| 423 | /* populate reports */ |
| 424 | makeCFG_reports(*cfg, raw, joins, vertices); |
| 425 | |
| 426 | using boost::graph_bundle; |
| 427 | if (is_triggered(raw.kind)) { |
| 428 | (*cfg)[graph_bundle].initial_vertex = vertices[DEAD_STATE]; |
| 429 | } else { |
| 430 | (*cfg)[graph_bundle].initial_vertex = vertices[raw.start_anchored]; |
| 431 | } |
| 432 | |
| 433 | return cfg; |
| 434 | } |
| 435 | |
| 436 | static |
| 437 | void copy_propagate_report_set(vector<pair<ReportID, GoughSSAVar *> > &rep) { |
| 438 | vector<pair<ReportID, GoughSSAVar *> >::iterator it = rep.begin(); |
| 439 | while (it != rep.end()) { |
| 440 | GoughSSAVar *var = it->second; |
| 441 | if (!var) { |
| 442 | ++it; |
| 443 | continue; |
| 444 | } |
| 445 | const flat_set<GoughSSAVar *> &inputs = var->get_inputs(); |
| 446 | if (inputs.size() != 1) { |
| 447 | ++it; |
| 448 | continue; |
| 449 | } |
| 450 | it->second = *inputs.begin(); /* note may result in dupes, |
| 451 | filter later */ |
| 452 | } |
| 453 | } |
| 454 | |
| 455 | template<typename VarP> |
| 456 | void copy_propagate_update_vars(vector<VarP> &vars, bool *changes) { |
| 457 | for (u32 i = 0; i < vars.size(); i++) { |
| 458 | GoughSSAVar *vp = vars[i].get(); |
| 459 | const flat_set<GoughSSAVar *> &inputs = vp->get_inputs(); |
| 460 | |
| 461 | /* no need to worry about data coming from self; ignore self loops */ |
| 462 | GoughSSAVar *new_input = nullptr; |
| 463 | |
| 464 | if (inputs.size() == 1) { |
| 465 | new_input = *inputs.begin(); |
| 466 | } else if (inputs.size() == 2) { |
| 467 | flat_set<GoughSSAVar *>::const_iterator jt = inputs.begin(); |
| 468 | GoughSSAVar *i_0 = *jt; |
| 469 | GoughSSAVar *i_1 = *++jt; |
| 470 | |
| 471 | if (i_0 == vp) { |
| 472 | new_input = i_1; |
| 473 | } else if (i_1 == vp) { |
| 474 | new_input = i_0; |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | if (!new_input) { |
| 479 | continue; |
| 480 | } |
| 481 | |
| 482 | assert(new_input != vp); |
| 483 | |
| 484 | /* copy set as it will be modified by iteration */ |
| 485 | const flat_set<GoughSSAVarWithInputs *> outputs = vp->get_outputs(); |
| 486 | |
| 487 | for (GoughSSAVar *curr : outputs) { |
| 488 | curr->replace_input(vp, new_input); |
| 489 | *changes = true; |
| 490 | } |
| 491 | } |
| 492 | } |
| 493 | |
| 494 | static |
| 495 | void copy_propagation(GoughGraph &g, const Grey &grey) { |
| 496 | if (!grey.goughCopyPropagate) { |
| 497 | return; |
| 498 | } |
| 499 | /* TODO order visit of variables sensibly */ |
| 500 | bool changes = false; |
| 501 | do { |
| 502 | DEBUG_PRINTF("new iteration\n" ); |
| 503 | changes = false; |
| 504 | for (auto v : vertices_range(g)) { |
| 505 | copy_propagate_update_vars(g[v].vars, &changes); |
| 506 | } |
| 507 | for (const auto &e : edges_range(g)) { |
| 508 | copy_propagate_update_vars(g[e].vars, &changes); |
| 509 | } |
| 510 | } while(changes); |
| 511 | |
| 512 | /* see if any reports can also be moved along */ |
| 513 | for (auto v : vertices_range(g)) { |
| 514 | copy_propagate_report_set(g[v].reports); |
| 515 | copy_propagate_report_set(g[v].reports_eod); |
| 516 | } |
| 517 | } |
| 518 | |
| 519 | static |
| 520 | void mark_live_reports(const vector<pair<ReportID, GoughSSAVar *> > &reps, |
| 521 | vector<GoughSSAVar *> *queue) { |
| 522 | for (const auto &r : reps) { |
| 523 | GoughSSAVar *var = r.second; |
| 524 | if (!var || var->seen) { |
| 525 | continue; |
| 526 | } |
| 527 | var->seen = true; |
| 528 | queue->push_back(var); |
| 529 | } |
| 530 | } |
| 531 | |
| 532 | static |
| 533 | void remove_dead(GoughGraph &g) { |
| 534 | vector<GoughSSAVar *> queue; |
| 535 | |
| 536 | for (auto v : vertices_range(g)) { |
| 537 | mark_live_reports(g[v].reports, &queue); |
| 538 | mark_live_reports(g[v].reports_eod, &queue); |
| 539 | } |
| 540 | |
| 541 | while (!queue.empty()) { |
| 542 | GoughSSAVar *v = queue.back(); |
| 543 | queue.pop_back(); |
| 544 | for (GoughSSAVar *var : v->get_inputs()) { |
| 545 | if (var->seen) { |
| 546 | continue; |
| 547 | } |
| 548 | var->seen = true; |
| 549 | queue.push_back(var); |
| 550 | } |
| 551 | } |
| 552 | |
| 553 | /* remove unused variables */ |
| 554 | for (auto v : vertices_range(g)) { |
| 555 | for (u32 i = 0; i < g[v].vars.size(); i++) { |
| 556 | GoughSSAVar *var = g[v].vars[i].get(); |
| 557 | if (var->seen) { |
| 558 | continue; |
| 559 | } |
| 560 | var->clear_all(); |
| 561 | g[v].vars.erase(g[v].vars.begin() + i); |
| 562 | i--; |
| 563 | } |
| 564 | } |
| 565 | for (const auto &e : edges_range(g)) { |
| 566 | for (u32 i = 0; i < g[e].vars.size(); i++) { |
| 567 | GoughSSAVar *var = g[e].vars[i].get(); |
| 568 | if (var->seen) { |
| 569 | continue; |
| 570 | } |
| 571 | var->clear_all(); |
| 572 | g[e].vars.erase(g[e].vars.begin() + i); |
| 573 | i--; |
| 574 | } |
| 575 | } |
| 576 | } |
| 577 | |
| 578 | static |
| 579 | gough_ins make_gough_ins(u8 op, u32 dest = INVALID_SLOT, |
| 580 | u32 src = INVALID_SLOT) { |
| 581 | assert(dest != INVALID_SLOT || op == GOUGH_INS_END); |
| 582 | assert(src != INVALID_SLOT || op == GOUGH_INS_END || op == GOUGH_INS_NEW); |
| 583 | gough_ins rv; |
| 584 | rv.op = op; |
| 585 | rv.dest = dest; |
| 586 | rv.src = src; |
| 587 | return rv; |
| 588 | } |
| 589 | |
| 590 | void GoughSSAVarNew::generate(vector<gough_ins> *out) const { |
| 591 | assert(slot != INVALID_SLOT); |
| 592 | out->push_back(make_gough_ins(GOUGH_INS_NEW, slot, adjust)); |
| 593 | } |
| 594 | |
| 595 | #ifndef NDEBUG |
| 596 | template<typename C, typename K> |
| 597 | bool contains_loose(const C &container, const K &key) { |
| 598 | for (const auto &elem : container) { |
| 599 | if (elem == key) { |
| 600 | return true; |
| 601 | } |
| 602 | } |
| 603 | return false; |
| 604 | } |
| 605 | #endif |
| 606 | |
| 607 | void GoughSSAVarMin::generate(vector<gough_ins> *out) const { |
| 608 | assert(slot != INVALID_SLOT); |
| 609 | assert(!inputs.empty()); |
| 610 | // assert(inputs.size() > 1); |
| 611 | vector<u32> input_slots; /* for determinism */ |
| 612 | bool first = true; |
| 613 | for (const GoughSSAVar *var : inputs) { |
| 614 | assert(contains_loose(var->outputs, this)); |
| 615 | if (var->slot == slot) { |
| 616 | /* if the destination is one of the sources, no need to move it */ |
| 617 | first = false; |
| 618 | } else { |
| 619 | input_slots.push_back(var->slot); |
| 620 | } |
| 621 | } |
| 622 | |
| 623 | sort(input_slots.begin(), input_slots.end()); |
| 624 | |
| 625 | for (const u32 &input_slot : input_slots) { |
| 626 | if (first) { |
| 627 | out->push_back(make_gough_ins(GOUGH_INS_MOV, slot, input_slot)); |
| 628 | first = false; |
| 629 | } else { |
| 630 | out->push_back(make_gough_ins(GOUGH_INS_MIN, slot, input_slot)); |
| 631 | } |
| 632 | } |
| 633 | } |
| 634 | |
| 635 | void GoughSSAVarMin::remove_input_raw(GoughSSAVar *v) { |
| 636 | assert(contains(inputs, v)); |
| 637 | inputs.erase(v); |
| 638 | } |
| 639 | |
| 640 | void GoughSSAVarJoin::generate(UNUSED vector<gough_ins> *out) const { |
| 641 | assert(0); |
| 642 | } |
| 643 | |
| 644 | GoughSSAVar *GoughSSAVarJoin::get_input(const GoughEdge &prev) const { |
| 645 | for (const auto &var_edge : input_map) { |
| 646 | if (contains(var_edge.second, prev)) { |
| 647 | return var_edge.first; |
| 648 | } |
| 649 | } |
| 650 | assert(0); |
| 651 | return nullptr; |
| 652 | } |
| 653 | |
| 654 | const flat_set<GoughEdge> &GoughSSAVarJoin::get_edges_for_input( |
| 655 | GoughSSAVar *input) const { |
| 656 | return input_map.at(input); |
| 657 | } |
| 658 | |
| 659 | const map<GoughSSAVar *, flat_set<GoughEdge> > &GoughSSAVarJoin::get_input_map() |
| 660 | const { |
| 661 | return input_map; |
| 662 | } |
| 663 | |
| 664 | void GoughSSAVarJoin::clear_inputs() { |
| 665 | for (GoughSSAVar *var : input_map | map_keys) { |
| 666 | assert(contains(var->outputs, this)); |
| 667 | var->outputs.erase(this); |
| 668 | } |
| 669 | input_map.clear(); |
| 670 | inputs.clear(); |
| 671 | } |
| 672 | |
| 673 | void GoughSSAVarJoin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) { |
| 674 | assert(contains(input_map, old_v)); |
| 675 | assert(contains(inputs, old_v)); |
| 676 | if (old_v == new_v) { |
| 677 | assert(0); |
| 678 | return; |
| 679 | } |
| 680 | insert(&input_map[new_v], input_map[old_v]); |
| 681 | input_map.erase(old_v); |
| 682 | inputs.erase(old_v); |
| 683 | inputs.insert(new_v); |
| 684 | old_v->outputs.erase(this); |
| 685 | new_v->outputs.insert(this); |
| 686 | } |
| 687 | |
| 688 | void GoughSSAVarJoin::add_input(GoughSSAVar *v, GoughEdge prev) { |
| 689 | input_map[v].insert(prev); |
| 690 | inputs.insert(v); |
| 691 | v->outputs.insert(this); |
| 692 | } |
| 693 | |
| 694 | void GoughSSAVarJoin::remove_input_raw(GoughSSAVar *v) { |
| 695 | assert(contains(inputs, v)); |
| 696 | assert(contains(input_map, v)); |
| 697 | input_map.erase(v); |
| 698 | inputs.erase(v); |
| 699 | } |
| 700 | |
| 701 | static |
| 702 | u32 highest_slot_used(const vector<gough_ins> &program) { |
| 703 | u32 rv = INVALID_SLOT; |
| 704 | for (const gough_ins &ins : program) { |
| 705 | if (rv == INVALID_SLOT) { |
| 706 | rv = ins.dest; |
| 707 | } else if (ins.dest != INVALID_SLOT) { |
| 708 | ENSURE_AT_LEAST(&rv, ins.dest); |
| 709 | } |
| 710 | if (rv == INVALID_SLOT) { |
| 711 | rv = ins.src; |
| 712 | } else if (ins.src != INVALID_SLOT) { |
| 713 | ENSURE_AT_LEAST(&rv, ins.src); |
| 714 | } |
| 715 | } |
| 716 | assert(rv != INVALID_SLOT); |
| 717 | return rv; |
| 718 | } |
| 719 | |
| 720 | static |
| 721 | u32 highest_slot_used(const map<gough_edge_id, vector<gough_ins> > &blocks) { |
| 722 | u32 rv = INVALID_SLOT; |
| 723 | for (const vector<gough_ins> &ins_list : blocks | map_values) { |
| 724 | u32 used = highest_slot_used(ins_list); |
| 725 | if (rv == INVALID_SLOT) { |
| 726 | rv = used; |
| 727 | } else if (used != INVALID_SLOT) { |
| 728 | ENSURE_AT_LEAST(&rv, used); |
| 729 | } |
| 730 | } |
| 731 | return rv; |
| 732 | } |
| 733 | |
| 734 | static |
| 735 | void add_to_block(const vector<shared_ptr<GoughSSAVar> > &vars, |
| 736 | vector<gough_ins> *out) { |
| 737 | for (const auto &var : vars) { |
| 738 | var->generate(out); |
| 739 | } |
| 740 | } |
| 741 | |
| 742 | namespace { |
| 743 | struct edge_join_info { |
| 744 | bool empty() const { return dest_to_src.empty(); } |
| 745 | |
| 746 | void insert(u32 src, u32 dest) { |
| 747 | assert(!contains(dest_to_src, dest)); |
| 748 | assert(src != dest); |
| 749 | dest_to_src[dest] = src; |
| 750 | src_to_dest[src].insert(dest); |
| 751 | } |
| 752 | |
| 753 | void erase(u32 src, u32 dest) { |
| 754 | assert(dest_to_src.at(dest) == src); |
| 755 | dest_to_src.erase(dest); |
| 756 | src_to_dest[src].erase(dest); |
| 757 | |
| 758 | if (src_to_dest[src].empty()) { |
| 759 | src_to_dest.erase(src); |
| 760 | } |
| 761 | } |
| 762 | |
| 763 | bool is_src(u32 v) const { |
| 764 | bool rv = contains(src_to_dest, v); |
| 765 | assert(!rv || !src_to_dest.at(v).empty()); |
| 766 | return rv; |
| 767 | } |
| 768 | |
| 769 | bool is_dest(u32 v) const { |
| 770 | return contains(dest_to_src, v); |
| 771 | } |
| 772 | |
| 773 | void remap_src(u32 old_src, u32 new_src) { |
| 774 | assert(is_src(old_src)); |
| 775 | assert(!is_src(new_src)); |
| 776 | |
| 777 | for (const u32 &e : src_to_dest[old_src]) { |
| 778 | assert(e != new_src); |
| 779 | dest_to_src[e] = new_src; |
| 780 | } |
| 781 | src_to_dest[new_src].swap(src_to_dest[old_src]); |
| 782 | src_to_dest.erase(old_src); |
| 783 | |
| 784 | assert(!is_src(old_src)); |
| 785 | assert(is_src(new_src)); |
| 786 | } |
| 787 | |
| 788 | /* returns an arbitrary unresolved entry */ |
| 789 | void get_pending(u32 *src, u32 *dest) { |
| 790 | assert(!empty()); |
| 791 | *dest = dest_to_src.begin()->first; |
| 792 | *src = dest_to_src.begin()->second; |
| 793 | } |
| 794 | |
| 795 | const map<u32, u32> &get_dest_mapping() const { return dest_to_src; } |
| 796 | |
| 797 | private: |
| 798 | map<u32, set<u32> > src_to_dest; |
| 799 | map<u32, u32> dest_to_src; |
| 800 | }; |
| 801 | |
| 802 | } |
| 803 | |
| 804 | static |
| 805 | void prep_joins_for_generation(const GoughGraph &g, GoughVertex v, |
| 806 | map<GoughEdge, edge_join_info> *edge_info) { |
| 807 | DEBUG_PRINTF("writing out joins for %u\n" , g[v].state_id); |
| 808 | for (const auto &var : g[v].vars) { |
| 809 | u32 dest_slot = var->slot; |
| 810 | for (const auto &var_edges : var->get_input_map()) { |
| 811 | u32 input = var_edges.first->slot; |
| 812 | if (dest_slot == input) { |
| 813 | continue; |
| 814 | } |
| 815 | |
| 816 | for (const GoughEdge &incoming_edge : var_edges.second) { |
| 817 | (*edge_info)[incoming_edge].insert(input, dest_slot); |
| 818 | DEBUG_PRINTF("need %u<-%u\n" , dest_slot, input); |
| 819 | } |
| 820 | } |
| 821 | } |
| 822 | } |
| 823 | |
| 824 | static |
| 825 | void add_simple_joins(edge_join_info &eji, vector<gough_ins> *out) { |
| 826 | /* any slot whose value we don't need can be written to immediately */ |
| 827 | const map<u32, u32> &dest_to_src = eji.get_dest_mapping(); |
| 828 | |
| 829 | bool changed; |
| 830 | do { |
| 831 | changed = false; |
| 832 | for (map<u32, u32>::const_iterator it = dest_to_src.begin(); |
| 833 | it != dest_to_src.end();) { |
| 834 | u32 src = it->second; |
| 835 | u32 dest = it->first; |
| 836 | ++it; /* avoid iterator being invalidated */ |
| 837 | |
| 838 | if (eji.is_src(dest)) { |
| 839 | continue; /* conflict; not simple (yet) */ |
| 840 | } |
| 841 | |
| 842 | /* value of destination slot is not used by any remaining joins; |
| 843 | * we can output this join immediately */ |
| 844 | DEBUG_PRINTF("out %u<-%u\n" , dest, src); |
| 845 | out->push_back(make_gough_ins(GOUGH_INS_MOV, dest, src)); |
| 846 | |
| 847 | eji.erase(src, dest); |
| 848 | |
| 849 | if (eji.is_dest(src) && eji.is_src(src)) { |
| 850 | /* we can unblock src being used as an output by shifting |
| 851 | * across everybody using src as input to using dest (as == src |
| 852 | * now) */ |
| 853 | eji.remap_src(src, dest); |
| 854 | } |
| 855 | changed = true; |
| 856 | } |
| 857 | } while (changed); |
| 858 | } |
| 859 | |
| 860 | static |
| 861 | void add_joins_to_block(edge_join_info &eji, vector<gough_ins> *out, |
| 862 | u32 base_temp_slot) { |
| 863 | /* joins happen concurrently: none of them should see the outputs of another |
| 864 | * join happening due to the same entry of the vertex. If there are |
| 865 | * conflicts we may have to handle things by using a temp output slot for |
| 866 | * each join and then copying into the final slot. |
| 867 | */ |
| 868 | |
| 869 | add_simple_joins(eji, out); |
| 870 | while (!eji.empty()) { |
| 871 | u32 split; |
| 872 | u32 input_for_split; |
| 873 | eji.get_pending(&input_for_split, &split); |
| 874 | |
| 875 | assert(eji.is_src(split)); /* otherwise should be handled by simple */ |
| 876 | |
| 877 | /* stash the initial value of the split register in a temp register */ |
| 878 | u32 temp = base_temp_slot++; |
| 879 | DEBUG_PRINTF("out %u<-%u\n" , temp, split); |
| 880 | out->push_back(make_gough_ins(GOUGH_INS_MOV, temp, split)); |
| 881 | eji.remap_src(split, temp); /* update maps */ |
| 882 | |
| 883 | /* split can now be safely written out to as all the uses of it as an |
| 884 | * input now refer to temp instead */ |
| 885 | |
| 886 | DEBUG_PRINTF("out %u<-%u\n" , split, input_for_split); |
| 887 | out->push_back(make_gough_ins(GOUGH_INS_MOV, split, input_for_split)); |
| 888 | eji.erase(input_for_split, split); |
| 889 | |
| 890 | /* handle any uncovered simple cases */ |
| 891 | add_simple_joins(eji, out); |
| 892 | } |
| 893 | } |
| 894 | |
| 895 | static |
| 896 | void build_blocks(const GoughGraph &g, |
| 897 | map<gough_edge_id, vector<gough_ins> > *blocks, |
| 898 | u32 base_temp_slot) { |
| 899 | for (const auto &e : edges_range(g)) { |
| 900 | if (g[e].vars.empty()) { |
| 901 | continue; |
| 902 | } |
| 903 | |
| 904 | vector<gough_ins> &block = (*blocks)[gough_edge_id(g, e)]; |
| 905 | add_to_block(g[e].vars, &block); |
| 906 | assert(!block.empty()); |
| 907 | } |
| 908 | |
| 909 | for (const auto t : vertices_range(g)) { |
| 910 | if (g[t].vars.empty()) { |
| 911 | continue; |
| 912 | } |
| 913 | |
| 914 | map<GoughEdge, edge_join_info> eji; |
| 915 | prep_joins_for_generation(g, t, &eji); |
| 916 | |
| 917 | for (auto &m : eji) { |
| 918 | vector<gough_ins> &block = (*blocks)[gough_edge_id(g, m.first)]; |
| 919 | u32 cur_base = base_temp_slot; |
| 920 | if (!block.empty()) { |
| 921 | /* some temp slots may already be in use by short-lived vars */ |
| 922 | ENSURE_AT_LEAST(&cur_base, highest_slot_used(block) + 1); |
| 923 | } |
| 924 | |
| 925 | add_joins_to_block(m.second, &block, cur_base); |
| 926 | if (block.empty()) { |
| 927 | blocks->erase(gough_edge_id(g, m.first)); |
| 928 | } |
| 929 | } |
| 930 | } |
| 931 | |
| 932 | for (vector<gough_ins> &ins_list : *blocks | map_values) { |
| 933 | assert(!ins_list.empty()); |
| 934 | ins_list.push_back(make_gough_ins(GOUGH_INS_END)); |
| 935 | } |
| 936 | } |
| 937 | |
| 938 | static |
| 939 | void copy_in_blocks(raw_som_dfa &raw, u8 alphaShift, const GoughGraph &cfg, |
| 940 | const map<gough_edge_id, vector<gough_ins> > &blocks, |
| 941 | u32 *edge_blocks, u32 *top_blocks, u32 base_offset, |
| 942 | map<vector<gough_ins>, u32> *prog_offsets, |
| 943 | vector<gough_ins> *out) { |
| 944 | u32 impl_alpha_size = 1U << alphaShift; |
| 945 | UNUSED u32 top_sym = raw.alpha_remap[TOP]; |
| 946 | assert(top_sym == raw.alpha_size - 1U); |
| 947 | map<vector<gough_ins>, u32> &processed = *prog_offsets; |
| 948 | |
| 949 | for (const auto &e : edges_range(cfg)) { |
| 950 | if (!contains(blocks, gough_edge_id(cfg, e))) { |
| 951 | continue; |
| 952 | } |
| 953 | const vector<gough_ins> &block = blocks.at(gough_edge_id(cfg, e)); |
| 954 | u32 prog_offset; |
| 955 | if (!contains(processed, block)) { |
| 956 | prog_offset = base_offset + byte_length(*out); |
| 957 | insert(out, out->end(), block); |
| 958 | processed[block] = prog_offset; |
| 959 | } else { |
| 960 | prog_offset = processed[block]; |
| 961 | } |
| 962 | |
| 963 | /* update edges */ |
| 964 | u32 s_id = cfg[source(e, cfg)].state_id; |
| 965 | UNUSED u32 t_id = cfg[target(e, cfg)].state_id; |
| 966 | u32 impl_src_id = raw.states[s_id].impl_id; |
| 967 | DEBUG_PRINTF("%u: writing out block for edge_%u_%u at %u:\n" , |
| 968 | impl_src_id, s_id, t_id,prog_offset); |
| 969 | |
| 970 | for (u32 j = cfg[e].reach.find_first(); j != CharReach::npos; |
| 971 | j = cfg[e].reach.find_next(j)) { |
| 972 | assert(raw.states[s_id].next[j] == t_id); |
| 973 | u32 edge_index = impl_src_id * impl_alpha_size + j; |
| 974 | DEBUG_PRINTF("\tsetting on %u, %u\n" , j, edge_index); |
| 975 | edge_blocks[edge_index] = prog_offset; |
| 976 | } |
| 977 | |
| 978 | if (cfg[e].top) { |
| 979 | assert(raw.states[s_id].next[top_sym] == t_id); |
| 980 | DEBUG_PRINTF("\tsetting top on %u to block at %u\n" , impl_src_id, |
| 981 | prog_offset); |
| 982 | top_blocks[impl_src_id] = prog_offset; |
| 983 | } |
| 984 | } |
| 985 | } |
| 986 | |
| 987 | bool find_normal_self_loop(GoughVertex v, const GoughGraph &g, GoughEdge *out) { |
| 988 | for (const auto &e : out_edges_range(v, g)) { |
| 989 | if (target(e, g) != v) { |
| 990 | continue; |
| 991 | } |
| 992 | if (g[e].top) { |
| 993 | assert(g[e].reach.find_first() == CharReach::npos); |
| 994 | continue; /* corresponds to a top, not a normal transition */ |
| 995 | } |
| 996 | |
| 997 | *out = e; |
| 998 | return true; |
| 999 | } |
| 1000 | |
| 1001 | return false; |
| 1002 | } |
| 1003 | |
| 1004 | static never_inline |
| 1005 | void update_accel_prog_offset(const gough_build_strat &gbs, |
| 1006 | const map<gough_edge_id, vector<gough_ins> > &blocks, |
| 1007 | const map<vector<gough_ins>, u32> &prog_offsets) { |
| 1008 | map<dstate_id_t, GoughVertex> verts; |
| 1009 | for (auto v : vertices_range(gbs.gg)) { |
| 1010 | verts[gbs.gg[v].state_id] = v; |
| 1011 | } |
| 1012 | |
| 1013 | for (auto &m : gbs.built_accel) { |
| 1014 | gough_accel *ga = m.first; |
| 1015 | assert(!ga->prog_offset); |
| 1016 | GoughVertex v = verts[m.second]; |
| 1017 | GoughEdge e; |
| 1018 | UNUSED bool rv = find_normal_self_loop(v, gbs.gg, &e); |
| 1019 | assert(rv); |
| 1020 | |
| 1021 | if (!rv) { |
| 1022 | continue; |
| 1023 | } |
| 1024 | |
| 1025 | DEBUG_PRINTF("updating state %u accel with margin %hhu\n" , |
| 1026 | gbs.gg[v].state_id, ga->margin_dist); |
| 1027 | if (contains(blocks, gough_edge_id(gbs.gg, e))) { |
| 1028 | const vector<gough_ins> &block |
| 1029 | = blocks.at(gough_edge_id(gbs.gg, e)); |
| 1030 | ga->prog_offset = prog_offsets.at(block); |
| 1031 | DEBUG_PRINTF("prog offset %u\n" , ga->prog_offset); |
| 1032 | } else { |
| 1033 | ga->margin_dist = 0; |
| 1034 | DEBUG_PRINTF("removing margin as no som\n" ); |
| 1035 | } |
| 1036 | } |
| 1037 | } |
| 1038 | |
| 1039 | bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision, |
| 1040 | const CompileContext &cc, |
| 1041 | const ReportManager &rm) { |
| 1042 | assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8 |
| 1043 | || !cc.streaming); |
| 1044 | |
| 1045 | if (!cc.grey.allowGough) { |
| 1046 | return nullptr; |
| 1047 | } |
| 1048 | |
| 1049 | DEBUG_PRINTF("hello world\n" ); |
| 1050 | unique_ptr<GoughGraph> cfg = makeCFG(raw); |
| 1051 | dump(*cfg, "init" , cc.grey); |
| 1052 | copy_propagation(*cfg, cc.grey); |
| 1053 | remove_dead(*cfg); |
| 1054 | dump(*cfg, "prop" , cc.grey); |
| 1055 | u32 slot_count = assign_slots(*cfg, cc.grey); |
| 1056 | dump(*cfg, "slots" , cc.grey); |
| 1057 | |
| 1058 | map<gough_edge_id, vector<gough_ins> > blocks; |
| 1059 | build_blocks(*cfg, &blocks, slot_count); |
| 1060 | DEBUG_PRINTF("%u slots\n" , highest_slot_used(blocks) + 1); |
| 1061 | |
| 1062 | u32 scratch_slot_count = highest_slot_used(blocks) + 1; |
| 1063 | assert(slot_count <= scratch_slot_count); |
| 1064 | |
| 1065 | dump(*cfg, "final" , cc.grey); |
| 1066 | dump_blocks(blocks, "final" , cc.grey); |
| 1067 | |
| 1068 | gough_info gi; |
| 1069 | memset(&gi, 0, sizeof(gi)); |
| 1070 | |
| 1071 | map<dstate_id_t, gough_accel_state_info> accel_allowed; |
| 1072 | find_allowed_accel_states(*cfg, blocks, &accel_allowed); |
| 1073 | gough_build_strat gbs(raw, *cfg, rm, accel_allowed); |
| 1074 | auto basic_dfa = mcclellanCompile_i(raw, gbs, cc); |
| 1075 | assert(basic_dfa); |
| 1076 | if (!basic_dfa) { |
| 1077 | return nullptr; |
| 1078 | } |
| 1079 | |
| 1080 | u8 alphaShift |
| 1081 | = ((const mcclellan *)getImplNfa(basic_dfa.get()))->alphaShift; |
| 1082 | u32 edge_count = (1U << alphaShift) * raw.states.size(); |
| 1083 | |
| 1084 | u32 curr_offset = ROUNDUP_N(basic_dfa->length, 4); |
| 1085 | |
| 1086 | u32 haig_offset = curr_offset; |
| 1087 | curr_offset += sizeof(gi); |
| 1088 | /* reserve space for edge->program mapping */ |
| 1089 | u32 edge_prog_offset = curr_offset; |
| 1090 | curr_offset += sizeof(u32) * edge_count; |
| 1091 | vector<u32> edge_blocks(edge_count); |
| 1092 | |
| 1093 | u32 top_prog_offset = 0; |
| 1094 | if (is_triggered(raw.kind)) { |
| 1095 | /* reserve space for edge->program mapping */ |
| 1096 | top_prog_offset = curr_offset; |
| 1097 | curr_offset += sizeof(u32) * raw.states.size(); |
| 1098 | } |
| 1099 | gi.top_prog_offset = top_prog_offset; |
| 1100 | vector<u32> top_blocks(raw.states.size()); |
| 1101 | |
| 1102 | /* reserve space for blocks */ |
| 1103 | u32 prog_base_offset = curr_offset; |
| 1104 | gi.prog_base_offset = prog_base_offset; |
| 1105 | |
| 1106 | vector<gough_ins> temp_blocks; |
| 1107 | map<vector<gough_ins>, u32> prog_offsets; |
| 1108 | copy_in_blocks(raw, alphaShift, *cfg, blocks, &edge_blocks[0], |
| 1109 | &top_blocks[0], prog_base_offset, &prog_offsets, |
| 1110 | &temp_blocks); |
| 1111 | update_accel_prog_offset(gbs, blocks, prog_offsets); |
| 1112 | |
| 1113 | u32 total_prog_size = byte_length(temp_blocks); |
| 1114 | curr_offset += total_prog_size; |
| 1115 | |
| 1116 | gi.stream_som_loc_count = slot_count; |
| 1117 | gi.stream_som_loc_width = somPrecision; |
| 1118 | |
| 1119 | u32 gough_size = ROUNDUP_N(curr_offset, 16); |
| 1120 | auto gough_dfa = make_zeroed_bytecode_ptr<NFA>(gough_size); |
| 1121 | |
| 1122 | memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length); |
| 1123 | memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi)); |
| 1124 | if (gough_dfa->type == MCCLELLAN_NFA_16) { |
| 1125 | gough_dfa->type = GOUGH_NFA_16; |
| 1126 | } else { |
| 1127 | assert(gough_dfa->type == MCCLELLAN_NFA_8); |
| 1128 | gough_dfa->type = GOUGH_NFA_8; |
| 1129 | } |
| 1130 | |
| 1131 | /* update stream state requirements */ |
| 1132 | u32 base_state_size = gough_dfa->type == GOUGH_NFA_8 ? 1 : 2; |
| 1133 | gough_dfa->streamStateSize = base_state_size + slot_count * somPrecision; |
| 1134 | gough_dfa->scratchStateSize = (u32)(16 + scratch_slot_count * sizeof(u64a)); |
| 1135 | |
| 1136 | mcclellan *m = (mcclellan *)getMutableImplNfa(gough_dfa.get()); |
| 1137 | m->haig_offset = haig_offset; |
| 1138 | |
| 1139 | /* update nfa length, haig_info offset (leave mcclellan length alone) */ |
| 1140 | gough_dfa->length = gough_size; |
| 1141 | |
| 1142 | /* copy in blocks */ |
| 1143 | copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks); |
| 1144 | if (top_prog_offset) { |
| 1145 | copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks); |
| 1146 | } |
| 1147 | copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks); |
| 1148 | |
| 1149 | return gough_dfa; |
| 1150 | } |
| 1151 | |
| 1152 | AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { |
| 1153 | AccelScheme rv; |
| 1154 | if (!contains(accel_gough_info, this_idx)) { |
| 1155 | rv.cr = CharReach::dot(); |
| 1156 | rv.double_byte.clear(); |
| 1157 | return rv; |
| 1158 | } |
| 1159 | |
| 1160 | rv = mcclellan_build_strat::find_escape_strings(this_idx); |
| 1161 | |
| 1162 | assert(!rv.offset || rv.cr.all()); /* should have been limited by strat */ |
| 1163 | if (rv.offset) { |
| 1164 | rv.cr = CharReach::dot(); |
| 1165 | rv.double_byte.clear(); |
| 1166 | return rv; |
| 1167 | } |
| 1168 | |
| 1169 | if (rv.double_offset |
| 1170 | || !accel_gough_info.at(this_idx).two_byte) { |
| 1171 | rv.double_byte.clear(); |
| 1172 | } |
| 1173 | |
| 1174 | return rv; |
| 1175 | } |
| 1176 | |
| 1177 | void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info, |
| 1178 | void *accel_out) { |
| 1179 | assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux)); |
| 1180 | gough_accel *accel = (gough_accel *)accel_out; |
| 1181 | /* build a plain accelaux so we can work out where we can get to */ |
| 1182 | mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel); |
| 1183 | DEBUG_PRINTF("state %hu is accel with type %hhu\n" , this_idx, |
| 1184 | accel->accel.accel_type); |
| 1185 | if (accel->accel.accel_type == ACCEL_NONE) { |
| 1186 | return; |
| 1187 | } |
| 1188 | |
| 1189 | assert(!accel->accel.generic.offset); |
| 1190 | assert(contains(accel_gough_info, this_idx)); |
| 1191 | accel->margin_dist = verify_u8(accel_gough_info.at(this_idx).margin); |
| 1192 | built_accel[accel] = this_idx; |
| 1193 | DEBUG_PRINTF("state %hu is accel with margin %hhu\n" , this_idx, |
| 1194 | accel->margin_dist); |
| 1195 | } |
| 1196 | |
| 1197 | namespace { |
| 1198 | struct raw_gough_report_list { |
| 1199 | set<som_report> reports; |
| 1200 | |
| 1201 | raw_gough_report_list( |
| 1202 | const vector<pair<ReportID, GoughSSAVar *>> &raw_reports, |
| 1203 | const ReportManager &rm, bool do_remap) { |
| 1204 | for (const auto &m : raw_reports) { |
| 1205 | ReportID r = do_remap ? rm.getProgramOffset(m.first) : m.first; |
| 1206 | u32 impl_slot = INVALID_SLOT; |
| 1207 | if (m.second) { |
| 1208 | impl_slot = m.second->slot; |
| 1209 | assert(impl_slot != INVALID_SLOT); |
| 1210 | } |
| 1211 | reports.emplace(r, impl_slot); |
| 1212 | } |
| 1213 | } |
| 1214 | |
| 1215 | bool operator<(const raw_gough_report_list &b) const { |
| 1216 | return reports < b.reports; |
| 1217 | } |
| 1218 | }; |
| 1219 | |
| 1220 | struct raw_gough_report_info_impl : public raw_report_info { |
| 1221 | vector<raw_gough_report_list> rl; |
| 1222 | u32 getReportListSize() const override; |
| 1223 | size_t size() const override; |
| 1224 | void fillReportLists(NFA *n, size_t base_offset, |
| 1225 | vector<u32> &ro /* out */) const override; |
| 1226 | }; |
| 1227 | } |
| 1228 | |
| 1229 | unique_ptr<raw_report_info> gough_build_strat::gatherReports( |
| 1230 | vector<u32> &reports, |
| 1231 | vector<u32> &reports_eod, |
| 1232 | u8 *isSingleReport, |
| 1233 | ReportID *arbReport) const { |
| 1234 | DEBUG_PRINTF("gathering reports\n" ); |
| 1235 | |
| 1236 | const bool remap_reports = has_managed_reports(rdfa.kind); |
| 1237 | |
| 1238 | auto ri = ue2::make_unique<raw_gough_report_info_impl>(); |
| 1239 | map<raw_gough_report_list, u32> rev; |
| 1240 | |
| 1241 | assert(!rdfa.states.empty()); |
| 1242 | |
| 1243 | vector<GoughVertex> verts(rdfa.states.size()); |
| 1244 | for (auto v : vertices_range(gg)) { |
| 1245 | verts[gg[v].state_id] = v; |
| 1246 | } |
| 1247 | |
| 1248 | for (u32 state_id = 0; state_id < verts.size(); state_id++) { |
| 1249 | assert(state_id < rdfa.states.size()); |
| 1250 | GoughVertex v = verts[state_id]; |
| 1251 | assert(v != GoughGraph::null_vertex() || !state_id); |
| 1252 | |
| 1253 | DEBUG_PRINTF("i = %zu [%zu]\n" , reports.size(), gg[v].reports.size()); |
| 1254 | if (v == GoughGraph::null_vertex() || gg[v].reports.empty()) { |
| 1255 | reports.push_back(MO_INVALID_IDX); |
| 1256 | continue; |
| 1257 | } |
| 1258 | |
| 1259 | raw_gough_report_list rrl(gg[v].reports, rm, remap_reports); |
| 1260 | DEBUG_PRINTF("non empty r %zu\n" , reports.size()); |
| 1261 | if (rev.find(rrl) != rev.end()) { |
| 1262 | reports.push_back(rev[rrl]); |
| 1263 | } else { |
| 1264 | DEBUG_PRINTF("adding to rl\n" ); |
| 1265 | rev[rrl] = ri->size(); |
| 1266 | reports.push_back(ri->size()); |
| 1267 | ri->rl.push_back(rrl); |
| 1268 | } |
| 1269 | } |
| 1270 | |
| 1271 | for (auto v : verts) { |
| 1272 | if (v == GoughGraph::null_vertex() || gg[v].reports_eod.empty()) { |
| 1273 | reports_eod.push_back(MO_INVALID_IDX); |
| 1274 | continue; |
| 1275 | } |
| 1276 | |
| 1277 | DEBUG_PRINTF("non empty r eod\n" ); |
| 1278 | raw_gough_report_list rrl(gg[v].reports_eod, rm, remap_reports); |
| 1279 | if (rev.find(rrl) != rev.end()) { |
| 1280 | reports_eod.push_back(rev[rrl]); |
| 1281 | continue; |
| 1282 | } |
| 1283 | |
| 1284 | DEBUG_PRINTF("adding to rl eod %zu\n" , gg[v].reports_eod.size()); |
| 1285 | rev[rrl] = ri->size(); |
| 1286 | reports_eod.push_back(ri->size()); |
| 1287 | ri->rl.push_back(rrl); |
| 1288 | } |
| 1289 | |
| 1290 | /* TODO: support single report in gough */ |
| 1291 | *isSingleReport = 0; |
| 1292 | *arbReport = MO_INVALID_IDX; |
| 1293 | assert(!ri->rl.empty()); /* all components should be able to generate |
| 1294 | reports */ |
| 1295 | return ri; |
| 1296 | } |
| 1297 | |
| 1298 | u32 raw_gough_report_info_impl::getReportListSize() const { |
| 1299 | u32 sz = 0; |
| 1300 | |
| 1301 | for (const raw_gough_report_list &r : rl) { |
| 1302 | sz += sizeof(gough_report_list); |
| 1303 | sz += sizeof(gough_report) * r.reports.size(); |
| 1304 | } |
| 1305 | |
| 1306 | return sz; |
| 1307 | } |
| 1308 | |
| 1309 | size_t raw_gough_report_info_impl::size() const { |
| 1310 | return rl.size(); |
| 1311 | } |
| 1312 | |
| 1313 | void raw_gough_report_info_impl::fillReportLists(NFA *n, size_t base_offset, |
| 1314 | vector<u32> &ro) const { |
| 1315 | for (const raw_gough_report_list &r : rl) { |
| 1316 | ro.push_back(base_offset); |
| 1317 | |
| 1318 | gough_report_list *p = (gough_report_list *)((char *)n + base_offset); |
| 1319 | u32 i = 0; |
| 1320 | |
| 1321 | for (const som_report &sr : r.reports) { |
| 1322 | p->report[i].r = sr.report; |
| 1323 | p->report[i].som = sr.slot; |
| 1324 | i++; |
| 1325 | } |
| 1326 | |
| 1327 | p->count = verify_u32(r.reports.size()); |
| 1328 | |
| 1329 | base_offset += sizeof(gough_report_list); |
| 1330 | base_offset += sizeof(gough_report) * r.reports.size(); |
| 1331 | } |
| 1332 | } |
| 1333 | |
| 1334 | } // namespace ue2 |
| 1335 | |