| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** \file |
| 30 | * \brief Literal analysis and scoring. |
| 31 | */ |
| 32 | |
| 33 | #ifndef NG_LITERAL_ANALYSIS_H |
| 34 | #define NG_LITERAL_ANALYSIS_H |
| 35 | |
| 36 | #include <set> |
| 37 | #include <vector> |
| 38 | |
| 39 | #include "ng_holder.h" |
| 40 | #include "util/ue2string.h" |
| 41 | |
| 42 | namespace ue2 { |
| 43 | |
| 44 | #define NO_LITERAL_AT_EDGE_SCORE 10000000ULL |
| 45 | #define INVALID_EDGE_CAP 100000000ULL /* special-to-special score */ |
| 46 | |
| 47 | class NGHolder; |
| 48 | |
| 49 | /** |
| 50 | * Fetch the literal set for a given vertex, returning it in \p s. Note: does |
| 51 | * NOT take into account any constraints due to streaming mode requirements. |
| 52 | * |
| 53 | * if only_first_encounter is requested, the output set may drop literals |
| 54 | * generated by revisiting the destination vertex. |
| 55 | */ |
| 56 | std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v, |
| 57 | bool only_first_encounter = true); |
| 58 | std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e); |
| 59 | |
| 60 | /** |
| 61 | * Returns true if we are unable to use a mixed sensitivity literal in rose (as |
| 62 | * our literal matchers are generally either case sensitive or not). |
| 63 | * |
| 64 | * Shortish mixed sensitivity literals can be handled by confirm checks in rose |
| 65 | * and are not flagged as bad. |
| 66 | */ |
| 67 | bool bad_mixed_sensitivity(const ue2_literal &s); |
| 68 | |
| 69 | /** |
| 70 | * Score all the edges in the given graph, returning them in \p scores indexed |
| 71 | * by edge_index. */ |
| 72 | std::vector<u64a> scoreEdges(const NGHolder &h, |
| 73 | const flat_set<NFAEdge> &known_bad = {}); |
| 74 | |
| 75 | /** Returns a score for a literal set. Lower scores are better. */ |
| 76 | u64a scoreSet(const std::set<ue2_literal> &s); |
| 77 | |
| 78 | /** Compress a literal set to fewer literals. */ |
| 79 | u64a compressAndScore(std::set<ue2_literal> &s); |
| 80 | |
| 81 | /** |
| 82 | * Compress a literal set to fewer literals and replace any long mixed |
| 83 | * sensitivity literals with supported literals. |
| 84 | */ |
| 85 | u64a sanitizeAndCompressAndScore(std::set<ue2_literal> &s); |
| 86 | |
| 87 | bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, |
| 88 | NGHolder *rhs); |
| 89 | |
| 90 | bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out); |
| 91 | |
| 92 | /** \brief Returns true if the given literal is the only thing in the graph, |
| 93 | * from (start or startDs) to accept. */ |
| 94 | bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit); |
| 95 | |
| 96 | } // namespace ue2 |
| 97 | |
| 98 | #endif |
| 99 | |