| 1 | #include <Common/Exception.h> |
| 2 | #include <Common/SensitiveDataMasker.h> |
| 3 | #include <Poco/AutoPtr.h> |
| 4 | #include <Poco/Util/XMLConfiguration.h> |
| 5 | #include <Poco/XML/XMLException.h> |
| 6 | |
| 7 | #pragma GCC diagnostic ignored "-Wsign-compare" |
| 8 | #ifdef __clang__ |
| 9 | # pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" |
| 10 | # pragma clang diagnostic ignored "-Wundef" |
| 11 | #endif |
| 12 | |
| 13 | #include <gtest/gtest.h> |
| 14 | #include <chrono> |
| 15 | |
| 16 | |
| 17 | namespace DB |
| 18 | { |
| 19 | namespace ErrorCodes |
| 20 | { |
| 21 | extern const int CANNOT_COMPILE_REGEXP; |
| 22 | extern const int NO_ELEMENTS_IN_CONFIG; |
| 23 | extern const int INVALID_CONFIG_PARAMETER; |
| 24 | } |
| 25 | }; |
| 26 | |
| 27 | |
| 28 | TEST(Common, SensitiveDataMasker) |
| 29 | { |
| 30 | |
| 31 | Poco::AutoPtr<Poco::Util::XMLConfiguration> empty_xml_config = new Poco::Util::XMLConfiguration(); |
| 32 | DB::SensitiveDataMasker masker(*empty_xml_config , "" ); |
| 33 | masker.addMaskingRule("all a letters" , "a+" , "--a--" ); |
| 34 | masker.addMaskingRule("all b letters" , "b+" , "--b--" ); |
| 35 | masker.addMaskingRule("all d letters" , "d+" , "--d--" ); |
| 36 | masker.addMaskingRule("all x letters" , "x+" , "--x--" ); |
| 37 | masker.addMaskingRule("rule \"d\" result" , "--d--" , "*****" ); // RE2 regexps are applied one-by-one in order |
| 38 | std::string x = "aaaaaaaaaaaaa bbbbbbbbbb cccc aaaaaaaaaaaa d " ; |
| 39 | EXPECT_EQ(masker.wipeSensitiveData(x), 5); |
| 40 | EXPECT_EQ(x, "--a-- --b-- cccc --a-- ***** " ); |
| 41 | #ifndef NDEBUG |
| 42 | masker.printStats(); |
| 43 | #endif |
| 44 | EXPECT_EQ(masker.wipeSensitiveData(x), 3); |
| 45 | EXPECT_EQ(x, "----a---- ----b---- cccc ----a---- ***** " ); |
| 46 | #ifndef NDEBUG |
| 47 | masker.printStats(); |
| 48 | #endif |
| 49 | |
| 50 | DB::SensitiveDataMasker masker2(*empty_xml_config , "" ); |
| 51 | masker2.addMaskingRule("hide root password" , "qwerty123" , "******" ); |
| 52 | masker2.addMaskingRule("hide SSN" , "[0-9]{3}-[0-9]{2}-[0-9]{4}" , "000-00-0000" ); |
| 53 | masker2.addMaskingRule("hide email" , "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}" , "hidden@hidden.test" ); |
| 54 | |
| 55 | std::string query = "SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', 'qwerty123') WHERE ssn='123-45-6789' or " |
| 56 | "email='JonhSmith@secret.domain.test'" ; |
| 57 | EXPECT_EQ(masker2.wipeSensitiveData(query), 3); |
| 58 | EXPECT_EQ( |
| 59 | query, |
| 60 | "SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', '******') WHERE " |
| 61 | "ssn='000-00-0000' or email='hidden@hidden.test'" ); |
| 62 | |
| 63 | #ifndef NDEBUG |
| 64 | // simple benchmark |
| 65 | auto start = std::chrono::high_resolution_clock::now(); |
| 66 | constexpr unsigned long int iterations = 200000; |
| 67 | for (int i = 0; i < iterations; ++i) |
| 68 | { |
| 69 | std::string query2 = "SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', 'qwerty123') WHERE ssn='123-45-6789' or " |
| 70 | "email='JonhSmith@secret.domain.test'" ; |
| 71 | masker2.wipeSensitiveData(query2); |
| 72 | } |
| 73 | auto finish = std::chrono::high_resolution_clock::now(); |
| 74 | std::chrono::duration<double> elapsed = finish - start; |
| 75 | std::cout << "Elapsed time: " << elapsed.count() << "s per " << iterations <<" calls (" << elapsed.count() * 1000000 / iterations << "µs per call)" |
| 76 | << std::endl; |
| 77 | // I have: "Elapsed time: 3.44022s per 200000 calls (17.2011µs per call)" |
| 78 | masker2.printStats(); |
| 79 | #endif |
| 80 | |
| 81 | DB::SensitiveDataMasker maskerbad(*empty_xml_config , "" ); |
| 82 | |
| 83 | // gtest has not good way to check exception content, so just do it manually (see https://github.com/google/googletest/issues/952 ) |
| 84 | try |
| 85 | { |
| 86 | maskerbad.addMaskingRule("bad regexp" , "**" , "" ); |
| 87 | ADD_FAILURE() << "addMaskingRule() should throw an error" << std::endl; |
| 88 | } |
| 89 | catch (const DB::Exception & e) |
| 90 | { |
| 91 | EXPECT_EQ( |
| 92 | std::string(e.what()), |
| 93 | "SensitiveDataMasker: cannot compile re2: **, error: no argument for repetition operator: *. Look at " |
| 94 | "https://github.com/google/re2/wiki/Syntax for reference." ); |
| 95 | EXPECT_EQ(e.code(), DB::ErrorCodes::CANNOT_COMPILE_REGEXP); |
| 96 | } |
| 97 | /* catch (...) { // not needed, gtest will react unhandled exception |
| 98 | FAIL() << "ERROR: Unexpected exception thrown: " << std::current_exception << std::endl; // std::current_exception is part of C++11x |
| 99 | } */ |
| 100 | |
| 101 | EXPECT_EQ(maskerbad.rulesCount(), 0); |
| 102 | EXPECT_EQ(maskerbad.wipeSensitiveData(x), 0); |
| 103 | |
| 104 | { |
| 105 | std::istringstream xml_isteam(R"END(<?xml version="1.0"?> |
| 106 | <clickhouse> |
| 107 | <query_masking_rules> |
| 108 | <rule> |
| 109 | <name>hide SSN</name><!-- by default: it will use xml path, like query_masking_rules.rule[1] --> |
| 110 | <regexp>[0-9]{3}-[0-9]{2}-[0-9]{4}</regexp><!-- mandatory --> |
| 111 | <replace>000-00-0000</replace><!-- by default - six asterisks (******) --> |
| 112 | </rule> |
| 113 | <rule> |
| 114 | <name>hide root password</name> |
| 115 | <regexp>qwerty123</regexp> |
| 116 | </rule> |
| 117 | <rule> |
| 118 | <regexp>(?i)Ivan</regexp> |
| 119 | <replace>John</replace> |
| 120 | </rule> |
| 121 | <rule> |
| 122 | <regexp>(?i)Petrov</regexp> |
| 123 | <replace>Doe</replace> |
| 124 | </rule> |
| 125 | <rule> |
| 126 | <name>hide email</name> |
| 127 | <regexp>(?i)[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}</regexp> |
| 128 | <replace>hidden@hidden.test</replace> |
| 129 | </rule> |
| 130 | <rule> |
| 131 | <name>remove selects to bad_words table</name> |
| 132 | <regexp>^.*bad_words.*$</regexp> |
| 133 | <replace>[QUERY IS CENSORED]</replace> |
| 134 | </rule> |
| 135 | </query_masking_rules> |
| 136 | </clickhouse>)END" ); |
| 137 | |
| 138 | Poco::AutoPtr<Poco::Util::XMLConfiguration> xml_config = new Poco::Util::XMLConfiguration(xml_isteam); |
| 139 | DB::SensitiveDataMasker masker_xml_based(*xml_config, "query_masking_rules" ); |
| 140 | std::string top_secret = "The e-mail of IVAN PETROV is kotik1902@sdsdf.test, and the password is qwerty123" ; |
| 141 | EXPECT_EQ(masker_xml_based.wipeSensitiveData(top_secret), 4); |
| 142 | EXPECT_EQ(top_secret, "The e-mail of John Doe is hidden@hidden.test, and the password is ******" ); |
| 143 | |
| 144 | top_secret = "SELECT * FROM bad_words" ; |
| 145 | EXPECT_EQ(masker_xml_based.wipeSensitiveData(top_secret), 1); |
| 146 | EXPECT_EQ(top_secret, "[QUERY IS CENSORED]" ); |
| 147 | |
| 148 | #ifndef NDEBUG |
| 149 | masker_xml_based.printStats(); |
| 150 | #endif |
| 151 | } |
| 152 | |
| 153 | try |
| 154 | { |
| 155 | std::istringstream xml_isteam_bad(R"END(<?xml version="1.0"?> |
| 156 | <clickhouse> |
| 157 | <query_masking_rules> |
| 158 | <rule> |
| 159 | <name>test</name> |
| 160 | <regexp>abc</regexp> |
| 161 | </rule> |
| 162 | <rule> |
| 163 | <name>test</name> |
| 164 | <regexp>abc</regexp> |
| 165 | </rule> |
| 166 | </query_masking_rules> |
| 167 | </clickhouse>)END" ); |
| 168 | Poco::AutoPtr<Poco::Util::XMLConfiguration> xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad); |
| 169 | DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules" ); |
| 170 | |
| 171 | ADD_FAILURE() << "XML should throw an error on bad XML" << std::endl; |
| 172 | } |
| 173 | catch (const DB::Exception & e) |
| 174 | { |
| 175 | EXPECT_EQ( |
| 176 | std::string(e.what()), |
| 177 | "query_masking_rules configuration contains more than one rule named 'test'." ); |
| 178 | EXPECT_EQ(e.code(), DB::ErrorCodes::INVALID_CONFIG_PARAMETER); |
| 179 | } |
| 180 | |
| 181 | try |
| 182 | { |
| 183 | std::istringstream xml_isteam_bad(R"END(<?xml version="1.0"?> |
| 184 | <clickhouse> |
| 185 | <query_masking_rules> |
| 186 | <rule><name>test</name></rule> |
| 187 | </query_masking_rules> |
| 188 | </clickhouse>)END" ); |
| 189 | |
| 190 | Poco::AutoPtr<Poco::Util::XMLConfiguration> xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad); |
| 191 | DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules" ); |
| 192 | |
| 193 | ADD_FAILURE() << "XML should throw an error on bad XML" << std::endl; |
| 194 | } |
| 195 | catch (const DB::Exception & e) |
| 196 | { |
| 197 | EXPECT_EQ( |
| 198 | std::string(e.what()), |
| 199 | "query_masking_rules configuration, rule 'test' has no <regexp> node or <regexp> is empty." ); |
| 200 | EXPECT_EQ(e.code(), DB::ErrorCodes::NO_ELEMENTS_IN_CONFIG); |
| 201 | } |
| 202 | |
| 203 | try |
| 204 | { |
| 205 | std::istringstream xml_isteam_bad(R"END(<?xml version="1.0"?> |
| 206 | <clickhouse> |
| 207 | <query_masking_rules> |
| 208 | <rule><name>test</name><regexp>())(</regexp></rule> |
| 209 | </query_masking_rules> |
| 210 | </clickhouse>)END" ); |
| 211 | |
| 212 | Poco::AutoPtr<Poco::Util::XMLConfiguration> xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad); |
| 213 | DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules" ); |
| 214 | |
| 215 | ADD_FAILURE() << "XML should throw an error on bad XML" << std::endl; |
| 216 | } |
| 217 | catch (const DB::Exception & e) |
| 218 | { |
| 219 | EXPECT_EQ( |
| 220 | std::string(e.message()), |
| 221 | "SensitiveDataMasker: cannot compile re2: ())(, error: missing ): ())(. Look at https://github.com/google/re2/wiki/Syntax for reference.: while adding query masking rule 'test'." |
| 222 | ); |
| 223 | EXPECT_EQ(e.code(), DB::ErrorCodes::CANNOT_COMPILE_REGEXP); |
| 224 | } |
| 225 | |
| 226 | } |
| 227 | |