1#include <Common/Exception.h>
2#include <Common/SensitiveDataMasker.h>
3#include <Poco/AutoPtr.h>
4#include <Poco/Util/XMLConfiguration.h>
5#include <Poco/XML/XMLException.h>
6
7#pragma GCC diagnostic ignored "-Wsign-compare"
8#ifdef __clang__
9# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
10# pragma clang diagnostic ignored "-Wundef"
11#endif
12
13#include <gtest/gtest.h>
14#include <chrono>
15
16
17namespace DB
18{
19namespace ErrorCodes
20{
21extern const int CANNOT_COMPILE_REGEXP;
22extern const int NO_ELEMENTS_IN_CONFIG;
23extern const int INVALID_CONFIG_PARAMETER;
24}
25};
26
27
28TEST(Common, SensitiveDataMasker)
29{
30
31 Poco::AutoPtr<Poco::Util::XMLConfiguration> empty_xml_config = new Poco::Util::XMLConfiguration();
32 DB::SensitiveDataMasker masker(*empty_xml_config , "");
33 masker.addMaskingRule("all a letters", "a+", "--a--");
34 masker.addMaskingRule("all b letters", "b+", "--b--");
35 masker.addMaskingRule("all d letters", "d+", "--d--");
36 masker.addMaskingRule("all x letters", "x+", "--x--");
37 masker.addMaskingRule("rule \"d\" result", "--d--", "*****"); // RE2 regexps are applied one-by-one in order
38 std::string x = "aaaaaaaaaaaaa bbbbbbbbbb cccc aaaaaaaaaaaa d ";
39 EXPECT_EQ(masker.wipeSensitiveData(x), 5);
40 EXPECT_EQ(x, "--a-- --b-- cccc --a-- ***** ");
41#ifndef NDEBUG
42 masker.printStats();
43#endif
44 EXPECT_EQ(masker.wipeSensitiveData(x), 3);
45 EXPECT_EQ(x, "----a---- ----b---- cccc ----a---- ***** ");
46#ifndef NDEBUG
47 masker.printStats();
48#endif
49
50 DB::SensitiveDataMasker masker2(*empty_xml_config , "");
51 masker2.addMaskingRule("hide root password", "qwerty123", "******");
52 masker2.addMaskingRule("hide SSN", "[0-9]{3}-[0-9]{2}-[0-9]{4}", "000-00-0000");
53 masker2.addMaskingRule("hide email", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}", "hidden@hidden.test");
54
55 std::string query = "SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', 'qwerty123') WHERE ssn='123-45-6789' or "
56 "email='JonhSmith@secret.domain.test'";
57 EXPECT_EQ(masker2.wipeSensitiveData(query), 3);
58 EXPECT_EQ(
59 query,
60 "SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', '******') WHERE "
61 "ssn='000-00-0000' or email='hidden@hidden.test'");
62
63#ifndef NDEBUG
64 // simple benchmark
65 auto start = std::chrono::high_resolution_clock::now();
66 constexpr unsigned long int iterations = 200000;
67 for (int i = 0; i < iterations; ++i)
68 {
69 std::string query2 = "SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', 'qwerty123') WHERE ssn='123-45-6789' or "
70 "email='JonhSmith@secret.domain.test'";
71 masker2.wipeSensitiveData(query2);
72 }
73 auto finish = std::chrono::high_resolution_clock::now();
74 std::chrono::duration<double> elapsed = finish - start;
75 std::cout << "Elapsed time: " << elapsed.count() << "s per " << iterations <<" calls (" << elapsed.count() * 1000000 / iterations << "µs per call)"
76 << std::endl;
77 // I have: "Elapsed time: 3.44022s per 200000 calls (17.2011µs per call)"
78 masker2.printStats();
79#endif
80
81 DB::SensitiveDataMasker maskerbad(*empty_xml_config , "");
82
83 // gtest has not good way to check exception content, so just do it manually (see https://github.com/google/googletest/issues/952 )
84 try
85 {
86 maskerbad.addMaskingRule("bad regexp", "**", "");
87 ADD_FAILURE() << "addMaskingRule() should throw an error" << std::endl;
88 }
89 catch (const DB::Exception & e)
90 {
91 EXPECT_EQ(
92 std::string(e.what()),
93 "SensitiveDataMasker: cannot compile re2: **, error: no argument for repetition operator: *. Look at "
94 "https://github.com/google/re2/wiki/Syntax for reference.");
95 EXPECT_EQ(e.code(), DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
96 }
97 /* catch (...) { // not needed, gtest will react unhandled exception
98 FAIL() << "ERROR: Unexpected exception thrown: " << std::current_exception << std::endl; // std::current_exception is part of C++11x
99 } */
100
101 EXPECT_EQ(maskerbad.rulesCount(), 0);
102 EXPECT_EQ(maskerbad.wipeSensitiveData(x), 0);
103
104 {
105 std::istringstream xml_isteam(R"END(<?xml version="1.0"?>
106<clickhouse>
107 <query_masking_rules>
108 <rule>
109 <name>hide SSN</name><!-- by default: it will use xml path, like query_masking_rules.rule[1] -->
110 <regexp>[0-9]{3}-[0-9]{2}-[0-9]{4}</regexp><!-- mandatory -->
111 <replace>000-00-0000</replace><!-- by default - six asterisks (******) -->
112 </rule>
113 <rule>
114 <name>hide root password</name>
115 <regexp>qwerty123</regexp>
116 </rule>
117 <rule>
118 <regexp>(?i)Ivan</regexp>
119 <replace>John</replace>
120 </rule>
121 <rule>
122 <regexp>(?i)Petrov</regexp>
123 <replace>Doe</replace>
124 </rule>
125 <rule>
126 <name>hide email</name>
127 <regexp>(?i)[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}</regexp>
128 <replace>hidden@hidden.test</replace>
129 </rule>
130 <rule>
131 <name>remove selects to bad_words table</name>
132 <regexp>^.*bad_words.*$</regexp>
133 <replace>[QUERY IS CENSORED]</replace>
134 </rule>
135 </query_masking_rules>
136</clickhouse>)END");
137
138 Poco::AutoPtr<Poco::Util::XMLConfiguration> xml_config = new Poco::Util::XMLConfiguration(xml_isteam);
139 DB::SensitiveDataMasker masker_xml_based(*xml_config, "query_masking_rules");
140 std::string top_secret = "The e-mail of IVAN PETROV is kotik1902@sdsdf.test, and the password is qwerty123";
141 EXPECT_EQ(masker_xml_based.wipeSensitiveData(top_secret), 4);
142 EXPECT_EQ(top_secret, "The e-mail of John Doe is hidden@hidden.test, and the password is ******");
143
144 top_secret = "SELECT * FROM bad_words";
145 EXPECT_EQ(masker_xml_based.wipeSensitiveData(top_secret), 1);
146 EXPECT_EQ(top_secret, "[QUERY IS CENSORED]");
147
148#ifndef NDEBUG
149 masker_xml_based.printStats();
150#endif
151 }
152
153 try
154 {
155 std::istringstream xml_isteam_bad(R"END(<?xml version="1.0"?>
156<clickhouse>
157 <query_masking_rules>
158 <rule>
159 <name>test</name>
160 <regexp>abc</regexp>
161 </rule>
162 <rule>
163 <name>test</name>
164 <regexp>abc</regexp>
165 </rule>
166 </query_masking_rules>
167</clickhouse>)END");
168 Poco::AutoPtr<Poco::Util::XMLConfiguration> xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad);
169 DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules");
170
171 ADD_FAILURE() << "XML should throw an error on bad XML" << std::endl;
172 }
173 catch (const DB::Exception & e)
174 {
175 EXPECT_EQ(
176 std::string(e.what()),
177 "query_masking_rules configuration contains more than one rule named 'test'.");
178 EXPECT_EQ(e.code(), DB::ErrorCodes::INVALID_CONFIG_PARAMETER);
179 }
180
181 try
182 {
183 std::istringstream xml_isteam_bad(R"END(<?xml version="1.0"?>
184<clickhouse>
185 <query_masking_rules>
186 <rule><name>test</name></rule>
187 </query_masking_rules>
188</clickhouse>)END");
189
190 Poco::AutoPtr<Poco::Util::XMLConfiguration> xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad);
191 DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules");
192
193 ADD_FAILURE() << "XML should throw an error on bad XML" << std::endl;
194 }
195 catch (const DB::Exception & e)
196 {
197 EXPECT_EQ(
198 std::string(e.what()),
199 "query_masking_rules configuration, rule 'test' has no <regexp> node or <regexp> is empty.");
200 EXPECT_EQ(e.code(), DB::ErrorCodes::NO_ELEMENTS_IN_CONFIG);
201 }
202
203 try
204 {
205 std::istringstream xml_isteam_bad(R"END(<?xml version="1.0"?>
206<clickhouse>
207 <query_masking_rules>
208 <rule><name>test</name><regexp>())(</regexp></rule>
209 </query_masking_rules>
210</clickhouse>)END");
211
212 Poco::AutoPtr<Poco::Util::XMLConfiguration> xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad);
213 DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules");
214
215 ADD_FAILURE() << "XML should throw an error on bad XML" << std::endl;
216 }
217 catch (const DB::Exception & e)
218 {
219 EXPECT_EQ(
220 std::string(e.message()),
221 "SensitiveDataMasker: cannot compile re2: ())(, error: missing ): ())(. Look at https://github.com/google/re2/wiki/Syntax for reference.: while adding query masking rule 'test'."
222 );
223 EXPECT_EQ(e.code(), DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
224 }
225
226}
227