1 | #pragma once |
2 | |
3 | #include <memory> |
4 | #include <vector> |
5 | #include <cstdint> |
6 | |
7 | namespace Poco |
8 | { |
9 | namespace Util |
10 | { |
11 | class AbstractConfiguration; |
12 | } |
13 | } |
14 | |
15 | /// SensitiveDataMasker allows to remove sensitive data from queries using set of regexp-based rules |
16 | |
17 | /// It's used as a singelton via getInstance method |
18 | |
19 | /// Initially it's empty (nullptr) and after manual initialization |
20 | /// (one-time, done by setInstance call) it takes the proper value which |
21 | /// is stored in unique_ptr. |
22 | |
23 | /// It looks like the singelton is the best option here, as |
24 | /// two users of that object (OwnSplitChannel & Interpreters/executeQuery) |
25 | /// can't own/share that Masker properly without syncronization & locks, |
26 | /// and we can't afford setting global locks for each logged line. |
27 | |
28 | /// I've considered singleton alternatives, but it's unclear who should own the object, |
29 | /// and it introduce unnecessary complexity in implementation (passing references back and forward): |
30 | /// |
31 | /// context can't own, as Context is destroyed before logger, |
32 | /// and logger lives longer and logging can still happen after Context destruction. |
33 | /// resetting masker in the logger at the moment of |
34 | /// context destruction can't be done w/o synchronization / locks in a safe manner. |
35 | /// |
36 | /// logger is Poco derived and i didn't want to brake it's interface, |
37 | /// also logger can be dynamically reconfigured without server restart, |
38 | /// and it actually recreates OwnSplitChannel when reconfiguration happen, |
39 | /// so that makes it's quite tricky. So it a bad candidate for owning masker too. |
40 | |
41 | namespace DB |
42 | { |
43 | class SensitiveDataMasker |
44 | { |
45 | private: |
46 | class MaskingRule; |
47 | std::vector<std::unique_ptr<MaskingRule>> all_masking_rules; |
48 | static std::unique_ptr<SensitiveDataMasker> sensitive_data_masker; |
49 | |
50 | public: |
51 | SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); |
52 | ~SensitiveDataMasker(); |
53 | |
54 | /// Returns the number of matched rules. |
55 | size_t wipeSensitiveData(std::string & data) const; |
56 | |
57 | /// setInstance is not thread-safe and should be called once in single-thread mode. |
58 | /// https://github.com/ClickHouse/ClickHouse/pull/6810#discussion_r321183367 |
59 | static void setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_); |
60 | static SensitiveDataMasker * getInstance(); |
61 | |
62 | /// Used in tests. |
63 | void addMaskingRule(const std::string & name, const std::string & regexp_string, const std::string & replacement_string); |
64 | |
65 | #ifndef NDEBUG |
66 | void printStats(); |
67 | #endif |
68 | |
69 | size_t rulesCount() const; |
70 | }; |
71 | |
72 | }; |
73 | |