| 1 | #pragma once |
| 2 | |
| 3 | #include <memory> |
| 4 | #include <vector> |
| 5 | #include <cstdint> |
| 6 | |
| 7 | namespace Poco |
| 8 | { |
| 9 | namespace Util |
| 10 | { |
| 11 | class AbstractConfiguration; |
| 12 | } |
| 13 | } |
| 14 | |
| 15 | /// SensitiveDataMasker allows to remove sensitive data from queries using set of regexp-based rules |
| 16 | |
| 17 | /// It's used as a singelton via getInstance method |
| 18 | |
| 19 | /// Initially it's empty (nullptr) and after manual initialization |
| 20 | /// (one-time, done by setInstance call) it takes the proper value which |
| 21 | /// is stored in unique_ptr. |
| 22 | |
| 23 | /// It looks like the singelton is the best option here, as |
| 24 | /// two users of that object (OwnSplitChannel & Interpreters/executeQuery) |
| 25 | /// can't own/share that Masker properly without syncronization & locks, |
| 26 | /// and we can't afford setting global locks for each logged line. |
| 27 | |
| 28 | /// I've considered singleton alternatives, but it's unclear who should own the object, |
| 29 | /// and it introduce unnecessary complexity in implementation (passing references back and forward): |
| 30 | /// |
| 31 | /// context can't own, as Context is destroyed before logger, |
| 32 | /// and logger lives longer and logging can still happen after Context destruction. |
| 33 | /// resetting masker in the logger at the moment of |
| 34 | /// context destruction can't be done w/o synchronization / locks in a safe manner. |
| 35 | /// |
| 36 | /// logger is Poco derived and i didn't want to brake it's interface, |
| 37 | /// also logger can be dynamically reconfigured without server restart, |
| 38 | /// and it actually recreates OwnSplitChannel when reconfiguration happen, |
| 39 | /// so that makes it's quite tricky. So it a bad candidate for owning masker too. |
| 40 | |
| 41 | namespace DB |
| 42 | { |
| 43 | class SensitiveDataMasker |
| 44 | { |
| 45 | private: |
| 46 | class MaskingRule; |
| 47 | std::vector<std::unique_ptr<MaskingRule>> all_masking_rules; |
| 48 | static std::unique_ptr<SensitiveDataMasker> sensitive_data_masker; |
| 49 | |
| 50 | public: |
| 51 | SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); |
| 52 | ~SensitiveDataMasker(); |
| 53 | |
| 54 | /// Returns the number of matched rules. |
| 55 | size_t wipeSensitiveData(std::string & data) const; |
| 56 | |
| 57 | /// setInstance is not thread-safe and should be called once in single-thread mode. |
| 58 | /// https://github.com/ClickHouse/ClickHouse/pull/6810#discussion_r321183367 |
| 59 | static void setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_); |
| 60 | static SensitiveDataMasker * getInstance(); |
| 61 | |
| 62 | /// Used in tests. |
| 63 | void addMaskingRule(const std::string & name, const std::string & regexp_string, const std::string & replacement_string); |
| 64 | |
| 65 | #ifndef NDEBUG |
| 66 | void printStats(); |
| 67 | #endif |
| 68 | |
| 69 | size_t rulesCount() const; |
| 70 | }; |
| 71 | |
| 72 | }; |
| 73 | |