1 | #include "LibraryDictionarySource.h" |
2 | #include <DataStreams/OneBlockInputStream.h> |
3 | #include <Interpreters/Context.h> |
4 | #include <Poco/File.h> |
5 | #include <common/logger_useful.h> |
6 | #include <ext/bit_cast.h> |
7 | #include <ext/range.h> |
8 | #include <ext/scope_guard.h> |
9 | #include <Common/StringUtils/StringUtils.h> |
10 | #include "DictionarySourceFactory.h" |
11 | #include "DictionaryStructure.h" |
12 | #include "LibraryDictionarySourceExternal.h" |
13 | #include "registerDictionaries.h" |
14 | |
15 | namespace DB |
16 | { |
17 | namespace ErrorCodes |
18 | { |
19 | extern const int NOT_IMPLEMENTED; |
20 | extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; |
21 | extern const int FILE_DOESNT_EXIST; |
22 | extern const int EXTERNAL_LIBRARY_ERROR; |
23 | extern const int PATH_ACCESS_DENIED; |
24 | } |
25 | |
26 | |
27 | class CStringsHolder |
28 | { |
29 | public: |
30 | using Container = std::vector<std::string>; |
31 | explicit CStringsHolder(const Container & strings_pass) |
32 | { |
33 | strings_holder = strings_pass; |
34 | strings.size = strings_holder.size(); |
35 | ptr_holder = std::make_unique<ClickHouseLibrary::CString[]>(strings.size); |
36 | strings.data = ptr_holder.get(); |
37 | size_t i = 0; |
38 | for (auto & str : strings_holder) |
39 | { |
40 | strings.data[i] = str.c_str(); |
41 | ++i; |
42 | } |
43 | } |
44 | |
45 | ClickHouseLibrary::CStrings strings; // will pass pointer to lib |
46 | |
47 | private: |
48 | std::unique_ptr<ClickHouseLibrary::CString[]> ptr_holder = nullptr; |
49 | Container strings_holder; |
50 | }; |
51 | |
52 | |
53 | namespace |
54 | { |
55 | constexpr auto lib_config_settings = ".settings" ; |
56 | |
57 | |
58 | CStringsHolder getLibSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_root) |
59 | { |
60 | Poco::Util::AbstractConfiguration::Keys config_keys; |
61 | config.keys(config_root, config_keys); |
62 | CStringsHolder::Container strings; |
63 | for (const auto & key : config_keys) |
64 | { |
65 | std::string key_name = key; |
66 | auto bracket_pos = key.find('['); |
67 | if (bracket_pos != std::string::npos && bracket_pos > 0) |
68 | key_name = key.substr(0, bracket_pos); |
69 | strings.emplace_back(key_name); |
70 | strings.emplace_back(config.getString(config_root + "." + key)); |
71 | } |
72 | return CStringsHolder(strings); |
73 | } |
74 | |
75 | |
76 | Block dataToBlock(const Block & sample_block, const void * data) |
77 | { |
78 | if (!data) |
79 | throw Exception("LibraryDictionarySource: No data returned" , ErrorCodes::EXTERNAL_LIBRARY_ERROR); |
80 | |
81 | auto columns_received = static_cast<const ClickHouseLibrary::Table *>(data); |
82 | if (columns_received->error_code) |
83 | throw Exception( |
84 | "LibraryDictionarySource: Returned error: " + std::to_string(columns_received->error_code) + " " |
85 | + (columns_received->error_string ? columns_received->error_string : "" ), |
86 | ErrorCodes::EXTERNAL_LIBRARY_ERROR); |
87 | |
88 | MutableColumns columns(sample_block.columns()); |
89 | for (const auto i : ext::range(0, columns.size())) |
90 | columns[i] = sample_block.getByPosition(i).column->cloneEmpty(); |
91 | |
92 | for (size_t col_n = 0; col_n < columns_received->size; ++col_n) |
93 | { |
94 | if (columns.size() != columns_received->data[col_n].size) |
95 | throw Exception( |
96 | "LibraryDictionarySource: Returned unexpected number of columns: " + std::to_string(columns_received->data[col_n].size) |
97 | + ", must be " + std::to_string(columns.size()), |
98 | ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); |
99 | |
100 | for (size_t row_n = 0; row_n < columns_received->data[col_n].size; ++row_n) |
101 | { |
102 | const auto & field = columns_received->data[col_n].data[row_n]; |
103 | if (!field.data) |
104 | { |
105 | /// sample_block contains null_value (from config) inside corresponding column |
106 | const auto & col = sample_block.getByPosition(row_n); |
107 | columns[row_n]->insertFrom(*(col.column), 0); |
108 | } |
109 | else |
110 | { |
111 | const auto & size = field.size; |
112 | columns[row_n]->insertData(static_cast<const char *>(field.data), size); |
113 | } |
114 | } |
115 | } |
116 | |
117 | return sample_block.cloneWithColumns(std::move(columns)); |
118 | } |
119 | } |
120 | |
121 | |
122 | LibraryDictionarySource::LibraryDictionarySource( |
123 | const DictionaryStructure & dict_struct_, |
124 | const Poco::Util::AbstractConfiguration & config, |
125 | const std::string & config_prefix_, |
126 | Block & sample_block_, |
127 | const Context & context, |
128 | bool check_config) |
129 | : log(&Logger::get("LibraryDictionarySource" )) |
130 | , dict_struct{dict_struct_} |
131 | , config_prefix{config_prefix_} |
132 | , path{config.getString(config_prefix + ".path" , "" )} |
133 | , sample_block{sample_block_} |
134 | { |
135 | |
136 | if (check_config) |
137 | { |
138 | const String dictionaries_lib_path = context.getDictionariesLibPath(); |
139 | if (!startsWith(path, dictionaries_lib_path)) |
140 | throw Exception("LibraryDictionarySource: Library path " + dictionaries_lib_path + " is not inside " + dictionaries_lib_path, ErrorCodes::PATH_ACCESS_DENIED); |
141 | } |
142 | |
143 | if (!Poco::File(path).exists()) |
144 | throw Exception( |
145 | "LibraryDictionarySource: Can't load library " + Poco::File(path).path() + ": file doesn't exist" , |
146 | ErrorCodes::FILE_DOESNT_EXIST); |
147 | |
148 | description.init(sample_block); |
149 | library = std::make_shared<SharedLibrary>(path, RTLD_LAZY |
150 | #if defined(RTLD_DEEPBIND) && !defined(ADDRESS_SANITIZER) // Does not exists in FreeBSD. Cannot work with Address Sanitizer. |
151 | | RTLD_DEEPBIND |
152 | #endif |
153 | ); |
154 | settings = std::make_shared<CStringsHolder>(getLibSettings(config, config_prefix + lib_config_settings)); |
155 | if (auto libNew = library->tryGet<decltype(lib_data) (*)(decltype(&settings->strings), decltype(&ClickHouseLibrary::log))>( |
156 | "ClickHouseDictionary_v3_libNew" )) |
157 | lib_data = libNew(&settings->strings, ClickHouseLibrary::log); |
158 | } |
159 | |
160 | LibraryDictionarySource::LibraryDictionarySource(const LibraryDictionarySource & other) |
161 | : log(&Logger::get("LibraryDictionarySource" )) |
162 | , dict_struct{other.dict_struct} |
163 | , config_prefix{other.config_prefix} |
164 | , path{other.path} |
165 | , sample_block{other.sample_block} |
166 | , library{other.library} |
167 | , description{other.description} |
168 | , settings{other.settings} |
169 | { |
170 | if (auto libClone = library->tryGet<decltype(lib_data) (*)(decltype(other.lib_data))>("ClickHouseDictionary_v3_libClone" )) |
171 | lib_data = libClone(other.lib_data); |
172 | else if ( |
173 | auto libNew = library->tryGet<decltype(lib_data) (*)(decltype(&settings->strings), decltype(&ClickHouseLibrary::log))>( |
174 | "ClickHouseDictionary_v3_libNew" )) |
175 | lib_data = libNew(&settings->strings, ClickHouseLibrary::log); |
176 | } |
177 | |
178 | LibraryDictionarySource::~LibraryDictionarySource() |
179 | { |
180 | if (auto libDelete = library->tryGet<void (*)(decltype(lib_data))>("ClickHouseDictionary_v3_libDelete" )) |
181 | libDelete(lib_data); |
182 | } |
183 | |
184 | BlockInputStreamPtr LibraryDictionarySource::loadAll() |
185 | { |
186 | LOG_TRACE(log, "loadAll " + toString()); |
187 | |
188 | auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(dict_struct.attributes.size()); |
189 | ClickHouseLibrary::CStrings columns{static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()), |
190 | dict_struct.attributes.size()}; |
191 | size_t i = 0; |
192 | for (auto & a : dict_struct.attributes) |
193 | { |
194 | columns.data[i] = a.name.c_str(); |
195 | ++i; |
196 | } |
197 | void * data_ptr = nullptr; |
198 | |
199 | /// Get function pointer before dataNew call because library->get may throw. |
200 | auto func_loadAll |
201 | = library->get<void * (*)(decltype(data_ptr), decltype(&settings->strings), decltype(&columns))>("ClickHouseDictionary_v3_loadAll" ); |
202 | data_ptr = library->get<decltype(data_ptr) (*)(decltype(lib_data))>("ClickHouseDictionary_v3_dataNew" )(lib_data); |
203 | auto data = func_loadAll(data_ptr, &settings->strings, &columns); |
204 | auto block = dataToBlock(description.sample_block, data); |
205 | SCOPE_EXIT(library->get<void (*)(decltype(lib_data), decltype(data_ptr))>("ClickHouseDictionary_v3_dataDelete" )(lib_data, data_ptr)); |
206 | return std::make_shared<OneBlockInputStream>(block); |
207 | } |
208 | |
209 | BlockInputStreamPtr LibraryDictionarySource::loadIds(const std::vector<UInt64> & ids) |
210 | { |
211 | LOG_TRACE(log, "loadIds " << toString() << " size = " << ids.size()); |
212 | |
213 | const ClickHouseLibrary::VectorUInt64 ids_data{ext::bit_cast<decltype(ClickHouseLibrary::VectorUInt64::data)>(ids.data()), ids.size()}; |
214 | auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(dict_struct.attributes.size()); |
215 | ClickHouseLibrary::CStrings columns_pass{static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()), |
216 | dict_struct.attributes.size()}; |
217 | size_t i = 0; |
218 | for (auto & a : dict_struct.attributes) |
219 | { |
220 | columns_pass.data[i] = a.name.c_str(); |
221 | ++i; |
222 | } |
223 | void * data_ptr = nullptr; |
224 | |
225 | /// Get function pointer before dataNew call because library->get may throw. |
226 | auto func_loadIds |
227 | = library->get<void * (*)(decltype(data_ptr), decltype(&settings->strings), decltype(&columns_pass), decltype(&ids_data))>( |
228 | "ClickHouseDictionary_v3_loadIds" ); |
229 | data_ptr = library->get<decltype(data_ptr) (*)(decltype(lib_data))>("ClickHouseDictionary_v3_dataNew" )(lib_data); |
230 | auto data = func_loadIds(data_ptr, &settings->strings, &columns_pass, &ids_data); |
231 | auto block = dataToBlock(description.sample_block, data); |
232 | SCOPE_EXIT(library->get<void (*)(decltype(lib_data), decltype(data_ptr))>("ClickHouseDictionary_v3_dataDelete" )(lib_data, data_ptr)); |
233 | return std::make_shared<OneBlockInputStream>(block); |
234 | } |
235 | |
236 | BlockInputStreamPtr LibraryDictionarySource::loadKeys(const Columns & key_columns, const std::vector<std::size_t> & requested_rows) |
237 | { |
238 | LOG_TRACE(log, "loadKeys " << toString() << " size = " << requested_rows.size()); |
239 | |
240 | auto holder = std::make_unique<ClickHouseLibrary::Row[]>(key_columns.size()); |
241 | std::vector<std::unique_ptr<ClickHouseLibrary::Field[]>> column_data_holders; |
242 | for (size_t i = 0; i < key_columns.size(); ++i) |
243 | { |
244 | auto cell_holder = std::make_unique<ClickHouseLibrary::Field[]>(requested_rows.size()); |
245 | for (size_t j = 0; j < requested_rows.size(); ++j) |
246 | { |
247 | auto data_ref = key_columns[i]->getDataAt(requested_rows[j]); |
248 | cell_holder[j] = ClickHouseLibrary::Field{.data = static_cast<const void *>(data_ref.data), .size = data_ref.size}; |
249 | } |
250 | holder[i] |
251 | = ClickHouseLibrary::Row{.data = static_cast<ClickHouseLibrary::Field *>(cell_holder.get()), .size = requested_rows.size()}; |
252 | |
253 | column_data_holders.push_back(std::move(cell_holder)); |
254 | } |
255 | |
256 | ClickHouseLibrary::Table request_cols{.data = static_cast<ClickHouseLibrary::Row *>(holder.get()), .size = key_columns.size()}; |
257 | |
258 | void * data_ptr = nullptr; |
259 | /// Get function pointer before dataNew call because library->get may throw. |
260 | auto func_loadKeys = library->get<void * (*)(decltype(data_ptr), decltype(&settings->strings), decltype(&request_cols))>( |
261 | "ClickHouseDictionary_v3_loadKeys" ); |
262 | data_ptr = library->get<decltype(data_ptr) (*)(decltype(lib_data))>("ClickHouseDictionary_v3_dataNew" )(lib_data); |
263 | auto data = func_loadKeys(data_ptr, &settings->strings, &request_cols); |
264 | auto block = dataToBlock(description.sample_block, data); |
265 | SCOPE_EXIT(library->get<void (*)(decltype(lib_data), decltype(data_ptr))>("ClickHouseDictionary_v3_dataDelete" )(lib_data, data_ptr)); |
266 | return std::make_shared<OneBlockInputStream>(block); |
267 | } |
268 | |
269 | bool LibraryDictionarySource::isModified() const |
270 | { |
271 | if (auto func_isModified |
272 | = library->tryGet<bool (*)(decltype(lib_data), decltype(&settings->strings))>("ClickHouseDictionary_v3_isModified" )) |
273 | return func_isModified(lib_data, &settings->strings); |
274 | return true; |
275 | } |
276 | |
277 | bool LibraryDictionarySource::supportsSelectiveLoad() const |
278 | { |
279 | if (auto func_supportsSelectiveLoad |
280 | = library->tryGet<bool (*)(decltype(lib_data), decltype(&settings->strings))>("ClickHouseDictionary_v3_supportsSelectiveLoad" )) |
281 | return func_supportsSelectiveLoad(lib_data, &settings->strings); |
282 | return true; |
283 | } |
284 | |
285 | DictionarySourcePtr LibraryDictionarySource::clone() const |
286 | { |
287 | return std::make_unique<LibraryDictionarySource>(*this); |
288 | } |
289 | |
290 | std::string LibraryDictionarySource::toString() const |
291 | { |
292 | return path; |
293 | } |
294 | |
295 | void registerDictionarySourceLibrary(DictionarySourceFactory & factory) |
296 | { |
297 | auto createTableSource = [=](const DictionaryStructure & dict_struct, |
298 | const Poco::Util::AbstractConfiguration & config, |
299 | const std::string & config_prefix, |
300 | Block & sample_block, |
301 | const Context & context, |
302 | bool check_config) -> DictionarySourcePtr |
303 | { |
304 | return std::make_unique<LibraryDictionarySource>(dict_struct, config, config_prefix + ".library" , sample_block, context, check_config); |
305 | }; |
306 | factory.registerSource("library" , createTableSource); |
307 | } |
308 | |
309 | } |
310 | |