1#include "LibraryDictionarySource.h"
2#include <DataStreams/OneBlockInputStream.h>
3#include <Interpreters/Context.h>
4#include <Poco/File.h>
5#include <common/logger_useful.h>
6#include <ext/bit_cast.h>
7#include <ext/range.h>
8#include <ext/scope_guard.h>
9#include <Common/StringUtils/StringUtils.h>
10#include "DictionarySourceFactory.h"
11#include "DictionaryStructure.h"
12#include "LibraryDictionarySourceExternal.h"
13#include "registerDictionaries.h"
14
15namespace DB
16{
17namespace ErrorCodes
18{
19 extern const int NOT_IMPLEMENTED;
20 extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
21 extern const int FILE_DOESNT_EXIST;
22 extern const int EXTERNAL_LIBRARY_ERROR;
23 extern const int PATH_ACCESS_DENIED;
24}
25
26
27class CStringsHolder
28{
29public:
30 using Container = std::vector<std::string>;
31 explicit CStringsHolder(const Container & strings_pass)
32 {
33 strings_holder = strings_pass;
34 strings.size = strings_holder.size();
35 ptr_holder = std::make_unique<ClickHouseLibrary::CString[]>(strings.size);
36 strings.data = ptr_holder.get();
37 size_t i = 0;
38 for (auto & str : strings_holder)
39 {
40 strings.data[i] = str.c_str();
41 ++i;
42 }
43 }
44
45 ClickHouseLibrary::CStrings strings; // will pass pointer to lib
46
47private:
48 std::unique_ptr<ClickHouseLibrary::CString[]> ptr_holder = nullptr;
49 Container strings_holder;
50};
51
52
53namespace
54{
55 constexpr auto lib_config_settings = ".settings";
56
57
58 CStringsHolder getLibSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_root)
59 {
60 Poco::Util::AbstractConfiguration::Keys config_keys;
61 config.keys(config_root, config_keys);
62 CStringsHolder::Container strings;
63 for (const auto & key : config_keys)
64 {
65 std::string key_name = key;
66 auto bracket_pos = key.find('[');
67 if (bracket_pos != std::string::npos && bracket_pos > 0)
68 key_name = key.substr(0, bracket_pos);
69 strings.emplace_back(key_name);
70 strings.emplace_back(config.getString(config_root + "." + key));
71 }
72 return CStringsHolder(strings);
73 }
74
75
76 Block dataToBlock(const Block & sample_block, const void * data)
77 {
78 if (!data)
79 throw Exception("LibraryDictionarySource: No data returned", ErrorCodes::EXTERNAL_LIBRARY_ERROR);
80
81 auto columns_received = static_cast<const ClickHouseLibrary::Table *>(data);
82 if (columns_received->error_code)
83 throw Exception(
84 "LibraryDictionarySource: Returned error: " + std::to_string(columns_received->error_code) + " "
85 + (columns_received->error_string ? columns_received->error_string : ""),
86 ErrorCodes::EXTERNAL_LIBRARY_ERROR);
87
88 MutableColumns columns(sample_block.columns());
89 for (const auto i : ext::range(0, columns.size()))
90 columns[i] = sample_block.getByPosition(i).column->cloneEmpty();
91
92 for (size_t col_n = 0; col_n < columns_received->size; ++col_n)
93 {
94 if (columns.size() != columns_received->data[col_n].size)
95 throw Exception(
96 "LibraryDictionarySource: Returned unexpected number of columns: " + std::to_string(columns_received->data[col_n].size)
97 + ", must be " + std::to_string(columns.size()),
98 ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
99
100 for (size_t row_n = 0; row_n < columns_received->data[col_n].size; ++row_n)
101 {
102 const auto & field = columns_received->data[col_n].data[row_n];
103 if (!field.data)
104 {
105 /// sample_block contains null_value (from config) inside corresponding column
106 const auto & col = sample_block.getByPosition(row_n);
107 columns[row_n]->insertFrom(*(col.column), 0);
108 }
109 else
110 {
111 const auto & size = field.size;
112 columns[row_n]->insertData(static_cast<const char *>(field.data), size);
113 }
114 }
115 }
116
117 return sample_block.cloneWithColumns(std::move(columns));
118 }
119}
120
121
122LibraryDictionarySource::LibraryDictionarySource(
123 const DictionaryStructure & dict_struct_,
124 const Poco::Util::AbstractConfiguration & config,
125 const std::string & config_prefix_,
126 Block & sample_block_,
127 const Context & context,
128 bool check_config)
129 : log(&Logger::get("LibraryDictionarySource"))
130 , dict_struct{dict_struct_}
131 , config_prefix{config_prefix_}
132 , path{config.getString(config_prefix + ".path", "")}
133 , sample_block{sample_block_}
134{
135
136 if (check_config)
137 {
138 const String dictionaries_lib_path = context.getDictionariesLibPath();
139 if (!startsWith(path, dictionaries_lib_path))
140 throw Exception("LibraryDictionarySource: Library path " + dictionaries_lib_path + " is not inside " + dictionaries_lib_path, ErrorCodes::PATH_ACCESS_DENIED);
141 }
142
143 if (!Poco::File(path).exists())
144 throw Exception(
145 "LibraryDictionarySource: Can't load library " + Poco::File(path).path() + ": file doesn't exist",
146 ErrorCodes::FILE_DOESNT_EXIST);
147
148 description.init(sample_block);
149 library = std::make_shared<SharedLibrary>(path, RTLD_LAZY
150#if defined(RTLD_DEEPBIND) && !defined(ADDRESS_SANITIZER) // Does not exists in FreeBSD. Cannot work with Address Sanitizer.
151 | RTLD_DEEPBIND
152#endif
153 );
154 settings = std::make_shared<CStringsHolder>(getLibSettings(config, config_prefix + lib_config_settings));
155 if (auto libNew = library->tryGet<decltype(lib_data) (*)(decltype(&settings->strings), decltype(&ClickHouseLibrary::log))>(
156 "ClickHouseDictionary_v3_libNew"))
157 lib_data = libNew(&settings->strings, ClickHouseLibrary::log);
158}
159
160LibraryDictionarySource::LibraryDictionarySource(const LibraryDictionarySource & other)
161 : log(&Logger::get("LibraryDictionarySource"))
162 , dict_struct{other.dict_struct}
163 , config_prefix{other.config_prefix}
164 , path{other.path}
165 , sample_block{other.sample_block}
166 , library{other.library}
167 , description{other.description}
168 , settings{other.settings}
169{
170 if (auto libClone = library->tryGet<decltype(lib_data) (*)(decltype(other.lib_data))>("ClickHouseDictionary_v3_libClone"))
171 lib_data = libClone(other.lib_data);
172 else if (
173 auto libNew = library->tryGet<decltype(lib_data) (*)(decltype(&settings->strings), decltype(&ClickHouseLibrary::log))>(
174 "ClickHouseDictionary_v3_libNew"))
175 lib_data = libNew(&settings->strings, ClickHouseLibrary::log);
176}
177
178LibraryDictionarySource::~LibraryDictionarySource()
179{
180 if (auto libDelete = library->tryGet<void (*)(decltype(lib_data))>("ClickHouseDictionary_v3_libDelete"))
181 libDelete(lib_data);
182}
183
184BlockInputStreamPtr LibraryDictionarySource::loadAll()
185{
186 LOG_TRACE(log, "loadAll " + toString());
187
188 auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(dict_struct.attributes.size());
189 ClickHouseLibrary::CStrings columns{static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()),
190 dict_struct.attributes.size()};
191 size_t i = 0;
192 for (auto & a : dict_struct.attributes)
193 {
194 columns.data[i] = a.name.c_str();
195 ++i;
196 }
197 void * data_ptr = nullptr;
198
199 /// Get function pointer before dataNew call because library->get may throw.
200 auto func_loadAll
201 = library->get<void * (*)(decltype(data_ptr), decltype(&settings->strings), decltype(&columns))>("ClickHouseDictionary_v3_loadAll");
202 data_ptr = library->get<decltype(data_ptr) (*)(decltype(lib_data))>("ClickHouseDictionary_v3_dataNew")(lib_data);
203 auto data = func_loadAll(data_ptr, &settings->strings, &columns);
204 auto block = dataToBlock(description.sample_block, data);
205 SCOPE_EXIT(library->get<void (*)(decltype(lib_data), decltype(data_ptr))>("ClickHouseDictionary_v3_dataDelete")(lib_data, data_ptr));
206 return std::make_shared<OneBlockInputStream>(block);
207}
208
209BlockInputStreamPtr LibraryDictionarySource::loadIds(const std::vector<UInt64> & ids)
210{
211 LOG_TRACE(log, "loadIds " << toString() << " size = " << ids.size());
212
213 const ClickHouseLibrary::VectorUInt64 ids_data{ext::bit_cast<decltype(ClickHouseLibrary::VectorUInt64::data)>(ids.data()), ids.size()};
214 auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(dict_struct.attributes.size());
215 ClickHouseLibrary::CStrings columns_pass{static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()),
216 dict_struct.attributes.size()};
217 size_t i = 0;
218 for (auto & a : dict_struct.attributes)
219 {
220 columns_pass.data[i] = a.name.c_str();
221 ++i;
222 }
223 void * data_ptr = nullptr;
224
225 /// Get function pointer before dataNew call because library->get may throw.
226 auto func_loadIds
227 = library->get<void * (*)(decltype(data_ptr), decltype(&settings->strings), decltype(&columns_pass), decltype(&ids_data))>(
228 "ClickHouseDictionary_v3_loadIds");
229 data_ptr = library->get<decltype(data_ptr) (*)(decltype(lib_data))>("ClickHouseDictionary_v3_dataNew")(lib_data);
230 auto data = func_loadIds(data_ptr, &settings->strings, &columns_pass, &ids_data);
231 auto block = dataToBlock(description.sample_block, data);
232 SCOPE_EXIT(library->get<void (*)(decltype(lib_data), decltype(data_ptr))>("ClickHouseDictionary_v3_dataDelete")(lib_data, data_ptr));
233 return std::make_shared<OneBlockInputStream>(block);
234}
235
236BlockInputStreamPtr LibraryDictionarySource::loadKeys(const Columns & key_columns, const std::vector<std::size_t> & requested_rows)
237{
238 LOG_TRACE(log, "loadKeys " << toString() << " size = " << requested_rows.size());
239
240 auto holder = std::make_unique<ClickHouseLibrary::Row[]>(key_columns.size());
241 std::vector<std::unique_ptr<ClickHouseLibrary::Field[]>> column_data_holders;
242 for (size_t i = 0; i < key_columns.size(); ++i)
243 {
244 auto cell_holder = std::make_unique<ClickHouseLibrary::Field[]>(requested_rows.size());
245 for (size_t j = 0; j < requested_rows.size(); ++j)
246 {
247 auto data_ref = key_columns[i]->getDataAt(requested_rows[j]);
248 cell_holder[j] = ClickHouseLibrary::Field{.data = static_cast<const void *>(data_ref.data), .size = data_ref.size};
249 }
250 holder[i]
251 = ClickHouseLibrary::Row{.data = static_cast<ClickHouseLibrary::Field *>(cell_holder.get()), .size = requested_rows.size()};
252
253 column_data_holders.push_back(std::move(cell_holder));
254 }
255
256 ClickHouseLibrary::Table request_cols{.data = static_cast<ClickHouseLibrary::Row *>(holder.get()), .size = key_columns.size()};
257
258 void * data_ptr = nullptr;
259 /// Get function pointer before dataNew call because library->get may throw.
260 auto func_loadKeys = library->get<void * (*)(decltype(data_ptr), decltype(&settings->strings), decltype(&request_cols))>(
261 "ClickHouseDictionary_v3_loadKeys");
262 data_ptr = library->get<decltype(data_ptr) (*)(decltype(lib_data))>("ClickHouseDictionary_v3_dataNew")(lib_data);
263 auto data = func_loadKeys(data_ptr, &settings->strings, &request_cols);
264 auto block = dataToBlock(description.sample_block, data);
265 SCOPE_EXIT(library->get<void (*)(decltype(lib_data), decltype(data_ptr))>("ClickHouseDictionary_v3_dataDelete")(lib_data, data_ptr));
266 return std::make_shared<OneBlockInputStream>(block);
267}
268
269bool LibraryDictionarySource::isModified() const
270{
271 if (auto func_isModified
272 = library->tryGet<bool (*)(decltype(lib_data), decltype(&settings->strings))>("ClickHouseDictionary_v3_isModified"))
273 return func_isModified(lib_data, &settings->strings);
274 return true;
275}
276
277bool LibraryDictionarySource::supportsSelectiveLoad() const
278{
279 if (auto func_supportsSelectiveLoad
280 = library->tryGet<bool (*)(decltype(lib_data), decltype(&settings->strings))>("ClickHouseDictionary_v3_supportsSelectiveLoad"))
281 return func_supportsSelectiveLoad(lib_data, &settings->strings);
282 return true;
283}
284
285DictionarySourcePtr LibraryDictionarySource::clone() const
286{
287 return std::make_unique<LibraryDictionarySource>(*this);
288}
289
290std::string LibraryDictionarySource::toString() const
291{
292 return path;
293}
294
295void registerDictionarySourceLibrary(DictionarySourceFactory & factory)
296{
297 auto createTableSource = [=](const DictionaryStructure & dict_struct,
298 const Poco::Util::AbstractConfiguration & config,
299 const std::string & config_prefix,
300 Block & sample_block,
301 const Context & context,
302 bool check_config) -> DictionarySourcePtr
303 {
304 return std::make_unique<LibraryDictionarySource>(dict_struct, config, config_prefix + ".library", sample_block, context, check_config);
305 };
306 factory.registerSource("library", createTableSource);
307}
308
309}
310