| 1 | #include "LibraryDictionarySource.h" |
| 2 | #include <DataStreams/OneBlockInputStream.h> |
| 3 | #include <Interpreters/Context.h> |
| 4 | #include <Poco/File.h> |
| 5 | #include <common/logger_useful.h> |
| 6 | #include <ext/bit_cast.h> |
| 7 | #include <ext/range.h> |
| 8 | #include <ext/scope_guard.h> |
| 9 | #include <Common/StringUtils/StringUtils.h> |
| 10 | #include "DictionarySourceFactory.h" |
| 11 | #include "DictionaryStructure.h" |
| 12 | #include "LibraryDictionarySourceExternal.h" |
| 13 | #include "registerDictionaries.h" |
| 14 | |
| 15 | namespace DB |
| 16 | { |
| 17 | namespace ErrorCodes |
| 18 | { |
| 19 | extern const int NOT_IMPLEMENTED; |
| 20 | extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; |
| 21 | extern const int FILE_DOESNT_EXIST; |
| 22 | extern const int EXTERNAL_LIBRARY_ERROR; |
| 23 | extern const int PATH_ACCESS_DENIED; |
| 24 | } |
| 25 | |
| 26 | |
| 27 | class CStringsHolder |
| 28 | { |
| 29 | public: |
| 30 | using Container = std::vector<std::string>; |
| 31 | explicit CStringsHolder(const Container & strings_pass) |
| 32 | { |
| 33 | strings_holder = strings_pass; |
| 34 | strings.size = strings_holder.size(); |
| 35 | ptr_holder = std::make_unique<ClickHouseLibrary::CString[]>(strings.size); |
| 36 | strings.data = ptr_holder.get(); |
| 37 | size_t i = 0; |
| 38 | for (auto & str : strings_holder) |
| 39 | { |
| 40 | strings.data[i] = str.c_str(); |
| 41 | ++i; |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | ClickHouseLibrary::CStrings strings; // will pass pointer to lib |
| 46 | |
| 47 | private: |
| 48 | std::unique_ptr<ClickHouseLibrary::CString[]> ptr_holder = nullptr; |
| 49 | Container strings_holder; |
| 50 | }; |
| 51 | |
| 52 | |
| 53 | namespace |
| 54 | { |
| 55 | constexpr auto lib_config_settings = ".settings" ; |
| 56 | |
| 57 | |
| 58 | CStringsHolder getLibSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_root) |
| 59 | { |
| 60 | Poco::Util::AbstractConfiguration::Keys config_keys; |
| 61 | config.keys(config_root, config_keys); |
| 62 | CStringsHolder::Container strings; |
| 63 | for (const auto & key : config_keys) |
| 64 | { |
| 65 | std::string key_name = key; |
| 66 | auto bracket_pos = key.find('['); |
| 67 | if (bracket_pos != std::string::npos && bracket_pos > 0) |
| 68 | key_name = key.substr(0, bracket_pos); |
| 69 | strings.emplace_back(key_name); |
| 70 | strings.emplace_back(config.getString(config_root + "." + key)); |
| 71 | } |
| 72 | return CStringsHolder(strings); |
| 73 | } |
| 74 | |
| 75 | |
| 76 | Block dataToBlock(const Block & sample_block, const void * data) |
| 77 | { |
| 78 | if (!data) |
| 79 | throw Exception("LibraryDictionarySource: No data returned" , ErrorCodes::EXTERNAL_LIBRARY_ERROR); |
| 80 | |
| 81 | auto columns_received = static_cast<const ClickHouseLibrary::Table *>(data); |
| 82 | if (columns_received->error_code) |
| 83 | throw Exception( |
| 84 | "LibraryDictionarySource: Returned error: " + std::to_string(columns_received->error_code) + " " |
| 85 | + (columns_received->error_string ? columns_received->error_string : "" ), |
| 86 | ErrorCodes::EXTERNAL_LIBRARY_ERROR); |
| 87 | |
| 88 | MutableColumns columns(sample_block.columns()); |
| 89 | for (const auto i : ext::range(0, columns.size())) |
| 90 | columns[i] = sample_block.getByPosition(i).column->cloneEmpty(); |
| 91 | |
| 92 | for (size_t col_n = 0; col_n < columns_received->size; ++col_n) |
| 93 | { |
| 94 | if (columns.size() != columns_received->data[col_n].size) |
| 95 | throw Exception( |
| 96 | "LibraryDictionarySource: Returned unexpected number of columns: " + std::to_string(columns_received->data[col_n].size) |
| 97 | + ", must be " + std::to_string(columns.size()), |
| 98 | ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); |
| 99 | |
| 100 | for (size_t row_n = 0; row_n < columns_received->data[col_n].size; ++row_n) |
| 101 | { |
| 102 | const auto & field = columns_received->data[col_n].data[row_n]; |
| 103 | if (!field.data) |
| 104 | { |
| 105 | /// sample_block contains null_value (from config) inside corresponding column |
| 106 | const auto & col = sample_block.getByPosition(row_n); |
| 107 | columns[row_n]->insertFrom(*(col.column), 0); |
| 108 | } |
| 109 | else |
| 110 | { |
| 111 | const auto & size = field.size; |
| 112 | columns[row_n]->insertData(static_cast<const char *>(field.data), size); |
| 113 | } |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | return sample_block.cloneWithColumns(std::move(columns)); |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | |
| 122 | LibraryDictionarySource::LibraryDictionarySource( |
| 123 | const DictionaryStructure & dict_struct_, |
| 124 | const Poco::Util::AbstractConfiguration & config, |
| 125 | const std::string & config_prefix_, |
| 126 | Block & sample_block_, |
| 127 | const Context & context, |
| 128 | bool check_config) |
| 129 | : log(&Logger::get("LibraryDictionarySource" )) |
| 130 | , dict_struct{dict_struct_} |
| 131 | , config_prefix{config_prefix_} |
| 132 | , path{config.getString(config_prefix + ".path" , "" )} |
| 133 | , sample_block{sample_block_} |
| 134 | { |
| 135 | |
| 136 | if (check_config) |
| 137 | { |
| 138 | const String dictionaries_lib_path = context.getDictionariesLibPath(); |
| 139 | if (!startsWith(path, dictionaries_lib_path)) |
| 140 | throw Exception("LibraryDictionarySource: Library path " + dictionaries_lib_path + " is not inside " + dictionaries_lib_path, ErrorCodes::PATH_ACCESS_DENIED); |
| 141 | } |
| 142 | |
| 143 | if (!Poco::File(path).exists()) |
| 144 | throw Exception( |
| 145 | "LibraryDictionarySource: Can't load library " + Poco::File(path).path() + ": file doesn't exist" , |
| 146 | ErrorCodes::FILE_DOESNT_EXIST); |
| 147 | |
| 148 | description.init(sample_block); |
| 149 | library = std::make_shared<SharedLibrary>(path, RTLD_LAZY |
| 150 | #if defined(RTLD_DEEPBIND) && !defined(ADDRESS_SANITIZER) // Does not exists in FreeBSD. Cannot work with Address Sanitizer. |
| 151 | | RTLD_DEEPBIND |
| 152 | #endif |
| 153 | ); |
| 154 | settings = std::make_shared<CStringsHolder>(getLibSettings(config, config_prefix + lib_config_settings)); |
| 155 | if (auto libNew = library->tryGet<decltype(lib_data) (*)(decltype(&settings->strings), decltype(&ClickHouseLibrary::log))>( |
| 156 | "ClickHouseDictionary_v3_libNew" )) |
| 157 | lib_data = libNew(&settings->strings, ClickHouseLibrary::log); |
| 158 | } |
| 159 | |
| 160 | LibraryDictionarySource::LibraryDictionarySource(const LibraryDictionarySource & other) |
| 161 | : log(&Logger::get("LibraryDictionarySource" )) |
| 162 | , dict_struct{other.dict_struct} |
| 163 | , config_prefix{other.config_prefix} |
| 164 | , path{other.path} |
| 165 | , sample_block{other.sample_block} |
| 166 | , library{other.library} |
| 167 | , description{other.description} |
| 168 | , settings{other.settings} |
| 169 | { |
| 170 | if (auto libClone = library->tryGet<decltype(lib_data) (*)(decltype(other.lib_data))>("ClickHouseDictionary_v3_libClone" )) |
| 171 | lib_data = libClone(other.lib_data); |
| 172 | else if ( |
| 173 | auto libNew = library->tryGet<decltype(lib_data) (*)(decltype(&settings->strings), decltype(&ClickHouseLibrary::log))>( |
| 174 | "ClickHouseDictionary_v3_libNew" )) |
| 175 | lib_data = libNew(&settings->strings, ClickHouseLibrary::log); |
| 176 | } |
| 177 | |
| 178 | LibraryDictionarySource::~LibraryDictionarySource() |
| 179 | { |
| 180 | if (auto libDelete = library->tryGet<void (*)(decltype(lib_data))>("ClickHouseDictionary_v3_libDelete" )) |
| 181 | libDelete(lib_data); |
| 182 | } |
| 183 | |
| 184 | BlockInputStreamPtr LibraryDictionarySource::loadAll() |
| 185 | { |
| 186 | LOG_TRACE(log, "loadAll " + toString()); |
| 187 | |
| 188 | auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(dict_struct.attributes.size()); |
| 189 | ClickHouseLibrary::CStrings columns{static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()), |
| 190 | dict_struct.attributes.size()}; |
| 191 | size_t i = 0; |
| 192 | for (auto & a : dict_struct.attributes) |
| 193 | { |
| 194 | columns.data[i] = a.name.c_str(); |
| 195 | ++i; |
| 196 | } |
| 197 | void * data_ptr = nullptr; |
| 198 | |
| 199 | /// Get function pointer before dataNew call because library->get may throw. |
| 200 | auto func_loadAll |
| 201 | = library->get<void * (*)(decltype(data_ptr), decltype(&settings->strings), decltype(&columns))>("ClickHouseDictionary_v3_loadAll" ); |
| 202 | data_ptr = library->get<decltype(data_ptr) (*)(decltype(lib_data))>("ClickHouseDictionary_v3_dataNew" )(lib_data); |
| 203 | auto data = func_loadAll(data_ptr, &settings->strings, &columns); |
| 204 | auto block = dataToBlock(description.sample_block, data); |
| 205 | SCOPE_EXIT(library->get<void (*)(decltype(lib_data), decltype(data_ptr))>("ClickHouseDictionary_v3_dataDelete" )(lib_data, data_ptr)); |
| 206 | return std::make_shared<OneBlockInputStream>(block); |
| 207 | } |
| 208 | |
| 209 | BlockInputStreamPtr LibraryDictionarySource::loadIds(const std::vector<UInt64> & ids) |
| 210 | { |
| 211 | LOG_TRACE(log, "loadIds " << toString() << " size = " << ids.size()); |
| 212 | |
| 213 | const ClickHouseLibrary::VectorUInt64 ids_data{ext::bit_cast<decltype(ClickHouseLibrary::VectorUInt64::data)>(ids.data()), ids.size()}; |
| 214 | auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(dict_struct.attributes.size()); |
| 215 | ClickHouseLibrary::CStrings columns_pass{static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()), |
| 216 | dict_struct.attributes.size()}; |
| 217 | size_t i = 0; |
| 218 | for (auto & a : dict_struct.attributes) |
| 219 | { |
| 220 | columns_pass.data[i] = a.name.c_str(); |
| 221 | ++i; |
| 222 | } |
| 223 | void * data_ptr = nullptr; |
| 224 | |
| 225 | /// Get function pointer before dataNew call because library->get may throw. |
| 226 | auto func_loadIds |
| 227 | = library->get<void * (*)(decltype(data_ptr), decltype(&settings->strings), decltype(&columns_pass), decltype(&ids_data))>( |
| 228 | "ClickHouseDictionary_v3_loadIds" ); |
| 229 | data_ptr = library->get<decltype(data_ptr) (*)(decltype(lib_data))>("ClickHouseDictionary_v3_dataNew" )(lib_data); |
| 230 | auto data = func_loadIds(data_ptr, &settings->strings, &columns_pass, &ids_data); |
| 231 | auto block = dataToBlock(description.sample_block, data); |
| 232 | SCOPE_EXIT(library->get<void (*)(decltype(lib_data), decltype(data_ptr))>("ClickHouseDictionary_v3_dataDelete" )(lib_data, data_ptr)); |
| 233 | return std::make_shared<OneBlockInputStream>(block); |
| 234 | } |
| 235 | |
| 236 | BlockInputStreamPtr LibraryDictionarySource::loadKeys(const Columns & key_columns, const std::vector<std::size_t> & requested_rows) |
| 237 | { |
| 238 | LOG_TRACE(log, "loadKeys " << toString() << " size = " << requested_rows.size()); |
| 239 | |
| 240 | auto holder = std::make_unique<ClickHouseLibrary::Row[]>(key_columns.size()); |
| 241 | std::vector<std::unique_ptr<ClickHouseLibrary::Field[]>> column_data_holders; |
| 242 | for (size_t i = 0; i < key_columns.size(); ++i) |
| 243 | { |
| 244 | auto cell_holder = std::make_unique<ClickHouseLibrary::Field[]>(requested_rows.size()); |
| 245 | for (size_t j = 0; j < requested_rows.size(); ++j) |
| 246 | { |
| 247 | auto data_ref = key_columns[i]->getDataAt(requested_rows[j]); |
| 248 | cell_holder[j] = ClickHouseLibrary::Field{.data = static_cast<const void *>(data_ref.data), .size = data_ref.size}; |
| 249 | } |
| 250 | holder[i] |
| 251 | = ClickHouseLibrary::Row{.data = static_cast<ClickHouseLibrary::Field *>(cell_holder.get()), .size = requested_rows.size()}; |
| 252 | |
| 253 | column_data_holders.push_back(std::move(cell_holder)); |
| 254 | } |
| 255 | |
| 256 | ClickHouseLibrary::Table request_cols{.data = static_cast<ClickHouseLibrary::Row *>(holder.get()), .size = key_columns.size()}; |
| 257 | |
| 258 | void * data_ptr = nullptr; |
| 259 | /// Get function pointer before dataNew call because library->get may throw. |
| 260 | auto func_loadKeys = library->get<void * (*)(decltype(data_ptr), decltype(&settings->strings), decltype(&request_cols))>( |
| 261 | "ClickHouseDictionary_v3_loadKeys" ); |
| 262 | data_ptr = library->get<decltype(data_ptr) (*)(decltype(lib_data))>("ClickHouseDictionary_v3_dataNew" )(lib_data); |
| 263 | auto data = func_loadKeys(data_ptr, &settings->strings, &request_cols); |
| 264 | auto block = dataToBlock(description.sample_block, data); |
| 265 | SCOPE_EXIT(library->get<void (*)(decltype(lib_data), decltype(data_ptr))>("ClickHouseDictionary_v3_dataDelete" )(lib_data, data_ptr)); |
| 266 | return std::make_shared<OneBlockInputStream>(block); |
| 267 | } |
| 268 | |
| 269 | bool LibraryDictionarySource::isModified() const |
| 270 | { |
| 271 | if (auto func_isModified |
| 272 | = library->tryGet<bool (*)(decltype(lib_data), decltype(&settings->strings))>("ClickHouseDictionary_v3_isModified" )) |
| 273 | return func_isModified(lib_data, &settings->strings); |
| 274 | return true; |
| 275 | } |
| 276 | |
| 277 | bool LibraryDictionarySource::supportsSelectiveLoad() const |
| 278 | { |
| 279 | if (auto func_supportsSelectiveLoad |
| 280 | = library->tryGet<bool (*)(decltype(lib_data), decltype(&settings->strings))>("ClickHouseDictionary_v3_supportsSelectiveLoad" )) |
| 281 | return func_supportsSelectiveLoad(lib_data, &settings->strings); |
| 282 | return true; |
| 283 | } |
| 284 | |
| 285 | DictionarySourcePtr LibraryDictionarySource::clone() const |
| 286 | { |
| 287 | return std::make_unique<LibraryDictionarySource>(*this); |
| 288 | } |
| 289 | |
| 290 | std::string LibraryDictionarySource::toString() const |
| 291 | { |
| 292 | return path; |
| 293 | } |
| 294 | |
| 295 | void registerDictionarySourceLibrary(DictionarySourceFactory & factory) |
| 296 | { |
| 297 | auto createTableSource = [=](const DictionaryStructure & dict_struct, |
| 298 | const Poco::Util::AbstractConfiguration & config, |
| 299 | const std::string & config_prefix, |
| 300 | Block & sample_block, |
| 301 | const Context & context, |
| 302 | bool check_config) -> DictionarySourcePtr |
| 303 | { |
| 304 | return std::make_unique<LibraryDictionarySource>(dict_struct, config, config_prefix + ".library" , sample_block, context, check_config); |
| 305 | }; |
| 306 | factory.registerSource("library" , createTableSource); |
| 307 | } |
| 308 | |
| 309 | } |
| 310 | |