| 1 | #include "AggregateFunctionMLMethod.h" | 
|---|
| 2 |  | 
|---|
| 3 | #include <IO/ReadHelpers.h> | 
|---|
| 4 | #include <IO/WriteHelpers.h> | 
|---|
| 5 | #include <Interpreters/castColumn.h> | 
|---|
| 6 | #include <Columns/ColumnArray.h> | 
|---|
| 7 | #include <Columns/ColumnTuple.h> | 
|---|
| 8 | #include <Common/FieldVisitors.h> | 
|---|
| 9 | #include <Common/typeid_cast.h> | 
|---|
| 10 | #include <Common/assert_cast.h> | 
|---|
| 11 | #include "AggregateFunctionFactory.h" | 
|---|
| 12 | #include "FactoryHelpers.h" | 
|---|
| 13 | #include "Helpers.h" | 
|---|
| 14 | #include "registerAggregateFunctions.h" | 
|---|
| 15 |  | 
|---|
| 16 |  | 
|---|
| 17 | namespace DB | 
|---|
| 18 | { | 
|---|
| 19 | namespace | 
|---|
| 20 | { | 
|---|
| 21 | using FuncLinearRegression = AggregateFunctionMLMethod<LinearModelData, NameLinearRegression>; | 
|---|
| 22 | using FuncLogisticRegression = AggregateFunctionMLMethod<LinearModelData, NameLogisticRegression>; | 
|---|
| 23 | template <class Method> | 
|---|
| 24 | AggregateFunctionPtr | 
|---|
| 25 | createAggregateFunctionMLMethod(const std::string & name, const DataTypes & argument_types, const Array & parameters) | 
|---|
| 26 | { | 
|---|
| 27 | if (parameters.size() > 4) | 
|---|
| 28 | throw Exception( | 
|---|
| 29 | "Aggregate function "+ name | 
|---|
| 30 | + " requires at most four parameters: learning_rate, l2_regularization_coef, mini-batch size and weights_updater " | 
|---|
| 31 | "method", | 
|---|
| 32 | ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); | 
|---|
| 33 |  | 
|---|
| 34 | if (argument_types.size() < 2) | 
|---|
| 35 | throw Exception( | 
|---|
| 36 | "Aggregate function "+ name + " requires at least two arguments: target and model's parameters", | 
|---|
| 37 | ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); | 
|---|
| 38 |  | 
|---|
| 39 | for (size_t i = 0; i < argument_types.size(); ++i) | 
|---|
| 40 | { | 
|---|
| 41 | if (!isNativeNumber(argument_types[i])) | 
|---|
| 42 | throw Exception( | 
|---|
| 43 | "Argument "+ std::to_string(i) + " of type "+ argument_types[i]->getName() | 
|---|
| 44 | + " must be numeric for aggregate function "+ name, | 
|---|
| 45 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); | 
|---|
| 46 | } | 
|---|
| 47 |  | 
|---|
| 48 | /// Such default parameters were picked because they did good on some tests, | 
|---|
| 49 | /// though it still requires to fit parameters to achieve better result | 
|---|
| 50 | auto learning_rate = Float64(1.0); | 
|---|
| 51 | auto l2_reg_coef = Float64(0.5); | 
|---|
| 52 | UInt64 batch_size = 15; | 
|---|
| 53 |  | 
|---|
| 54 | std::string weights_updater_name = "Adam"; | 
|---|
| 55 | std::unique_ptr<IGradientComputer> gradient_computer; | 
|---|
| 56 |  | 
|---|
| 57 | if (!parameters.empty()) | 
|---|
| 58 | { | 
|---|
| 59 | learning_rate = applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters[0]); | 
|---|
| 60 | } | 
|---|
| 61 | if (parameters.size() > 1) | 
|---|
| 62 | { | 
|---|
| 63 | l2_reg_coef = applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters[1]); | 
|---|
| 64 | } | 
|---|
| 65 | if (parameters.size() > 2) | 
|---|
| 66 | { | 
|---|
| 67 | batch_size = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), parameters[2]); | 
|---|
| 68 | } | 
|---|
| 69 | if (parameters.size() > 3) | 
|---|
| 70 | { | 
|---|
| 71 | weights_updater_name = parameters[3].safeGet<String>(); | 
|---|
| 72 | if (weights_updater_name != "SGD"&& weights_updater_name != "Momentum"&& weights_updater_name != "Nesterov"&& weights_updater_name != "Adam") | 
|---|
| 73 | throw Exception( "Invalid parameter for weights updater. The only supported are 'SGD', 'Momentum' and 'Nesterov'", | 
|---|
| 74 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); | 
|---|
| 75 | } | 
|---|
| 76 |  | 
|---|
| 77 | if (std::is_same<Method, FuncLinearRegression>::value) | 
|---|
| 78 | { | 
|---|
| 79 | gradient_computer = std::make_unique<LinearRegression>(); | 
|---|
| 80 | } | 
|---|
| 81 | else if (std::is_same<Method, FuncLogisticRegression>::value) | 
|---|
| 82 | { | 
|---|
| 83 | gradient_computer = std::make_unique<LogisticRegression>(); | 
|---|
| 84 | } | 
|---|
| 85 | else | 
|---|
| 86 | { | 
|---|
| 87 | throw Exception( "Such gradient computer is not implemented yet", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); | 
|---|
| 88 | } | 
|---|
| 89 |  | 
|---|
| 90 | return std::make_shared<Method>( | 
|---|
| 91 | argument_types.size() - 1, | 
|---|
| 92 | std::move(gradient_computer), | 
|---|
| 93 | weights_updater_name, | 
|---|
| 94 | learning_rate, | 
|---|
| 95 | l2_reg_coef, | 
|---|
| 96 | batch_size, | 
|---|
| 97 | argument_types, | 
|---|
| 98 | parameters); | 
|---|
| 99 | } | 
|---|
| 100 | } | 
|---|
| 101 |  | 
|---|
| 102 | void registerAggregateFunctionMLMethod(AggregateFunctionFactory & factory) | 
|---|
| 103 | { | 
|---|
| 104 | factory.registerFunction( "stochasticLinearRegression", createAggregateFunctionMLMethod<FuncLinearRegression>); | 
|---|
| 105 | factory.registerFunction( "stochasticLogisticRegression", createAggregateFunctionMLMethod<FuncLogisticRegression>); | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 | LinearModelData::LinearModelData( | 
|---|
| 109 | Float64 learning_rate_, | 
|---|
| 110 | Float64 l2_reg_coef_, | 
|---|
| 111 | UInt64 param_num_, | 
|---|
| 112 | UInt64 batch_capacity_, | 
|---|
| 113 | std::shared_ptr<DB::IGradientComputer> gradient_computer_, | 
|---|
| 114 | std::shared_ptr<DB::IWeightsUpdater> weights_updater_) | 
|---|
| 115 | : learning_rate(learning_rate_) | 
|---|
| 116 | , l2_reg_coef(l2_reg_coef_) | 
|---|
| 117 | , batch_capacity(batch_capacity_) | 
|---|
| 118 | , batch_size(0) | 
|---|
| 119 | , gradient_computer(std::move(gradient_computer_)) | 
|---|
| 120 | , weights_updater(std::move(weights_updater_)) | 
|---|
| 121 | { | 
|---|
| 122 | weights.resize(param_num_, Float64{0.0}); | 
|---|
| 123 | gradient_batch.resize(param_num_ + 1, Float64{0.0}); | 
|---|
| 124 | } | 
|---|
| 125 |  | 
|---|
| 126 | void LinearModelData::update_state() | 
|---|
| 127 | { | 
|---|
| 128 | if (batch_size == 0) | 
|---|
| 129 | return; | 
|---|
| 130 |  | 
|---|
| 131 | weights_updater->update(batch_size, weights, bias, learning_rate, gradient_batch); | 
|---|
| 132 | batch_size = 0; | 
|---|
| 133 | ++iter_num; | 
|---|
| 134 | gradient_batch.assign(gradient_batch.size(), Float64{0.0}); | 
|---|
| 135 | } | 
|---|
| 136 |  | 
|---|
| 137 | void LinearModelData::predict( | 
|---|
| 138 | ColumnVector<Float64>::Container & container, | 
|---|
| 139 | Block & block, | 
|---|
| 140 | size_t offset, | 
|---|
| 141 | size_t limit, | 
|---|
| 142 | const ColumnNumbers & arguments, | 
|---|
| 143 | const Context & context) const | 
|---|
| 144 | { | 
|---|
| 145 | gradient_computer->predict(container, block, offset, limit, arguments, weights, bias, context); | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|
| 148 | void LinearModelData::returnWeights(IColumn & to) const | 
|---|
| 149 | { | 
|---|
| 150 | size_t size = weights.size() + 1; | 
|---|
| 151 |  | 
|---|
| 152 | ColumnArray & arr_to = assert_cast<ColumnArray &>(to); | 
|---|
| 153 | ColumnArray::Offsets & offsets_to = arr_to.getOffsets(); | 
|---|
| 154 |  | 
|---|
| 155 | size_t old_size = offsets_to.back(); | 
|---|
| 156 | offsets_to.push_back(old_size + size); | 
|---|
| 157 |  | 
|---|
| 158 | typename ColumnFloat64::Container & val_to | 
|---|
| 159 | = assert_cast<ColumnFloat64 &>(arr_to.getData()).getData(); | 
|---|
| 160 |  | 
|---|
| 161 | val_to.reserve(old_size + size); | 
|---|
| 162 | for (size_t i = 0; i + 1 < size; ++i) | 
|---|
| 163 | val_to.push_back(weights[i]); | 
|---|
| 164 |  | 
|---|
| 165 | val_to.push_back(bias); | 
|---|
| 166 | } | 
|---|
| 167 |  | 
|---|
| 168 | void LinearModelData::read(ReadBuffer & buf) | 
|---|
| 169 | { | 
|---|
| 170 | readBinary(bias, buf); | 
|---|
| 171 | readBinary(weights, buf); | 
|---|
| 172 | readBinary(iter_num, buf); | 
|---|
| 173 | readBinary(gradient_batch, buf); | 
|---|
| 174 | readBinary(batch_size, buf); | 
|---|
| 175 | weights_updater->read(buf); | 
|---|
| 176 | } | 
|---|
| 177 |  | 
|---|
| 178 | void LinearModelData::write(WriteBuffer & buf) const | 
|---|
| 179 | { | 
|---|
| 180 | writeBinary(bias, buf); | 
|---|
| 181 | writeBinary(weights, buf); | 
|---|
| 182 | writeBinary(iter_num, buf); | 
|---|
| 183 | writeBinary(gradient_batch, buf); | 
|---|
| 184 | writeBinary(batch_size, buf); | 
|---|
| 185 | weights_updater->write(buf); | 
|---|
| 186 | } | 
|---|
| 187 |  | 
|---|
| 188 | void LinearModelData::merge(const DB::LinearModelData & rhs) | 
|---|
| 189 | { | 
|---|
| 190 | if (iter_num == 0 && rhs.iter_num == 0) | 
|---|
| 191 | return; | 
|---|
| 192 |  | 
|---|
| 193 | update_state(); | 
|---|
| 194 | /// can't update rhs state because it's constant | 
|---|
| 195 |  | 
|---|
| 196 | /// squared mean is more stable (in sence of quality of prediction) when two states with quietly different number of learning steps are merged | 
|---|
| 197 | Float64 frac = (static_cast<Float64>(iter_num) * iter_num) / (iter_num * iter_num + rhs.iter_num * rhs.iter_num); | 
|---|
| 198 |  | 
|---|
| 199 | for (size_t i = 0; i < weights.size(); ++i) | 
|---|
| 200 | { | 
|---|
| 201 | weights[i] = weights[i] * frac + rhs.weights[i] * (1 - frac); | 
|---|
| 202 | } | 
|---|
| 203 | bias = bias * frac + rhs.bias * (1 - frac); | 
|---|
| 204 |  | 
|---|
| 205 | iter_num += rhs.iter_num; | 
|---|
| 206 | weights_updater->merge(*rhs.weights_updater, frac, 1 - frac); | 
|---|
| 207 | } | 
|---|
| 208 |  | 
|---|
| 209 | void LinearModelData::add(const IColumn ** columns, size_t row_num) | 
|---|
| 210 | { | 
|---|
| 211 | /// first column stores target; features start from (columns + 1) | 
|---|
| 212 | Float64 target = (*columns[0]).getFloat64(row_num); | 
|---|
| 213 |  | 
|---|
| 214 | /// Here we have columns + 1 as first column corresponds to target value, and others - to features | 
|---|
| 215 | weights_updater->add_to_batch( | 
|---|
| 216 | gradient_batch, *gradient_computer, weights, bias, l2_reg_coef, target, columns + 1, row_num); | 
|---|
| 217 |  | 
|---|
| 218 | ++batch_size; | 
|---|
| 219 | if (batch_size == batch_capacity) | 
|---|
| 220 | { | 
|---|
| 221 | update_state(); | 
|---|
| 222 | } | 
|---|
| 223 | } | 
|---|
| 224 |  | 
|---|
| 225 | /// Weights updaters | 
|---|
| 226 |  | 
|---|
| 227 | void Adam::write(WriteBuffer & buf) const | 
|---|
| 228 | { | 
|---|
| 229 | writeBinary(average_gradient, buf); | 
|---|
| 230 | writeBinary(average_squared_gradient, buf); | 
|---|
| 231 | } | 
|---|
| 232 |  | 
|---|
| 233 | void Adam::read(ReadBuffer & buf) | 
|---|
| 234 | { | 
|---|
| 235 | readBinary(average_gradient, buf); | 
|---|
| 236 | readBinary(average_squared_gradient, buf); | 
|---|
| 237 | } | 
|---|
| 238 |  | 
|---|
| 239 | void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) | 
|---|
| 240 | { | 
|---|
| 241 | auto & adam_rhs = static_cast<const Adam &>(rhs); | 
|---|
| 242 |  | 
|---|
| 243 | if (adam_rhs.average_gradient.empty()) | 
|---|
| 244 | return; | 
|---|
| 245 |  | 
|---|
| 246 | if (average_gradient.empty()) | 
|---|
| 247 | { | 
|---|
| 248 | if (!average_squared_gradient.empty() || | 
|---|
| 249 | adam_rhs.average_gradient.size() != adam_rhs.average_squared_gradient.size()) | 
|---|
| 250 | throw Exception( "Average_gradient and average_squared_gradient must have same size", ErrorCodes::LOGICAL_ERROR); | 
|---|
| 251 |  | 
|---|
| 252 | average_gradient.resize(adam_rhs.average_gradient.size(), Float64{0.0}); | 
|---|
| 253 | average_squared_gradient.resize(adam_rhs.average_squared_gradient.size(), Float64{0.0}); | 
|---|
| 254 | } | 
|---|
| 255 |  | 
|---|
| 256 | for (size_t i = 0; i < average_gradient.size(); ++i) | 
|---|
| 257 | { | 
|---|
| 258 | average_gradient[i] = average_gradient[i] * frac + adam_rhs.average_gradient[i] * rhs_frac; | 
|---|
| 259 | average_squared_gradient[i] = average_squared_gradient[i] * frac + adam_rhs.average_squared_gradient[i] * rhs_frac; | 
|---|
| 260 | } | 
|---|
| 261 | beta1_powered_ *= adam_rhs.beta1_powered_; | 
|---|
| 262 | beta2_powered_ *= adam_rhs.beta2_powered_; | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 | void Adam::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) | 
|---|
| 266 | { | 
|---|
| 267 | if (average_gradient.empty()) | 
|---|
| 268 | { | 
|---|
| 269 | if (!average_squared_gradient.empty()) | 
|---|
| 270 | throw Exception( "Average_gradient and average_squared_gradient must have same size", ErrorCodes::LOGICAL_ERROR); | 
|---|
| 271 |  | 
|---|
| 272 | average_gradient.resize(batch_gradient.size(), Float64{0.0}); | 
|---|
| 273 | average_squared_gradient.resize(batch_gradient.size(), Float64{0.0}); | 
|---|
| 274 | } | 
|---|
| 275 |  | 
|---|
| 276 | for (size_t i = 0; i != average_gradient.size(); ++i) | 
|---|
| 277 | { | 
|---|
| 278 | Float64 normed_gradient = batch_gradient[i] / batch_size; | 
|---|
| 279 | average_gradient[i] = beta1_ * average_gradient[i] + (1 - beta1_) * normed_gradient; | 
|---|
| 280 | average_squared_gradient[i] = beta2_ * average_squared_gradient[i] + | 
|---|
| 281 | (1 - beta2_) * normed_gradient * normed_gradient; | 
|---|
| 282 | } | 
|---|
| 283 |  | 
|---|
| 284 | for (size_t i = 0; i < weights.size(); ++i) | 
|---|
| 285 | { | 
|---|
| 286 | weights[i] += (learning_rate * average_gradient[i]) / | 
|---|
| 287 | ((1 - beta1_powered_) * (sqrt(average_squared_gradient[i] / (1 - beta2_powered_)) + eps_)); | 
|---|
| 288 | } | 
|---|
| 289 | bias += (learning_rate * average_gradient[weights.size()]) / | 
|---|
| 290 | ((1 - beta1_powered_) * (sqrt(average_squared_gradient[weights.size()] / (1 - beta2_powered_)) + eps_)); | 
|---|
| 291 |  | 
|---|
| 292 | beta1_powered_ *= beta1_; | 
|---|
| 293 | beta2_powered_ *= beta2_; | 
|---|
| 294 | } | 
|---|
| 295 |  | 
|---|
| 296 | void Adam::add_to_batch( | 
|---|
| 297 | std::vector<Float64> & batch_gradient, | 
|---|
| 298 | IGradientComputer & gradient_computer, | 
|---|
| 299 | const std::vector<Float64> & weights, | 
|---|
| 300 | Float64 bias, | 
|---|
| 301 | Float64 l2_reg_coef, | 
|---|
| 302 | Float64 target, | 
|---|
| 303 | const IColumn ** columns, | 
|---|
| 304 | size_t row_num) | 
|---|
| 305 | { | 
|---|
| 306 | if (average_gradient.empty()) | 
|---|
| 307 | { | 
|---|
| 308 | average_gradient.resize(batch_gradient.size(), Float64{0.0}); | 
|---|
| 309 | average_squared_gradient.resize(batch_gradient.size(), Float64{0.0}); | 
|---|
| 310 | } | 
|---|
| 311 | gradient_computer.compute(batch_gradient, weights, bias, l2_reg_coef, target, columns, row_num); | 
|---|
| 312 | } | 
|---|
| 313 |  | 
|---|
| 314 | void Nesterov::read(ReadBuffer & buf) | 
|---|
| 315 | { | 
|---|
| 316 | readBinary(accumulated_gradient, buf); | 
|---|
| 317 | } | 
|---|
| 318 |  | 
|---|
| 319 | void Nesterov::write(WriteBuffer & buf) const | 
|---|
| 320 | { | 
|---|
| 321 | writeBinary(accumulated_gradient, buf); | 
|---|
| 322 | } | 
|---|
| 323 |  | 
|---|
| 324 | void Nesterov::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) | 
|---|
| 325 | { | 
|---|
| 326 | auto & nesterov_rhs = static_cast<const Nesterov &>(rhs); | 
|---|
| 327 | if (accumulated_gradient.empty()) | 
|---|
| 328 | accumulated_gradient.resize(nesterov_rhs.accumulated_gradient.size(), Float64{0.0}); | 
|---|
| 329 |  | 
|---|
| 330 | for (size_t i = 0; i < accumulated_gradient.size(); ++i) | 
|---|
| 331 | { | 
|---|
| 332 | accumulated_gradient[i] = accumulated_gradient[i] * frac + nesterov_rhs.accumulated_gradient[i] * rhs_frac; | 
|---|
| 333 | } | 
|---|
| 334 | } | 
|---|
| 335 |  | 
|---|
| 336 | void Nesterov::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) | 
|---|
| 337 | { | 
|---|
| 338 | if (accumulated_gradient.empty()) | 
|---|
| 339 | { | 
|---|
| 340 | accumulated_gradient.resize(batch_gradient.size(), Float64{0.0}); | 
|---|
| 341 | } | 
|---|
| 342 |  | 
|---|
| 343 | for (size_t i = 0; i < batch_gradient.size(); ++i) | 
|---|
| 344 | { | 
|---|
| 345 | accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size; | 
|---|
| 346 | } | 
|---|
| 347 | for (size_t i = 0; i < weights.size(); ++i) | 
|---|
| 348 | { | 
|---|
| 349 | weights[i] += accumulated_gradient[i]; | 
|---|
| 350 | } | 
|---|
| 351 | bias += accumulated_gradient[weights.size()]; | 
|---|
| 352 | } | 
|---|
| 353 |  | 
|---|
| 354 | void Nesterov::add_to_batch( | 
|---|
| 355 | std::vector<Float64> & batch_gradient, | 
|---|
| 356 | IGradientComputer & gradient_computer, | 
|---|
| 357 | const std::vector<Float64> & weights, | 
|---|
| 358 | Float64 bias, | 
|---|
| 359 | Float64 l2_reg_coef, | 
|---|
| 360 | Float64 target, | 
|---|
| 361 | const IColumn ** columns, | 
|---|
| 362 | size_t row_num) | 
|---|
| 363 | { | 
|---|
| 364 | if (accumulated_gradient.empty()) | 
|---|
| 365 | { | 
|---|
| 366 | accumulated_gradient.resize(batch_gradient.size(), Float64{0.0}); | 
|---|
| 367 | } | 
|---|
| 368 |  | 
|---|
| 369 | std::vector<Float64> shifted_weights(weights.size()); | 
|---|
| 370 | for (size_t i = 0; i != shifted_weights.size(); ++i) | 
|---|
| 371 | { | 
|---|
| 372 | shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha_; | 
|---|
| 373 | } | 
|---|
| 374 | auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha_; | 
|---|
| 375 |  | 
|---|
| 376 | gradient_computer.compute(batch_gradient, shifted_weights, shifted_bias, l2_reg_coef, target, columns, row_num); | 
|---|
| 377 | } | 
|---|
| 378 |  | 
|---|
| 379 | void Momentum::read(ReadBuffer & buf) | 
|---|
| 380 | { | 
|---|
| 381 | readBinary(accumulated_gradient, buf); | 
|---|
| 382 | } | 
|---|
| 383 |  | 
|---|
| 384 | void Momentum::write(WriteBuffer & buf) const | 
|---|
| 385 | { | 
|---|
| 386 | writeBinary(accumulated_gradient, buf); | 
|---|
| 387 | } | 
|---|
| 388 |  | 
|---|
| 389 | void Momentum::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) | 
|---|
| 390 | { | 
|---|
| 391 | auto & momentum_rhs = static_cast<const Momentum &>(rhs); | 
|---|
| 392 | for (size_t i = 0; i < accumulated_gradient.size(); ++i) | 
|---|
| 393 | { | 
|---|
| 394 | accumulated_gradient[i] = accumulated_gradient[i] * frac + momentum_rhs.accumulated_gradient[i] * rhs_frac; | 
|---|
| 395 | } | 
|---|
| 396 | } | 
|---|
| 397 |  | 
|---|
| 398 | void Momentum::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) | 
|---|
| 399 | { | 
|---|
| 400 | /// batch_size is already checked to be greater than 0 | 
|---|
| 401 | if (accumulated_gradient.empty()) | 
|---|
| 402 | { | 
|---|
| 403 | accumulated_gradient.resize(batch_gradient.size(), Float64{0.0}); | 
|---|
| 404 | } | 
|---|
| 405 |  | 
|---|
| 406 | for (size_t i = 0; i < batch_gradient.size(); ++i) | 
|---|
| 407 | { | 
|---|
| 408 | accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size; | 
|---|
| 409 | } | 
|---|
| 410 | for (size_t i = 0; i < weights.size(); ++i) | 
|---|
| 411 | { | 
|---|
| 412 | weights[i] += accumulated_gradient[i]; | 
|---|
| 413 | } | 
|---|
| 414 | bias += accumulated_gradient[weights.size()]; | 
|---|
| 415 | } | 
|---|
| 416 |  | 
|---|
| 417 | void StochasticGradientDescent::update( | 
|---|
| 418 | UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) | 
|---|
| 419 | { | 
|---|
| 420 | /// batch_size is already checked to be greater than  0 | 
|---|
| 421 | for (size_t i = 0; i < weights.size(); ++i) | 
|---|
| 422 | { | 
|---|
| 423 | weights[i] += (learning_rate * batch_gradient[i]) / batch_size; | 
|---|
| 424 | } | 
|---|
| 425 | bias += (learning_rate * batch_gradient[weights.size()]) / batch_size; | 
|---|
| 426 | } | 
|---|
| 427 |  | 
|---|
| 428 | void IWeightsUpdater::add_to_batch( | 
|---|
| 429 | std::vector<Float64> & batch_gradient, | 
|---|
| 430 | IGradientComputer & gradient_computer, | 
|---|
| 431 | const std::vector<Float64> & weights, | 
|---|
| 432 | Float64 bias, | 
|---|
| 433 | Float64 l2_reg_coef, | 
|---|
| 434 | Float64 target, | 
|---|
| 435 | const IColumn ** columns, | 
|---|
| 436 | size_t row_num) | 
|---|
| 437 | { | 
|---|
| 438 | gradient_computer.compute(batch_gradient, weights, bias, l2_reg_coef, target, columns, row_num); | 
|---|
| 439 | } | 
|---|
| 440 |  | 
|---|
| 441 | /// Gradient computers | 
|---|
| 442 |  | 
|---|
| 443 | void LogisticRegression::predict( | 
|---|
| 444 | ColumnVector<Float64>::Container & container, | 
|---|
| 445 | Block & block, | 
|---|
| 446 | size_t offset, | 
|---|
| 447 | size_t limit, | 
|---|
| 448 | const ColumnNumbers & arguments, | 
|---|
| 449 | const std::vector<Float64> & weights, | 
|---|
| 450 | Float64 bias, | 
|---|
| 451 | const Context & /*context*/) const | 
|---|
| 452 | { | 
|---|
| 453 | size_t rows_num = block.rows(); | 
|---|
| 454 |  | 
|---|
| 455 | if (offset > rows_num || offset + limit > rows_num) | 
|---|
| 456 | throw Exception( "Invalid offset and limit for LogisticRegression::predict. " | 
|---|
| 457 | "Block has "+ toString(rows_num) + " rows, but offset is "+ toString(offset) + | 
|---|
| 458 | " and limit is "+ toString(limit), ErrorCodes::LOGICAL_ERROR); | 
|---|
| 459 |  | 
|---|
| 460 | std::vector<Float64> results(limit, bias); | 
|---|
| 461 |  | 
|---|
| 462 | for (size_t i = 1; i < arguments.size(); ++i) | 
|---|
| 463 | { | 
|---|
| 464 | const ColumnWithTypeAndName & cur_col = block.getByPosition(arguments[i]); | 
|---|
| 465 |  | 
|---|
| 466 | if (!isNativeNumber(cur_col.type)) | 
|---|
| 467 | throw Exception( "Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS); | 
|---|
| 468 |  | 
|---|
| 469 | auto & features_column = cur_col.column; | 
|---|
| 470 |  | 
|---|
| 471 | for (size_t row_num = 0; row_num < limit; ++row_num) | 
|---|
| 472 | results[row_num] += weights[i - 1] * features_column->getFloat64(offset + row_num); | 
|---|
| 473 | } | 
|---|
| 474 |  | 
|---|
| 475 | container.reserve(container.size() + limit); | 
|---|
| 476 | for (size_t row_num = 0; row_num < limit; ++row_num) | 
|---|
| 477 | container.emplace_back(1 / (1 + exp(-results[row_num]))); | 
|---|
| 478 | } | 
|---|
| 479 |  | 
|---|
| 480 | void LogisticRegression::compute( | 
|---|
| 481 | std::vector<Float64> & batch_gradient, | 
|---|
| 482 | const std::vector<Float64> & weights, | 
|---|
| 483 | Float64 bias, | 
|---|
| 484 | Float64 l2_reg_coef, | 
|---|
| 485 | Float64 target, | 
|---|
| 486 | const IColumn ** columns, | 
|---|
| 487 | size_t row_num) | 
|---|
| 488 | { | 
|---|
| 489 | Float64 derivative = bias; | 
|---|
| 490 | for (size_t i = 0; i < weights.size(); ++i) | 
|---|
| 491 | { | 
|---|
| 492 | auto value = (*columns[i]).getFloat64(row_num); | 
|---|
| 493 | derivative += weights[i] * value; | 
|---|
| 494 | } | 
|---|
| 495 | derivative *= target; | 
|---|
| 496 | derivative = exp(derivative); | 
|---|
| 497 |  | 
|---|
| 498 | batch_gradient[weights.size()] += target / (derivative + 1); | 
|---|
| 499 | for (size_t i = 0; i < weights.size(); ++i) | 
|---|
| 500 | { | 
|---|
| 501 | auto value = (*columns[i]).getFloat64(row_num); | 
|---|
| 502 | batch_gradient[i] += target * value / (derivative + 1) - 2 * l2_reg_coef * weights[i]; | 
|---|
| 503 | } | 
|---|
| 504 | } | 
|---|
| 505 |  | 
|---|
| 506 | void LinearRegression::predict( | 
|---|
| 507 | ColumnVector<Float64>::Container & container, | 
|---|
| 508 | Block & block, | 
|---|
| 509 | size_t offset, | 
|---|
| 510 | size_t limit, | 
|---|
| 511 | const ColumnNumbers & arguments, | 
|---|
| 512 | const std::vector<Float64> & weights, | 
|---|
| 513 | Float64 bias, | 
|---|
| 514 | const Context & /*context*/) const | 
|---|
| 515 | { | 
|---|
| 516 | if (weights.size() + 1 != arguments.size()) | 
|---|
| 517 | { | 
|---|
| 518 | throw Exception( "In predict function number of arguments differs from the size of weights vector", ErrorCodes::LOGICAL_ERROR); | 
|---|
| 519 | } | 
|---|
| 520 |  | 
|---|
| 521 | size_t rows_num = block.rows(); | 
|---|
| 522 |  | 
|---|
| 523 | if (offset > rows_num || offset + limit > rows_num) | 
|---|
| 524 | throw Exception( "Invalid offset and limit for LogisticRegression::predict. " | 
|---|
| 525 | "Block has "+ toString(rows_num) + " rows, but offset is "+ toString(offset) + | 
|---|
| 526 | " and limit is "+ toString(limit), ErrorCodes::LOGICAL_ERROR); | 
|---|
| 527 |  | 
|---|
| 528 | std::vector<Float64> results(limit, bias); | 
|---|
| 529 |  | 
|---|
| 530 | for (size_t i = 1; i < arguments.size(); ++i) | 
|---|
| 531 | { | 
|---|
| 532 | const ColumnWithTypeAndName & cur_col = block.getByPosition(arguments[i]); | 
|---|
| 533 |  | 
|---|
| 534 | if (!isNativeNumber(cur_col.type)) | 
|---|
| 535 | throw Exception( "Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS); | 
|---|
| 536 |  | 
|---|
| 537 | auto features_column = cur_col.column; | 
|---|
| 538 |  | 
|---|
| 539 | if (!features_column) | 
|---|
| 540 | throw Exception( "Unexpectedly cannot dynamically cast features column "+ std::to_string(i), ErrorCodes::LOGICAL_ERROR); | 
|---|
| 541 |  | 
|---|
| 542 | for (size_t row_num = 0; row_num < limit; ++row_num) | 
|---|
| 543 | results[row_num] += weights[i - 1] * features_column->getFloat64(row_num + offset); | 
|---|
| 544 | } | 
|---|
| 545 |  | 
|---|
| 546 | container.reserve(container.size() + limit); | 
|---|
| 547 | for (size_t row_num = 0; row_num < limit; ++row_num) | 
|---|
| 548 | container.emplace_back(results[row_num]); | 
|---|
| 549 | } | 
|---|
| 550 |  | 
|---|
| 551 | void LinearRegression::compute( | 
|---|
| 552 | std::vector<Float64> & batch_gradient, | 
|---|
| 553 | const std::vector<Float64> & weights, | 
|---|
| 554 | Float64 bias, | 
|---|
| 555 | Float64 l2_reg_coef, | 
|---|
| 556 | Float64 target, | 
|---|
| 557 | const IColumn ** columns, | 
|---|
| 558 | size_t row_num) | 
|---|
| 559 | { | 
|---|
| 560 | Float64 derivative = (target - bias); | 
|---|
| 561 | for (size_t i = 0; i < weights.size(); ++i) | 
|---|
| 562 | { | 
|---|
| 563 | auto value = (*columns[i]).getFloat64(row_num); | 
|---|
| 564 | derivative -= weights[i] * value; | 
|---|
| 565 | } | 
|---|
| 566 | derivative *= 2; | 
|---|
| 567 |  | 
|---|
| 568 | batch_gradient[weights.size()] += derivative; | 
|---|
| 569 | for (size_t i = 0; i < weights.size(); ++i) | 
|---|
| 570 | { | 
|---|
| 571 | auto value = (*columns[i]).getFloat64(row_num); | 
|---|
| 572 | batch_gradient[i] += derivative * value - 2 * l2_reg_coef * weights[i]; | 
|---|
| 573 | } | 
|---|
| 574 | } | 
|---|
| 575 |  | 
|---|
| 576 | } | 
|---|
| 577 |  | 
|---|