| 1 | #include "duckdb/function/aggregate/algebraic_functions.hpp" |
| 2 | #include "duckdb/common/exception.hpp" |
| 3 | #include "duckdb/common/types/null_value.hpp" |
| 4 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
| 5 | #include "duckdb/function/function_set.hpp" |
| 6 | #include <cmath> |
| 7 | |
| 8 | using namespace duckdb; |
| 9 | using namespace std; |
| 10 | |
| 11 | struct covar_state_t { |
| 12 | uint64_t count; |
| 13 | double meanx; |
| 14 | double meany; |
| 15 | double co_moment; |
| 16 | }; |
| 17 | |
| 18 | struct CovarOperation { |
| 19 | template <class STATE> static void Initialize(STATE *state) { |
| 20 | state->count = 0; |
| 21 | state->meanx = 0; |
| 22 | state->meany = 0; |
| 23 | state->co_moment = 0; |
| 24 | } |
| 25 | |
| 26 | template <class A_TYPE, class B_TYPE, class STATE, class OP> |
| 27 | static void Operation(STATE *state, A_TYPE *x_data, B_TYPE *y_data, nullmask_t &anullmask, nullmask_t &bnullmask, |
| 28 | idx_t xidx, idx_t yidx) { |
| 29 | // update running mean and d^2 |
| 30 | const uint64_t n = ++(state->count); |
| 31 | |
| 32 | const auto x = x_data[xidx]; |
| 33 | const double dx = (x - state->meanx); |
| 34 | const double meanx = state->meanx + dx / n; |
| 35 | |
| 36 | const auto y = y_data[yidx]; |
| 37 | const double dy = (y - state->meany); |
| 38 | const double meany = state->meany + dy / n; |
| 39 | |
| 40 | const double C = state->co_moment + dx * (y - meany); |
| 41 | |
| 42 | state->meanx = meanx; |
| 43 | state->meany = meany; |
| 44 | state->co_moment = C; |
| 45 | } |
| 46 | |
| 47 | template <class STATE, class OP> static void Combine(STATE source, STATE *target) { |
| 48 | if (target->count == 0) { |
| 49 | *target = source; |
| 50 | } else if (source.count > 0) { |
| 51 | const auto count = target->count + source.count; |
| 52 | const auto meanx = (source.count * source.meanx + target->count * target->meanx) / count; |
| 53 | const auto meany = (source.count * source.meany + target->count * target->meany) / count; |
| 54 | |
| 55 | // Schubert and Gertz SSDBM 2018, equation 21 |
| 56 | const auto deltax = target->meanx - source.meanx; |
| 57 | const auto deltay = target->meany - source.meany; |
| 58 | target->co_moment = |
| 59 | source.co_moment + target->co_moment + deltax * deltay * source.count * target->count / count; |
| 60 | target->meanx = meanx; |
| 61 | target->meany = meany; |
| 62 | target->count = count; |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | static bool IgnoreNull() { |
| 67 | return true; |
| 68 | } |
| 69 | }; |
| 70 | |
| 71 | struct CovarPopOperation : public CovarOperation { |
| 72 | template <class T, class STATE> |
| 73 | static void Finalize(Vector &result, STATE *state, T *target, nullmask_t &nullmask, idx_t idx) { |
| 74 | if (state->count == 0) { |
| 75 | nullmask[idx] = true; |
| 76 | } else { |
| 77 | target[idx] = state->co_moment / state->count; |
| 78 | } |
| 79 | } |
| 80 | }; |
| 81 | |
| 82 | struct CovarSampOperation : public CovarOperation { |
| 83 | template <class T, class STATE> |
| 84 | static void Finalize(Vector &result, STATE *state, T *target, nullmask_t &nullmask, idx_t idx) { |
| 85 | if ((state->count) < 2) { |
| 86 | nullmask[idx] = true; |
| 87 | } else { |
| 88 | target[idx] = state->co_moment / (state->count - 1); |
| 89 | } |
| 90 | } |
| 91 | }; |
| 92 | |
| 93 | void CovarPopFun::RegisterFunction(BuiltinFunctions &set) { |
| 94 | AggregateFunctionSet covar_pop("covar_pop" ); |
| 95 | covar_pop.AddFunction(AggregateFunction::BinaryAggregate<covar_state_t, double, double, double, CovarPopOperation>( |
| 96 | SQLType::DOUBLE, SQLType::DOUBLE, SQLType::DOUBLE)); |
| 97 | set.AddFunction(covar_pop); |
| 98 | } |
| 99 | |
| 100 | void CovarSampFun::RegisterFunction(BuiltinFunctions &set) { |
| 101 | AggregateFunctionSet covar_samp("covar_samp" ); |
| 102 | covar_samp.AddFunction( |
| 103 | AggregateFunction::BinaryAggregate<covar_state_t, double, double, double, CovarSampOperation>( |
| 104 | SQLType::DOUBLE, SQLType::DOUBLE, SQLType::DOUBLE)); |
| 105 | set.AddFunction(covar_samp); |
| 106 | } |
| 107 | |