| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * nodeAgg.h |
| 4 | * prototypes for nodeAgg.c |
| 5 | * |
| 6 | * |
| 7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 8 | * Portions Copyright (c) 1994, Regents of the University of California |
| 9 | * |
| 10 | * src/include/executor/nodeAgg.h |
| 11 | * |
| 12 | *------------------------------------------------------------------------- |
| 13 | */ |
| 14 | #ifndef NODEAGG_H |
| 15 | #define NODEAGG_H |
| 16 | |
| 17 | #include "nodes/execnodes.h" |
| 18 | |
| 19 | |
| 20 | /* |
| 21 | * AggStatePerTransData - per aggregate state value information |
| 22 | * |
| 23 | * Working state for updating the aggregate's state value, by calling the |
| 24 | * transition function with an input row. This struct does not store the |
| 25 | * information needed to produce the final aggregate result from the transition |
| 26 | * state, that's stored in AggStatePerAggData instead. This separation allows |
| 27 | * multiple aggregate results to be produced from a single state value. |
| 28 | */ |
| 29 | typedef struct AggStatePerTransData |
| 30 | { |
| 31 | /* |
| 32 | * These values are set up during ExecInitAgg() and do not change |
| 33 | * thereafter: |
| 34 | */ |
| 35 | |
| 36 | /* |
| 37 | * Link to an Aggref expr this state value is for. |
| 38 | * |
| 39 | * There can be multiple Aggref's sharing the same state value, so long as |
| 40 | * the inputs and transition functions are identical and the final |
| 41 | * functions are not read-write. This points to the first one of them. |
| 42 | */ |
| 43 | Aggref *aggref; |
| 44 | |
| 45 | /* |
| 46 | * Is this state value actually being shared by more than one Aggref? |
| 47 | */ |
| 48 | bool aggshared; |
| 49 | |
| 50 | /* |
| 51 | * Number of aggregated input columns. This includes ORDER BY expressions |
| 52 | * in both the plain-agg and ordered-set cases. Ordered-set direct args |
| 53 | * are not counted, though. |
| 54 | */ |
| 55 | int numInputs; |
| 56 | |
| 57 | /* |
| 58 | * Number of aggregated input columns to pass to the transfn. This |
| 59 | * includes the ORDER BY columns for ordered-set aggs, but not for plain |
| 60 | * aggs. (This doesn't count the transition state value!) |
| 61 | */ |
| 62 | int numTransInputs; |
| 63 | |
| 64 | /* Oid of the state transition or combine function */ |
| 65 | Oid transfn_oid; |
| 66 | |
| 67 | /* Oid of the serialization function or InvalidOid */ |
| 68 | Oid serialfn_oid; |
| 69 | |
| 70 | /* Oid of the deserialization function or InvalidOid */ |
| 71 | Oid deserialfn_oid; |
| 72 | |
| 73 | /* Oid of state value's datatype */ |
| 74 | Oid aggtranstype; |
| 75 | |
| 76 | /* |
| 77 | * fmgr lookup data for transition function or combine function. Note in |
| 78 | * particular that the fn_strict flag is kept here. |
| 79 | */ |
| 80 | FmgrInfo transfn; |
| 81 | |
| 82 | /* fmgr lookup data for serialization function */ |
| 83 | FmgrInfo serialfn; |
| 84 | |
| 85 | /* fmgr lookup data for deserialization function */ |
| 86 | FmgrInfo deserialfn; |
| 87 | |
| 88 | /* Input collation derived for aggregate */ |
| 89 | Oid aggCollation; |
| 90 | |
| 91 | /* number of sorting columns */ |
| 92 | int numSortCols; |
| 93 | |
| 94 | /* number of sorting columns to consider in DISTINCT comparisons */ |
| 95 | /* (this is either zero or the same as numSortCols) */ |
| 96 | int numDistinctCols; |
| 97 | |
| 98 | /* deconstructed sorting information (arrays of length numSortCols) */ |
| 99 | AttrNumber *sortColIdx; |
| 100 | Oid *sortOperators; |
| 101 | Oid *sortCollations; |
| 102 | bool *sortNullsFirst; |
| 103 | |
| 104 | /* |
| 105 | * Comparators for input columns --- only set/used when aggregate has |
| 106 | * DISTINCT flag. equalfnOne version is used for single-column |
| 107 | * comparisons, equalfnMulti for the case of multiple columns. |
| 108 | */ |
| 109 | FmgrInfo equalfnOne; |
| 110 | ExprState *equalfnMulti; |
| 111 | |
| 112 | /* |
| 113 | * initial value from pg_aggregate entry |
| 114 | */ |
| 115 | Datum initValue; |
| 116 | bool initValueIsNull; |
| 117 | |
| 118 | /* |
| 119 | * We need the len and byval info for the agg's input and transition data |
| 120 | * types in order to know how to copy/delete values. |
| 121 | * |
| 122 | * Note that the info for the input type is used only when handling |
| 123 | * DISTINCT aggs with just one argument, so there is only one input type. |
| 124 | */ |
| 125 | int16 inputtypeLen, |
| 126 | transtypeLen; |
| 127 | bool inputtypeByVal, |
| 128 | transtypeByVal; |
| 129 | |
| 130 | /* |
| 131 | * Slots for holding the evaluated input arguments. These are set up |
| 132 | * during ExecInitAgg() and then used for each input row requiring either |
| 133 | * FILTER or ORDER BY/DISTINCT processing. |
| 134 | */ |
| 135 | TupleTableSlot *sortslot; /* current input tuple */ |
| 136 | TupleTableSlot *uniqslot; /* used for multi-column DISTINCT */ |
| 137 | TupleDesc sortdesc; /* descriptor of input tuples */ |
| 138 | |
| 139 | /* |
| 140 | * These values are working state that is initialized at the start of an |
| 141 | * input tuple group and updated for each input tuple. |
| 142 | * |
| 143 | * For a simple (non DISTINCT/ORDER BY) aggregate, we just feed the input |
| 144 | * values straight to the transition function. If it's DISTINCT or |
| 145 | * requires ORDER BY, we pass the input values into a Tuplesort object; |
| 146 | * then at completion of the input tuple group, we scan the sorted values, |
| 147 | * eliminate duplicates if needed, and run the transition function on the |
| 148 | * rest. |
| 149 | * |
| 150 | * We need a separate tuplesort for each grouping set. |
| 151 | */ |
| 152 | |
| 153 | Tuplesortstate **sortstates; /* sort objects, if DISTINCT or ORDER BY */ |
| 154 | |
| 155 | /* |
| 156 | * This field is a pre-initialized FunctionCallInfo struct used for |
| 157 | * calling this aggregate's transfn. We save a few cycles per row by not |
| 158 | * re-initializing the unchanging fields; which isn't much, but it seems |
| 159 | * worth the extra space consumption. |
| 160 | */ |
| 161 | FunctionCallInfo transfn_fcinfo; |
| 162 | |
| 163 | /* Likewise for serialization and deserialization functions */ |
| 164 | FunctionCallInfo serialfn_fcinfo; |
| 165 | |
| 166 | FunctionCallInfo deserialfn_fcinfo; |
| 167 | } AggStatePerTransData; |
| 168 | |
| 169 | /* |
| 170 | * AggStatePerAggData - per-aggregate information |
| 171 | * |
| 172 | * This contains the information needed to call the final function, to produce |
| 173 | * a final aggregate result from the state value. If there are multiple |
| 174 | * identical Aggrefs in the query, they can all share the same per-agg data. |
| 175 | * |
| 176 | * These values are set up during ExecInitAgg() and do not change thereafter. |
| 177 | */ |
| 178 | typedef struct AggStatePerAggData |
| 179 | { |
| 180 | /* |
| 181 | * Link to an Aggref expr this state value is for. |
| 182 | * |
| 183 | * There can be multiple identical Aggref's sharing the same per-agg. This |
| 184 | * points to the first one of them. |
| 185 | */ |
| 186 | Aggref *aggref; |
| 187 | |
| 188 | /* index to the state value which this agg should use */ |
| 189 | int transno; |
| 190 | |
| 191 | /* Optional Oid of final function (may be InvalidOid) */ |
| 192 | Oid finalfn_oid; |
| 193 | |
| 194 | /* |
| 195 | * fmgr lookup data for final function --- only valid when finalfn_oid is |
| 196 | * not InvalidOid. |
| 197 | */ |
| 198 | FmgrInfo finalfn; |
| 199 | |
| 200 | /* |
| 201 | * Number of arguments to pass to the finalfn. This is always at least 1 |
| 202 | * (the transition state value) plus any ordered-set direct args. If the |
| 203 | * finalfn wants extra args then we pass nulls corresponding to the |
| 204 | * aggregated input columns. |
| 205 | */ |
| 206 | int numFinalArgs; |
| 207 | |
| 208 | /* ExprStates for any direct-argument expressions */ |
| 209 | List *aggdirectargs; |
| 210 | |
| 211 | /* |
| 212 | * We need the len and byval info for the agg's result data type in order |
| 213 | * to know how to copy/delete values. |
| 214 | */ |
| 215 | int16 resulttypeLen; |
| 216 | bool resulttypeByVal; |
| 217 | |
| 218 | /* |
| 219 | * "shareable" is false if this agg cannot share state values with other |
| 220 | * aggregates because the final function is read-write. |
| 221 | */ |
| 222 | bool shareable; |
| 223 | } AggStatePerAggData; |
| 224 | |
| 225 | /* |
| 226 | * AggStatePerGroupData - per-aggregate-per-group working state |
| 227 | * |
| 228 | * These values are working state that is initialized at the start of |
| 229 | * an input tuple group and updated for each input tuple. |
| 230 | * |
| 231 | * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these |
| 232 | * structs (pointed to by aggstate->pergroup); we re-use the array for |
| 233 | * each input group, if it's AGG_SORTED mode. In AGG_HASHED mode, the |
| 234 | * hash table contains an array of these structs for each tuple group. |
| 235 | * |
| 236 | * Logically, the sortstate field belongs in this struct, but we do not |
| 237 | * keep it here for space reasons: we don't support DISTINCT aggregates |
| 238 | * in AGG_HASHED mode, so there's no reason to use up a pointer field |
| 239 | * in every entry of the hashtable. |
| 240 | */ |
| 241 | typedef struct AggStatePerGroupData |
| 242 | { |
| 243 | #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUE 0 |
| 244 | Datum transValue; /* current transition value */ |
| 245 | #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUEISNULL 1 |
| 246 | bool transValueIsNull; |
| 247 | |
| 248 | #define FIELDNO_AGGSTATEPERGROUPDATA_NOTRANSVALUE 2 |
| 249 | bool noTransValue; /* true if transValue not set yet */ |
| 250 | |
| 251 | /* |
| 252 | * Note: noTransValue initially has the same value as transValueIsNull, |
| 253 | * and if true both are cleared to false at the same time. They are not |
| 254 | * the same though: if transfn later returns a NULL, we want to keep that |
| 255 | * NULL and not auto-replace it with a later input value. Only the first |
| 256 | * non-NULL input will be auto-substituted. |
| 257 | */ |
| 258 | } AggStatePerGroupData; |
| 259 | |
| 260 | /* |
| 261 | * AggStatePerPhaseData - per-grouping-set-phase state |
| 262 | * |
| 263 | * Grouping sets are divided into "phases", where a single phase can be |
| 264 | * processed in one pass over the input. If there is more than one phase, then |
| 265 | * at the end of input from the current phase, state is reset and another pass |
| 266 | * taken over the data which has been re-sorted in the mean time. |
| 267 | * |
| 268 | * Accordingly, each phase specifies a list of grouping sets and group clause |
| 269 | * information, plus each phase after the first also has a sort order. |
| 270 | */ |
| 271 | typedef struct AggStatePerPhaseData |
| 272 | { |
| 273 | AggStrategy aggstrategy; /* strategy for this phase */ |
| 274 | int numsets; /* number of grouping sets (or 0) */ |
| 275 | int *gset_lengths; /* lengths of grouping sets */ |
| 276 | Bitmapset **grouped_cols; /* column groupings for rollup */ |
| 277 | ExprState **eqfunctions; /* expression returning equality, indexed by |
| 278 | * nr of cols to compare */ |
| 279 | Agg *aggnode; /* Agg node for phase data */ |
| 280 | Sort *sortnode; /* Sort node for input ordering for phase */ |
| 281 | |
| 282 | ExprState *evaltrans; /* evaluation of transition functions */ |
| 283 | } AggStatePerPhaseData; |
| 284 | |
| 285 | /* |
| 286 | * AggStatePerHashData - per-hashtable state |
| 287 | * |
| 288 | * When doing grouping sets with hashing, we have one of these for each |
| 289 | * grouping set. (When doing hashing without grouping sets, we have just one of |
| 290 | * them.) |
| 291 | */ |
| 292 | typedef struct AggStatePerHashData |
| 293 | { |
| 294 | TupleHashTable hashtable; /* hash table with one entry per group */ |
| 295 | TupleHashIterator hashiter; /* for iterating through hash table */ |
| 296 | TupleTableSlot *hashslot; /* slot for loading hash table */ |
| 297 | FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ |
| 298 | Oid *eqfuncoids; /* per-grouping-field equality fns */ |
| 299 | int numCols; /* number of hash key columns */ |
| 300 | int numhashGrpCols; /* number of columns in hash table */ |
| 301 | int largestGrpColIdx; /* largest col required for hashing */ |
| 302 | AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */ |
| 303 | AttrNumber *hashGrpColIdxHash; /* indices in hashtbl tuples */ |
| 304 | Agg *aggnode; /* original Agg node, for numGroups etc. */ |
| 305 | } AggStatePerHashData; |
| 306 | |
| 307 | |
| 308 | extern AggState *ExecInitAgg(Agg *node, EState *estate, int eflags); |
| 309 | extern void ExecEndAgg(AggState *node); |
| 310 | extern void ExecReScanAgg(AggState *node); |
| 311 | |
| 312 | extern Size hash_agg_entry_size(int numAggs); |
| 313 | |
| 314 | extern Datum aggregate_dummy(PG_FUNCTION_ARGS); |
| 315 | |
| 316 | #endif /* NODEAGG_H */ |
| 317 | |