compression_function.hpp source code [Velox/build/_deps/duckdb-src/src/include/duckdb/function/compression_function.hpp]

1	//===----------------------------------------------------------------------===//
2	// DuckDB
3	//
4	// duckdb/function/compression_function.hpp
5	//
6	//
7	//===----------------------------------------------------------------------===//
8
9	#pragma once
10
11	#include "duckdb/common/common.hpp"
12	#include "duckdb/function/function.hpp"
13	#include "duckdb/common/enums/compression_type.hpp"
14	#include "duckdb/common/map.hpp"
15	#include "duckdb/storage/storage_info.hpp"
16	#include "duckdb/common/mutex.hpp"
17
18	namespace duckdb {
19	class DatabaseInstance;
20	class ColumnData;
21	class ColumnDataCheckpointer;
22	class ColumnSegment;
23	class SegmentStatistics;
24
25	struct ColumnFetchState;
26	struct ColumnScanState;
27	struct SegmentScanState;
28
29	struct AnalyzeState {
30	virtual ~AnalyzeState() {
31	}
32
33	template <class TARGET>
34	TARGET &Cast() {
35	D_ASSERT(dynamic_cast<TARGET >(this*));
36	return reinterpret_cast<TARGET &>(*this);
37	}
38	template <class TARGET>
39	const TARGET &Cast() const {
40	D_ASSERT(dynamic_cast<const TARGET >(this*));
41	return reinterpret_cast<const TARGET &>(*this);
42	}
43	};
44
45	struct CompressionState {
46	virtual ~CompressionState() {
47	}
48
49	template <class TARGET>
50	TARGET &Cast() {
51	D_ASSERT(dynamic_cast<TARGET >(this*));
52	return reinterpret_cast<TARGET &>(*this);
53	}
54	template <class TARGET>
55	const TARGET &Cast() const {
56	D_ASSERT(dynamic_cast<const TARGET >(this*));
57	return reinterpret_cast<const TARGET &>(*this);
58	}
59	};
60
61	struct CompressedSegmentState {
62	virtual ~CompressedSegmentState() {
63	}
64
65	template <class TARGET>
66	TARGET &Cast() {
67	D_ASSERT(dynamic_cast<TARGET >(this*));
68	return reinterpret_cast<TARGET &>(*this);
69	}
70	template <class TARGET>
71	const TARGET &Cast() const {
72	D_ASSERT(dynamic_cast<const TARGET >(this*));
73	return reinterpret_cast<const TARGET &>(*this);
74	}
75	};
76
77	struct CompressionAppendState {
78	CompressionAppendState(BufferHandle handle_p) : handle (std::move(handle_p)) {
79	}
80	virtual ~CompressionAppendState() {
81	}
82
83	BufferHandle handle;
84
85	template <class TARGET>
86	TARGET &Cast() {
87	D_ASSERT(dynamic_cast<TARGET >(this*));
88	return reinterpret_cast<TARGET &>(*this);
89	}
90	template <class TARGET>
91	const TARGET &Cast() const {
92	D_ASSERT(dynamic_cast<const TARGET >(this*));
93	return reinterpret_cast<const TARGET &>(*this);
94	}
95	};
96
97	//===--------------------------------------------------------------------===//
98	// Analyze
99	//===--------------------------------------------------------------------===//
100	//! The analyze functions are used to determine whether or not to use this compression method
101	//! The system first determines the potential compression methods to use based on the physical type of the column
102	//! After that the following steps are taken:
103	//! 1. The init_analyze is called to initialize the analyze state of every candidate compression method
104	//! 2. The analyze method is called with all of the input data in the order in which it must be stored.
105	//! analyze can return "false". In that case, the compression method is taken out of consideration early.
106	//! 3. The final_analyze method is called, which should return a score for the compression method
107
108	//! The system then decides which compression function to use based on the analyzed score (returned from final_analyze)
109	typedef unique_ptr<AnalyzeState> (*compression_init_analyze_t)(ColumnData &col_data, PhysicalType type);
110	typedef bool (*compression_analyze_t)(AnalyzeState &state, Vector &input, idx_t count);
111	typedef idx_t (*compression_final_analyze_t)(AnalyzeState &state);
112
113	//===--------------------------------------------------------------------===//
114	// Compress
115	//===--------------------------------------------------------------------===//
116	typedef unique_ptr<CompressionState> (*compression_init_compression_t)(ColumnDataCheckpointer &checkpointer,
117	unique_ptr<AnalyzeState> state);
118	typedef void (*compression_compress_data_t)(CompressionState &state, Vector &scan_vector, idx_t count);
119	typedef void (*compression_compress_finalize_t)(CompressionState &state);
120
121	//===--------------------------------------------------------------------===//
122	// Uncompress / Scan
123	//===--------------------------------------------------------------------===//
124	typedef unique_ptr<SegmentScanState> (*compression_init_segment_scan_t)(ColumnSegment &segment);
125
126	//! Function prototype used for reading an entire vector (STANDARD_VECTOR_SIZE)
127	typedef void (*compression_scan_vector_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count,
128	Vector &result);
129	//! Function prototype used for reading an arbitrary ('scan_count') number of values
130	typedef void (*compression_scan_partial_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count,
131	Vector &result, idx_t result_offset);
132	//! Function prototype used for reading a single value
133	typedef void (*compression_fetch_row_t)(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result,
134	idx_t result_idx);
135	//! Function prototype used for skipping 'skip_count' values, non-trivial if random-access is not supported for the
136	//! compressed data.
137	typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count);
138
139	//===--------------------------------------------------------------------===//
140	// Append (optional)
141	//===--------------------------------------------------------------------===//
142	typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id);
143	typedef unique_ptr<CompressionAppendState> (*compression_init_append_t)(ColumnSegment &segment);
144	typedef idx_t (*compression_append_t)(CompressionAppendState &append_state, ColumnSegment &segment,
145	SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count);
146	typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
147	typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
148
149	class CompressionFunction {
150	public:
151	CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze,
152	compression_analyze_t analyze, compression_final_analyze_t final_analyze,
153	compression_init_compression_t init_compression, compression_compress_data_t compress,
154	compression_compress_finalize_t compress_finalize, compression_init_segment_scan_t init_scan,
155	compression_scan_vector_t scan_vector, compression_scan_partial_t scan_partial,
156	compression_fetch_row_t fetch_row, compression_skip_t skip,
157	compression_init_segment_t init_segment = nullptr,
158	compression_init_append_t init_append = nullptr, compression_append_t append = nullptr,
159	compression_finalize_append_t finalize_append = nullptr,
160	compression_revert_append_t revert_append = nullptr)
161	: type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze),
162	init_compression(init_compression), compress(compress), compress_finalize(compress_finalize),
163	init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip),
164	init_segment(init_segment), init_append(init_append), append(append), finalize_append(finalize_append),
165	revert_append(revert_append) {
166	}
167
168	//! Compression type
169	CompressionType type;
170	//! The data type this function can compress
171	PhysicalType data_type;
172
173	//! Analyze step: determine which compression function is the most effective
174	//! init_analyze is called once to set up the analyze state
175	compression_init_analyze_t init_analyze;
176	//! analyze is called several times (once per vector in the row group)
177	//! analyze should return true, unless compression is no longer possible with this compression method
178	//! in that case false should be returned
179	compression_analyze_t analyze;
180	//! final_analyze should return the score of the compression function
181	//! ideally this is the exact number of bytes required to store the data
182	//! this is not required/enforced: it can be an estimate as well
183	//! also this function can return DConstants::INVALID_INDEX to skip this compression method
184	compression_final_analyze_t final_analyze;
185
186	//! Compression step: actually compress the data
187	//! init_compression is called once to set up the comperssion state
188	compression_init_compression_t init_compression;
189	//! compress is called several times (once per vector in the row group)
190	compression_compress_data_t compress;
191	//! compress_finalize is called after
192	compression_compress_finalize_t compress_finalize;
193
194	//! init_scan is called to set up the scan state
195	compression_init_segment_scan_t init_scan;
196	//! scan_vector scans an entire vector using the scan state
197	compression_scan_vector_t scan_vector;
198	//! scan_partial scans a subset of a vector
199	//! this can request > vector_size as well
200	//! this is used if a vector crosses segment boundaries, or for child columns of lists
201	compression_scan_partial_t scan_partial;
202	//! fetch an individual row from the compressed vector
203	//! used for index lookups
204	compression_fetch_row_t fetch_row;
205	//! Skip forward in the compressed segment
206	compression_skip_t skip;
207
208	// Append functions
209	//! This only really needs to be defined for uncompressed segments
210
211	//! Initialize a compressed segment (optional)
212	compression_init_segment_t init_segment;
213	//! Initialize the append state (optional)
214	compression_init_append_t init_append;
215	//! Append to the compressed segment (optional)
216	compression_append_t append;
217	//! Finalize an append to the segment
218	compression_finalize_append_t finalize_append;
219	//! Revert append (optional)
220	compression_revert_append_t revert_append;
221	};
222
223	//! The set of compression functions
224	struct CompressionFunctionSet {
225	mutex lock;
226	map<CompressionType, map<PhysicalType, CompressionFunction>> functions;
227	};
228
229	} // namespace duckdb
230

Browse the source code of Velox/build/_deps/duckdb-src/src/include/duckdb/function/compression_function.hpp