MergeTreeReader.h source code [ClickHouse/dbms/src/Storages/MergeTree/MergeTreeReader.h]

1	#pragma once
2
3	#include <Core/NamesAndTypes.h>
4	#include <Storages/MergeTree/MergeTreeReaderStream.h>
5	#include <port/clock.h>
6
7
8	namespace DB
9	{
10
11	class IDataType;
12
13	/// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks.
14	/// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer.
15	/// Avoids loading the marks file if it is not needed (e.g. when reading the whole part).
16	class MergeTreeReader : private boost::noncopyable
17	{
18	public:
19	using ValueSizeMap = std::map<std::string, double>;
20	using DeserializeBinaryBulkStateMap = std::map<std::string, IDataType::DeserializeBinaryBulkStatePtr>;
21
22	MergeTreeReader(String path_, /// Path to the directory containing the part
23	MergeTreeData::DataPartPtr data_part_,
24	NamesAndTypesList columns_,
25	UncompressedCache * uncompressed_cache_,
26	MarkCache * mark_cache_,
27	bool save_marks_in_cache_,
28	const MergeTreeData & storage_,
29	MarkRanges all_mark_ranges_,
30	size_t aio_threshold_,
31	size_t max_read_buffer_size_,
32	ValueSizeMap avg_value_size_hints_ = ValueSizeMap {},
33	const ReadBufferFromFileBase::ProfileCallback & profile_callback_ = ReadBufferFromFileBase::ProfileCallback {},
34	clockid_t clock_type_ = CLOCK_MONOTONIC_COARSE);
35
36	~MergeTreeReader();
37
38	const ValueSizeMap & getAvgValueSizeHints() const;
39
40	/// Add columns from ordered_names that are not present in the block.
41	/// Missing columns are added in the order specified by ordered_names.
42	/// num_rows is needed in case if all res_columns are nullptr.
43	void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows);
44	/// Evaluate defaulted columns if necessary.
45	void evaluateMissingDefaults(Block additional_columns, Columns & res_columns);
46
47	const NamesAndTypesList & getColumns() const { return columns; }
48	size_t numColumnsInResult() const { return columns.size(); }
49
50	/// Return the number of rows has been read or zero if there is no columns to read.
51	/// If continue_reading is true, continue reading from last state, otherwise seek to from_mark.
52	/// Fills res_columns in order specified in getColumns() list. If column was not read it will be nullptr.
53	size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns);
54
55	MergeTreeData::DataPartPtr data_part;
56
57	size_t getFirstMarkToRead() const
58	{
59	return all_mark_ranges.back().begin;
60	}
61	private:
62	using FileStreams = std::map<std::string, std::unique_ptr<MergeTreeReaderStream>>;
63
64	/// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size.
65	ValueSizeMap avg_value_size_hints;
66	/// Stores states for IDataType::deserializeBinaryBulk
67	DeserializeBinaryBulkStateMap deserialize_binary_bulk_state_map;
68	/// Path to the directory containing the part
69	String path;
70
71	FileStreams streams;
72
73	/// Columns that are read.
74	NamesAndTypesList columns;
75
76	UncompressedCache * uncompressed_cache;
77	MarkCache * mark_cache;
78	/// If save_marks_in_cache is false, then, if marks are not in cache, we will load them but won't save in the cache, to avoid evicting other data.
79	bool save_marks_in_cache;
80
81	const MergeTreeData & storage;
82	MarkRanges all_mark_ranges;
83	size_t aio_threshold;
84	size_t max_read_buffer_size;
85
86	void addStreams(const String & name, const IDataType & type,
87	const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type);
88
89	void readData(
90	const String & name, const IDataType & type, IColumn & column,
91	size_t from_mark, bool continue_reading, size_t max_rows_to_read,
92	bool read_offsets = true);
93
94
95	friend class MergeTreeRangeReader::DelayedStream;
96	};
97
98	}
99

Browse the source code of ClickHouse/dbms/src/Storages/MergeTree/MergeTreeReader.h