| 1 | #pragma once |
| 2 | |
| 3 | #include <Core/NamesAndTypes.h> |
| 4 | #include <Storages/MergeTree/MergeTreeReaderStream.h> |
| 5 | #include <port/clock.h> |
| 6 | |
| 7 | |
| 8 | namespace DB |
| 9 | { |
| 10 | |
| 11 | class IDataType; |
| 12 | |
| 13 | /// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks. |
| 14 | /// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer. |
| 15 | /// Avoids loading the marks file if it is not needed (e.g. when reading the whole part). |
| 16 | class MergeTreeReader : private boost::noncopyable |
| 17 | { |
| 18 | public: |
| 19 | using ValueSizeMap = std::map<std::string, double>; |
| 20 | using DeserializeBinaryBulkStateMap = std::map<std::string, IDataType::DeserializeBinaryBulkStatePtr>; |
| 21 | |
| 22 | MergeTreeReader(String path_, /// Path to the directory containing the part |
| 23 | MergeTreeData::DataPartPtr data_part_, |
| 24 | NamesAndTypesList columns_, |
| 25 | UncompressedCache * uncompressed_cache_, |
| 26 | MarkCache * mark_cache_, |
| 27 | bool save_marks_in_cache_, |
| 28 | const MergeTreeData & storage_, |
| 29 | MarkRanges all_mark_ranges_, |
| 30 | size_t aio_threshold_, |
| 31 | size_t max_read_buffer_size_, |
| 32 | ValueSizeMap avg_value_size_hints_ = ValueSizeMap{}, |
| 33 | const ReadBufferFromFileBase::ProfileCallback & profile_callback_ = ReadBufferFromFileBase::ProfileCallback{}, |
| 34 | clockid_t clock_type_ = CLOCK_MONOTONIC_COARSE); |
| 35 | |
| 36 | ~MergeTreeReader(); |
| 37 | |
| 38 | const ValueSizeMap & getAvgValueSizeHints() const; |
| 39 | |
| 40 | /// Add columns from ordered_names that are not present in the block. |
| 41 | /// Missing columns are added in the order specified by ordered_names. |
| 42 | /// num_rows is needed in case if all res_columns are nullptr. |
| 43 | void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows); |
| 44 | /// Evaluate defaulted columns if necessary. |
| 45 | void evaluateMissingDefaults(Block additional_columns, Columns & res_columns); |
| 46 | |
| 47 | const NamesAndTypesList & getColumns() const { return columns; } |
| 48 | size_t numColumnsInResult() const { return columns.size(); } |
| 49 | |
| 50 | /// Return the number of rows has been read or zero if there is no columns to read. |
| 51 | /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark. |
| 52 | /// Fills res_columns in order specified in getColumns() list. If column was not read it will be nullptr. |
| 53 | size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns); |
| 54 | |
| 55 | MergeTreeData::DataPartPtr data_part; |
| 56 | |
| 57 | size_t getFirstMarkToRead() const |
| 58 | { |
| 59 | return all_mark_ranges.back().begin; |
| 60 | } |
| 61 | private: |
| 62 | using FileStreams = std::map<std::string, std::unique_ptr<MergeTreeReaderStream>>; |
| 63 | |
| 64 | /// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size. |
| 65 | ValueSizeMap avg_value_size_hints; |
| 66 | /// Stores states for IDataType::deserializeBinaryBulk |
| 67 | DeserializeBinaryBulkStateMap deserialize_binary_bulk_state_map; |
| 68 | /// Path to the directory containing the part |
| 69 | String path; |
| 70 | |
| 71 | FileStreams streams; |
| 72 | |
| 73 | /// Columns that are read. |
| 74 | NamesAndTypesList columns; |
| 75 | |
| 76 | UncompressedCache * uncompressed_cache; |
| 77 | MarkCache * mark_cache; |
| 78 | /// If save_marks_in_cache is false, then, if marks are not in cache, we will load them but won't save in the cache, to avoid evicting other data. |
| 79 | bool save_marks_in_cache; |
| 80 | |
| 81 | const MergeTreeData & storage; |
| 82 | MarkRanges all_mark_ranges; |
| 83 | size_t aio_threshold; |
| 84 | size_t max_read_buffer_size; |
| 85 | |
| 86 | void addStreams(const String & name, const IDataType & type, |
| 87 | const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type); |
| 88 | |
| 89 | void readData( |
| 90 | const String & name, const IDataType & type, IColumn & column, |
| 91 | size_t from_mark, bool continue_reading, size_t max_rows_to_read, |
| 92 | bool read_offsets = true); |
| 93 | |
| 94 | |
| 95 | friend class MergeTreeRangeReader::DelayedStream; |
| 96 | }; |
| 97 | |
| 98 | } |
| 99 | |