1#include <DataStreams/AddingDefaultBlockOutputStream.h>
2#include <DataStreams/ConvertingBlockInputStream.h>
3#include <DataStreams/PushingToViewsBlockOutputStream.h>
4#include <DataStreams/SquashingBlockInputStream.h>
5#include <DataTypes/NestedUtils.h>
6#include <Interpreters/InterpreterSelectQuery.h>
7#include <Interpreters/InterpreterInsertQuery.h>
8#include <Parsers/ASTInsertQuery.h>
9#include <Common/CurrentThread.h>
10#include <Common/setThreadName.h>
11#include <Common/getNumberOfPhysicalCPUCores.h>
12#include <Common/ThreadPool.h>
13#include <Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h>
14#include <Storages/StorageValues.h>
15#include <Storages/LiveView/StorageLiveView.h>
16
17namespace DB
18{
19
20PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
21 const String & database, const String & table, const StoragePtr & storage_,
22 const Context & context_, const ASTPtr & query_ptr_, bool no_destination)
23 : storage(storage_), context(context_), query_ptr(query_ptr_)
24{
25 /** TODO This is a very important line. At any insertion into the table one of streams should own lock.
26 * Although now any insertion into the table is done via PushingToViewsBlockOutputStream,
27 * but it's clear that here is not the best place for this functionality.
28 */
29 addTableLock(storage->lockStructureForShare(true, context.getInitialQueryId()));
30
31 /// If the "root" table deduplactes blocks, there are no need to make deduplication for children
32 /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks
33 bool disable_deduplication_for_children = !no_destination && storage->supportsDeduplication();
34
35 if (!table.empty())
36 {
37 Dependencies dependencies = context.getDependencies(database, table);
38
39 /// We need special context for materialized views insertions
40 if (!dependencies.empty())
41 {
42 views_context = std::make_unique<Context>(context);
43 // Do not deduplicate insertions into MV if the main insertion is Ok
44 if (disable_deduplication_for_children)
45 views_context->getSettingsRef().insert_deduplicate = false;
46 }
47
48 for (const auto & database_table : dependencies)
49 {
50 auto dependent_table = context.getTable(database_table.first, database_table.second);
51
52 ASTPtr query;
53 BlockOutputStreamPtr out;
54
55 if (auto * materialized_view = dynamic_cast<const StorageMaterializedView *>(dependent_table.get()))
56 {
57 StoragePtr inner_table = materialized_view->getTargetTable();
58 query = materialized_view->getInnerQuery();
59 std::unique_ptr<ASTInsertQuery> insert = std::make_unique<ASTInsertQuery>();
60 insert->database = inner_table->getDatabaseName();
61 insert->table = inner_table->getTableName();
62 ASTPtr insert_query_ptr(insert.release());
63 InterpreterInsertQuery interpreter(insert_query_ptr, *views_context);
64 BlockIO io = interpreter.execute();
65 out = io.out;
66 }
67 else if (dynamic_cast<const StorageLiveView *>(dependent_table.get()))
68 out = std::make_shared<PushingToViewsBlockOutputStream>(
69 database_table.first, database_table.second, dependent_table, *views_context, ASTPtr(), true);
70 else
71 out = std::make_shared<PushingToViewsBlockOutputStream>(
72 database_table.first, database_table.second, dependent_table, *views_context, ASTPtr());
73
74 views.emplace_back(ViewInfo{std::move(query), database_table.first, database_table.second, std::move(out)});
75 }
76 }
77
78 /* Do not push to destination table if the flag is set */
79 if (!no_destination)
80 {
81 output = storage->write(query_ptr, context);
82 replicated_output = dynamic_cast<ReplicatedMergeTreeBlockOutputStream *>(output.get());
83 }
84}
85
86
87Block PushingToViewsBlockOutputStream::getHeader() const
88{
89 /// If we don't write directly to the destination
90 /// then expect that we're inserting with precalculated virtual columns
91 if (output)
92 return storage->getSampleBlock();
93 else
94 return storage->getSampleBlockWithVirtuals();
95}
96
97
98void PushingToViewsBlockOutputStream::write(const Block & block)
99{
100 /** Throw an exception if the sizes of arrays - elements of nested data structures doesn't match.
101 * We have to make this assertion before writing to table, because storage engine may assume that they have equal sizes.
102 * NOTE It'd better to do this check in serialization of nested structures (in place when this assumption is required),
103 * but currently we don't have methods for serialization of nested structures "as a whole".
104 */
105 Nested::validateArraySizes(block);
106
107 if (auto * live_view = dynamic_cast<StorageLiveView *>(storage.get()))
108 {
109 StorageLiveView::writeIntoLiveView(*live_view, block, context);
110 }
111 else
112 {
113 if (output)
114 /// TODO: to support virtual and alias columns inside MVs, we should return here the inserted block extended
115 /// with additional columns directly from storage and pass it to MVs instead of raw block.
116 output->write(block);
117 }
118
119 /// Don't process materialized views if this block is duplicate
120 if (replicated_output && replicated_output->lastBlockIsDuplicate())
121 return;
122
123 // Insert data into materialized views only after successful insert into main table
124 const Settings & settings = context.getSettingsRef();
125 if (settings.parallel_view_processing && views.size() > 1)
126 {
127 // Push to views concurrently if enabled, and more than one view is attached
128 ThreadPool pool(std::min(size_t(settings.max_threads), views.size()));
129 for (size_t view_num = 0; view_num < views.size(); ++view_num)
130 {
131 auto thread_group = CurrentThread::getGroup();
132 pool.scheduleOrThrowOnError([=, this]
133 {
134 setThreadName("PushingToViews");
135 if (thread_group)
136 CurrentThread::attachToIfDetached(thread_group);
137 process(block, view_num);
138 });
139 }
140 // Wait for concurrent view processing
141 pool.wait();
142 }
143 else
144 {
145 // Process sequentially
146 for (size_t view_num = 0; view_num < views.size(); ++view_num)
147 process(block, view_num);
148 }
149}
150
151void PushingToViewsBlockOutputStream::writePrefix()
152{
153 if (output)
154 output->writePrefix();
155
156 for (auto & view : views)
157 {
158 try
159 {
160 view.out->writePrefix();
161 }
162 catch (Exception & ex)
163 {
164 ex.addMessage("while write prefix to view " + view.database + "." + view.table);
165 throw;
166 }
167 }
168}
169
170void PushingToViewsBlockOutputStream::writeSuffix()
171{
172 if (output)
173 output->writeSuffix();
174
175 for (auto & view : views)
176 {
177 try
178 {
179 view.out->writeSuffix();
180 }
181 catch (Exception & ex)
182 {
183 ex.addMessage("while write prefix to view " + view.database + "." + view.table);
184 throw;
185 }
186 }
187}
188
189void PushingToViewsBlockOutputStream::flush()
190{
191 if (output)
192 output->flush();
193
194 for (auto & view : views)
195 view.out->flush();
196}
197
198void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_num)
199{
200 auto & view = views[view_num];
201
202 try
203 {
204 BlockInputStreamPtr in;
205
206 /// We need keep InterpreterSelectQuery, until the processing will be finished, since:
207 ///
208 /// - We copy Context inside InterpreterSelectQuery to support
209 /// modification of context (Settings) for subqueries
210 /// - InterpreterSelectQuery lives shorter than query pipeline.
211 /// It's used just to build the query pipeline and no longer needed
212 /// - ExpressionAnalyzer and then, Functions, that created in InterpreterSelectQuery,
213 /// **can** take a reference to Context from InterpreterSelectQuery
214 /// (the problem raises only when function uses context from the
215 /// execute*() method, like FunctionDictGet do)
216 /// - These objects live inside query pipeline (DataStreams) and the reference become dangling.
217 std::optional<InterpreterSelectQuery> select;
218
219 if (view.query)
220 {
221 /// We create a table with the same name as original table and the same alias columns,
222 /// but it will contain single block (that is INSERT-ed into main table).
223 /// InterpreterSelectQuery will do processing of alias columns.
224 Context local_context = *views_context;
225 local_context.addViewSource(
226 StorageValues::create(storage->getDatabaseName(), storage->getTableName(), storage->getColumns(),
227 block));
228 select.emplace(view.query, local_context, SelectQueryOptions());
229 in = std::make_shared<MaterializingBlockInputStream>(select->execute().in);
230
231 /// Squashing is needed here because the materialized view query can generate a lot of blocks
232 /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY
233 /// and two-level aggregation is triggered).
234 in = std::make_shared<SquashingBlockInputStream>(
235 in, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes);
236 in = std::make_shared<ConvertingBlockInputStream>(context, in, view.out->getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Name);
237 }
238 else
239 in = std::make_shared<OneBlockInputStream>(block);
240
241 in->readPrefix();
242
243 while (Block result_block = in->read())
244 {
245 Nested::validateArraySizes(result_block);
246 view.out->write(result_block);
247 }
248
249 in->readSuffix();
250 }
251 catch (Exception & ex)
252 {
253 ex.addMessage("while pushing to view " + backQuoteIfNeed(view.database) + "." + backQuoteIfNeed(view.table));
254 throw;
255 }
256}
257
258}
259