network.cpp source code [Godot/thirdparty/oidn/core/network.cpp]

1	// ======================================================================== //
2	// Copyright 2009-2019 Intel Corporation //
3	// //
4	// Licensed under the Apache License, Version 2.0 (the "License"); //
5	// you may not use this file except in compliance with the License. //
6	// You may obtain a copy of the License at //
7	// //
8	// http://www.apache.org/licenses/LICENSE-2.0 //
9	// //
10	// Unless required by applicable law or agreed to in writing, software //
11	// distributed under the License is distributed on an "AS IS" BASIS, //
12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
13	// See the License for the specific language governing permissions and //
14	// limitations under the License. //
15	// ======================================================================== //
16
17	#include "upsample.h"
18	#include "weights_reorder.h"
19	#include "network.h"
20	// -- GODOT start --
21	#include <cstring>
22	// -- GODOT end --
23
24	namespace oidn {
25
26	template<int K>
27	Network<K>::Network(const Ref<Device>& device, const std::map<std::string, Tensor>& weightMap)
28	: device (device),
29	eng (engine::cpu, `0`),
30	sm (eng),
31	weightMap (weightMap)
32	{
33	}
34
35	template<int K>
36	void Network<K>::execute(const Progress& progress, int taskIndex)
37	{
38	if (progress.func)
39	{
40	const double value = double(taskIndex) / double(progress.taskCount);
41	if (!progress.func(progress.userPtr, value))
42	throw Exception (Error::Cancelled, "execution was cancelled");
43	}
44
45	for (size_t i = `0`; i < nodes.size(); ++i)
46	{
47	nodes [i]->execute(sm);
48
49	if (progress.func)
50	{
51	const double value = (double(taskIndex) + double(i+`1`) / double(nodes.size())) / double(progress.taskCount);
52	if (!progress.func(progress.userPtr, value))
53	throw Exception (Error::Cancelled, "execution was cancelled");
54	}
55	}
56	}
57
58	template<int K>
59	std::shared_ptr<memory> Network<K>::allocTensor(const memory::dims& dims,
60	memory::format_tag format,
61	void* data)
62	{
63	if (format == memory::format_tag::any)
64	{
65	if (dims.size() == `4`)
66	format = BlockedFormat<K>::nChwKc;
67	else if (dims.size() == `1`)
68	format = memory::format_tag::x;
69	else
70	assert(`0`);
71	}
72	memory::desc desc(dims, memory::data_type::f32, format);
73	if (data == nullptr)
74	{
75	const size_t bytes = getTensorSize(dims) * sizeof(float);
76	if (format == BlockedFormat<K>::nChwKc)
77	activationAllocBytes += bytes;
78	totalAllocBytes += bytes;
79
80	return std::make_shared<memory>(desc, eng);
81	}
82	else
83	{
84	return std::make_shared<memory>(desc, eng, data);
85	}
86	}
87
88	template<int K>
89	std::shared_ptr<memory> Network<K>::castTensor(const memory::dims& dims,
90	const std::shared_ptr<memory>& src,
91	size_t srcOffset,
92	memory::format_tag format)
93	{
94	const mkldnn_memory_desc_t& srcDesc = src ->get_desc().data;
95	MAYBE_UNUSED(srcDesc);
96	assert(srcDesc.data_type == memory::data_type::f32);
97	assert(getTensorSize(src) >= srcOffset + getTensorSize(dims));
98
99	if (format == memory::format_tag::any)
100	{
101	if (dims.size() == `4`)
102	format = BlockedFormat<K>::nChwKc;
103	else if (dims.size() == `1`)
104	format = memory::format_tag::x;
105	else
106	assert(`0`);
107	}
108	memory::desc desc(dims, memory::data_type::f32, format);
109	float* srcPtr = (float*)src ->get_data_handle() + srcOffset;
110	return std::make_shared<memory>(desc, eng, srcPtr);
111	}
112
113	template<int K>
114	std::shared_ptr<memory> Network<K>::castTensor(const memory::dims& dims,
115	const std::shared_ptr<memory>& src,
116	const memory::dims& srcOffset)
117	{
118	return castTensor(dims, src, getTensorSize(srcOffset));
119	}
120
121	template<int K>
122	void Network<K>::zeroTensor(const std::shared_ptr<memory>& dst)
123	{
124	assert(getTensorType(dst) == memory::data_type::f32);
125	memset(dst ->get_data_handle(), `0`, getTensorSize(dst)*sizeof(float));
126	}
127
128	template<int K>
129	memory::dims Network<K>::getInputReorderDims(const memory::dims& srcDims, int alignment)
130	{
131	memory::dims dstDims = srcDims;
132	dstDims [`1`] = getPadded<K>(srcDims [`1`]); // round up C
133	dstDims [`2`] = roundUp(srcDims [`2`], memory::dim(alignment)); // round up H
134	dstDims [`3`] = roundUp(srcDims [`3`], memory::dim(alignment)); // round up W
135	return dstDims;
136	}
137
138	template<int K>
139	std::shared_ptr<Node> Network<K>::addInputReorder(const Image& color,
140	const Image& albedo,
141	const Image& normal,
142	const std::shared_ptr<TransferFunction>& transferFunc,
143	int alignment,
144	const std::shared_ptr<memory>& userDst)
145	{
146	assert(color);
147	int inputC = `3`;
148	if (albedo) inputC += `3`;
149	if (normal) inputC += `3`;
150
151	memory::dims srcDims = {`1`, inputC, color.height, color.width};
152	memory::dims dstDims = getInputReorderDims(srcDims, alignment);
153
154	// Allocate padded memory
155	auto dst = userDst;
156	if (!dst)
157	dst = allocTensor(dstDims);
158
159	// Push node
160	std::shared_ptr<Node> node;
161
162	if (auto tf = std::dynamic_pointer_cast<LinearTransferFunction>(transferFunc))
163	node = std::make_shared<InputReorderNode<K, LinearTransferFunction>>(color, albedo, normal, dst, tf);
164	else if (auto tf = std::dynamic_pointer_cast<GammaTransferFunction>(transferFunc))
165	node = std::make_shared<InputReorderNode<K, GammaTransferFunction>>(color, albedo, normal, dst, tf);
166	else if (auto tf = std::dynamic_pointer_cast<LogTransferFunction>(transferFunc))
167	node = std::make_shared<InputReorderNode<K, LogTransferFunction>>(color, albedo, normal, dst, tf);
168	else if (auto tf = std::dynamic_pointer_cast<PQXTransferFunction>(transferFunc))
169	node = std::make_shared<InputReorderNode<K, PQXTransferFunction>>(color, albedo, normal, dst, tf);
170	else
171	assert(`0`);
172
173	nodes.push_back(node);
174	return node;
175	}
176
177	template<int K>
178	std::shared_ptr<Node> Network<K>::addOutputReorder(const std::shared_ptr<memory>& src,
179	const std::shared_ptr<TransferFunction>& transferFunc,
180	const Image& output)
181	{
182	memory::dims srcDims = getTensorDims(src);
183	assert(srcDims[`1`] == K);
184
185	// Push node
186	std::shared_ptr<Node> node;
187
188	if (auto tf = std::dynamic_pointer_cast<LinearTransferFunction>(transferFunc))
189	node = std::make_shared<OutputReorderNode<K, LinearTransferFunction>>(src, output, tf);
190	else if (auto tf = std::dynamic_pointer_cast<GammaTransferFunction>(transferFunc))
191	node = std::make_shared<OutputReorderNode<K, GammaTransferFunction>>(src, output, tf);
192	else if (auto tf = std::dynamic_pointer_cast<LogTransferFunction>(transferFunc))
193	node = std::make_shared<OutputReorderNode<K, LogTransferFunction>>(src, output, tf);
194	else if (auto tf = std::dynamic_pointer_cast<PQXTransferFunction>(transferFunc))
195	node = std::make_shared<OutputReorderNode<K, PQXTransferFunction>>(src, output, tf);
196	else
197	assert(`0`);
198
199	nodes.push_back(node);
200	return node;
201	}
202
203	template<int K>
204	memory::dims Network<K>::getConvDims(const std::string& name, const memory::dims& srcDims)
205	{
206	auto b = weightMap [name + "/b"];
207	memory::dims dstDims = srcDims;
208	dstDims [`1`] = getPadded<K>(b.dims [`0`]); // dstDims[C] = getPadded(OC)
209	return dstDims;
210	}
211
212	template<int K>
213	std::shared_ptr<Node> Network<K>::addConv(const std::string& name,
214	const std::shared_ptr<memory>& src,
215	const std::shared_ptr<memory>& userDst,
216	bool relu)
217	{
218	const memory::dims strides = {`1`, `1`};
219	const memory::dims padding = {`1`, `1`};
220
221	memory::dims srcDims = getTensorDims(src);
222
223	// Get the weights
224	const auto& W = weightMap [name + "/W"];
225	if (W.ndims() != `4` \|\| W.format != "oihw")
226	throw Exception (Error::InvalidOperation, "invalid convolution weights");
227	memory::dims weightsDims = W.dims;
228	auto userWeights = allocTensor(weightsDims, memory::format_tag::oihw, W.data);
229
230	// Pad the weights
231	memory::dims weightsPadDims = weightsDims;
232	weightsPadDims [`1`] = getPadded<K>(weightsDims [`1`]); // IC
233	weightsPadDims [`0`] = getPadded<K>(weightsDims [`0`]); // OC
234	assert(srcDims[`1`] == weightsPadDims[`1`]); // srcDims[C] == weightsPadDims[IC]
235	auto weightsPad = allocTensor(weightsPadDims, memory::format_tag::oihw);
236	WeightsReorderNode<K>(userWeights, weightsPad).execute(sm);
237
238	// Get the biases
239	const auto& b = weightMap [name + "/b"];
240	if (b.ndims() != `1`)
241	throw Exception (Error::InvalidOperation, "invalid convolution biases");
242	memory::dims biasDims = b.dims;
243
244	// Copy/pad the biases
245	memory::dims biasPadDims = {getPadded<K>(biasDims [`0`])};
246	auto bias = allocTensor(biasPadDims);
247	if (biasDims [`0`] != biasPadDims [`0`])
248	memset(bias->get_data_handle(), `0`, biasPadDims [`0`]*sizeof(float));
249	memcpy(bias->get_data_handle(), b.data, biasDims [`0`]*sizeof(float));
250
251	// Allocate memory for destination
252	memory::dims dstDims = srcDims;
253	dstDims [`1`] = weightsPadDims [`0`]; // dstDims[C] = weightsPadDims[OC]
254
255	std::shared_ptr<memory> dst;
256	if (!userDst)
257	dst = allocTensor(dstDims);
258	else if (getTensorDims(userDst) == dstDims)
259	dst = userDst;
260	else
261	dst = castTensor(dstDims, userDst);
262
263	// Create a convolution
264	// Let the convolution primitive choose the weights format
265	auto weightsDesc = memory::desc ({ weightsPadDims }, memory::data_type::f32, memory::format_tag::any);
266
267	auto convAlgo = (K == `16`) ? convolution_winograd : convolution_direct;
268	auto convDesc = convolution_forward::desc(
269	prop_kind::forward_inference, convAlgo,
270	src ->get_desc(),
271	weightsDesc,
272	bias->get_desc(),
273	dst ->get_desc(),
274	strides, padding, padding, padding_kind::zero);
275
276	// Incorporate relu
277	mkldnn::primitive_attr convAttr;
278	if (relu)
279	{
280	mkldnn::post_ops ops;
281	ops.append_eltwise(
282	`1.f`, // scale factor, not used
283	algorithm::eltwise_relu,
284	`0.f`, // max with
285	`0.f` // unused
286	);
287	convAttr.set_post_ops(ops);
288	}
289	convAttr.set_scratchpad_mode(scratchpad_mode_user);
290
291	auto convPrimDesc = convolution_forward::primitive_desc (convDesc, convAttr, eng);
292
293	// Reorder the weights to the final format, if necessary
294	auto weights = weightsPad;
295	if (convPrimDesc.weights_desc() != weightsPad->get_desc())
296	{
297	weights = std::make_shared<memory>(convPrimDesc.weights_desc(), eng);
298	ReorderNode(weightsPad, weights).execute(sm);
299	}
300
301	// Create convolution node and add it to the net
302	auto node = std::make_shared<ConvNode>(convPrimDesc, src, weights, bias, dst);
303	nodes.push_back(node);
304	return node;
305	}
306
307	template<int K>
308	memory::dims Network<K>::getPoolDims(const memory::dims& srcDims)
309	{
310	memory::dims dstDims = srcDims;
311	dstDims [`2`] /= `2`; // H/2
312	dstDims [`3`] /= `2`; // W/2
313	return dstDims;
314	}
315
316	template<int K>
317	std::shared_ptr<Node> Network<K>::addPool(const std::shared_ptr<memory>& src,
318	const std::shared_ptr<memory>& userDst)
319	{
320	const memory::dims kernel = {`2`, `2`};
321	const memory::dims strides = {`2`, `2`};
322	const memory::dims padding = {`0`, `0`};
323
324	memory::dims srcDims = getTensorDims(src);
325	memory::dims dstDims = getPoolDims(srcDims);
326
327	std::shared_ptr<memory> dst;
328	if (!userDst)
329	dst = allocTensor(dstDims);
330	else if (getTensorDims(userDst) == dstDims)
331	dst = userDst;
332	else
333	dst = castTensor(dstDims, userDst);
334
335	auto poolDesc = pooling_forward::desc (
336	prop_kind::forward_inference, pooling_max,
337	src ->get_desc(),
338	dst ->get_desc(),
339	strides, kernel, padding, padding, padding_kind::zero);
340
341	mkldnn::primitive_attr poolAttr;
342	poolAttr.set_scratchpad_mode(scratchpad_mode_user);
343
344	auto poolPrimDesc = pooling_forward::primitive_desc (poolDesc, poolAttr, eng);
345
346	auto node = std::make_shared<PoolNode>(poolPrimDesc, src, dst);
347	nodes.push_back(node);
348	return node;
349	}
350
351	template<int K>
352	memory::dims Network<K>::getUpsampleDims(const memory::dims& srcDims)
353	{
354	memory::dims dstDims = srcDims;
355	dstDims [`2`] = `2`; // H2
356	dstDims [`3`] = `2`; // W2
357	return dstDims;
358	}
359
360	template<int K>
361	std::shared_ptr<Node> Network<K>::addUpsample(const std::shared_ptr<memory>& src,
362	const std::shared_ptr<memory>& userDst)
363	{
364	memory::dims srcDims = getTensorDims(src);
365	memory::dims dstDims = getUpsampleDims(srcDims);
366
367	std::shared_ptr<memory> dst;
368	if (!userDst)
369	dst = allocTensor(dstDims);
370	else if (getTensorDims(userDst) == dstDims)
371	dst = userDst;
372	else
373	dst = castTensor(dstDims, userDst);
374
375	// Create upsampling node and add it to net
376	auto node = std::make_shared<UpsampleNode<K>>(src, dst);
377	nodes.push_back(node);
378	return node;
379	}
380
381	template<int K>
382	memory::dims Network<K>::getConcatDims(const memory::dims& src1Dims, const memory::dims& src2Dims)
383	{
384	assert(src1Dims[`0`] == src2Dims[`0`]); // N
385	assert(src1Dims[`2`] == src2Dims[`2`]); // H
386	assert(src1Dims[`3`] == src2Dims[`3`]); // W
387
388	memory::dims dstDims = src1Dims;
389	dstDims [`1`] += src2Dims [`1`]; // C
390	return dstDims;
391	}
392
393	template<int K>
394	std::shared_ptr<Node> Network<K>::addAutoexposure(const Image& color,
395	const std::shared_ptr<HDRTransferFunction>& transferFunc)
396	{
397	auto node = std::make_shared<AutoexposureNode>(color, transferFunc);
398	nodes.push_back(node);
399	return node;
400	}
401
402	template <int K>
403	void Network<K>::finalize()
404	{
405	// Compute the size of the scratchpad
406	size_t scratchpadSize = `0`;
407	for (const auto& node : nodes)
408	scratchpadSize = max(scratchpadSize, node ->getScratchpadSize());
409
410	// Allocate the scratchpad
411	memory::dims scratchpadDims = { memory::dim(scratchpadSize) };
412	memory::desc scratchpadDesc(scratchpadDims, memory::data_type::u8, memory::format_tag::x);
413	auto scratchpad = std::make_shared<memory>(scratchpadDesc, eng);
414	activationAllocBytes += scratchpadSize;
415	totalAllocBytes += scratchpadSize;
416
417	// Set the scratchpad for the nodes
418	for (auto& node : nodes)
419	node ->setScratchpad(scratchpad);
420
421	// Free the weights
422	weightMap.clear();
423
424	// Print statistics
425	if (device ->isVerbose(`2`))
426	{
427	std::cout << "Activation bytes: " << activationAllocBytes << std::endl;
428	std::cout << "Scratchpad bytes: " << scratchpadSize << std::endl;
429	std::cout << "Total bytes : " << totalAllocBytes << std::endl;
430	}
431	}
432
433	template class Network<`8`>;
434	template class Network<`16`>;
435
436	} // namespace oidn
437

Browse the source code of Godot/thirdparty/oidn/core/network.cpp