memory_tracking.hpp source code [Godot/thirdparty/oidn/mkl-dnn/src/common/memory_tracking.hpp]

1	/*******************************************************************************
2	* Copyright 2018 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef MEMORY_TRACKING_HPP
18	#define MEMORY_TRACKING_HPP
19
20	#include <assert.h>
21	#include <unordered_map>
22
23	#include "nstl.hpp"
24	#include "utils.hpp"
25
26	namespace mkldnn {
27	namespace impl {
28	namespace memory_tracking {
29
30	/ Memory tracking capabilities*
31	*
32	* The main purpose of this header file is to provide uniform way to register
33	* required memory for a scratchpad at a primitive descriptor creation time
34	* and then easily access it having only the base address of the scratchpad.
35	*
36	* Primitives might contain multiple disjoint parts that require temporary
37	* buffers (known as scratchpad) during their execution. A primitive descriptor
38	* should summarize all the needs into one single number -- the buffer size
39	* that would be requested from a user. At execution time, the corresponding
40	* primitive will receive a base pointer to a scratchpad. It then needs to
41	* provide each part of algorithm the corresponding piece of memory. Three main
42	* challenges here are:
43	* 1. Track correct offset (from the base scratchpad address) for each piece
44	* 2. Algorithm might require that different memory pieces to be aligned, so
45	* the scratchpad size is no more just a sum of size of the corresponding
46	* subparts.
47	* 3. While a primitive is responsible for its scratchpad, the implementation
48	* might use some other basic blocks (e.g. cpu_reducer) that also require
49	* scratchpad memory. So there should be a simple way of passing the
50	* information back and force between the main algorithm (a primitive) and
51	* auxiliary stuff that lives completely separately from it (e.g. reducer).
52	*
53	* To address these challenges this header file provides 3 structures:
54	* 1. registry_t -- the class the stores the information about requested
55	* memory. The information includes required size and desired
56	* alignment for each piece. This class is also responsible
57	* for computing the right offset to a given piece using the
58	* base pointer.
59	* This class is basically a ledger with all entries.
60	* Lives in primitive descriptors.
61	*
62	* 2. registrar_t -- the interface to a registry_t to book memory. Used at
63	* primitive descriptor creation time only. Contains a
64	* reference to the corresponding mutable registry.
65	* Always modifiable.
66	* Allows chaining (using prefixes).
67	*
68	* 3. grantor_t -- the interface to a registry_t to access memory. Used at
69	* primitive execution time only. Contains a reference to
70	* the corresponding constant registry and base pointer.
71	* Always constant.
72	* Allows chaining (using prefixes).
73	*
74	* Both registrar_t and grantor_t allow chaining with extra prefix provided.
75	* The feature is useful when a primitive offload a part of computations to
76	* some other primitives which require their own scratchpad space
77	* (e.g. reducer). Prefixes are used to avoid key collision in cases when
78	* multiple sub-primitive (e.g. multiple reducers) are used.
79	*
80	* A short example below demonstrates how to use aforementioned classes. In it
81	* the main primitive is convolution that uses scratchpad for keeping padded
82	* bias. It also needs a reducer, that needs its own space as well.
83	*
84	* ``` c++
85	* struct reducer_t {
86	* static void init(registrar_t &scratchpad) {
87	* // preserve space for the reduction (one page aligned)
88	* scratchpad.book(key_space, sizeof(float) * 980 * 1024, 4096);
89	* }
90	*
91	* void exec(const grantor_t &scratchpad) {
92	* // get the pointer to preserved space. scratchpad came from
93	* // upper primitive (convolution in this example)
94	* auto space = scratchpad.get<float>(key_reducer_space);
95	*
96	* space[:] += ...;
97	* }
98	* };
99	*
100	* struct conv_t {
101	* struct pd_t {
102	* void init() {
103	* registrar_t scratchpad(scratchpad_registry_);
104	*
105	* // preserve a space for padded bias (using default alignment)
106	* scratchpad.book(key_conv_padded_bias, 128);
107	*
108	* // create a proxy registrar for the reducer All entries made
109	* // by reducer would live in convolution's registry, but would
110	* // have their own `prefix`, so no interference with conv's
111	* // buffers.
112	* registrar_t reducer_scratchpad(scratchpad, prefix_reducer);
113	*
114	* reducer_t::init(reducer_scratchpad);
115	* }
116	*
117	* registry_t scratchpad_registry_;
118	* }
119	*
120	* void exec() {
121	* // get the base pointer to a scratchpad memory from a user
122	* void *scratchpad_ptr = this->input(MKLDNN_MEM_SCRATCHPAD);
123	*
124	* // create a grantor to the scratchpad (and provide the base
125	* // pointer).
126	* grantor_t scratchpad(pd()->scratchpad_registry_, scratchpad_ptr);
127	*
128	* // access the padded_bias (need only key name and the grantor)
129	* auto padded_bias = scratchpad.get<float>(key_conv_padded_bias);
130	*
131	* // to give the `right` grantor to reducer we need to add the
132	* // corresponding prefix, so that reducer would be able to access
133	* // its keys. The call is very similar to the one in pd_t::init
134	* // with only difference in types: grantor_t vs registrar_t.
135	* grantor_t reducer_scratchpad(scratchpad, prefix_reducer);
136	* reducer->exec(reducer_scratchpad);
137	* }
138	* };
139	* ```
140	*/
141
142
143	/ namespace with common keys and prefixes /
144	namespace names {
145	enum {
146	key_none = `0`,
147	key_bnorm_tmp_mean,
148	key_bnorm_tmp_var,
149	key_bnorm_tmp_diff_ss,
150	key_bnorm_tmp_stats,
151	key_bnorm_reduction,
152	key_concat_iptrs,
153	key_concat_istrides,
154	key_concat_nelems,
155	key_concat_optrs,
156	key_conv_adjusted_scales,
157	key_conv_bia_reduction,
158	key_conv_gemm_col,
159	key_conv_gemm_imtr,
160	key_conv_int_dat_in_acc_dt,
161	key_conv_padded_bias,
162	key_conv_rtus_space,
163	key_conv_tr_diff_dst,
164	key_conv_tr_diff_dst_bctx,
165	key_conv_tr_src,
166	key_conv_tr_src_bctx,
167	key_conv_wei_reduction,
168	key_conv_wei_bia_reduction,
169	key_conv_wei_bia_reduction_bctx,
170	key_iprod_int_dat_in_acc_dt,
171	key_reducer_space,
172	key_reducer_space_bctx,
173	key_reorder_wino_plain,
174	key_reorder_wino_transform_space,
175	key_reorder_rnn_weights_quantization,
176	key_reorder_rnn_weights_reduction,
177	key_rnn_space,
178	key_rnn_ptrs_bia,
179	key_rnn_ptrs_wei_layer,
180	key_rnn_ptrs_wei_iter,
181	key_softmax_reduction,
182	key_wino_U,
183	key_wino_V,
184	key_wino_M,
185	key_barrier,
186	};
187
188	enum {
189	prefix_none = `0`,
190	prefix_reducer_bia,
191	prefix_reducer_wei,
192	};
193	}
194
195	// level 0: 00 00 00 xxx
196	// level 1: 00 00 aa xxx
197	// level 2: 00 aa bb xxx
198	// level 3: aa bb cc xxx
199	// max # of levels: 3 + 1 (base_level)
200	// here:
201	// xxx : [1 .. MAX_KEY) : key
202	// aa, bb, cc : [1 .. MAX_PREFIX) : prefixes for levels 1, 2, and 3
203
204	using key_t = uint32_t;
205	enum { MAX_KEY = (`1u` << `10`), MAX_PREFIX = (`1u` << `7`), };
206
207	/// generates global key based on a prefix and a local key
208	inline key_t make_key(key_t prefix, key_t key) { return prefix + key; }
209
210	/// generates global prefix based on the global parent and the local ones
211	inline key_t make_prefix(key_t parent_prefix, key_t prefix)
212	{ return MAX_PREFIX * parent_prefix + MAX_KEY * prefix; }
213
214	struct registrar_t;
215	struct grantor_t;
216
217	struct registry_t {
218	void book(const key_t &key, size_t size, size_t alignment) {
219	if (size == `0`) return;
220	assert(offset_map_.count(key) == `0`);
221
222	size = utils::rnd_up(size, minimal_alignment);
223	alignment = nstl::max<size_t>(alignment, minimal_alignment);
224	offset_map_[key] = entry_t{size_, size, alignment};
225
226	size_ += size + alignment - minimal_alignment;
227	}
228
229	void get(const* key_t &key, void base_ptr) const* {
230	if (base_ptr == nullptr) { assert(size() == `0`); return nullptr; }
231	if (offset_map_.count(key) != `1`) return nullptr;
232
233	const auto &e = offset_map_.at(key);
234	base_ptr = utils::align_ptr<void>(base_ptr, minimal_alignment);
235	char ptr = (char* *)base_ptr + e.offset;
236	return utils::align_ptr<void>(ptr, e.alignment);
237	}
238
239	size_t size() const
240	{ return size_ > `0` ? size_ + minimal_alignment - `1` : `0`; }
241
242	registrar_t registrar();
243	grantor_t grantor(void base_ptr) const*;
244
245	protected:
246	enum { minimal_alignment = `64` };
247	struct entry_t { size_t offset, size, alignment; };
248
249	std::unordered_map<key_t, entry_t> offset_map_;
250	size_t size_ = `0`;
251	};
252
253	struct registrar_t {
254	enum { default_alignment = `64` };
255
256	registrar_t(registry_t &registry): registry_(registry), prefix_(`0`) {}
257	registrar_t(registrar_t &parent, const key_t &prefix)
258	: registry_(parent.registry_)
259	, prefix_(make_prefix(parent.prefix_, prefix)) {}
260
261	void book(const key_t &key, size_t size,
262	size_t alignment = default_alignment)
263	{ registry_.book(make_key(prefix_, key), size, alignment); }
264
265	protected:
266	registry_t &registry_;
267	const key_t prefix_;
268	};
269
270	struct grantor_t {
271	grantor_t(const registry_t &registry, void *base_ptr)
272	: registry_(registry), prefix_(`0`), base_ptr_(base_ptr) {}
273	grantor_t(const grantor_t &parent, const key_t &prefix)
274	: registry_(parent.registry_)
275	, prefix_(make_prefix(parent.prefix_, prefix))
276	, base_ptr_(parent.base_ptr_) {}
277
278	template <typename T = void> T get(const* key_t &key) const
279	{ return (T *)registry_.get(make_key(prefix_, key), base_ptr_); }
280
281	protected:
282	const registry_t &registry_;
283	const key_t prefix_;
284	void *base_ptr_;
285	};
286
287	inline registrar_t registry_t::registrar() { return registrar_t (*this); }
288	inline grantor_t registry_t::grantor(void base_ptr) const*
289	{ return grantor_t (*this, base_ptr); }
290
291	}
292	}
293	}
294
295	#endif
296

Browse the source code of Godot/thirdparty/oidn/mkl-dnn/src/common/memory_tracking.hpp