1/*******************************************************************************
2* Copyright 2016-2018 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef MKLDNN_H
18#define MKLDNN_H
19
20#ifndef DOXYGEN_SHOULD_SKIP_THIS
21
22/* All symbols shall be internal unless marked as MKLDNN_API */
23#if defined _WIN32 || defined __CYGWIN__
24# define MKLDNN_HELPER_DLL_IMPORT __declspec(dllimport)
25# define MKLDNN_HELPER_DLL_EXPORT __declspec(dllexport)
26#else
27# if __GNUC__ >= 4
28# define MKLDNN_HELPER_DLL_IMPORT __attribute__ ((visibility ("default")))
29# define MKLDNN_HELPER_DLL_EXPORT __attribute__ ((visibility ("default")))
30# else
31# define MKLDNN_HELPER_DLL_IMPORT
32# define MKLDNN_HELPER_DLL_EXPORT
33# endif
34#endif
35
36#ifdef MKLDNN_DLL
37# ifdef MKLDNN_DLL_EXPORTS
38# define MKLDNN_API MKLDNN_HELPER_DLL_EXPORT
39# else
40# define MKLDNN_API MKLDNN_HELPER_DLL_IMPORT
41# endif
42#else
43# define MKLDNN_API
44#endif
45
46#if defined (__GNUC__)
47# define MKLDNN_DEPRECATED __attribute__((deprecated))
48#elif defined(_MSC_VER)
49# define MKLDNN_DEPRECATED __declspec(deprecated)
50#else
51# define MKLDNN_DEPRECATED
52#endif
53
54#include "mkldnn_types.h"
55#include "mkldnn_version.h"
56#endif /* DOXYGEN_SHOULD_SKIP_THIS */
57
58#ifdef __cplusplus
59extern "C" {
60#endif
61
62/** @addtogroup c_api C API
63 * @{ */
64
65/** @addtogroup c_api_primitive Primitive operations
66 * @{ */
67
68/** @addtogroup c_api_primitive_common Common primitive operations
69 * @{ */
70
71/** Creates a primitive descriptor @p iterator for given @p op_desc, @p attr,
72 * @p engine, and optionally a hint primitive descriptor from forward
73 * propagation (required for backward propagation). Pass @c NULL for forward
74 * propagation.
75 */
76mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_iterator_create(
77 mkldnn_primitive_desc_iterator_t *iterator,
78 const_mkldnn_op_desc_t op_desc, const_mkldnn_primitive_attr_t attr,
79 mkldnn_engine_t engine,
80 const_mkldnn_primitive_desc_t hint_forward_primitive_desc);
81
82/** Iterates over primitive descriptors. Returns #mkldnn_iterator_ends if no
83 * more primitive descriptors are available. */
84mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_iterator_next(
85 mkldnn_primitive_desc_iterator_t iterator);
86
87/** Fetches the current primitive descriptor.
88 *
89 * @note
90 * The user should delete the fetched primitive descriptor using
91 * mkldnn_primitive_desc_destroy() once it is no longer needed. */
92mkldnn_primitive_desc_t MKLDNN_API mkldnn_primitive_desc_iterator_fetch(
93 const_mkldnn_primitive_desc_iterator_t iterator);
94
95/** Deletes a primitive descriptor @p iterator */
96mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_iterator_destroy(
97 mkldnn_primitive_desc_iterator_t iterator);
98
99/** Creates a @p primitive_desc using @p op_desc, @p attr, @p engine, and
100 * optionally a hint primitive descriptor from forward propagation. The call is
101 * equivalent to creating a primitive descriptor iterator, immediately fetching
102 * a primitive descriptor, and then destroying the iterator. */
103mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_create(
104 mkldnn_primitive_desc_t *primitive_desc,
105 const_mkldnn_op_desc_t op_desc, const_mkldnn_primitive_attr_t attr,
106 mkldnn_engine_t engine,
107 const_mkldnn_primitive_desc_t hint_forward_primitive_desc);
108
109/** Makes a copy of a @p primitive_desc. */
110mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_clone(
111 mkldnn_primitive_desc_t *primitive_desc,
112 const_mkldnn_primitive_desc_t existing_primitive_desc);
113
114/** Returns a constant reference to the attribute of a @p primitive_desc.
115 *
116 * @warning
117 * The user should not destroy the obtained @p attr.
118 *
119 * @warning
120 * The lifetime of an @p attr is the same as that of a @p primitive_desc,
121 * so it is illegal to use the @p attr once @p primitive_desc has been
122 * destroyed. */
123mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_get_attr(
124 const_mkldnn_primitive_desc_t primitive_desc,
125 const_mkldnn_primitive_attr_t *attr);
126
127/** Deletes a @p primitive_desc. */
128mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_destroy(
129 mkldnn_primitive_desc_t primitive_desc);
130
131/** Queries primitive descriptor
132 *
133 * One of the most typical use cases is to query a convolution primitive
134 * descriptor created with source, weights, and destination formats equal
135 * to #mkldnn_format_tag_any about the corresponding memory descriptors
136 * (@p what equals #mkldnn_query_src_md, #mkldnn_query_weights_md, and
137 * #mkldnn_query_dst_md respectively) to be able to prepare memory and
138 * create reorders if required.
139 *
140 * Another quite typical use case is to query an operation primitive
141 * descriptor for a workspace (@p what equals #mkldnn_query_workspace_md).
142 * The returned status #mkldnn_not_required indicates that a workspace is
143 * not required.
144 *
145 * A few other possibilities:
146 * - query an operation primitive descriptor for the underlying operation
147 * descriptor (#mkldnn_query_convolution_d, #mkldnn_query_eltwise_d,
148 * #mkldnn_query_rnn_d, etc.)
149 * - query an operation primitive descriptor for the implementation
150 * information string (#mkldnn_query_impl_info_str)
151 * - query an operation primitive descriptor for the number of inputs and
152 * outputs (#mkldnn_query_num_of_inputs_s32 and
153 * #mkldnn_query_num_of_outputs_s32 respectively)
154 *
155 * @sa mkldnn_query_t for more options
156 */
157mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_query(
158 const_mkldnn_primitive_desc_t primitive_desc, mkldnn_query_t what,
159 int index, void *result);
160
161/** Queries primitive descriptor for memory descriptor
162 *
163 * @returns NULL in case of any error.
164 *
165 * This is just a specialized version of mkldnn_primitive_desc_query
166 * used for convenience.
167 */
168const mkldnn_memory_desc_t MKLDNN_API *mkldnn_primitive_desc_query_md(
169 const_mkldnn_primitive_desc_t primitive_desc, mkldnn_query_t what,
170 int index);
171
172/** Queries primitive descriptor for signed 32bit int
173 *
174 * @returns 0 in case of any error (in particular if the queried entity is
175 * not of type int32_t). Note that 0 might also be the actual returned
176 * value.
177 *
178 * This is just a specialized version of mkldnn_primitive_desc_query
179 * used for convenience.
180 */
181int MKLDNN_API mkldnn_primitive_desc_query_s32(
182 const_mkldnn_primitive_desc_t primitive_desc, mkldnn_query_t what,
183 int index);
184
185/** Creates a @p primitive using a @p primitive_desc descriptor. */
186mkldnn_status_t MKLDNN_API mkldnn_primitive_create(
187 mkldnn_primitive_t *primitive,
188 const_mkldnn_primitive_desc_t primitive_desc);
189
190/** Executes a @p primitive using a @p stream, and @p nargs arguments
191 * @p args. */
192mkldnn_status_t MKLDNN_API mkldnn_primitive_execute(
193 const_mkldnn_primitive_t primitive, mkldnn_stream_t stream,
194 int nargs, const mkldnn_exec_arg_t *args);
195
196/** Retrieves a reference to the @p primitive_desc descriptor of given @p
197 * primitive.
198 *
199 * @warning
200 * The returned object must not be destroyed by the user. The @c const
201 * qualifier of the returned object prevents such attempts. */
202mkldnn_status_t MKLDNN_API mkldnn_primitive_get_primitive_desc(
203 const_mkldnn_primitive_t primitive,
204 const_mkldnn_primitive_desc_t *primitive_desc);
205
206/** Deletes a @p primitive. */
207mkldnn_status_t MKLDNN_API mkldnn_primitive_destroy(
208 mkldnn_primitive_t primitive);
209
210/** @} */
211
212/** @addtogroup c_api_attributes Attributes
213 * An extension for controlling primitive behavior.
214 * @{ */
215
216/** Creates an empty (default) @p attr attribute. All the parameters are set to
217 * default values.
218 *
219 * An empty attribute is used in primitive descriptor creation whenever it
220 * is not passed explicitly, e.g. in mkldnn_primitive_desc_create.
221 */
222mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_create(
223 mkldnn_primitive_attr_t *attr);
224
225/** Makes a copy of an @p existing_attr. */
226mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_clone(
227 mkldnn_primitive_attr_t *attr,
228 const_mkldnn_primitive_attr_t existing_attr);
229
230/** Deletes an @p attr. */
231mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_destroy(
232 mkldnn_primitive_attr_t attr);
233
234/** Returns the scratchpad @p mode set in the attribute @p attr */
235mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_get_scratchpad_mode(
236 const_mkldnn_primitive_attr_t attr, mkldnn_scratchpad_mode_t *mode);
237
238/** Sets scratchpad @p mode.
239 *
240 * The possible values are: #mkldnn_scratchpad_mode_library (default) and
241 * #mkldnn_scratchpad_mode_user. */
242mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_scratchpad_mode(
243 mkldnn_primitive_attr_t attr, mkldnn_scratchpad_mode_t mode);
244
245/** Returns @p count, correspondence scale @p mask, and a pointer to a constant
246 * floating point array of output @p scales for given @p attr, previously set
247 * by mkldnn_primitive_attr_set_output_scales.
248 *
249 * @warning
250 * The @p scales array points to the internal @p attr field, so the user
251 * should not modify or destroy @p scales.
252 *
253 * @warning
254 * The lifetime of @p scales is the same as that of the @p attr to which it
255 * belongs, so it is illegal to use @p scales after @p attr is destroyed.
256 */
257mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_get_output_scales(
258 const_mkldnn_primitive_attr_t attr, mkldnn_dim_t *count, int *mask,
259 const float **scales);
260
261/** Sets output @p scales for primitive operations. The number of elements @p
262 * count and correspondence scale @p mask are stored for future use.
263 *
264 * The @p mask argument defines the correspondence between the output tensor
265 * dimensions and the @p scales array. Set the i-th bit of @p mask to 1 to use a
266 * dedicated scaling factor for each slice of the output tensor over the i-th
267 * dimension. Set @p mask to 0 to use a common scaling factor for the whole
268 * output tensor.
269 *
270 * @note
271 * The dimension order is always native and does not depend on the actual
272 * layout used. Examples:
273 * - 2D dimensional data the order of dimensions is always: (n, c)
274 * - 4D dimensional data the order is always: (n, c, h, w)
275 * - 5D dimensional weights the order is always: (g, oc, ic, kh, kw)
276 *
277 * Example usage:
278 * @code
279 * int mb = 32, oc = 32, oh = 14, ow = 14; // convolution output params
280 * float scales[oc] = { ... }; // unique output scales per output channel
281 * int oc_dim = 1; // mb_dim = 0, channel_dim = 1, height_dim = 2, ...
282 *
283 * mkldnn_convolution_desc_t cd; // create & configure convolution op_desc
284 *
285 * mkldnn_primitive_attr_t attr;
286 * mkldnn_primitive_attr_create(&attr); // create default attributes
287 * mkldnn_primitive_attr_set_output_scales(attr, oc, 1 << oc_dim, scales);
288 *
289 * mkldnn_primitive_desc_t cpd;
290 * mkldnn_primitive_desc_create(&cpd, &cd, attr, NULL);
291 * @endcode
292 *
293 * @note
294 * There is no way to check that @p count corresponds to @p mask until an
295 * actual primitive descriptor is created, so it is the user's
296 * responsibility to set proper values. The following formula must hold:
297 *
298 * \f[count = \prod\limits_{d \in mask} output.dims[d]\f]
299 */
300mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_output_scales(
301 mkldnn_primitive_attr_t attr, mkldnn_dim_t count, int mask,
302 const float *scales);
303
304/** Returns @p post_ops for given @p attr.
305 *
306 * @warning
307 * @p post_ops points to the internal @p attr field, so the user should not
308 * modify or destroy @p post_ops. Also, the lifetime of @p post_ops is the
309 * same as that of the @p attr it belongs to, so it is illegal to use @p
310 * post_ops after @p attr has been destroyed.
311 */
312mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_get_post_ops(
313 const_mkldnn_primitive_attr_t attr, const_mkldnn_post_ops_t *post_ops);
314
315/** Sets configured @p post_ops to an attribute @p attr for future use (when
316 * primitive descriptor is being created).
317 *
318 * @note
319 * At this point in time, there is no way to check whether the primitive
320 * descriptor does or does not support a given sequence of post operations.
321 * Therefore the user should handle an error that might occur at the
322 * mkldnn_primitive_desc_create call.
323 */
324mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_post_ops(
325 mkldnn_primitive_attr_t attr, const_mkldnn_post_ops_t post_ops);
326
327/** @addtogroup c_api_attributes_post_ops Sequence of post operations
328 * An extension for performing extra operations after a base operation.
329 * @{ */
330
331/** Creates an empty sequence of post operations @p post_ops. */
332mkldnn_status_t MKLDNN_API mkldnn_post_ops_create(mkldnn_post_ops_t *post_ops);
333
334/** Deletes a @p post_ops sequence. */
335mkldnn_status_t MKLDNN_API mkldnn_post_ops_destroy(mkldnn_post_ops_t post_ops);
336
337/** Returns the @p length of post operations for given @p post_ops. */
338int MKLDNN_API mkldnn_post_ops_len(const_mkldnn_post_ops_t post_ops);
339
340/** Returns the type of post operation with index @p index in given
341 * @p post_ops. In case of error, returns #mkldnn_undefined_primitive. */
342mkldnn_primitive_kind_t MKLDNN_API mkldnn_post_ops_get_kind(
343 const_mkldnn_post_ops_t post_ops, int index);
344
345/** Appends accumulation (sum) post operation to the @p post_ops. Prior to
346 * accumulating the result, the previous value would be multiplied by @p scale.
347 *
348 * The kind of this post operation is #mkldnn_sum.
349 *
350 * This feature might improve performance for cases like residual learning
351 * blocks, where the result of convolution is accumulated to the previously
352 * computed activations. The parameter @p scale might be extreme for the
353 * integer-based computations when the result and previous activations have
354 * different logical scaling factors.
355 *
356 * In the simplest case when the accumulation is the only post operation, the
357 * computations would be:
358 * dst[] <- scale * dst[] + op(...) // instead of dst[] <- op(...)
359 *
360 * @note
361 * This post operation (as well as all the others) disregards the original
362 * layout of the destination; that is, the layout of the original
363 * destination is expected to be the same as the layout of the stored
364 * destination.
365 */
366mkldnn_status_t MKLDNN_API mkldnn_post_ops_append_sum(
367 mkldnn_post_ops_t post_ops, float scale);
368
369/** Gets the parameters of the accumulation (sum) post operation with index
370 * @p index in the sequence of @p post_ops.
371 *
372 * @note
373 * If index @p index would not correspond to the accumulation post
374 * operation, the function returns #mkldnn_invalid_arguments.
375 */
376mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_sum(
377 const_mkldnn_post_ops_t post_ops, int index, float *scale);
378
379/** Appends eltwise post operation to the @p post_ops with given parameters
380 * @p kind, @p alpha, and @p beta (@sa mkldnn_eltwise_forward_desc_init and
381 * mkldnn_eltwise_desc_t).
382 *
383 * The kind of this post operation is #mkldnn_eltwise.
384 *
385 * In the simplest case when the eltwise is the only post operation, the
386 * computations would be:
387 * dst[] <- scale * eltwise_op ( op(...) ) // instead of dst[] <- op(...)
388 * where eltwise_op is configured with the given parameters.
389 */
390mkldnn_status_t MKLDNN_API mkldnn_post_ops_append_eltwise(
391 mkldnn_post_ops_t post_ops, float scale, mkldnn_alg_kind_t alg,
392 float alpha, float beta);
393
394/** Gets the eltwise parameters of the post operation with index @p index in
395 * the sequence of @p post_ops.
396 */
397mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_eltwise(
398 const_mkldnn_post_ops_t post_ops, int index, float *scale,
399 mkldnn_alg_kind_t *alg, float *alpha, float *beta);
400
401/** @} */
402
403/** @} */
404
405/** @addtogroup c_api_memory Memory
406 * A primitive to describe and store data.
407 *
408 * The library supports various data types and formats. Memory hierarchy
409 * consists of three levels of abstraction:
410 * 1. **Memory descriptor** -- engine agnostic logical description of data
411 * (number of dimensions, dimensions themselves, and data type), and
412 * optionally the format/layout that describes the physical representation
413 * of data in memory. If the format is not known yet, one can pass
414 * #mkldnn_format_tag_any. This approach is used to allow compute-intensive
415 * primitives to specify the most appropriate format on their own with
416 * users required to reorder the data if the incoming format doesn't match
417 * the primitive's selection. Memory descriptor can be initialized with
418 * mkldnn_memory_desc_init_by_tag() or mkldnn_memory_desc_init_by_strides()
419 * functions, or by directly filling the mkldnn_memory_desc_t structure.
420 * The latter requires deep knowledge of how the physical data
421 * representation is mapped to the structure.
422 * The @ref understanding_memory_formats topic should shed some light on
423 * that.
424 * For the fully defined memory descriptors (i.e. where the format kind is
425 * not equal to #mkldnn_format_kind_any) a user can the size, using the
426 * mkldnn_memory_desc_get_size() function. As described in
427 * @ref understanding_memory_formats, the size of data sometimes cannot
428 * be computed as the product of dimensions times the size of the data
429 * type. So users are encouraged to use this function for better code
430 * portability.
431 * Two memory descriptors can be compared with mkldnn_memory_desc_equal().
432 * The comparison is especially useful when checking whether a primitive
433 * requires reorder from the user's data format to the primitive's format.
434 * 2. **Memory** -- an engine-specific object that handles the data and its
435 * description (a memory descriptor). For CPU enigne, the data handle is
436 * simply a pointer to @c void. The data handle can be queried using
437 * mkldnn_memory_get_data_handle() and set using
438 * mkldnn_memory_set_data_handle(). The latter function always sets the
439 * memory in the padding region to zero, which is the invariant maintained
440 * by all the primitives in Intel MKL-DNN.
441 * See @ref understanding_memory_formats for more details.
442 * A memory can be created using mkldnn_memory_create() function.
443 * A memory can also be queried for the underlying memory descriptor and
444 * engine using mkldnn_memory_get_memory_desc() and
445 * mkldnn_memory_get_engine() functions.
446 *
447 * Along with ordinary memory with all dimensions being positive, Intel
448 * MKL-DNN supports *zero-volume* memory with one or more dimensions set to
449 * zero. This is to support the NumPy\* convention.
450 * If a *zero-volume* memory is passed to a primitive, the primitive does
451 * not perform any computations on this memory. For example:
452 * - Convolution with `(0 batch, 3 input channels, 13 height, 13 width)`
453 * source and `(16 output channels, 3 inputs, channel, 3 height, 3 width)`
454 * weights would produce `(0 batch, 16 output channels, 11 height, 11 width)`
455 * destination (assuming strides are `1` and paddings are zero) and perform
456 * zero multiply-add operations.
457 * - Concatenation of three memories of shapes `(3, 4, 13, 13)`,
458 * `(3, 0, 13, 13)`, and `(3, 1, 13, 13)` along the second axis would produce
459 * the output of the shape `(3, 5, 13, 13)`, effectively ignoring the second
460 * input (however, if the user created a concatenation primitive descriptor
461 * with three inputs they should also provide all three memories to the
462 * concatenation primitive, including the one with zero second dimension).
463 * - However, Intel MKL-DNN would return an error when attempting to create a
464 * convolution with *zero-volume* memory passed for weights because such a
465 * convolution is not well-defined:
466 * ~~~
467 * dst(1, 16, 11, 11) <-- src(1, 0, 13, 13) (*) wei(16, 0, 3, 3)
468 * ~~~
469 * Should the values in the destination be zeroes or just not accessed at
470 * all? Moreover, backward pass w.r.t. weights in such cases is also not
471 * well-defined.
472 *
473 * Data handle of *zero-volume* memory is never accessed and hence can be
474 * unset (NULL in case of CPU engine).
475 *
476 * @sa @ref understanding_memory_formats
477 * @{ */
478
479/** Initializes a @p memory_desc memory descriptor using @p ndims, @p dims, @p
480 * data_type, and @p strides.
481 *
482 * The @p strides might be NULL, which means the order of physical dimensions
483 * is the same as the order of logical ones.
484 *
485 * @note The logical order of dimensions is defined by a primitive that
486 * consumes the memory.
487 */
488mkldnn_status_t MKLDNN_API mkldnn_memory_desc_init_by_strides(
489 mkldnn_memory_desc_t *memory_desc, int ndims, const mkldnn_dims_t dims,
490 mkldnn_data_type_t data_type, const mkldnn_dims_t strides);
491
492/** Initializes a @p memory_desc memory descriptor using @p ndims, @p dims, @p
493 * data_type, and format @p tag.
494 *
495 * @p tag can be #mkldnn_format_tag_any, which allows a primitive to define
496 * the appropriate memory format. In this case, the @p format_kind would be set
497 * to #mkldnn_format_kind_any */
498mkldnn_status_t MKLDNN_API mkldnn_memory_desc_init_by_tag(
499 mkldnn_memory_desc_t *memory_desc, int ndims, const mkldnn_dims_t dims,
500 mkldnn_data_type_t data_type, mkldnn_format_tag_t tag);
501
502/** Initializes a @p memory_desc for a given @p parent_memory_desc, with
503 * @p dims sizes and @p offsets. May fail if layout used does not allow
504 * obtain desired submemory. In this case consider using `extract` or `insert`
505 * primitive */
506mkldnn_status_t MKLDNN_API mkldnn_memory_desc_init_submemory(
507 mkldnn_memory_desc_t *memory_desc,
508 const mkldnn_memory_desc_t *parent_memory_desc,
509 const mkldnn_dims_t dims, const mkldnn_dims_t offsets);
510
511/** Compares two memory descriptors.
512 * @return 1 if the descriptors are the same.
513 * @return 0 if the descriptors are different.
514 *
515 * Use this function to identify whether a reorder is required between the
516 * two memories */
517int MKLDNN_API mkldnn_memory_desc_equal(
518 const mkldnn_memory_desc_t *lhs,
519 const mkldnn_memory_desc_t *rhs);
520
521/** Returns the size (in bytes) that is required for given @p memory_desc */
522size_t MKLDNN_API mkldnn_memory_desc_get_size(
523 const mkldnn_memory_desc_t *memory_desc);
524
525/** Creates a memory for given @p memory_desc and @p engine. Also sets handle
526 * to @p native_handle.
527 * The @p native_handle can:
528 * - point to the user allocated memory, i.e. valid handle. In this case the
529 * library doesn't own allocated memory.
530 * - be MKLDNN_NATIVE_HANDLE_ALLOCATE to ask the library to allocate and
531 * attach memory. In this case the library owns allocated memory.
532 * - be MKLDNN_NATIVE_HANDLE_NONE to create mkldnn_memory w/o attached memory.
533 */
534mkldnn_status_t MKLDNN_API mkldnn_memory_create(mkldnn_memory_t *memory,
535 const mkldnn_memory_desc_t *memory_desc, mkldnn_engine_t engine,
536 void *native_handle);
537
538/** Returns a @p memory_desc associated with @p memory. */
539mkldnn_status_t MKLDNN_API mkldnn_memory_get_memory_desc(
540 const_mkldnn_memory_t memory,
541 const mkldnn_memory_desc_t **memory_desc);
542
543/** Returns an @p engine associated with @p memory. */
544mkldnn_status_t MKLDNN_API mkldnn_memory_get_engine(
545 const_mkldnn_memory_t memory, mkldnn_engine_t *engine);
546
547/** For a @p memory, returns the data @p handle.
548 *
549 * For the CPU engine, the data handle is a pointer to the actual data. */
550mkldnn_status_t MKLDNN_API mkldnn_memory_get_data_handle(
551 const_mkldnn_memory_t memory, void **handle);
552
553/** For a @p memory, sets the data @p handle. */
554mkldnn_status_t MKLDNN_API mkldnn_memory_set_data_handle(
555 mkldnn_memory_t memory, void *handle);
556
557/** Deletes a @p memory. */
558mkldnn_status_t MKLDNN_API mkldnn_memory_destroy(mkldnn_memory_t memory);
559
560/** @} */
561
562/** @addtogroup c_api_reorder Reorder
563 * A primitive to copy data between memory formats.
564 * @{ */
565
566/** Initializes a @p reorder_primitive_desc using the description of the source
567 * (@p src_engine and @p src_md) and destination (@p dst_engine and @p dst_md)
568 * memory, and an @p attr attribute.
569 *
570 * Inputs:
571 * - input (#mkldnn_query_src_md, 0)
572 *
573 * Outputs:
574 * - output (#mkldnn_query_dst_md, 0)
575 */
576mkldnn_status_t MKLDNN_API mkldnn_reorder_primitive_desc_create(
577 mkldnn_primitive_desc_t *reorder_primitive_desc,
578 mkldnn_engine_t src_engine, const mkldnn_memory_desc_t *src_md,
579 mkldnn_engine_t dst_engine, const mkldnn_memory_desc_t *dst_md,
580 const_mkldnn_primitive_attr_t attr);
581
582/** @} */
583
584/** @addtogroup c_api_concat Concat
585 * A primitive to concatenate data by arbitrary dimension.
586 * @{ */
587
588/** Creates out-of-place @p concat_primitive_desc for concatenation of @p n
589 * inputs by @p concat_dimension with resulting @p output_desc memory
590 * descriptor. @p output_desc can be NULL or specified with the
591 * #mkldnn_format_kind_any format kind -- in this case, the appropriate memory
592 * format would be chosen automatically.
593 *
594 * Inputs:
595 * - input 0 (#mkldnn_query_src_md, 0)
596 * - input 1 (#mkldnn_query_src_md, 1)
597 * - ...
598 * - input @p n - 1 (#mkldnn_query_src_md, @p n - 1)
599 *
600 * Outputs:
601 * - output (#mkldnn_query_dst_md, 0)
602 */
603mkldnn_status_t MKLDNN_API mkldnn_concat_primitive_desc_create(
604 mkldnn_primitive_desc_t *concat_primitive_desc,
605 const mkldnn_memory_desc_t *dst_md,
606 int n, int concat_dimension,
607 const mkldnn_memory_desc_t *src_mds,
608 const_mkldnn_primitive_attr_t attr,
609 mkldnn_engine_t engine);
610
611/** @} */
612
613/** @addtogroup c_api_sum Sum
614 * A primitive to sum data.
615 * @{ */
616
617/** Creates out-of-place @p sum_primitive_desc for sum of @p n
618 * inputs multiplied by scale with resulting @p output_desc memory
619 * descriptor. @p output_desc can be NULL or specified with the
620 * #mkldnn_format_kind_any format kind -- in this case, the appropriate memory
621 * format would be chosen automatically.
622 *
623 * Inputs:
624 * - src 0 (#mkldnn_query_src_md, 0)
625 * - src 1 (#mkldnn_query_src_md, 1)
626 * - ...
627 * - src @p n - 1 (#mkldnn_query_src_md, @p n - 1)
628 *
629 * Outputs:
630 * - output (#mkldnn_query_dst_md, 0)
631 */
632mkldnn_status_t MKLDNN_API mkldnn_sum_primitive_desc_create(
633 mkldnn_primitive_desc_t *sum_primitive_desc,
634 const mkldnn_memory_desc_t *dst_mds,
635 int n, const float *scales,
636 const mkldnn_memory_desc_t *src_mds,
637 const_mkldnn_primitive_attr_t attr,
638 mkldnn_engine_t engine);
639
640/** @} */
641
642/** @addtogroup c_api_convolution Convolution
643 * A primitive to compute convolution using different algorithms.
644 *
645 * \f[dst[n][oc][oh][ow] =
646 * \sum_{kw=0}^{KW}\sum_{kh=0}^{KH}\sum_{ic=0}^{IC}
647 * src[n][ic][oh \cdot s_h - p_l[0] + kh][ow \cdot s_w - p_r[1] + kw]
648 * \cdot weights[g][oc][ic][kh][kw]
649 * + bias[g][oc],\f]
650 *
651 * where size of output spatial domain is given by
652 * \f$ OH = \left\lfloor{\frac{IH - KH + p_l[0] + p_r[0]}{s_h}}
653 * \right\rfloor + 1\f$,
654 * \f$ OW = \left\lfloor{\frac{IW - KW + p_l[1] + p_r[1]}{s_w}}
655 * \right\rfloor + 1\f$,
656 *
657 * and summation is carried over input channels \f$ic\f$ in
658 * group \f$g\f$, and \f$s_h, s_w\f$ are @p strides and
659 * \f$p_l, p_r\f$ are @p padding_l and @p padding_r.
660 * @{ */
661
662/** Initializes a convolution descriptor @p conv_desc for forward propagation
663 * using @p prop_kind (possible values are #mkldnn_forward_training and
664 * #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides, @p
665 * padding_l, @p padding_r, and @p padding_kind. In order to create a
666 * convolution without bias, @p bias_desc should either be @c NULL or point to
667 * a descriptor with memory format kind equal to #mkldnn_format_kind_undef.
668 *
669 * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
670 *
671 * @note Memory descriptors are allowed to be initialized with
672 * #mkldnn_format_kind_any value of @p format_kind.
673 *
674 * Inputs:
675 * - src (#mkldnn_query_src_md, 0)
676 * - weights (#mkldnn_query_weights_md, 0)
677 * - bias (#mkldnn_query_weights_md, 1), if created with bias
678 *
679 * Outputs:
680 * - dst (#mkldnn_query_dst_md, 0)
681 */
682mkldnn_status_t MKLDNN_API mkldnn_convolution_forward_desc_init(
683 mkldnn_convolution_desc_t *conv_desc, mkldnn_prop_kind_t prop_kind,
684 mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
685 const mkldnn_memory_desc_t *weights_desc,
686 const mkldnn_memory_desc_t *bias_desc,
687 const mkldnn_memory_desc_t *dst_desc, const mkldnn_dims_t strides,
688 const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
689 mkldnn_padding_kind_t padding_kind);
690
691/** Initializes a dilated convolution descriptor @p conv_desc for forward
692 * propagation using @p prop_kind (possible values are #mkldnn_forward_training
693 * and #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
694 * @p dilates, @p padding_l, @p padding_r, and @p padding_kind.
695 * In order to create a dilated convolution without bias, @p bias_desc
696 * should either be @c NULL or point to a descriptor with memory format kind
697 * equals #mkldnn_format_kind_undef.
698 *
699 * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
700 *
701 * @note Memory descriptors are allowed to be initialized with
702 * #mkldnn_format_kind_any value of @p format_kind.
703 *
704 * Inputs:
705 * - src (#mkldnn_query_src_md, 0)
706 * - weights (#mkldnn_query_weights_md, 0)
707 * - bias (#mkldnn_query_weights_md, 1), if created with bias
708 *
709 * Outputs:
710 * - dst (#mkldnn_query_dst_md, 0)
711 */
712mkldnn_status_t MKLDNN_API mkldnn_dilated_convolution_forward_desc_init(
713 mkldnn_convolution_desc_t *conv_desc, mkldnn_prop_kind_t prop_kind,
714 mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
715 const mkldnn_memory_desc_t *weights_desc,
716 const mkldnn_memory_desc_t *bias_desc,
717 const mkldnn_memory_desc_t *dst_desc, const mkldnn_dims_t strides,
718 const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
719 const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
720
721/** Initializes a convolution descriptor @p conv_desc for backward propagation
722 * with respect to data using @p alg_kind, memory descriptors, @p strides, @p
723 * padding_l, @p padding_r, and @p padding_kind.
724 *
725 * @note Memory descriptors are allowed to be initialized with
726 * #mkldnn_format_kind_any value of @p format_kind.
727 *
728 * Inputs:
729 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
730 * - weights (#mkldnn_query_weights_md, 0)
731 *
732 * Outputs:
733 * - diff_src (#mkldnn_query_diff_src_md, 0)
734 */
735mkldnn_status_t MKLDNN_API mkldnn_convolution_backward_data_desc_init(
736 mkldnn_convolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
737 const mkldnn_memory_desc_t *diff_src_desc,
738 const mkldnn_memory_desc_t *weights_desc,
739 const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
740 const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
741 mkldnn_padding_kind_t padding_kind);
742
743/** Initializes a dilated convolution descriptor @p conv_desc for backward
744 * propagation with respect to data using @p alg_kind, memory descriptors, @p
745 * strides, @p dilates @p padding_l, @p padding_r, and @p padding_kind.
746 *
747 * @note Memory descriptors are allowed to be initialized with
748 * #mkldnn_format_kind_any value of @p format_kind.
749 *
750 * Inputs:
751 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
752 * - weights (#mkldnn_query_weights_md, 0)
753 *
754 * Outputs:
755 * - diff_src (#mkldnn_query_diff_src_md, 0)
756 */
757mkldnn_status_t MKLDNN_API mkldnn_dilated_convolution_backward_data_desc_init(
758 mkldnn_convolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
759 const mkldnn_memory_desc_t *diff_src_desc,
760 const mkldnn_memory_desc_t *weights_desc,
761 const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
762 const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
763 const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
764
765/** Initializes a convolution descriptor @p conv_desc for backward propagation
766 * with respect to weights using @p alg_kind, memory descriptors, @p strides,
767 * @p padding_l, @p padding_r, and @p padding_kind.
768 *
769 * @note Memory descriptors are allowed to be initialized with
770 * #mkldnn_format_kind_any value of @p format_kind.
771 *
772 * Inputs:
773 * - src (#mkldnn_query_src_md, 0)
774 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
775 *
776 * Outputs:
777 * - diff_weights (#mkldnn_query_diff_weights_md, 0)
778 * - diff_bias (#mkldnn_query_diff_weights_md, 1), if created with bias
779 */
780mkldnn_status_t MKLDNN_API mkldnn_convolution_backward_weights_desc_init(
781 mkldnn_convolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
782 const mkldnn_memory_desc_t *src_desc,
783 const mkldnn_memory_desc_t *diff_weights_desc,
784 const mkldnn_memory_desc_t *diff_bias_desc,
785 const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
786 const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
787 mkldnn_padding_kind_t padding_kind);
788
789/** Initializes a convolution descriptor @p conv_desc for backward propagation
790 * with respect to weights using @p alg_kind, memory descriptors, @p strides,
791 * @p dilates @p padding_l, @p padding_r, and @p padding_kind.
792 *
793 * @note Memory descriptors are allowed to be initialized with
794 * #mkldnn_format_kind_any value of @p format_kind.
795 *
796 * Inputs:
797 * - src (#mkldnn_query_src_md, 0)
798 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
799 *
800 * Outputs:
801 * - diff_weights (#mkldnn_query_diff_weights_md, 0)
802 * - diff_bias (#mkldnn_query_diff_weights_md, 1), if created with bias
803 */
804mkldnn_status_t MKLDNN_API
805mkldnn_dilated_convolution_backward_weights_desc_init(
806 mkldnn_convolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
807 const mkldnn_memory_desc_t *src_desc,
808 const mkldnn_memory_desc_t *diff_weights_desc,
809 const mkldnn_memory_desc_t *diff_bias_desc,
810 const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
811 const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
812 const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
813
814/** @} */
815
816/** @addtogroup c_api_deconvolution Deconvolution
817 * A primitive to compute deconvolution using different algorithms.
818 *
819 * @{ */
820
821
822/** Initializes a deconvolution descriptor @p deconv_desc for forward
823 * propagation using @p prop_kind (possible values are #mkldnn_forward_training
824 * and #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
825 * @p padding_l, @p padding_r, and @p padding_kind. In order to create a
826 * deconvolution without bias, @p bias_desc should either be @c NULL or point to
827 * a descriptor with memory format kind equals #mkldnn_format_kind_undef.
828 *
829 * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
830 *
831 * @note Memory descriptors are allowed to be initialized with
832 * #mkldnn_format_kind_any value of @p format_kind.
833 *
834 * Inputs:
835 * - src (#mkldnn_query_src_md, 0)
836 * - weights (#mkldnn_query_weights_md, 0)
837 * - bias (#mkldnn_query_weights_md, 1), if created with bias
838 *
839 * Outputs:
840 * - dst (#mkldnn_query_dst_md, 0)
841 */
842mkldnn_status_t MKLDNN_API mkldnn_deconvolution_forward_desc_init(
843 mkldnn_deconvolution_desc_t *conv_desc, mkldnn_prop_kind_t prop_kind,
844 mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
845 const mkldnn_memory_desc_t *weights_desc,
846 const mkldnn_memory_desc_t *bias_desc,
847 const mkldnn_memory_desc_t *dst_desc, const mkldnn_dims_t strides,
848 const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
849 mkldnn_padding_kind_t padding_kind);
850
851/** Initializes a dilated deconvolution descriptor @p deconv_desc for forward
852 * propagation using @p prop_kind (possible values are #mkldnn_forward_training
853 * and #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
854 * @p dilates, @p padding_l, @p padding_r, and @p padding_kind. In order to
855 * create a dilated deconvolution without bias, @p bias_desc should either be
856 * @c NULL or point to a descriptor with memory format kind equal
857 * #mkldnn_format_kind_undef.
858 *
859 * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
860 *
861 * @note Memory descriptors are allowed to be initialized with
862 * #mkldnn_format_kind_any value of @p format_kind.
863 *
864 * Inputs:
865 * - src (#mkldnn_query_src_md, 0)
866 * - weights (#mkldnn_query_weights_md, 0)
867 * - bias (#mkldnn_query_weights_md, 1), if created with bias
868 *
869 * Outputs:
870 * - dst (#mkldnn_query_dst_md, 0)
871 */
872mkldnn_status_t MKLDNN_API mkldnn_dilated_deconvolution_forward_desc_init(
873 mkldnn_deconvolution_desc_t *conv_desc, mkldnn_prop_kind_t prop_kind,
874 mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
875 const mkldnn_memory_desc_t *weights_desc,
876 const mkldnn_memory_desc_t *bias_desc,
877 const mkldnn_memory_desc_t *dst_desc, const mkldnn_dims_t strides,
878 const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
879 const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
880
881/** Initializes a deconvolution descriptor @p conv_desc for backward propagation
882 * with respect to data using @p alg_kind, memory descriptors, @p strides, @p
883 * padding_l, @p padding_r, and @p padding_kind.
884 *
885 * @note Memory descriptors are allowed to be initialized with
886 * #mkldnn_format_kind_any value of @p format_kind.
887 *
888 * Inputs:
889 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
890 * - weights (#mkldnn_query_weights_md, 0)
891 *
892 * Outputs:
893 * - diff_src (#mkldnn_query_diff_src_md, 0)
894 */
895mkldnn_status_t MKLDNN_API mkldnn_deconvolution_backward_data_desc_init(
896 mkldnn_deconvolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
897 const mkldnn_memory_desc_t *diff_src_desc,
898 const mkldnn_memory_desc_t *weights_desc,
899 const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
900 const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
901 mkldnn_padding_kind_t padding_kind);
902
903/** Initializes a dilated deconvolution descriptor @p conv_desc for backward
904 * propagation with respect to data using @p alg_kind, memory descriptors, @p
905 * strides, @p dilates, @p padding_l, @p padding_r, and @p padding_kind.
906 *
907 * @note Memory descriptors are allowed to be initialized with
908 * #mkldnn_format_kind_any value of @p format_kind.
909 *
910 * Inputs:
911 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
912 * - weights (#mkldnn_query_weights_md, 0)
913 *
914 * Outputs:
915 * - diff_src (#mkldnn_query_diff_src_md, 0)
916 */
917mkldnn_status_t MKLDNN_API mkldnn_dilated_deconvolution_backward_data_desc_init(
918 mkldnn_deconvolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
919 const mkldnn_memory_desc_t *diff_src_desc,
920 const mkldnn_memory_desc_t *weights_desc,
921 const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
922 const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
923 const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
924
925/** Initializes a deconvolution descriptor @p conv_desc for backward propagation
926 * with respect to weights using @p alg_kind, memory descriptors, @p strides,
927 * @p padding_l, @p padding_r, and @p padding_kind.
928 *
929 * @note Memory descriptors are allowed to be initialized with
930 * #mkldnn_format_kind_any value of @p format_kind.
931 *
932 * Inputs:
933 * - src (#mkldnn_query_src_md, 0)
934 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
935 *
936 * Outputs:
937 * - diff_weights (#mkldnn_query_diff_weights_md, 0)
938 * - diff_bias (#mkldnn_query_diff_weights_md, 1), if created with bias
939 */
940mkldnn_status_t MKLDNN_API mkldnn_deconvolution_backward_weights_desc_init(
941 mkldnn_deconvolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
942 const mkldnn_memory_desc_t *src_desc,
943 const mkldnn_memory_desc_t *diff_weights_desc,
944 const mkldnn_memory_desc_t *diff_bias_desc,
945 const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
946 const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
947 mkldnn_padding_kind_t padding_kind);
948
949/** Initializes a dilated deconvolution descriptor @p conv_desc for backward
950 * propagation with respect to weights using @p alg_kind, memory descriptors,
951 * @p strides, @p dilates, @p padding_l, @p padding_r, and @p padding_kind.
952 *
953 * @note Memory descriptors are allowed to be initialized with
954 * #mkldnn_format_kind_any value of @p format_kind.
955 *
956 * Inputs:
957 * - src (#mkldnn_query_src_md, 0)
958 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
959 *
960 * Outputs:
961 * - diff_weights (#mkldnn_query_diff_weights_md, 0)
962 * - diff_bias (#mkldnn_query_diff_weights_md, 1), if created with bias
963 */
964mkldnn_status_t MKLDNN_API mkldnn_dilated_deconvolution_backward_weights_desc_init(
965 mkldnn_deconvolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
966 const mkldnn_memory_desc_t *src_desc,
967 const mkldnn_memory_desc_t *diff_weights_desc,
968 const mkldnn_memory_desc_t *diff_bias_desc,
969 const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
970 const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
971 const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
972
973/** @} */
974
975/** @addtogroup c_api_shuffle Shuffle
976 * A primitive to shuffle data along the axis.
977 * @{ */
978
979/** Initializes a @p shuffle_desc for forward propagation using @p prop_kind,
980 * memory descriptor @p data_desc, @p axis, and @p group_size.
981 *
982 * Inputs:
983 * - src (#mkldnn_query_src_md, 0)
984 *
985 * Outputs:
986 * - dst (#mkldnn_query_dst_md, 0)
987 *
988 */
989mkldnn_status_t MKLDNN_API mkldnn_shuffle_forward_desc_init(
990 mkldnn_shuffle_desc_t *shuffle_desc, mkldnn_prop_kind_t prop_kind,
991 const mkldnn_memory_desc_t *data_desc, int axis,
992 mkldnn_dim_t group_size);
993
994/** Initializes a @p shuffle_desc for backward propagation using memory
995 * descriptor @p diff_data_desc, @p axis, and @p group_size.
996 *
997 *
998 * Inputs:
999 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
1000 *
1001 * Outputs:
1002 * - diff_src (#mkldnn_query_diff_src_md, 0)
1003 *
1004 */
1005mkldnn_status_t MKLDNN_API mkldnn_shuffle_backward_desc_init(
1006 mkldnn_shuffle_desc_t *shuffle_desc,
1007 const mkldnn_memory_desc_t *diff_data_desc, int axis,
1008 mkldnn_dim_t group_size);
1009
1010/** @} */
1011
1012/** @addtogroup c_api_eltwise Eltwise
1013 * A primitive to compute element-wise operations like parametric rectifier
1014 * linear unit (ReLU).
1015 *
1016 * Both forward and backward passes support in-place operation; that is, src
1017 * and dst point to the same memory for forward pass, and diff_dst and diff_src
1018 * point to the same memory for backward pass.
1019 *
1020 * @warning Because the original src is required for backward pass, in-place
1021 * forward pass in general cannot be applied during training. However, for some
1022 * kinds of element-wise operations (namely ReLU with alpha parameter equals 0),
1023 * dst and src can be interchangeable for the backward pass, which enables
1024 * performing in-place forward even for training.
1025 *
1026 * @{ */
1027
1028/** Initializes an @p eltwise_desc for forward propagation using @p prop_kind
1029 * (possible values are #mkldnn_forward_training and #mkldnn_forward_inference),
1030 * @p alg_kind algorithm, memory descriptor @p data_desc, @p alpha, and
1031 * @p beta parameters.
1032 *
1033 * @sa mkldnn_eltwise_desc_t for details.
1034 *
1035 * Inputs:
1036 * - src (#mkldnn_query_src_md, 0)
1037 *
1038 * Outputs:
1039 * - dst (#mkldnn_query_dst_md, 0)
1040 */
1041mkldnn_status_t MKLDNN_API mkldnn_eltwise_forward_desc_init(
1042 mkldnn_eltwise_desc_t *eltwise_desc, mkldnn_prop_kind_t prop_kind,
1043 mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *data_desc,
1044 float alpha, float beta);
1045
1046/** Initializes an @p eltwise_desc for backward propagation using @p alg_kind
1047 * algorithm memory descriptors @p diff_data_desc and @p data_desc, and the
1048 * @p alpha and @p beta parameters.
1049 *
1050 * @sa mkldnn_eltwise_desc_t for details.
1051 *
1052 * Inputs:
1053 * - src (#mkldnn_query_src_md, 0)
1054 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
1055 *
1056 * Outputs:
1057 * - diff_src (#mkldnn_query_diff_src_md, 0)
1058 */
1059mkldnn_status_t MKLDNN_API mkldnn_eltwise_backward_desc_init(
1060 mkldnn_eltwise_desc_t *eltwise_desc, mkldnn_alg_kind_t alg_kind,
1061 const mkldnn_memory_desc_t *diff_data_desc,
1062 const mkldnn_memory_desc_t *data_desc, float alpha, float beta);
1063
1064/** @} */
1065
1066/** @addtogroup c_api_softmax Softmax
1067 * A primitive to perform softmax.
1068 *
1069 * \f[dst[u][c][in] =
1070 * \frac{\exp(src[ou][c][in]) - \max\limits_{c}(src[ou][c][in])}
1071 * {\sum\limits_{c}\{\exp(src[ou][c][in])
1072 * - \max\limits_{c}(src[ou][c][in])\}},\f]
1073 *
1074 * where \f$ou, iu\f$ are outer and inner sizes repectively, defined
1075 * by @p data_desc.dims and @p softmax_axis.
1076 * @{ */
1077
1078/** Initializes a @p softmax_desc for forward propagation using @p prop_kind
1079 * (possible values are #mkldnn_forward_training and #mkldnn_forward_inference)
1080 * and memory descriptor @p data_desc.
1081 *
1082 * Inputs:
1083 * - src (#mkldnn_query_src_md, 0)
1084 *
1085 * Outputs:
1086 * - dst (#mkldnn_query_dst_md, 0)
1087 */
1088mkldnn_status_t MKLDNN_API mkldnn_softmax_forward_desc_init(
1089 mkldnn_softmax_desc_t *softmax_desc, mkldnn_prop_kind_t prop_kind,
1090 const mkldnn_memory_desc_t *data_desc, int softmax_axis);
1091
1092/** Initializes a @p softmax_desc for backward propagation using memory
1093 * descriptors @p diff_desc and @p data_desc.
1094 *
1095 * Inputs:
1096 * - dst (#mkldnn_query_dst_md, 0)
1097 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
1098 *
1099 * Outputs:
1100 * - diff_src (#mkldnn_query_diff_src_md, 0)
1101 */
1102mkldnn_status_t MKLDNN_API mkldnn_softmax_backward_desc_init(
1103 mkldnn_softmax_desc_t *softmax_desc,
1104 const mkldnn_memory_desc_t *diff_desc,
1105 const mkldnn_memory_desc_t *data_desc, int softmax_axis);
1106
1107/** @} */
1108
1109/** @addtogroup c_api_pooling Pooling
1110 * A primitive to perform max or average pooling.
1111 *
1112 * Max pooling:
1113 * \f[dst[n][oc][oh][ow] =
1114 * \max\limits_{kw,kh}
1115 * (src[n][ic][oh \cdot s_h - p_l[0] + kh][ow \cdot s_w - p_r[1] + kw]),\f]
1116 *
1117 * Average pooling:
1118 * \f[dst[n][oc][oh][ow] =
1119 * \frac{1}{KW \cdot KH}\sum\limits_{kw,kh}
1120 * src[n][ic][oh \cdot s_h - p_l[0] + kh][ow \cdot s_w - p_r[1] + kw],\f]
1121 *
1122 * where \f$p_l, p_r\f$ are @p padding_l and @p padding_r respectively, and
1123 * output spatial dimensions are calculated similarly to how they are done in
1124 * convolution.
1125 *
1126 * During training, max pooling requires a workspace on forward
1127 * (#mkldnn_forward_training) and backward (#mkldnn_backward) passes to
1128 * save indices where maximum was found. The workspace layout is opaque, and
1129 * the indices cannot be restored from it. However, one can use backward
1130 * pooling to perform up-sampling (used in some detection topologies).
1131 *
1132 * @{ */
1133
1134/** Initializes a pooling descriptor @p pool_desc for forward propagation using
1135 * @p prop_kind (possible values are #mkldnn_forward_training and
1136 * #mkldnn_forward_inference), @p alg_kind, memory descriptors, and pooling
1137 * parameters in the spatial domain: @p strides, @p kernel sizes, @p padding_l,
1138 * @p padding_r, and @p padding_kind.
1139 *
1140 * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
1141 *
1142 * Inputs:
1143 * - src (#mkldnn_query_src_md, 0)
1144 *
1145 * Outputs:
1146 * - dst (#mkldnn_query_dst_md, 0)
1147 * - workspace (#mkldnn_query_workspace_md, 0),
1148 * if @p alg_kind = #mkldnn_pooling_max and
1149 * @p prop_kind = #mkldnn_forward_training
1150 */
1151mkldnn_status_t MKLDNN_API mkldnn_pooling_forward_desc_init(
1152 mkldnn_pooling_desc_t *pool_desc, mkldnn_prop_kind_t prop_kind,
1153 mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
1154 const mkldnn_memory_desc_t *dst_desc, const mkldnn_dims_t strides,
1155 const mkldnn_dims_t kernel, const mkldnn_dims_t padding_l,
1156 const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
1157
1158/** Initializes a pooling descriptor @p pool_desc for backward propagation
1159 * using @p alg_kind, memory descriptors, and pooling parameters in the spatial
1160 * domain: @p strides, @p kernel sizes, @p padding_l, @p padding_r, and @p
1161 * padding_kind.
1162 *
1163 * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
1164 *
1165 * Inputs:
1166 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
1167 * - workspace (#mkldnn_query_workspace_md, 0),
1168 * if @p alg_kind = #mkldnn_pooling_max
1169 *
1170 * Outputs:
1171 * - diff_src (#mkldnn_query_diff_src_md, 0)
1172 */
1173mkldnn_status_t MKLDNN_API mkldnn_pooling_backward_desc_init(
1174 mkldnn_pooling_desc_t *pool_desc, mkldnn_alg_kind_t alg_kind,
1175 const mkldnn_memory_desc_t *diff_src_desc,
1176 const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
1177 const mkldnn_dims_t kernel, const mkldnn_dims_t padding_l,
1178 const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
1179
1180/** @} */
1181
1182/** @addtogroup c_api_lrn LRN
1183 * A primitive to perform local response normalization (LRN) across or within
1184 * channels.
1185 *
1186 * LRN accross channels:
1187 * \f[dst[n][c][h][w] = \left\{k + \frac{\alpha}{n_{l}}
1188 * \sum\limits_{i=-(n_{l}-1)/2}^{(n_{l}+1)/2}
1189 * (src[n][c+i][h][w])^2\right\}^{-\beta}
1190 * src[n][c][h][w],\f]
1191 *
1192 * LRN within channels:
1193 * \f[dst[n][c][h][w] = \left\{k + \frac{\alpha}{n_{l}}
1194 * \sum\limits_{i=-(n_{l}-1)/2}^{(n_{l}+1)/2}
1195 * (src[n][c][h+i][w+i])^2\right\}^{-\beta}
1196 * src[n][c][h][w],\f]
1197 *
1198 * where \f$n_{l}\f$ is the @p local_size.
1199 *
1200 * During training, LRN might or might not require a workspace on forward
1201 * (#mkldnn_forward_training) and backward (#mkldnn_backward) passes. The
1202 * behavior is implementation specific. Optimized implementations typically
1203 * require a workspace and use it to save some intermediate results from the
1204 * forward pass that accelerate computations on the backward pass.
1205 *
1206 * To check whether a workspace is required, query the LRN primitive descriptor
1207 * for the workspace (#mkldnn_query_workspace_md). Success indicates that the
1208 * workspace is required and its description will be returned.
1209 * @sa mkldnn_primitive_desc_query and mkldnn_primitive_desc_query_pd
1210 *
1211 * @{ */
1212
1213/** Initializes an @p lrn_desc for forward propagation using @p prop_kind
1214 * (possible values are #mkldnn_forward_training and #mkldnn_forward_inference),
1215 * @p alg_kind, memory descriptor @p data_desc, and regularization
1216 * parameters @p local_size, @p alpha, @p beta, and @p k.
1217 *
1218 * Inputs:
1219 * - src (#mkldnn_query_src_md, 0)
1220 *
1221 * Outputs:
1222 * - dst (#mkldnn_query_dst_md, 0)
1223 * - workspace (#mkldnn_query_workspace_md, 0),
1224 * if the underlying implementation requires
1225 */
1226mkldnn_status_t MKLDNN_API mkldnn_lrn_forward_desc_init(
1227 mkldnn_lrn_desc_t *lrn_desc, mkldnn_prop_kind_t prop_kind,
1228 mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *data_desc,
1229 mkldnn_dim_t local_size, float alpha, float beta, float k);
1230
1231/** Initializes an @p lrn_desc for backward propagation using @p alg_kind,
1232 * memory descriptors @p data_desc and @p diff_data_desc, and regularization
1233 * parameters @p local_size, @p alpha, @p beta, and @p k.
1234 *
1235 * Inputs:
1236 * - src (#mkldnn_query_src_md, 0)
1237 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
1238 * - workspace (#mkldnn_query_workspace_md, 0),
1239 * if the underlying implementation requires
1240 *
1241 * Outputs:
1242 * - diff_src (#mkldnn_query_diff_src_md, 0)
1243 */
1244mkldnn_status_t MKLDNN_API mkldnn_lrn_backward_desc_init(
1245 mkldnn_lrn_desc_t *lrn_desc, mkldnn_alg_kind_t alg_kind,
1246 const mkldnn_memory_desc_t *diff_data_desc,
1247 const mkldnn_memory_desc_t *data_desc, mkldnn_dim_t local_size,
1248 float alpha, float beta, float k);
1249
1250/** @} */
1251
1252/** @addtogroup c_api_batch_normalization Batch Normalization
1253 * A primitive to perform batch normalization.
1254 *
1255 * \f[dst[n][c][h][w] = \gamma[c] \frac{src[n][c][h][w] - \mu[c]}
1256 * {\sqrt{\sigma[c] + eps}} + \beta[c],\f]
1257 *
1258 * where \f$\gamma[c], \beta[c]\f$ are weights and bias for a channel and,
1259 *
1260 * \f$\mu[c] = \frac{1}{NHW} \sum\limits_{whn} src[n][c][h][w]\f$,
1261 * \f$\sigma[c] = \frac{1}{NHW} \sum\limits_{whn}
1262 * (src[n][c][h][w] - \mu[c])^2\f$,
1263 *
1264 * and @c eps is a constant to improve numerical stability.
1265 *
1266 * Both forward and backward passes support in-place operation; that is, src
1267 * and dst point to the same memory for forward pass, and diff_dst and diff_src
1268 * point to the same memory for backward pass.
1269 *
1270 * Batch normalization supports different flavors controlled by
1271 * mkldnn_batch_normalization_desc_t. For example, batch normalization can
1272 * compute the mean and variance on its own or take them as inputs. It can
1273 * either perform scaling and shifting using gamma and beta parameters or not.
1274 * Optionally it can also perform a fused ReLU, which in case of training would
1275 * also require a workspace.
1276 *
1277 * @sa mkldnn_batch_normalization_desc_t
1278 * @{ */
1279
1280/** Initializes a batch normalization descriptor @p bnrm_desc for forward
1281 * propagation using @p prop_kind (possible values are
1282 * #mkldnn_forward_training and #mkldnn_forward_inference), memory descriptor
1283 * @p data_desc, normalization parameter @p epsilon, and @p flags set using bit
1284 * flags of type mkldnn_batch_normalization_desc_t.
1285 *
1286 * Inputs:
1287 * - src (#mkldnn_query_src_md, 0)
1288 * - mean (#mkldnn_query_src_md, 1),
1289 * if #mkldnn_use_global_stats bit-flags is set in @p flags
1290 * - variance (#mkldnn_query_src_md, 2),
1291 * if #mkldnn_use_global_stats bit-flags is set in @p flags
1292 * - scale_and_shift (#mkldnn_query_weights_md, 0),
1293 * if #mkldnn_use_scaleshift bit-flags is set in @p flags
1294 *
1295 * Outputs:
1296 * - dst (#mkldnn_query_dst_md, 0)
1297 * - mean (#mkldnn_query_dst_md, 1),
1298 * if #mkldnn_use_global_stats bit-flags is not set in @p flags
1299 * @p prop_kind = #mkldnn_forward_training
1300 * - variance (#mkldnn_query_dst_md, 2),
1301 * if #mkldnn_use_global_stats bit-flags is not set in @p flags
1302 * and @p prop_kind = #mkldnn_forward_training
1303 * - workspace (#mkldnn_query_workspace_md, 0),
1304 * if #mkldnn_fuse_bn_relu bit-flags is set in @p flags
1305 * and @p prop_kind = #mkldnn_forward_training
1306 *
1307 * @note In-place operation is supported; that is, dst points to the same memory
1308 * as src.
1309 *
1310 * @sa mkldnn_batch_normalization_desc_t
1311 */
1312mkldnn_status_t MKLDNN_API mkldnn_batch_normalization_forward_desc_init(
1313 mkldnn_batch_normalization_desc_t *bnrm_desc,
1314 mkldnn_prop_kind_t prop_kind, const mkldnn_memory_desc_t *data_desc,
1315 float epsilon, unsigned flags);
1316
1317/** Initializes a batch normalization descriptor @p bnrm_desc for backward
1318 * propagation with respect to data and scale-shift parameters using memory
1319 * descriptors @p data_desc and @p diff_data_desc, normalization parameter
1320 * @p epsilon, and @p flags set using bit flags of type
1321 * mkldnn_batch_normalization_desc_t.
1322 *
1323 * Inputs:
1324 * - src (#mkldnn_query_src_md, 0)
1325 * - mean (#mkldnn_query_src_md, 1)
1326 * - variance (#mkldnn_query_src_md, 2)
1327 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
1328 * - scale_and_shift (#mkldnn_query_weights_md, 0),
1329 * if #mkldnn_use_scaleshift bit-flags is set in @p flags
1330 * - workspace (#mkldnn_query_workspace_md, 0),
1331 * if #mkldnn_fuse_bn_relu bit-flags is set in @p flags
1332 *
1333 * Outputs:
1334 * - diff_src (#mkldnn_query_diff_src_md, 0)
1335 * - diff_scale_and_shift (#mkldnn_query_diff_weights_md, 0),
1336 * if #mkldnn_use_scaleshift bit-flags is set in @p flags
1337 * and @p prop_kind = #mkldnn_backward
1338 *
1339 * @note in-place operation is supported,
1340 * i.e. diff_src points to the same memory as diff_dst.
1341 *
1342 * @sa mkldnn_batch_normalization_desc_t
1343 */
1344mkldnn_status_t MKLDNN_API mkldnn_batch_normalization_backward_desc_init(
1345 mkldnn_batch_normalization_desc_t *bnrm_desc,
1346 mkldnn_prop_kind_t prop_kind,
1347 const mkldnn_memory_desc_t *diff_data_desc,
1348 const mkldnn_memory_desc_t *data_desc,
1349 float epsilon, unsigned flags);
1350
1351/** @} */
1352
1353/** @addtogroup c_api_inner_product Inner product
1354 * A primitive to compute an inner product.
1355 *
1356 * Inner product layer is also known as fully connected layer.
1357 * With spatial dimension:
1358 *
1359 * \f[dst[n][oc] = \sum\limits_{ic, kh, kw}
1360 * src[n][ic][kh][kw] \cdot weights[oc][ic][kh][kw]
1361 * + bias[oc]\f]
1362 * @{ */
1363
1364/** Initializes an inner product descriptor @p ip_desc for forward propagation
1365 * using @p prop_kind (possible values are #mkldnn_forward_training and
1366 * #mkldnn_forward_inference) and memory descriptors. In order to create an
1367 * inner product without bias, @p bias_desc should be either @c NULL or a
1368 * pointer to a descriptor with memory format kind equals
1369 * #mkldnn_format_kind_undef.
1370 *
1371 * @note Memory descriptors are allowed to be initialized with
1372 * #mkldnn_format_kind_any value of @p format_kind.
1373 *
1374 * Inputs:
1375 * - src (#mkldnn_query_src_md, 0)
1376 * - weights (#mkldnn_query_weights_md, 0)
1377 * - bias (#mkldnn_query_weights_md, 1), if created with bias
1378 *
1379 * Outputs:
1380 * - dst (#mkldnn_query_dst_md, 0)
1381 */
1382mkldnn_status_t MKLDNN_API mkldnn_inner_product_forward_desc_init(
1383 mkldnn_inner_product_desc_t *ip_desc, mkldnn_prop_kind_t prop_kind,
1384 const mkldnn_memory_desc_t *src_desc,
1385 const mkldnn_memory_desc_t *weights_desc,
1386 const mkldnn_memory_desc_t *bias_desc,
1387 const mkldnn_memory_desc_t *dst_desc);
1388
1389/** Initializes an inner product descriptor @p ip_desc for backward propagation
1390 * with respect to data using memory descriptors.
1391 *
1392 * @note Memory descriptors are allowed to be initialized with
1393 * #mkldnn_format_kind_any value of @p format_kind.
1394 *
1395 * Inputs:
1396 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
1397 * - weights (#mkldnn_query_weights_md, 0)
1398 *
1399 * Outputs:
1400 * - diff_src (#mkldnn_query_diff_src_md, 0)
1401 */
1402mkldnn_status_t MKLDNN_API mkldnn_inner_product_backward_data_desc_init(
1403 mkldnn_inner_product_desc_t *ip_desc,
1404 const mkldnn_memory_desc_t *diff_src_desc,
1405 const mkldnn_memory_desc_t *weights_desc,
1406 const mkldnn_memory_desc_t *diff_dst_desc);
1407
1408/** Initializes an inner product descriptor @p ip_desc for backward propagation
1409 * with respect to weights using memory descriptors.
1410 *
1411 * @note Memory descriptors are allowed to be initialized with
1412 * #mkldnn_format_kind_any value of @p format_kind.
1413 *
1414 * Inputs:
1415 * - src (#mkldnn_query_src_md, 0)
1416 * - diff_dst (#mkldnn_query_diff_dst_md, 0)
1417 *
1418 * Outputs:
1419 * - diff_weights (#mkldnn_query_diff_weights_md, 0)
1420 * - diff_bias (#mkldnn_query_diff_weights_md, 1), if created with bias
1421 */
1422mkldnn_status_t MKLDNN_API mkldnn_inner_product_backward_weights_desc_init(
1423 mkldnn_inner_product_desc_t *ip_desc,
1424 const mkldnn_memory_desc_t *src_desc,
1425 const mkldnn_memory_desc_t *diff_weights_desc,
1426 const mkldnn_memory_desc_t *diff_bias_desc,
1427 const mkldnn_memory_desc_t *diff_dst_desc);
1428
1429/** @} */
1430
1431/** @addtogroup c_api_rnn RNN
1432 * A primitive to compute the common recurrent layer.
1433 * @todo add additional description for the group
1434 * @{ */
1435
1436/**
1437 * Initializes a recurrent cell descriptor @p rnn_cell_desc
1438 * using @p rnn_cell_desc, @p kind (possible values are
1439 * #mkldnn_vanilla_rnn, #mkldnn_vanilla_lstm, #mkldnn_vanilla_gru, and
1440 * #mkldnn_gru_linear_before_reset),
1441 * @p f (possible values are #mkldnn_eltwise_relu and
1442 * #mkldnn_eltwise_tanh), @p flags, @p alpha, and @p clipping.
1443 */
1444mkldnn_status_t MKLDNN_API mkldnn_rnn_cell_desc_init(
1445 mkldnn_rnn_cell_desc_t *rnn_cell_desc,
1446 mkldnn_alg_kind_t kind, mkldnn_alg_kind_t f,
1447 unsigned int flags, float alpha, float clipping);
1448
1449/** Returns the number of gates of a particular @p rnn_cell_desc. */
1450int MKLDNN_API mkldnn_rnn_cell_get_gates_count(
1451 const mkldnn_rnn_cell_desc_t *rnn_cell_desc);
1452
1453/** Returns the number of states of a particular @p rnn_cell_desc. */
1454int MKLDNN_API mkldnn_rnn_cell_get_states_count(
1455 const mkldnn_rnn_cell_desc_t *rnn_cell_desc);
1456
1457/** Sets quantization @p scale and @p shift for RNN data tensors.
1458 * For performance reasons, low precision configuration of RNN primitive
1459 * expects input activations to have unsigned int8 data type. Scale and shift
1460 * used to quantize floating point data to unsigned integer must be passed to
1461 * RNN primitive using attributes.
1462 * Example usage:
1463 * @code
1464 * // rnn parameters
1465 * int l = 2, t = 2, mb = 32, sic = 32, slc = 32, dic = 32, dlc = 32;
1466 * // activations quantization parameters
1467 * float scale = ..., shift = ..;
1468 *
1469 * mkldnn_primitive_attr_t rnn_attr;
1470 * // create default attributes
1471 * mkldnn_primitive_attr_create(&rnn_attr);
1472 *
1473 * // set scale and shift for int8 quantization of activation
1474 * mkldnn_primitive_attr_set_rnn_data_qparams(rnn_attr, scale, shift);
1475 *
1476 * // create & configure rnn op_desc
1477 * mkldnn_rnn_desc_t rnn_d;
1478 * mkldnn_primitive_desc_t rnn_pd;
1479 * mkldnn_primitive_desc_create(&rnn_pd, &rnn_d, attr, engine, NULL);
1480 * @endcode
1481 * @note
1482 * Quantization scale and shift are common for src_layer, src_iter,
1483 * dst_iter and dst_layer.
1484 */
1485mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_rnn_data_qparams(
1486 mkldnn_primitive_attr_t attr, const float scale, const float shift);
1487
1488/** Sets quantization scales @p weights_scales for RNN weights tensors.
1489 * Low precision configuration of RNN primitive expects input weights to have
1490 * signed int8 data type. Scales used to quantize floating point data
1491 * to signed integer must be passed to RNN primitive using attributes.
1492 * The @p mask argument defines correspondence between output tensor dimensions
1493 * and the @p weights_scales array. Set i-th bit of @p mask to 1 to use
1494 * dedicated scaling factor for each slice of the output tensor over i-th
1495 * dimension. Set @p mask to 0 to use common scaling factor for the whole output
1496 * tensor. Example usage:
1497 * @code
1498 * // rnn parameters
1499 * int l = 2, t = 2, mb = 32, sic = 32, slc = 32, dic = 32, dlc = 32;
1500 * // unique output scales per output channel
1501 * float weights_scales[dic * n_gates] = { ... };
1502 * // mask that specifies last two dimensions of ldigo format
1503 * int mask = 0x3;
1504 *
1505 * mkldnn_primitive_attr_t attr;
1506 * // create default attributes
1507 * mkldnn_primitive_attr_create(&attr);
1508 *
1509 * // set output channel-wise weights scales
1510 * mkldnn_primitive_attr_set_rnn_weights_qparams(attr, dic * n_gates, mask,
1511 * weights_scales);
1512 *
1513 * // create & configure rnn op_desc
1514 * mkldnn_rnn_desc_t rnn_d;
1515 * mkldnn_primitive_desc_t rnn_pd;
1516 * mkldnn_primitive_desc_create(&rnn_pd, &rnn_d, attr, engine, NULL);
1517 * @endcode
1518 * @note
1519 * The dimension order is always native and does not depend on the actual
1520 * layout used. For example, 5 dimensional weights always have
1521 * (l, d, i, g, o) logical dimension ordering.
1522 * @note
1523 * Quantization sales are common for weights_layer and weights_iteration
1524 * @note
1525 * There is no way to check that @p count corresponds to @p mask until an
1526 * actual primitive descriptor is created, so it is user's responsibility
1527 * to set proper values. The following formula must be held:
1528 *
1529 * \f[count = \prod\limits_{d \in mask} output.dims[d]\f]
1530 */
1531mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_rnn_weights_qparams (
1532 mkldnn_primitive_attr_t attr, mkldnn_dim_t count, int mask,
1533 const float *weights_scales);
1534
1535/** Initializes a rnn descriptor @p rnn_desc for forward propagation
1536 * using @p prop_kind, @p rnn_cell_desc, @p direction, and memory descriptors.
1537 * @note If @p prop_kind equals #mkldnn_forward_training, you must query a
1538 * workspace memory descriptor before creating the primitive.
1539 *
1540 * @p src_iter_desc, @p bias_desc, and @p dst_iter_desc are allowed to either be
1541 * @c NULL or point to a zero memory descriptor, which would indicate that the
1542 * RNN primitive should not use them.
1543 *
1544 * @note All memory descriptors except @p src_iter_desc are allowed to be
1545 * initialized with #mkldnn_format_kind_any value of @p format_kind.
1546 *
1547 * Inputs:
1548 * - src_layer (#mkldnn_query_src_md, 0)
1549 * - src_iter (#mkldnn_query_src_md, 1), if used
1550 * - weights_layer (#mkldnn_query_weights_md, 0)
1551 * - weights_iter (#mkldnn_query_weights_md, 1)
1552 * - bias (#mkldnn_query_weights_md, 2), if used
1553 *
1554 * Outputs:
1555 * - dst_layer (#mkldnn_query_dst_md, 0)
1556 * - dst_iter (#mkldnn_query_dst_md, 1), if used
1557 * - workspace (#mkldnn_query_workspace_md, 0),
1558 * if @p prop_kind equals #mkldnn_forward_training
1559 */
1560mkldnn_status_t MKLDNN_API mkldnn_rnn_forward_desc_init(
1561 mkldnn_rnn_desc_t *rnn_desc, mkldnn_prop_kind_t prop_kind,
1562 const mkldnn_rnn_cell_desc_t *rnn_cell_desc,
1563 const mkldnn_rnn_direction_t direction,
1564 const mkldnn_memory_desc_t *src_layer_desc,
1565 const mkldnn_memory_desc_t *src_iter_desc,
1566 const mkldnn_memory_desc_t *weights_layer_desc,
1567 const mkldnn_memory_desc_t *weights_iter_desc,
1568 const mkldnn_memory_desc_t *bias_desc,
1569 const mkldnn_memory_desc_t *dst_layer_desc,
1570 const mkldnn_memory_desc_t *dst_iter_desc);
1571
1572/** Initializes a rnn descriptor @p rnn_desc for backward propagation
1573 * using @p prop_kind, @p rnn_cell_desc, @p direction, and memory descriptors.
1574 *
1575 * @note All memory descriptors are allowed to be initialized with
1576 * #mkldnn_format_kind_any value of @p format_kind.
1577 *
1578 * @p src_iter_desc (simultaneously with @p diff_src_iter_desc),
1579 * @p bias_desc (simultaneously with @p diff_bias_desc), and
1580 * @p dst_iter_desc (simultaneously with @p diff_src_iter_desc) are allowed to
1581 * either be @c NULL or point to a zero memory descriptor, which would indicate
1582 * that the RNN primitive should not use them.
1583 *
1584 * Inputs:
1585 * - src_layer (#mkldnn_query_src_md, 0)
1586 * - src_iter (#mkldnn_query_src_md, 1), if used
1587 * - weights_layer (#mkldnn_query_weights_md, 0)
1588 * - weights_iter (#mkldnn_query_weights_md, 1)
1589 * - bias (#mkldnn_query_weights_md, 2), if used
1590 * - dst_layer (#mkldnn_query_dst_md, 0)
1591 * - dst_iter (#mkldnn_query_dst_md, 1), if used
1592 * - diff_dst_layer (#mkldnn_query_diff_dst_md, 0)
1593 * - diff_dst_iter (#mkldnn_query_diff_dst_md, 1), if used
1594 * - workspace (#mkldnn_query_workspace_md, 0)
1595 *
1596 * Outputs:
1597 * - diff_src_layer (#mkldnn_query_diff_src_md, 0)
1598 * - diff_src_iter (#mkldnn_query_diff_src_md, 1), if used
1599 * - diff_weights_layer (#mkldnn_query_diff_weights_md, 0)
1600 * - diff_weights_iter (#mkldnn_query_diff_weights_md, 1)
1601 * - diff_bias (#mkldnn_query_diff_weights_md, 2), if used
1602 */
1603mkldnn_status_t MKLDNN_API mkldnn_rnn_backward_desc_init(
1604 mkldnn_rnn_desc_t *rnn_desc, mkldnn_prop_kind_t prop_kind,
1605 const mkldnn_rnn_cell_desc_t *rnn_cell_desc,
1606 const mkldnn_rnn_direction_t direction,
1607 const mkldnn_memory_desc_t *src_layer_desc,
1608 const mkldnn_memory_desc_t *src_iter_desc,
1609 const mkldnn_memory_desc_t *weights_layer_desc,
1610 const mkldnn_memory_desc_t *weights_iter_desc,
1611 const mkldnn_memory_desc_t *bias_desc,
1612 const mkldnn_memory_desc_t *dst_layer_desc,
1613 const mkldnn_memory_desc_t *dst_iter_desc,
1614 const mkldnn_memory_desc_t *diff_src_layer_desc,
1615 const mkldnn_memory_desc_t *diff_src_iter_desc,
1616 const mkldnn_memory_desc_t *diff_weights_layer_desc,
1617 const mkldnn_memory_desc_t *diff_weights_iter_desc,
1618 const mkldnn_memory_desc_t *diff_bias_desc,
1619 const mkldnn_memory_desc_t *diff_dst_layer,
1620 const mkldnn_memory_desc_t *diff_dst_iter_desc);
1621
1622/** @} */
1623
1624/** @} */
1625
1626/** @addtogroup c_api_engine Engine operations
1627 * @{ */
1628
1629/** Returns the number of engines of a particular @p kind. */
1630size_t MKLDNN_API mkldnn_engine_get_count(mkldnn_engine_kind_t kind);
1631
1632/** Creates an @p engine of particular @p kind and @p index. */
1633mkldnn_status_t MKLDNN_API mkldnn_engine_create(mkldnn_engine_t *engine,
1634 mkldnn_engine_kind_t kind, size_t index);
1635
1636/** Returns the kind of an @p engine. */
1637mkldnn_status_t MKLDNN_API mkldnn_engine_get_kind(mkldnn_engine_t engine,
1638 mkldnn_engine_kind_t *kind);
1639
1640/** Destroys an @p engine. */
1641mkldnn_status_t MKLDNN_API mkldnn_engine_destroy(mkldnn_engine_t engine);
1642
1643/** @} */
1644
1645/** @addtogroup c_api_stream Execution stream operations
1646 * @{ */
1647
1648/** Creates an execution @p stream for @p engine and with @p flags. */
1649mkldnn_status_t MKLDNN_API mkldnn_stream_create(mkldnn_stream_t *stream,
1650 mkldnn_engine_t engine, unsigned flags);
1651
1652/** Destroys an execution @p stream. */
1653mkldnn_status_t MKLDNN_API mkldnn_stream_destroy(mkldnn_stream_t stream);
1654
1655/** @} */
1656
1657/** @addtogroup c_api_service Service functions
1658 * @{ */
1659
1660/** Sets verbosity level (print information to stdout).
1661 * Possible levels are:
1662 * - 0 -- no verbose output (default)
1663 * - 1 -- primitive information at execution
1664 * - 2 -- primitive information at creation and execution
1665 *
1666 * @note
1667 * Dumping information might affect performance.
1668 * This setting overrides the MKLDNN_VERBOSE environment variable. */
1669mkldnn_status_t MKLDNN_API mkldnn_set_verbose(int level);
1670
1671/** Enables or disables dumping of JIT-generated code.
1672 * The enable parameter can be:
1673 * - 0 -- disable
1674 * - any other value -- enable
1675 *
1676 * @note
1677 * This setting overrides the MKLDNN_JIT_DUMP environment variable. */
1678mkldnn_status_t MKLDNN_API mkldnn_set_jit_dump(int enable);
1679
1680/** Gets library version information.
1681 * Version information includes:
1682 * - major -- major version number
1683 * - minor -- minor version number
1684 * - patch -- patch release number
1685 * - hash -- git commit hash */
1686const mkldnn_version_t MKLDNN_API *mkldnn_version();
1687
1688/** @} */
1689
1690/** @addtogroup c_api_blas BLAS functions
1691 * A subset of Basic Linear ALgebra (BLAS) functions to perform
1692 * matrix-matrix multiplication.
1693 * @{ */
1694
1695/** SGEMM performs a matrix-matrix multiplication operation defined as
1696 *
1697 * C := alpha*op( A )*op( B ) + beta*C
1698 *
1699 * where
1700 * - op( X ) is one of op( X ) = X or op( X ) = X**T,
1701 * - alpha and beta are scalars,
1702 * - A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix
1703 * and C an m by n matrix.
1704 *
1705 * The matrices are assumed to be stored in column-major order (the elements
1706 * in a matrix columns are contiguous in memory).
1707 *
1708 * @note
1709 * The API is different from the standard BLAS routine
1710 * because it returns mkldnn_status_t for error handling.
1711 * XERBLA is not supported: no error message will be printed
1712 * in case of incorrect parameters. */
1713mkldnn_status_t MKLDNN_API mkldnn_sgemm(
1714 const char *transa, const char *transb,
1715 const mkldnn_dim_t *M, const mkldnn_dim_t *N, const mkldnn_dim_t *K,
1716 const float *alpha, const float *A, const mkldnn_dim_t *lda,
1717 const float *B, const mkldnn_dim_t *ldb,
1718 const float *beta, float *C, const mkldnn_dim_t *ldc);
1719
1720/** gemm_s8u8s32 and gemm_s8s8s32 perform a matrix-matrix multiplication
1721 * operation and add the result to a scalar-matrix product. For the final
1722 * result, a vector is added to each row or column of the output matrix.
1723 * The operation is defined as:
1724 *
1725 * C := alpha*(op(A) + A_offset) * (op(B) + B_offset) + beta*C + C_offset
1726 *
1727 * where
1728 * - op( X ) = X or op( X ) = X**T,
1729 * - A_offset is an m-by-k matrix with every element equal to the value oa,
1730 * - B_offset is an k-by-n matrix with every element equal to the value ob,
1731 * - C_offset is an m-by-n matrix defined by the oc array, size len:
1732 * - if offsetc = F: len must be at least 1
1733 * - if offsetc = C: len must be at least max(1, m)
1734 * - if offsetc = R: len must be at least max(1, n)
1735 * - alpha and beta are scalars, and A, B and C are matrices, with op( A )
1736 * an m-by-k matrix, op( B ) a k-by-n matrix and C an m-by-n matrix.
1737 *
1738 * The matrices are assumed to be stored in column-major order (the elements
1739 * in a matrix columns are contiguous in memory).
1740 *
1741 * @note
1742 * The API is different compared with the standard BLAS routine
1743 * because it returns mkldnn_status_t for error handling.
1744 * XERBLA is not supported: no error message will be printed
1745 * in case of incorrect parameters. */
1746mkldnn_status_t MKLDNN_API mkldnn_gemm_s8u8s32(
1747 const char *transa, const char *transb, const char *offsetc,
1748 const mkldnn_dim_t *M, const mkldnn_dim_t *N, const mkldnn_dim_t *K,
1749 const float *alpha,
1750 const int8_t *A, const mkldnn_dim_t *lda, const int8_t *ao,
1751 const uint8_t *B, const mkldnn_dim_t *ldb, const int8_t *bo,
1752 const float *beta,
1753 int32_t *c, const mkldnn_dim_t *ldc, const int32_t *co);
1754
1755mkldnn_status_t MKLDNN_API mkldnn_gemm_s8s8s32(
1756 const char *transa, const char *transb, const char *offsetc,
1757 const mkldnn_dim_t *M, const mkldnn_dim_t *N, const mkldnn_dim_t *K,
1758 const float *alpha,
1759 const int8_t *A, const mkldnn_dim_t *lda, const int8_t *ao,
1760 const int8_t *B, const mkldnn_dim_t *ldb, const int8_t *bo,
1761 const float *beta,
1762 int32_t *c, const mkldnn_dim_t *ldc, const int32_t *co);
1763/** @} */
1764
1765/** @} */
1766
1767#ifdef __cplusplus
1768}
1769#endif
1770
1771#endif
1772