A container that describes and stores data. More...

Classes
struct	dnnl::memory
	Memory object. More...

struct	dnnl_blocking_desc_t
	Generic description of blocked data layout for most memory formats. More...

struct	dnnl_wino_desc_t
	Description of tensor of weights for winograd 2x3 convolution. More...

struct	dnnl_rnn_packed_desc_t
	Description of tensor of packed weights for rnn. More...

struct	dnnl_memory_extra_desc_t
	Description of extra information stored in memory. More...

struct	dnnl_memory_desc_t
	Memory descriptor. More...

struct	dnnl_memory
	An opaque structure to describe a memory. More...

Macros
#define	DNNL_MAX_NDIMS 12
	Maximum number of dimensions a tensor can have. More...

#define	DNNL_RUNTIME_DIM_VAL INT64_MIN
	A wildcard value for dimensions that are unknown at a primitive creation time. More...

#define	DNNL_RUNTIME_SIZE_VAL ((size_t)DNNL_RUNTIME_DIM_VAL)
	A `size_t` counterpart of the DNNL_RUNTIME_DIM_VAL. More...

#define	DNNL_RUNTIME_F32_VAL (DNNL_RUNTIME_F32_VAL_REP.f)
	A wildcard value for floating point values that are unknown at a primitive creation time. More...

#define	DNNL_RUNTIME_S32_VAL DNNL_RUNTIME_S32_VAL_REP
	A wildcard value for int32_t values that are unknown at a primitive creation time. More...

#define	DNNL_RNN_MAX_N_PARTS 4
	Maximum number of parts of RNN weights tensor that require separate computation. More...

#define	DNNL_MEMORY_NONE (NULL)
	Special pointer value that indicates that a memory object should not have an underlying buffer. More...

#define	DNNL_MEMORY_ALLOCATE ((void *)(size_t)-1)
	Special pointer value that indicates that the library needs to allocate an underlying buffer for a memory object. More...

Typedefs
typedef int64_t	dnnl_dim_t
	A type to describe tensor dimension.

typedef dnnl_dim_t	dnnl_dims_t[DNNL_MAX_NDIMS]
	A type to describe tensor dimensions.

typedef struct dnnl_memory *	dnnl_memory_t
	A memory handle.

typedef const struct dnnl_memory *	const_dnnl_memory_t
	A constant memory handle.

Enumerations
enum	dnnl_data_type_t
	Data type specification. More...

enum	dnnl_format_kind_t
	Memory format kind. More...

enum	dnnl_format_tag_t
	Memory format tag specification. More...

enum	dnnl_wino_memory_format_t
	Winograd-specific formats. More...

enum	dnnl_memory_extra_flags_t
	Flags for memory special features. More...

Functions
dnnl_status_t DNNL_API	dnnl_memory_desc_init_by_strides (dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims, dnnl_data_type_t data_type, const dnnl_dims_t strides)
	Initializes a memory descriptor using dimensions and strides. More...

dnnl_status_t DNNL_API	dnnl_memory_desc_init_by_tag (dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims, dnnl_data_type_t data_type, dnnl_format_tag_t tag)
	Initializes a memory descriptor using dimensions and memory format tag. More...

dnnl_status_t DNNL_API	dnnl_memory_desc_init_submemory (dnnl_memory_desc_t memory_desc, const dnnl_memory_desc_t parent_memory_desc, const dnnl_dims_t dims, const dnnl_dims_t offsets)
	Initializes a memory descriptor for a region inside an area described by an existing memory descriptor. More...

dnnl_status_t DNNL_API	dnnl_memory_desc_reshape (dnnl_memory_desc_t out_memory_desc, const dnnl_memory_desc_t in_memory_desc, int ndims, const dnnl_dims_t dims)
	Initializes a memory descriptor by reshaping an existing one. More...

dnnl_status_t DNNL_API	dnnl_memory_desc_permute_axes (dnnl_memory_desc_t out_memory_desc, const dnnl_memory_desc_t in_memory_desc, const int *permutation)
	Initializes a memory descriptor by permuting axes in an existing one. More...

int DNNL_API	dnnl_memory_desc_equal (const dnnl_memory_desc_t lhs, const dnnl_memory_desc_t rhs)
	Compares two memory descriptors. More...

size_t DNNL_API	dnnl_memory_desc_get_size (const dnnl_memory_desc_t *memory_desc)
	Returns the size of a memory descriptor. More...

dnnl_status_t DNNL_API	dnnl_memory_create (dnnl_memory_t memory, const dnnl_memory_desc_t memory_desc, dnnl_engine_t engine, void *handle)
	Creates a memory object. More...

dnnl_status_t DNNL_API	dnnl_memory_get_memory_desc (const_dnnl_memory_t memory, const dnnl_memory_desc_t **memory_desc)
	Returns the memory descriptor for a memory object. More...

dnnl_status_t DNNL_API	dnnl_memory_get_engine (const_dnnl_memory_t memory, dnnl_engine_t *engine)
	Returns the engine of a memory object. More...

dnnl_status_t DNNL_API	dnnl_memory_map_data (const_dnnl_memory_t memory, void **mapped_ptr)
	Maps a memory object and returns a host-side pointer to a memory buffer with a copy of its contents. More...

dnnl_status_t DNNL_API	dnnl_memory_unmap_data (const_dnnl_memory_t memory, void *mapped_ptr)
	Unmaps a memory object and writes back any changes made to the previously mapped memory buffer. More...

dnnl_status_t DNNL_API	dnnl_memory_get_data_handle (const_dnnl_memory_t memory, void **handle)
	Returns memory object's data handle. More...

dnnl_status_t DNNL_API	dnnl_memory_set_data_handle (dnnl_memory_t memory, void *handle)
	Sets the underlying memory buffer. More...

dnnl_status_t DNNL_API	dnnl_memory_set_data_handle_v2 (dnnl_memory_t memory, void *handle, dnnl_stream_t stream)
	Sets the underlying memory buffer. More...

dnnl_status_t DNNL_API	dnnl_memory_destroy (dnnl_memory_t memory)
	Destroys a memory object. More...

Detailed Description

A container that describes and stores data.

Memory objects can contain data of various types and formats. There are two levels of abstraction:

Memory descriptor – engine-agnostic logical description of data (number of dimensions, dimension sizes, and data type), and, optionally, the information about the physical format of data in memory. If this information is not known yet, a memory descriptor can be created with dnnl::memory::format_tag::any. This allows compute-intensive primitives to choose the best format for computation. The user is responsible for reordering the data into the chosen format when formats do not match.

A memory descriptor can be initialized either by specifying dimensions and a memory format tag or strides for each of them, or by manipulating the dnnl_memory_desc_t structure directly.

Warning
The latter approach requires understanding how the physical data representation is mapped to the structure and is discouraged. This topic is discussed in Understanding Memory Formats.

The user can query the amount of memory required by a memory descriptor using the dnnl::memory::desc::get_size() function. The size of data in general cannot be computed as the product of dimensions multiplied by the size of the data type. So users are required to use this function for better code portability.

Two memory descriptors can be compared using the equality and inequality operators. The comparison is especially useful when checking whether it is necessary to reorder data from the user's data format to a primitive's format.
Memory object – an engine-specific object that handles the memory buffer and its description (a memory descriptor). For the CPU engine or with USM, the memory buffer handle is simply a pointer to void. The memory buffer can be queried using dnnl::memory::get_data_handle() and set using dnnl::memory::set_data_handle(). The underlying SYCL buffer, when used, can be queried using dnnl::sycl_interop::get_buffer and set using dnnl::sycl_interop::set_buffer. A memory object can also be queried for the underlying memory descriptor and for its engine using dnnl::memory::get_desc() and dnnl::memory::get_engine().

Along with ordinary memory descriptors with all dimensions being positive, the library supports zero-volume memory descriptors with one or more dimensions set to zero. This is used to support the NumPy* convention. If a zero-volume memory is passed to a primitive, the primitive typically does not perform any computations with this memory. For example:

A concatenation primitive would ignore all memory object with zeroes in the concat dimension / axis.
A forward convolution with a source memory object with zero in the minibatch dimension would always produce a destination memory object with a zero in the minibatch dimension and perform no computations.
However, a forward convolution with a zero in one of the weights dimensions is ill-defined and is considered to be an error by the library because there is no clear definition of what the output values should be.

Memory buffer of a zero-volume memory is never accessed.

Macro Definition Documentation

◆ DNNL_MAX_NDIMS

#define DNNL_MAX_NDIMS 12

Maximum number of dimensions a tensor can have.

Only restricts the amount of space used for the tensor description. Individual computational primitives may support only tensors of certain dimensions.

◆ DNNL_RUNTIME_DIM_VAL

#define DNNL_RUNTIME_DIM_VAL INT64_MIN

A wildcard value for dimensions that are unknown at a primitive creation time.

Examples:: cpu_sgemm_and_matmul.cpp, and inference_int8_matmul.cpp.

◆ DNNL_RUNTIME_SIZE_VAL

#define DNNL_RUNTIME_SIZE_VAL ((size_t)DNNL_RUNTIME_DIM_VAL)

A size_t counterpart of the DNNL_RUNTIME_DIM_VAL.

For instance, this value is returned by dnnl_memory_desc_get_size() if either of the dimensions or strides equal to DNNL_RUNTIME_DIM_VAL.

◆ DNNL_RUNTIME_F32_VAL

#define DNNL_RUNTIME_F32_VAL (DNNL_RUNTIME_F32_VAL_REP.f)

A wildcard value for floating point values that are unknown at a primitive creation time.

Examples:: cpu_matmul_quantization.cpp, cpu_sgemm_and_matmul.cpp, and inference_int8_matmul.cpp.

◆ DNNL_RUNTIME_S32_VAL

#define DNNL_RUNTIME_S32_VAL DNNL_RUNTIME_S32_VAL_REP

A wildcard value for int32_t values that are unknown at a primitive creation time.

Examples:: cpu_matmul_quantization.cpp, and inference_int8_matmul.cpp.

◆ DNNL_RNN_MAX_N_PARTS

#define DNNL_RNN_MAX_N_PARTS 4

Maximum number of parts of RNN weights tensor that require separate computation.

◆ DNNL_MEMORY_NONE

#define DNNL_MEMORY_NONE (NULL)

Special pointer value that indicates that a memory object should not have an underlying buffer.

◆ DNNL_MEMORY_ALLOCATE

#define DNNL_MEMORY_ALLOCATE ((void *)(size_t)-1)

Special pointer value that indicates that the library needs to allocate an underlying buffer for a memory object.

Examples:: cnn_inference_f32.c, cpu_cnn_training_f32.c, and cross_engine_reorder.c.

Enumeration Type Documentation

◆ dnnl_data_type_t

enum dnnl_data_type_t

Data type specification.

Enumerator
dnnl_data_type_undef	Undefined data type, used for empty memory descriptors.
dnnl_f16	16-bit/half-precision floating point.
dnnl_bf16	non-standard 16-bit (bfloat16 w/ 7 bit mantissa) floating point.
dnnl_f32	32-bit/single-precision floating point.
dnnl_s32	32-bit signed integer.
dnnl_s8	8-bit signed integer.
dnnl_u8	8-bit unsigned integer.

◆ dnnl_format_kind_t

enum dnnl_format_kind_t

Memory format kind.

Enumerator
dnnl_format_kind_undef	Undefined memory format kind, used for empty memory descriptors.
dnnl_format_kind_any	Unspecified format kind. The primitive selects a format automatically.
dnnl_blocked	A tensor in a generic format described by the stride and blocking values in each dimension. See dnnl_blocking_desc_t for more information.
dnnl_format_kind_wino	Weights format used in 8bit Winograd convolution.
dnnl_format_kind_rnn_packed	Packed weights format used in RNN.

◆ dnnl_format_tag_t

enum dnnl_format_tag_t

Memory format tag specification.

oneDNN formats describe physical data layout. The physical layout is described as a sequence of the dimensions as they are laid out in the memory (from the outer-most to the inner-most). Note that this order doesn't affect the logical order of the dimensions that is kept in the dims field of the dnnl_memory_desc_t structure. The logical order of the dimensions is specified by the primitive that uses the tensor.

For example, CNN 5D tensor always has its logical dimensions in the order (batch, channels, depth, height, width), while the physical layout might be NCDHW (corresponds to dnnl_ncdhw format tag) or NDHWC (corresponds to dnnl_ndhwc format tag).

int batch = 2, channels = 16, depth = 13, height = 13, width = 13;
int ndims = 5; // 5D tensor
dnnl_dims_t dims = {batch, channels, depth, height, width};
dnnl_memory_desc_t data_in_ncdhw;
dnnl_memory_desc_init_by_tag(
     &data_in_ncdhw, 5, dims, dnnl_f32, dnnl_ncdhw);
// note that in both cases dims passed are the same
dnnl_memory_desc_t data_in_ndhwc;
dnnl_memory_desc_init_by_tag(
     &data_in_ndhwc, 5, dims, dnnl_f32, dnnl_ndhwc);

Memory format tags can be further divided into two categories:

Domain-agnostic names, i.e. names the do not depend on the tensor usage in the specific primitive. These names use letters from a to l to denote logical dimension from 1 to 12, and form the order in which the dimensions are laid in memory. For instance, dnnl_ab is used to denote 2D tensor where the second logical dimension (aka b) is the innermost, i.e. has stride = 1, and the first logical dimension (a) laid out in memory with stride equal to the size of second dimension. On the other hand, dnnl_ba is just transposed version of the same tensor: the first dimension (a) becomes the innermost one.
Domain-specific names, i.e. names that make sense only in the context of a certain domain, such as CNN. This names are just aliases to the corresponding domain-agnostic tags and used mostly for the convenience. For example, dnnl_nc is used to denote 2D CNN activations tensor memory format, where channels are the innermost dimension and batch is an outermost one. Moreover, dnnl_nc is just an alias to dnnl_ab, since for oneDNN CNN primitives the logical dimensions of activations tensors come in order: batch, channels, spatial. In other words, batch corresponds to the first logical dimension (a), channels correspond to the second one (b).

The following domain-specific notation applies to memory format tags:

'n' denotes the mini-batch dimension
'c' denotes a channels dimension
When there are multiple channel dimensions (for example, in convolution weights tensor), 'i' and 'o' denote dimensions of input and output channels
'd', 'h', and 'w' denote spatial depth, height, and width respectively

Upper-case letters indicate that the data is laid out in blocks for a particular dimension. In such cases, the format name contains both upper- and lower-case letters for that dimension with a lower-case letter preceded by the block size. For example: dnnl_nChw8c describes a format where the outermost dimension is mini-batch, followed by the channel block number, followed by the spatial height and width, and finally followed by 8-element channel blocks.

See also: Understanding Memory Formats

Enumerator
dnnl_format_tag_undef	Undefined memory format tag.
dnnl_format_tag_any	Undefined memory format tag. The primitive selects a format automatically.
dnnl_a	plain 1D tensor
dnnl_ab	plain 2D tensor
dnnl_abc	plain 3D tensor
dnnl_abcd	plain 4D tensor
dnnl_acbd	plain 4D tensor
dnnl_abcde	plain 5D tensor
dnnl_abcdef	plain 6D tensor
dnnl_abcdefg	plain 7D tensor
dnnl_abcdefgh	plain 8D tensor
dnnl_abcdefghi	plain 9D tensor
dnnl_abcdefghij	plain 10D tensor
dnnl_abcdefghijk	plain 11D tensor
dnnl_abcdefghijkl	plain 12D tensor
dnnl_abdc	permuted 4D tensor
dnnl_abdec	permuted 5D tensor
dnnl_abdfce	permuted 6D tensor
dnnl_acb	permuted 3D tensor
dnnl_acbde	permuted 5D tensor
dnnl_acbdef	permuted 6D tensor
dnnl_abdefc	permuted 6D tensor
dnnl_acdb	permuted 4D tensor
dnnl_acdeb	permuted 5D tensor
dnnl_ba	permuted 2D tensor
dnnl_bac	permuted 3D tensor
dnnl_bacd	permuted 4D tensor
dnnl_bacde	permuted 5D tensor
dnnl_bca	permuted 3D tensor
dnnl_bcda	permuted 4D tensor
dnnl_bcdea	permuted 5D tensor
dnnl_cba	permuted 3D tensor
dnnl_cdba	permuted 4D tensor
dnnl_dcab	permuted 4D tensor
dnnl_cdeba	permuted 5D tensor
dnnl_decab	permuted 5D tensor
dnnl_defcab	permuted 6D tensor
dnnl_abced	permuted 5D tensor
dnnl_abcdfe	permuted 6D tensor
dnnl_abcdegf	permuted 7D tensor
dnnl_abcdefhg	permuted 8D tensor
dnnl_abcdefgih	permuted 9D tensor
dnnl_abcdefghji	permuted 10D tensor
dnnl_abcdefghikj	permuted 11D tensor
dnnl_abcdefghijlk	permuted 12D tensor
dnnl_aBc16b	3D tensor blocked by 2nd dimension with block size 16
dnnl_aBc32b	3D tensor blocked by 2nd dimension with block size 32
dnnl_aBc4b	3D tensor blocked by 2nd dimension with block size 4
dnnl_aBc8b	3D tensor blocked by 2nd dimension with block size 8
dnnl_aBcd16b	4D tensor blocked by 2nd dimension with block size 16
dnnl_aBcd32b	4D tensor blocked by 2nd dimension with block size 32
dnnl_aBcd4b	4D tensor blocked by 2nd dimension with block size 4
dnnl_aBcd8b	4D tensor blocked by 2nd dimension with block size 8
dnnl_ABcd8b8a	4D tensor blocked by 1st and 2nd dimension with block size 8
dnnl_aBCd2b4c2b	4D tensor blocked by 3rd dimension with block size 4
dnnl_ABcde4b16a4b	5D tensor blocked by 1st dimension with block size 16
dnnl_ABcde2b8a4b	5D tensor blocked by 1st dimension with block size 8
dnnl_aBcde16b	5D tensor blocked by 2nd dimension with block size 16
dnnl_aBcde32b	5D tensor blocked by 2nd dimension with block size 32
dnnl_aBcde4b	5D tensor blocked by 2nd dimension with block size 4
dnnl_aBcde8b	5D tensor blocked by 2nd dimension with block size 8
dnnl_aBCde2b4c2b	5D tensor blocked by 3rd dimension with block size 4
dnnl_aBcdef16b	6D tensor blocked by 2nd dimension with block size 16
dnnl_aBCdef2c8b4c	6D tensor blocked by 2nd dimension with block size 8
dnnl_aBCdef2b4c2b	6D tensor blocked by 3rd dimension with block size 4
dnnl_aBcdef4b	6D tensor blocked by 2nd dimension with block size 4
dnnl_format_tag_last	Just a sentinel, not real memory format tag. Must be changed after new format tag is added.
dnnl_x	1D tensor, an alias to dnnl_a
dnnl_nc	2D CNN activations tensor, an alias to dnnl_ab
dnnl_cn	2D CNN activations tensor, an alias to dnnl_ba
dnnl_tn	2D RNN statistics tensor, an alias to dnnl_ab
dnnl_nt	2D RNN statistics tensor, an alias to dnnl_ba
dnnl_ncw	3D CNN activations tensor, an alias to dnnl_abc
dnnl_nwc	3D CNN activations tensor, an alias to dnnl_acb
dnnl_nchw	4D CNN activations tensor, an alias to dnnl_abcd
dnnl_nhwc	4D CNN activations tensor, an alias to dnnl_acdb
dnnl_chwn	4D CNN activations tensor, an alias to dnnl_bcda
dnnl_ncdhw	5D CNN activations tensor, an alias to dnnl_abcde
dnnl_ndhwc	5D CNN activations tensor, an alias to dnnl_acdeb
dnnl_oi	2D CNN weights tensor, an alias to dnnl_ab
dnnl_io	2D CNN weights tensor, an alias to dnnl_ba
dnnl_oiw	3D CNN weights tensor, an alias to dnnl_abc
dnnl_owi	3D CNN weights tensor, an alias to dnnl_acb
dnnl_wio	3D CNN weights tensor, an alias to dnnl_cba
dnnl_iwo	3D CNN weights tensor, an alias to dnnl_bca
dnnl_oihw	4D CNN weights tensor, an alias to dnnl_abcd
dnnl_hwio	4D CNN weights tensor, an alias to dnnl_cdba
dnnl_ohwi	4D CNN weights tensor, an alias to dnnl_acdb
dnnl_ihwo	4D CNN weights tensor, an alias to dnnl_bcda
dnnl_iohw	4D CNN weights tensor, an alias to dnnl_bacd
dnnl_oidhw	5D CNN weights tensor, an alias to dnnl_abcde
dnnl_iodhw	5D CNN weights tensor, an alias to dnnl_bacde
dnnl_dhwio	5D CNN weights tensor, an alias to dnnl_cdeba
dnnl_odhwi	5D CNN weights tensor, an alias to dnnl_acdeb
dnnl_idhwo	5D CNN weights tensor, an alias to dnnl_bcdea
dnnl_goiw	4D CNN weights tensor (incl. groups), an alias to dnnl_abcd
dnnl_gowi	4D CNN weights tensor (incl. groups), an alias to dnnl_abdc
dnnl_wigo	4D CNN weights tensor (incl. groups), an alias to dnnl_dcab
dnnl_goihw	5D CNN weights tensor (incl. groups), an alias to dnnl_abcde
dnnl_gohwi	5D CNN weights tensor (incl. groups), an alias to dnnl_abdec
dnnl_hwigo	5D CNN weights tensor (incl. groups), an alias to dnnl_decab
dnnl_giohw	5D CNN weights tensor (incl. groups), an alias to dnnl_acbde
dnnl_goidhw	6D CNN weights tensor (incl. groups), an alias to dnnl_abcdef
dnnl_godhwi	6D CNN weights tensor (incl. groups), an alias to dnnl_abdefc
dnnl_giodhw	6D CNN weights tensor (incl. groups), an alias to dnnl_acbdef
dnnl_dhwigo	6D CNN weights tensor (incl. groups), an alias to dnnl_defcab
dnnl_tnc	3D RNN data tensor in the format (seq_length, batch, input channels).
dnnl_ntc	3D RNN data tensor in the format (batch, seq_length, input channels).
dnnl_ldnc	4D RNN states tensor in the format (num_layers, num_directions, batch, state channels).
dnnl_ldigo	5D RNN weights tensor in the format (num_layers, num_directions, input_channels, num_gates, output_channels). For LSTM cells, the gates order is input, forget, candidate and output gate. For GRU cells, the gates order is update, reset and output gate.
dnnl_ldgoi	5D RNN weights tensor in the format (num_layers, num_directions, num_gates, output_channels, input_channels). For LSTM cells, the gates order is input, forget, candidate and output gate. For GRU cells, the gates order is update, reset and output gate.
dnnl_ldio	4D LSTM projection tensor in the format (num_layers, num_directions, num_channels_in_hidden_state, num_channels_in_recurrent_projection).
dnnl_ldoi	4D LSTM projection tensor in the format (num_layers, num_directions, num_channels_in_recurrent_projection, num_channels_in_hidden_state).
dnnl_ldgo	4D RNN bias tensor in the format (num_layers, num_directions, num_gates, output_channels). For LSTM cells, the gates order is input, forget, candidate and output gate. For GRU cells, the gates order is update, reset and output gate.
dnnl_ldOi32o	5D LSTM projection tensor
dnnl_ldgOi32o	6D RNN weights tensor
dnnl_nCdhw32c	5D CNN activations tensor blocked by channels with block size 32, an alias to dnnl_aBcde32b
dnnl_nCdhw16c	5D CNN activations tensor blocked by channels with block size 16, an alias to dnnl_aBcde16b
dnnl_nCdhw4c	5D CNN activations tensor blocked by channels with block size 4, an alias to dnnl_aBcde4b
dnnl_nCdhw8c	5D CNN activations tensor blocked by channels with block size 8, an alias to dnnl_aBcde8b
dnnl_nChw32c	4D CNN activations tensor blocked by channels with block size 32, an alias to dnnl_aBcd32b
dnnl_nChw16c	4D CNN activations tensor blocked by channels with block size 16, an alias to dnnl_aBcd16b
dnnl_nChw4c	4D CNN activations tensor blocked by channels with block size 4, an alias to dnnl_aBcd4b
dnnl_nChw8c	4D CNN activations tensor blocked by channels with block size 8, an alias to dnnl_aBcd8b
dnnl_nCw32c	3D CNN activations tensor blocked by channels with block size 32, an alias to dnnl_aBc32b
dnnl_nCw16c	3D CNN activations tensor blocked by channels with block size 16, an alias to dnnl_aBc16b
dnnl_nCw4c	3D CNN activations tensor blocked by channels with block size 4, an alias to dnnl_aBc4b
dnnl_nCw8c	3D CNN activations tensor blocked by channels with block size 8, an alias to dnnl_aBc8b

◆ dnnl_wino_memory_format_t

enum dnnl_wino_memory_format_t

Winograd-specific formats.

Enumerator
dnnl_wino_undef	Undefined memory format, used for empty memory descriptors.
dnnl_wino_wei_aaOIoi	Internal weights format for 2x3 Winograd.
dnnl_wino_wei_aaOio	Internal weights format for 2x3 Winograd.
dnnl_wino_wei_aaOBiOo	Internal weights format for 2x3 Winograd.
dnnl_wino_wei_OBaaIBOIio	Internal weights format for 4x3 Winograd.

◆ dnnl_memory_extra_flags_t

enum dnnl_memory_extra_flags_t

Flags for memory special features.

Enumerator

dnnl_memory_extra_flag_compensation_conv_s8s8

Indicates the weights have an additional buffer, that depends on the compensation_mask.

For instance, in 4D case with the compensation mask equals (1 << 0) the additional buffer would consist of OC values: O[oc : 0,OC] = -128 * SUM(ic : 0,IC; kh : 0,KH; kw : 0,KW){ weights(oc, ic, kh, kw) }

Function Documentation

◆ dnnl_memory_desc_init_by_strides()

dnnl_status_t DNNL_API dnnl_memory_desc_init_by_strides	(	dnnl_memory_desc_t *	memory_desc,
		int	ndims,
		const dnnl_dims_t	dims,
		dnnl_data_type_t	data_type,
		const dnnl_dims_t	strides
	)

Initializes a memory descriptor using dimensions and strides.

Note: As always, the logical order of dimensions corresponds to the abc... format tag, and the physical meaning of the dimensions depends on both the primitive that consumes the memory and the context of that consumption.

Parameters

memory_desc	Output memory descriptor.
ndims	Number of dimensions
dims	Array of dimensions.
data_type	Elements data type.
strides	Strides in each dimension.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_memory_desc_init_by_tag()

dnnl_status_t DNNL_API dnnl_memory_desc_init_by_tag	(	dnnl_memory_desc_t *	memory_desc,
		int	ndims,
		const dnnl_dims_t	dims,
		dnnl_data_type_t	data_type,
		dnnl_format_tag_t	tag
	)

Initializes a memory descriptor using dimensions and memory format tag.

Note: As always, the logical order of dimensions corresponds to the abc... format tag, and the physical meaning of the dimensions depends on both the primitive that consumes the memory and the context of that consumption.

Parameters

memory_desc	Output memory descriptor.
ndims	Number of dimensions
dims	Array of dimensions.
data_type	Elements data type.
tag	Memory format tag. Can be dnnl_format_tag_any which would allow a primitive to chose the final memory format. In this case the format_kind field of the memory descriptor would be set to dnnl_format_kind_any.

Returns: dnnl_success on success and a status describing the error otherwise.

Examples:: cnn_inference_f32.c, cpu_cnn_training_f32.c, and cross_engine_reorder.c.

◆ dnnl_memory_desc_init_submemory()

dnnl_status_t DNNL_API dnnl_memory_desc_init_submemory	(	dnnl_memory_desc_t *	memory_desc,
		const dnnl_memory_desc_t *	parent_memory_desc,
		const dnnl_dims_t	dims,
		const dnnl_dims_t	offsets
	)

Initializes a memory descriptor for a region inside an area described by an existing memory descriptor.

Warning: Some combinations of physical memory layout and/or offsets or dims may result in a failure to create a submemory.

Parameters

memory_desc	Output memory descriptor.
parent_memory_desc	An existing memory descriptor.
dims	Sizes of the region.
offsets	Offsets to the region from the encompassing memory object in each dimension

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_memory_desc_reshape()

dnnl_status_t DNNL_API dnnl_memory_desc_reshape	(	dnnl_memory_desc_t *	out_memory_desc,
		const dnnl_memory_desc_t *	in_memory_desc,
		int	ndims,
		const dnnl_dims_t	dims
	)

Initializes a memory descriptor by reshaping an existing one.

The new memory descriptor inherits the data type. This operation is valid only for memory descriptors that have format_kind set to dnnl_blocked or dnnl_format_kind_any.

The operation ensures the transformation of the physical memory format corresponds to the transformation of the logical dimensions. If such transformation is impossible, the function returns dnnl_invalid_arguments.

The reshape operation can be described as a combination of the following basic operations:

Add a dimension of size 1. This is always possible.
Remove a dimension of size 1. This is possible only if the dimension has no padding (i.e. padded_dims[dim] == dims[dim] && dims[dim] == 1).
Split a dimension into multiple ones. This is possible only if the size of the dimension is exactly equal to the product of the split ones and the dimension does not have padding (i.e. padded_dims[dim] = dims[dim]).
Joining multiple consecutive dimensions into a single one. As in the cases above, this requires that the dimensions do not have padding and that the memory format is such that in physical memory these dimensions are dense and have the same order as their logical counterparts. This also assumes that these dimensions are not blocked.
- Here, dense means: stride for dim[i] == (stride for dim[i + 1]) * dim[i + 1];
- And same order means: i < j if and only if stride for dim[j] <= stride for dim[i].

Warning: Some combinations of physical memory layout and/or offsets or dimensions may result in a failure to make a reshape.

Parameters

out_memory_desc	Output memory descriptor.
in_memory_desc	An existing memory descriptor. Must have format_kind set to dnnl_blocked or dnnl_format_kind_any.
ndims	Number of dimensions for the output memory descriptor.
dims	Dimensions for the output memory descriptor.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_memory_desc_permute_axes()

dnnl_status_t DNNL_API dnnl_memory_desc_permute_axes	(	dnnl_memory_desc_t *	out_memory_desc,
		const dnnl_memory_desc_t *	in_memory_desc,
		const int *	permutation
	)

Initializes a memory descriptor by permuting axes in an existing one.

The physical memory layout representation is adjusted accordingly to maintain the consistency between the logical and physical parts of the memory descriptor.

The new memory descriptor inherits the data type. This operation is valid only for memory descriptors that have format_kind set to dnnl_blocked or dnnl_format_kind_any.

The logical axes will be permuted in the following manner:

for (i: 0 .. in_memory_desc->ndims)

out_memory_desc->dims[permutation[i]] = in_memory_desc->dims[i];

Example:

dnnl_memory_desc_t in_md, out_md, expect_out_md;
const int permutation[] = {1, 0}; // swap the first and the second axes
dnnl_dims_t in_dims = {2, 3}, out_dims = {3, 2};
dnnl_format_tag_t in_tag = dnnl_ab, out_tag = dnnl_ba;
dnnl_memory_desc_init_by_tag(
        &in_md, 2, in_dims, data_type, in_tag);
dnnl_memory_desc_init_by_tag(
        &expect_out_md, 2, out_dims, data_type, out_tag);
dnnl_memory_desc_permute_axes(&out_md, in_md, permutation);
assert(dnnl_memory_desc_equal(&out_md, &expect_out_md));

Parameters

out_memory_desc	Output memory descriptor.
in_memory_desc	An existing memory descriptor. Must have format_kind set to dnnl_blocked or dnnl_format_kind_any.
permutation	Axes permutation (of size `in_memory_desc->ndims`).

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_memory_desc_equal()

int DNNL_API dnnl_memory_desc_equal	(	const dnnl_memory_desc_t *	lhs,
		const dnnl_memory_desc_t *	rhs
	)

Compares two memory descriptors.

Use this function to identify whether a reorder is required between the two memories

Parameters

lhs	Left-hand side of the comparison.
rhs	Right-hand side of the comparison.

Returns: 1 if the descriptors are the same.; 0 if the descriptors are different.

Examples:: cnn_inference_f32.c, and cpu_cnn_training_f32.c.

◆ dnnl_memory_desc_get_size()

size_t DNNL_API dnnl_memory_desc_get_size ( const dnnl_memory_desc_t * memory_desc )

Returns the size of a memory descriptor.

Parameters

memory_desc Memory descriptor.

Returns: The number of bytes required for memory described by a memory descriptor.

◆ dnnl_memory_create()

dnnl_status_t DNNL_API dnnl_memory_create	(	dnnl_memory_t *	memory,
		const dnnl_memory_desc_t *	memory_desc,
		dnnl_engine_t	engine,
		void *	handle
	)

Creates a memory object.

Unless handle is equal to DNNL_MEMORY_NONE, the constructed memory object will have the underlying buffer set. In this case, the buffer will be initialized as if dnnl_memory_set_data_handle() had been called.

See also: dnnl_memory_set_data_handle()

Parameters

memory	Output memory object.
memory_desc	Memory descriptor.
engine	Engine to use.
handle	Handle of the memory buffer to use as an underlying storage. A pointer to the user-allocated buffer. In this case the library doesn't own the buffer. The DNNL_MEMORY_ALLOCATE special value. Instructs the library to allocate the buffer for the memory object. In this case the library owns the buffer. DNNL_MEMORY_NONE to create dnnl_memory without an underlying buffer.

Returns: dnnl_success on success and a status describing the error otherwise.

Examples:: cnn_inference_f32.c, cpu_cnn_training_f32.c, and cross_engine_reorder.c.

◆ dnnl_memory_get_memory_desc()

dnnl_status_t DNNL_API dnnl_memory_get_memory_desc	(	const_dnnl_memory_t	memory,
		const dnnl_memory_desc_t **	memory_desc
	)

Returns the memory descriptor for a memory object.

Parameters

memory	Memory object.
memory_desc	Output memory descriptor (a copy).

Returns: dnnl_success on success and a status describing the error otherwise.

Examples:: cnn_inference_f32.c, and cpu_cnn_training_f32.c.

◆ dnnl_memory_get_engine()

dnnl_status_t DNNL_API dnnl_memory_get_engine	(	const_dnnl_memory_t	memory,
		dnnl_engine_t *	engine
	)

Returns the engine of a memory object.

Parameters

memory	Memory object.
engine	Output engine on which the memory is located.

Returns: dnnl_success on success and a status describing the error otherwise.

Examples:: cnn_inference_f32.c, and cpu_cnn_training_f32.c.

◆ dnnl_memory_map_data()

dnnl_status_t DNNL_API dnnl_memory_map_data	(	const_dnnl_memory_t	memory,
		void **	mapped_ptr
	)

Maps a memory object and returns a host-side pointer to a memory buffer with a copy of its contents.

Mapping enables explicit direct access to memory contents for the engines that do not support it implicitly.

Mapping is an exclusive operation - a memory object cannot be used in other operations until this memory object is unmapped.

Note: Any primitives working with memory should be completed before the memory is mapped. Use dnnl_stream_wait to synchronize the corresponding execution stream.; The dnnl_memory_map_data() and dnnl_memory_unmap_data() functions are mainly provided for debug and testing purposes, and their performance may be suboptimal.

Parameters

memory	Memory object.
mapped_ptr	Output pointer to the mapped buffer.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_memory_unmap_data()

dnnl_status_t DNNL_API dnnl_memory_unmap_data	(	const_dnnl_memory_t	memory,
		void *	mapped_ptr
	)

Unmaps a memory object and writes back any changes made to the previously mapped memory buffer.

The pointer to the mapped buffer must be obtained via the dnnl_memory_map_data() call.

Note: The dnnl_memory_map_data() and dnnl_memory_unmap_data() functions are mainly provided for debug and testing purposes, and their performance may be suboptimal.

Parameters

memory	Memory object.
mapped_ptr	Pointer to the mapped buffer that must have been obtained using the dnnl_memory_map_data() function.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_memory_get_data_handle()

dnnl_status_t DNNL_API dnnl_memory_get_data_handle	(	const_dnnl_memory_t	memory,
		void **	handle
	)

Returns memory object's data handle.

Parameters

memory	Memory object.
handle	Output data handle. For the CPU engine, the data handle is a pointer to the actual data. For OpenCL it is a cl_mem.

Returns: dnnl_success on success and a status describing the error otherwise.

Examples:: cpu_cnn_training_f32.c.

◆ dnnl_memory_set_data_handle()

dnnl_status_t DNNL_API dnnl_memory_set_data_handle	(	dnnl_memory_t	memory,
		void *	handle
	)

Sets the underlying memory buffer.

See the description of dnnl_memory_set_data_handle_v2() for more details.

Parameters

memory	Memory object.
handle	Data handle. For the CPU engine, the data handle is a pointer to the actual data. For OpenCL it is a `cl_mem`.

Returns: dnnl_success on success and a status describing the error otherwise.

Examples:: cpu_cnn_training_f32.c.

◆ dnnl_memory_set_data_handle_v2()

dnnl_status_t DNNL_API dnnl_memory_set_data_handle_v2	(	dnnl_memory_t	memory,
		void *	handle,
		dnnl_stream_t	stream
	)

Sets the underlying memory buffer.

This function may write zero values to the memory specified by the handle if the memory object has a zero padding area. This may be time consuming and happens each time this function is called. The operation is always blocking and the stream parameter is a hint.

Note: The zero padding is required by memory objects created with blocked memory format tags like dnnl_aBcd8b when any of the dimensions is not a multiple of the corresponding block size. For "plain" formats like dnnl_nchw or dnnl_nhwc zero padding area needs to be set up explicitly when creating the corresponding memory descriptors. See Understanding Memory Formats for more details.; Even when the memory object is used to hold values that stay constant during the execution of the program (pre-packed weights during inference, for example), the function will still write zeroes to the padding area if it exists. Hence, the handle parameter cannot and does not have a const qualifier.

Parameters

memory	Memory object.
handle	Data handle. For the CPU engine, the data handle is a pointer to the actual data. For OpenCL it is a `cl_mem`.
stream	Stream to use to execute padding in.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_memory_destroy()

dnnl_status_t DNNL_API dnnl_memory_destroy ( dnnl_memory_t memory )

Destroys a memory object.

Parameters

memory Memory object to destroy.

Returns: dnnl_success on success and a status describing the error otherwise.

Examples:: cnn_inference_f32.c, cpu_cnn_training_f32.c, and cross_engine_reorder.c.

Classes

Macros

Typedefs

Enumerations

Functions

Detailed Description

Macro Definition Documentation

◆ DNNL_MAX_NDIMS

◆ DNNL_RUNTIME_DIM_VAL

◆ DNNL_RUNTIME_SIZE_VAL

◆ DNNL_RUNTIME_F32_VAL

◆ DNNL_RUNTIME_S32_VAL

◆ DNNL_RNN_MAX_N_PARTS

◆ DNNL_MEMORY_NONE

◆ DNNL_MEMORY_ALLOCATE

Enumeration Type Documentation

◆ dnnl_data_type_t

◆ dnnl_format_kind_t

◆ dnnl_format_tag_t

◆ dnnl_wino_memory_format_t

◆ dnnl_memory_extra_flags_t

Function Documentation

◆ dnnl_memory_desc_init_by_strides()

◆ dnnl_memory_desc_init_by_tag()

◆ dnnl_memory_desc_init_submemory()

◆ dnnl_memory_desc_reshape()

◆ dnnl_memory_desc_permute_axes()

◆ dnnl_memory_desc_equal()

◆ dnnl_memory_desc_get_size()

◆ dnnl_memory_create()

◆ dnnl_memory_get_memory_desc()

◆ dnnl_memory_get_engine()

◆ dnnl_memory_map_data()

◆ dnnl_memory_unmap_data()

◆ dnnl_memory_get_data_handle()

◆ dnnl_memory_set_data_handle()

◆ dnnl_memory_set_data_handle_v2()

◆ dnnl_memory_destroy()