BRGeMM ukernel

Overview

BRGeMM ukernel routines. More…

// typedefs

typedef struct dnnl_brgemm* dnnl_brgemm_t;
typedef const struct dnnl_brgemm* const_dnnl_brgemm_t;
typedef struct dnnl_brgemm_pack_b* dnnl_brgemm_pack_b_t;
typedef const struct dnnl_brgemm_pack_b* const_dnnl_brgemm_pack_b_t;

// structs

struct dnnl::ukernel::brgemm;
struct dnnl::ukernel::brgemm_pack_b;
struct dnnl_brgemm;
struct dnnl_brgemm_pack_b;

// global functions

dnnl_status_t DNNL_API dnnl_brgemm_create(
    dnnl_brgemm_t* brgemm,
    dnnl_dim_t M,
    dnnl_dim_t N,
    dnnl_dim_t K,
    dnnl_dim_t batch_size,
    dnnl_dim_t lda,
    dnnl_dim_t ldb,
    dnnl_dim_t ldc,
    dnnl_dim_t ldd,
    dnnl_data_type_t a_dt,
    dnnl_data_type_t b_dt,
    dnnl_data_type_t c_dt,
    dnnl_data_type_t d_dt,
    float alpha,
    float beta,
    const_dnnl_primitive_attr_t attr
    );

dnnl_status_t DNNL_API dnnl_brgemm_get_scratchpad_size(
    const_dnnl_brgemm_t brgemm,
    size_t* size
    );

dnnl_status_t DNNL_API dnnl_brgemm_set_hw_context(const_dnnl_brgemm_t brgemm);
dnnl_status_t DNNL_API dnnl_brgemm_release_hw_context();
dnnl_status_t DNNL_API dnnl_brgemm_generate(dnnl_brgemm_t brgemm);

dnnl_status_t DNNL_API dnnl_brgemm_execute(
    const_dnnl_brgemm_t brgemm,
    const void* A_ptr,
    const void* B_ptr,
    const dnnl_dim_t* A_B_offsets,
    void* C_ptr,
    void* scratchpad_ptr
    );

dnnl_status_t DNNL_API dnnl_brgemm_execute_postops(
    const_dnnl_brgemm_t brgemm,
    const void* A,
    const void* B,
    const dnnl_dim_t* A_B_offsets,
    const void* C_ptr,
    void* D_ptr,
    void* scratchpad_ptr,
    const void* binary_po_ptr
    );

dnnl_status_t DNNL_API dnnl_brgemm_destroy(dnnl_brgemm_t brgemm);

dnnl_status_t DNNL_API dnnl_brgemm_pack_b_create(
    dnnl_brgemm_pack_b_t* brgemm_pack_b,
    dnnl_dim_t K,
    dnnl_dim_t N,
    dnnl_dim_t in_ld,
    dnnl_dim_t out_ld,
    dnnl_data_type_t in_dt,
    dnnl_data_type_t out_dt
    );

dnnl_status_t DNNL_API dnnl_brgemm_pack_b_need_pack(
    const_dnnl_brgemm_pack_b_t brgemm_pack_b,
    int* need_pack
    );

dnnl_status_t DNNL_API dnnl_brgemm_pack_b_generate(dnnl_brgemm_pack_b_t brgemm_pack_b);

dnnl_status_t DNNL_API dnnl_brgemm_pack_b_execute(
    const_dnnl_brgemm_pack_b_t brgemm_pack_b,
    const void* in_ptr,
    void* out_ptr
    );

dnnl_status_t DNNL_API dnnl_brgemm_pack_b_destroy(dnnl_brgemm_pack_b_t brgemm_pack_b);

Detailed Documentation

BRGeMM ukernel routines.

Typedefs

typedef struct dnnl_brgemm* dnnl_brgemm_t

A brgemm ukernel handle.

typedef const struct dnnl_brgemm* const_dnnl_brgemm_t

A constant brgemm ukernel handle.

typedef struct dnnl_brgemm_pack_b* dnnl_brgemm_pack_b_t

A brgemm ukernel packing B routine handle.

typedef const struct dnnl_brgemm_pack_b* const_dnnl_brgemm_pack_b_t

A constant brgemm ukernel packing B routine handle.

Global Functions

dnnl_status_t DNNL_API dnnl_brgemm_create(
    dnnl_brgemm_t* brgemm,
    dnnl_dim_t M,
    dnnl_dim_t N,
    dnnl_dim_t K,
    dnnl_dim_t batch_size,
    dnnl_dim_t lda,
    dnnl_dim_t ldb,
    dnnl_dim_t ldc,
    dnnl_dim_t ldd,
    dnnl_data_type_t a_dt,
    dnnl_data_type_t b_dt,
    dnnl_data_type_t c_dt,
    dnnl_data_type_t d_dt,
    float alpha,
    float beta,
    const_dnnl_primitive_attr_t attr
    )

Creates a BRGeMM ukernel object.

Operates by the following formula: C = alpha * [A x B] + beta * C. D = post-operations(C).

Post-operations applies if one of the following holds:

  • Non-empty attributes are specified.

  • Output data type d_dt is different from accumulation data type c_dt.

If any of conditions happens, the final call of the accumulation chain must be dnnl_brgemm_execute_postops, and dnnl_brgemm_execute, otherwise.

Parameters:

brgemm

Output BRGeMM ukernel object.

M

Dimension M of tensor A.

N

Dimension N of tensor B.

K

Dimension K of tensors A and B.

batch_size

Number of batches to process.

lda

Leading dimension of tensor A.

ldb

Leading dimension of tensor B.

ldc

Leading dimension of tensor C.

ldd

Leading dimension of tensor D.

a_dt

Data type of tensor A.

b_dt

Data type of tensor B.

c_dt

Data type of tensor C. Must be dnnl_f32.

d_dt

Data type of tensor D.

alpha

Scale for an accumulation output.

beta

Scale for a tensor C to append on an accumulation output.

attr

Primitive attributes to extend the kernel operations.

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_get_scratchpad_size(
    const_dnnl_brgemm_t brgemm,
    size_t* size
    )

Returns the size of a scratchpad memory needed for the BRGeMM ukernel object.

Parameters:

brgemm

BRGeMM ukernel object.

size

Output size of a buffer required for the BRGeMM ukernel object.

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_set_hw_context(const_dnnl_brgemm_t brgemm)

Initializes the hardware-specific context.

If no initialization required, returns the success status.

Parameters:

brgemm

BRGeMM ukernel object.

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_release_hw_context()

Releases the hardware-specific context.

Must be used after all the execution calls to BRGeMM ukernel objects.

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_generate(dnnl_brgemm_t brgemm)

Generates an executable part of BRGeMM ukernel object.

Parameters:

brgemm

BRGeMM ukernel object.

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_execute(
    const_dnnl_brgemm_t brgemm,
    const void* A_ptr,
    const void* B_ptr,
    const dnnl_dim_t* A_B_offsets,
    void* C_ptr,
    void* scratchpad_ptr
    )

Executes a BRGeMM ukernel object.

Parameters:

brgemm

BRGeMM ukernel object.

A_ptr

Base pointer to a tensor A.

B_ptr

Base pointer to a tensor B.

A_B_offsets

Pointer to the set of tensor A and tensor B offsets for each batch; the set must be contiguous in memory. Single batch should supply offsets for both tensors A and B simultaneously. The number of batches must coincide with the batch_size value passed at the creation stage.

C_ptr

Pointer to a tensor C (accumulation buffer).

scratchpad_ptr

Pointer to a scratchpad buffer.

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_execute_postops(
    const_dnnl_brgemm_t brgemm,
    const void* A,
    const void* B,
    const dnnl_dim_t* A_B_offsets,
    const void* C_ptr,
    void* D_ptr,
    void* scratchpad_ptr,
    const void* binary_po_ptr
    )

Executes a BRGeMM ukernel object with post operations.

Parameters:

brgemm

BRGeMM ukernel object.

A

Base pointer to a tensor A.

B

Base pointer to a tensor B.

A_B_offsets

Pointer to a set of tensor A and tensor B offsets for each batch. A set must be contiguous in memory. A single batch should supply offsets for both tensors A and B simultaneously. The number of batches must coincide with the batch_size value passed at the creation stage.

C_ptr

Pointer to a tensor C (accumulation buffer).

D_ptr

Pointer to a tensor D (output buffer).

scratchpad_ptr

Pointer to a scratchpad buffer.

binary_po_ptr

Pointer to binary post-op data.

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_destroy(dnnl_brgemm_t brgemm)

Destroys a BRGeMM ukernel object.

Parameters:

brgemm

BRGeMM ukernel object to destroy.

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_pack_b_create(
    dnnl_brgemm_pack_b_t* brgemm_pack_b,
    dnnl_dim_t K,
    dnnl_dim_t N,
    dnnl_dim_t in_ld,
    dnnl_dim_t out_ld,
    dnnl_data_type_t in_dt,
    dnnl_data_type_t out_dt
    )

Creates a BRGeMM ukernel packing tensor B object.

Parameters:

brgemm_pack_b

Output BRGeMM ukernel packing B object.

K

Dimension K.

N

Dimension N.

in_ld

Input leading dimension.

out_ld

Output leading dimension. Specifies a block by N dimension during data packing.

in_dt

Input data type.

out_dt

Output data type.

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_pack_b_need_pack(
    const_dnnl_brgemm_pack_b_t brgemm_pack_b,
    int* need_pack
    )

Returns the flag if packing is expected by BRGeMM ukernel kernel.

Parameters:

brgemm_pack_b

BRGeMM ukernel packing B object.

need_pack

Output flag specifying if packing is needed. Possible values are 0 (not needed) and 1 (needed).

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_pack_b_generate(dnnl_brgemm_pack_b_t brgemm_pack_b)

Generates an executable part of BRGeMM ukernel packing B object.

Parameters:

brgemm_pack_b

BRGeMM ukernel packing B object.

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_pack_b_execute(
    const_dnnl_brgemm_pack_b_t brgemm_pack_b,
    const void* in_ptr,
    void* out_ptr
    )

Executes a BRGeMM ukernel packing tensor B object.

Parameters:

brgemm_pack_b

BRGeMM ukernel packing B object.

in_ptr

Pointer to an input buffer.

out_ptr

Pointer to an output buffer.

Returns:

dnnl_success on success and a status describing the error otherwise.

dnnl_status_t DNNL_API dnnl_brgemm_pack_b_destroy(dnnl_brgemm_pack_b_t brgemm_pack_b)

Destroys a BRGeMM ukernel packing tensor B object.

Parameters:

brgemm_pack_b

BRGeMM ukernel packing B object.

Returns:

dnnl_success on success and a status describing the error otherwise.