struct dnnl::ukernel::brgemm

Overview

struct brgemm: public dnnl::handle
{
    // construction

    brgemm();

    brgemm(
        memory::dim M,
        memory::dim N,
        memory::dim K,
        memory::dim batch_size,
        memory::dim lda,
        memory::dim ldb,
        memory::dim ldc,
        memory::data_type a_dt,
        memory::data_type b_dt,
        memory::data_type c_dt,
        float alpha,
        float beta,
        bool allow_empty = false
        );

    brgemm(
        memory::dim M,
        memory::dim N,
        memory::dim K,
        memory::dim batch_size,
        memory::dim lda,
        memory::dim ldb,
        memory::dim ldc,
        memory::dim ldd,
        memory::data_type a_dt,
        memory::data_type b_dt,
        memory::data_type c_dt,
        memory::data_type d_dt,
        float alpha,
        float beta,
        const primitive_attr& attr,
        bool allow_empty = false
        );

    // methods

    size_t get_scratchpad_size() const;
    void set_hw_context() const;
    void generate();

    void execute(
        const void* A,
        const void* B,
        const std::vector<std::pair<memory::dim, memory::dim>>& A_B_offsets,
        void* C,
        void* scratchpad
        ) const;

    void execute(
        const void* A,
        const void* B,
        const std::vector<std::pair<memory::dim, memory::dim>>& A_B_offsets,
        void* C,
        void* D,
        void* scratchpad,
        const void* binary_po = nullptr
        ) const;

    static void release_hw_context();
};

Inherited Members

public:
    // methods

    handle<T, traits>& operator = (const handle<T, traits>&);
    handle<T, traits>& operator = (handle<T, traits>&&);
    void reset(T t, bool weak = false);
    T get(bool allow_empty = false) const;
    operator T () const;
    operator bool () const;
    bool operator == (const handle<T, traits>& other) const;
    bool operator != (const handle& other) const;

Detailed Documentation

Construction

brgemm()

Default constructor. Produces an empty object.

brgemm(
    memory::dim M,
    memory::dim N,
    memory::dim K,
    memory::dim batch_size,
    memory::dim lda,
    memory::dim ldb,
    memory::dim ldc,
    memory::data_type a_dt,
    memory::data_type b_dt,
    memory::data_type c_dt,
    float alpha,
    float beta,
    bool allow_empty = false
    )

Constructs a BRGeMM ukernel object.

Operates by the following formula: C = alpha * [A x B] + beta * C.

Parameters:

M

Dimension M of tensor A.

N

Dimension N of tensor B.

K

Dimension K of tensors A and B.

batch_size

Number of batches to process.

lda

Leading dimension of tensor A.

ldb

Leading dimension of tensor B.

ldc

Leading dimension of tensor C.

a_dt

Data type of tensor A.

b_dt

Data type of tensor B.

c_dt

Data type of tensor C.

alpha

Scale for an accumulation output.

beta

Scale for a tensor C to append on an accumulated output.

allow_empty

A flag signifying whether construction is allowed to fail without throwing an exception. In this case an empty object will be produced. This flag is optional and defaults to false.

brgemm(
    memory::dim M,
    memory::dim N,
    memory::dim K,
    memory::dim batch_size,
    memory::dim lda,
    memory::dim ldb,
    memory::dim ldc,
    memory::dim ldd,
    memory::data_type a_dt,
    memory::data_type b_dt,
    memory::data_type c_dt,
    memory::data_type d_dt,
    float alpha,
    float beta,
    const primitive_attr& attr,
    bool allow_empty = false
    )

Constructs a BRGeMM ukernel object.

Operates by the following formula: C = alpha * [A x B] + beta * C; D = post-operations(C).

Parameters:

M

Dimension M of tensor A.

N

Dimension N of tensor B.

K

Dimension K of tensors A and B.

batch_size

Number of batches to process.

lda

Leading dimension of tensor A.

ldb

Leading dimension of tensor B.

ldc

Leading dimension of tensor C.

ldd

Leading dimension of tensor D.

a_dt

Data type of tensor A.

b_dt

Data type of tensor B.

c_dt

Data type of tensor C. Must be data_type::f32.

d_dt

Data type of tensor D.

alpha

Scale for an accumulation output.

beta

Scale for a tensor C to append on an accumulated output.

attr

Primitive attributes to extend the kernel operations.

allow_empty

A flag signifying whether construction is allowed to fail without throwing an exception. In this case an empty object will be produced. This flag is optional and defaults to false.

Methods

size_t get_scratchpad_size() const

Returns the size of a scratchpad memory needed for the BRGeMM ukernel object.

void set_hw_context() const

Initializes the hardware-specific context.

Affects the global state for all BRGeMM ukernel objects. If no initialization required, returns.

void generate()

Generates an executable part of BRGeMM ukernel object.

void execute(
    const void* A,
    const void* B,
    const std::vector<std::pair<memory::dim, memory::dim>>& A_B_offsets,
    void* C,
    void* scratchpad
    ) const

Executes a BRGeMM ukernel object.

Parameters:

A

Base pointer to a tensor A.

B

Base pointer to a tensor B.

A_B_offsets

Vector of pairs of tensors A and B offsets for each batch. The number of batches must coincide with the batch_size value passed at object construction stage.

C

Pointer to a tensor C (accumulation buffer).

scratchpad

Pointer to a scratchpad buffer.

void execute(
    const void* A,
    const void* B,
    const std::vector<std::pair<memory::dim, memory::dim>>& A_B_offsets,
    void* C,
    void* D,
    void* scratchpad,
    const void* binary_po = nullptr
    ) const

Executes a BRGeMM ukernel object with post operations.

Parameters:

A

Base pointer to a tensor A.

B

Base pointer to a tensor B.

A_B_offsets

Vector of pairs of tensors A and B offsets for each batch. The number of batches must coincide with the batch_size value passed at object construction stage.

C

Pointer to a tensor C (accumulation buffer).

D

Pointer to a tensor D (output buffer).

scratchpad

Pointer to a scratchpad buffer.

binary_po

Binary post-op memory buffer. Must be passed If binary post-op was specified at construction call.

static void release_hw_context()

Releases the hardware-specific context.

Affects the global state for all BRGeMM ukernel objects. Must be used after all the execution calls to BRGeMM ukernel objects.