csr_table.cpp

/*******************************************************************************
* Copyright 2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#ifndef ONEDAL_DATA_PARALLEL
#define ONEDAL_DATA_PARALLEL
#endif

#include <memory>
#include <numeric>
#include <iostream>
#include <algorithm>

#include "example_util/utils.hpp"

#include "oneapi/dal/table/csr.hpp"
#include "oneapi/dal/table/common.hpp"

#include "oneapi/dal/table/csr_accessor.hpp"

// Generate offsets expecting the table
// to have various amounts of data
// in each row
template <typename Index = std::int64_t>
dal::array<Index> generate_offsets(sycl::queue& queue,
                                   std::int64_t row_count,
                                   std::int64_t column_count) {
    const std::int64_t offset_count = row_count + 1l;
    Index* const raw_data = new Index[offset_count];

    for (std::int64_t row = 0l; row < offset_count; ++row) {
        raw_data[row] = static_cast<Index>(row);
    }

    // Offsets are a cumulative sum of element counts
    std::partial_sum(raw_data, raw_data + offset_count, raw_data);

    auto on_host = dal::array<Index>(raw_data,
                                     offset_count, //
                                     [](Index* const ptr) -> void {
                                         delete[] ptr;
                                     });

    return to_device(queue, on_host);
}

// Generate indices knowing, from the `offsets` array,
// how many elements should be in each row
template <typename Index = std::int64_t>
dal::array<Index> generate_indices(sycl::queue& queue,
                                   std::int64_t column_count,
                                   const dal::array<Index>& offsets_on_device) {
    dal::array<Index> offsets = to_host(offsets_on_device);
    const std::int64_t offset_count = offsets.get_count();
    const std::int64_t row_count = offset_count - 1l;
    const std::int64_t element_count = offsets[row_count];

    Index* const raw_data = new Index[element_count];

    for (std::int64_t row = 0l; row < row_count; ++row) {
        const std::int64_t first = offsets[row];
        const std::int64_t last = offsets[row + 1l];
        const std::int64_t count = last - first;

        // Fill the lower right triangular
        // part of the table (which is non-trivial)
        for (std::int64_t col = 0l; col < count; ++col) {
            const std::int64_t column = column_count - col - 1l;
            raw_data[first + col] = static_cast<Index>(column);
        }

        std::sort(raw_data + first, raw_data + last);
    }

    auto on_host = dal::array<Index>(raw_data,
                                     element_count, //
                                     [](Index* const ptr) -> void {
                                         delete[] ptr;
                                     });

    return to_device(queue, on_host);
}

// Generate data for the table in a deterministic way
template <typename Type = float, typename Index = std::int64_t>
dal::array<Type> generate_data(sycl::queue& queue, const dal::array<Index>& offsets) {
    const std::int64_t offset_count = offsets.get_count();
    const std::int64_t element_count = offsets[offset_count - 1];

    Type* const raw_data = new Type[element_count];

    for (std::int64_t i = 0l; i < element_count; ++i) {
        raw_data[i] = static_cast<Type>(element_count - i);
    }

    auto on_host = dal::array<Type>(raw_data,
                                    element_count, //
                                    [](Type* const ptr) -> void {
                                        delete[] ptr;
                                    });

    return to_device(queue, on_host);
}

void run(sycl::queue& queue) {
    constexpr std::int64_t row_count = 4;
    constexpr std::int64_t column_count = 10;

    // Generate data on a device
    auto offsets = generate_offsets(queue, row_count, column_count);
    auto indices = generate_indices(queue, column_count, offsets);
    auto data = generate_data(queue, offsets);

    // Wrap previously generated pieces of data
    dal::csr_table test_table = dal::csr_table::wrap(data, //
                                                     indices,
                                                     offsets,
                                                     column_count,
                                                     dal::sparse_indexing::zero_based);

    // Sanity checks for the table shape
    std::cout << "Number of rows in table: " << test_table.get_row_count() << '\n';
    std::cout << "Number of columns in table: " << test_table.get_column_count() << '\n';

    // Check the type of abstract table
    const bool is_csr = test_table.get_kind() == dal::csr_table::kind();
    std::cout << "Is CSR table: " << is_csr << '\n';

    // Extract CSR slice of data on the device
    dal::csr_accessor<const float> accessor{ test_table };
    auto [slice_data, slice_indices, slice_offsets] = accessor.pull(queue, { 1, 3 });

    // Move data to be readable on CPU
    dal::array<float> data_on_host = to_host(slice_data);
    std::cout << "Slice of elements (compressed): " << data_on_host << '\n';

    // Move indices to be readable on CPU
    dal::array<std::int64_t> indices_on_host = to_host(slice_indices);
    std::cout << "Slice of indices (compressed): " << indices_on_host << '\n';

    // Move offsets to be readable on CPU
    dal::array<std::int64_t> offsets_on_host = to_host(slice_offsets);
    std::cout << "Slice of offsets: " << offsets_on_host << std::endl;
}

int main(int argc, char** argv) {
    // Go through different devices
    for (auto d : list_devices()) {
        std::cout << "Running on " << d.get_platform().get_info<sycl::info::platform::name>()
                  << ", " << d.get_info<sycl::info::device::name>() << "\n"
                  << std::endl;
        auto q = sycl::queue{ d };
        run(q);
    }

    return 0;
}