DPCT1065#
Message#
Consider replacing sycl::<...>::barrier()
with sycl::<...>::barrier(sycl::access::fence_space::local_space)
for better performance if there is no access to global memory.
Detailed Help#
The function sycl::<...>::barrier()
ensures correct memory access ordering
in the global and local address space. If the kernel function has no memory
accesses in the global memory, it is safe to replace sycl::<...>::barrier()
with sycl::<...>::barrier(sycl::access::fence_space::local_space)
for better
performance.
Suggestions to Fix#
Replace sycl::<...>::barrier()
with sycl::<...>::barrier(sycl::access::fence_space::local_space)
.
For example, this original CUDA* code:
1struct Data_t {
2 float *host_data;
3 float *device_data;
4};
5
6__global__ void k(Data_t *data) {
7 auto tid = threadIdx.x + blockDim.x * blockIdx.x;
8 only_read_data(data[tid].device_data);
9 __syncthreads();
10 only_read_data(data[tid].device_data);
11}
results in the following migrated SYCL* code:
1struct Data_t {
2 float *host_data;
3 float *device_data;
4};
5
6void k(Data_t *data, const sycl::nd_item<3> &item_ct1) {
7 auto tid = item_ct1.get_local_id(2) +
8 item_ct1.get_local_range(2) * item_ct1.get_group(2);
9 only_read_data(data[tid].device_data);
10 /*
11 DPCT1065:0: Consider replacing sycl::nd_item::barrier() with
12 sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better
13 performance if there is no access to global memory.
14 */
15 item_ct1.barrier();
16 only_read_data(data[tid].device_data);
17}
which is rewritten to:
1struct Data_t {
2 float *host_data;
3 float *device_data;
4};
5
6void k(Data_t *data, const sycl::nd_item<3> &item_ct1) {
7 auto tid = item_ct1.get_local_id(2) +
8 item_ct1.get_local_range(2) * item_ct1.get_group(2);
9 only_read_data(data[tid].device_data);
10 // global_local_space can be replaced with local_space if the access
11 // of the global memory after the barrier does not depend on (read-after-write or
12 // write-after-read or write-after-write) the access of the same global memory
13 // before the barrier among work-items in the current work-group.
14 item_ct1.barrier(sycl::access::fence_space::local_space);
15 only_read_data(data[tid].device_data);
16}