

__activemask() is migrated to 0xffffffff. You may need to adjust the code.

Detailed Help#

There is currently no functional equivalent of __activemask() in SYCL*. If there is flow control in your code that will make the thread inactive, you need to rewrite the thread logic.

For example, this original CUDA* code:

 1  __device__ inline int SHFL_SYNC(unsigned mask, int val, unsigned offset,
 2                                  unsigned w = warpSize) {
 3    return __shfl_down_sync(mask, val, offset, w);
 4  }
 6  __global__ void kernel(int *array) {
 7    unsigned int tid = threadIdx.x;
 8    if (tid >= 8)
 9      return;
10    unsigned mask = __activemask();
11    array[tid] = SHFL_SYNC(mask, array[tid], 4);
12  }

results in the following migrated SYCL code:

 1  inline int SHFL_SYNC(unsigned mask, int val, unsigned offset,
 2                       const sycl::nd_item<3> &item_ct1, unsigned w = 0) {
 3    /*
 4    DPCT1023:0: The SYCL sub-group does not support mask options for
 5    dpct::shift_sub_group_left. You can specify
 6    "--use-experimental-features=masked-sub-group-operation" to use the
 7    experimental helper function to migrate __shfl_down_sync.
 8    */
 9    if (!w) w = item_ct1.get_sub_group().get_local_range().get(0);
10    // This call will wait for all work-items to arrive which will never happen since only work-items with tid < 8 will encounter this call.
11    return dpct::shift_sub_group_left(item_ct1.get_sub_group(), val, offset, w);
12  }
14  void kernel(int *array, const sycl::nd_item<3> &item_ct1) {
15    unsigned int tid = item_ct1.get_local_id(2);
16    if (tid >= 8)
17      return;
18    /*
19    DPCT1086:1: __activemask() is migrated to 0xffffffff. You may need to adjust
20    the code.
21    */
22    unsigned mask = 0xffffffff;
23    array[tid] = SHFL_SYNC(mask, array[tid], 4, item_ct1);
24  }

which is rewritten to:

 1  // remove mask parameter, as it is not used
 2  inline int SHFL_SYNC(int val, unsigned offset,
 3                       const sycl::nd_item<3> &item_ct1, unsigned w = 0) {
 4    if (!w) w = item_ct1.get_sub_group().get_local_range().get(0);
 5    unsigned int tid = item_ct1.get_local_id(2);
 6    // Use a temporary variable to save the result of sycl::shift_group_left() to make sure all work-items can encounter this call.
 7    int v_tmp = sycl::shift_group_left(item_ct1.get_sub_group(), val, offset);
 8    return (tid < 8) ? v_tmp : val;
 9  }
11  void kernel(int *array, const sycl::nd_item<3> &item_ct1) {
12    unsigned int tid = item_ct1.get_local_id(2);
13    // remove mask parameter, as it is not used
14    array[tid] = SHFL_SYNC(array[tid], 4, item_ct1);
15  }

Suggestions to Fix#

Check if 0xffffffff can be used instead of __activemask(). If it cannot be used, redesign the thread logic.