DPCT1042#

Message#

The size of the arguments passed to the SYCL kernel exceeds the minimum size limit (1024) for a non-custom SYCL device. You can get the hardware argument size limit by querying info::device::max_parameter_size. You may need to rewrite this code if the size of the arguments exceeds the hardware limit.

Detailed Help#

The size of the arguments passed to the SYCL* kernel for non-custom SYCL device has a limit (see SYCL 2020 standard, 4.6.4.2 Device information descriptors).

In cases where this warning occurs, you need to adjust the code manually to decrease the number of accessors or other arguments that are captured by the SYCL kernel lambda.

The example in the next section shows how you can remove one accessor by merging two buffers with the same type.

Suggestions to Fix#

Review the code and adjust it.

For example, this original CUDA* code:

 1 #define ARRAY_SIZE 2
 2
 3 __constant__ int device0[ARRAY_SIZE];
 4 __constant__ int device1[ARRAY_SIZE];
 5 __constant__ int device2[ARRAY_SIZE];
 6 ...
 7 __constant__ int device30[ARRAY_SIZE];
 8 __constant__ int device31[ARRAY_SIZE];
 9
10 // kernel function declaration
11 __global__ void kernel(int *out) {
12   int i = blockDim.x * blockIdx.x + threadIdx.x;
13   out[i] = device0[i] + device1[i] + device2[i] + ... +
14            device30[i] + device31[i];
15 }
16
17 void test_function(int *out) {
18   kernel<<<1, 1>>>(out);
19 }

results in the following migrated SYCL code:

 1 #define ARRAY_SIZE 2
 2
 3 static dpct::constant_memory<int, 1> device0(ARRAY_SIZE);
 4 static dpct::constant_memory<int, 1> device1(ARRAY_SIZE);
 5 static dpct::constant_memory<int, 1> device2(ARRAY_SIZE);
 6 ...
 7 static dpct::constant_memory<int, 1> device30(ARRAY_SIZE);
 8 static dpct::constant_memory<int, 1> device31(ARRAY_SIZE);
 9
10 // kernel function declaration
11 void kernel(int *out, const sycl::nd_item<3> &item_ct1, int const *device0,
12             int const *device1, int const *device2, ..., int const *device30,
13             int const *device31) {
14   int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
15           item_ct1.get_local_id(2);
16   out[i] = device0[i] + device1[i] + device2[i] +
17            ... +
18            device30[i] + device31[i];
19 }
20
21 void test_function(int *out) {
22   device0.init();
23   device1.init();
24   device2.init();
25   ...
26   device30.init();
27   device31.init();
28
29   /*
30   DPCT1042:0: The size of the arguments passed to the SYCL kernel exceeds the
31   minimum size limit (1024) for a non-custom SYCL device. You can get the
32   hardware argument size limit by querying info::device::max_parameter_size. You
33   may need to rewrite this code if the size of the arguments exceeds the
34   hardware limit.
35   */
36   dpct::get_out_of_order_queue().submit([&](sycl::handler &cgh) {
37     auto device0_acc_ct1 = device0.get_access(cgh);
38     auto device1_acc_ct1 = device1.get_access(cgh);
39     auto device2_acc_ct1 = device2.get_access(cgh);
40     ...
41     auto device30_acc_ct1 = device30.get_access(cgh);
42     auto device31_acc_ct1 = device31.get_access(cgh);
43     dpct::access_wrapper<int *> out_acc_ct0(out, cgh);
44
45     cgh.parallel_for(
46         sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
47         [=](sycl::nd_item<3> item_ct1) {
48           kernel(out_acc_ct0.get_raw_pointer(), item_ct1,
49                  device0_acc_ct1.get_pointer(), device1_acc_ct1.get_pointer(),
50                  device2_acc_ct1.get_pointer(), ...,
51                  device30_acc_ct1.get_pointer(), device31_acc_ct1.get_pointer());
52         });
53   });
54 }

which is rewritten to:

 1 #define ARRAY_SIZE 2
 2
 3 static dpct::constant_memory<int, 1> device0(ARRAY_SIZE * 32);
 4
 5 // kernel function declaration
 6 void kernel(int *out, const sycl::nd_item<3> &item_ct1, int const *device0) {
 7   int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
 8           item_ct1.get_local_id(2);
 9   for (int j = 0; j < 32; j++) {
10     out[i] += device0[ARRAY_SIZE * j + i];
11   }
12 }
13
14 void test_function(int *out) {
15   device0.init();
16
17   dpct::get_out_of_order_queue().submit([&](sycl::handler &cgh) {
18     auto device0_acc_ct1 = device0.get_access(cgh);
19     dpct::access_wrapper<int *> out_acc_ct0(out, cgh);
20
21     cgh.parallel_for(
22         sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
23         [=](sycl::nd_item<3> item_ct1) {
24           kernel(out_acc_ct0.get_raw_pointer(), item_ct1,
25                  device0_acc_ct1.get_pointer());
26         });
27   });
28 }