

Use sycl::stream instead of printf if your code is used on the device.

Detailed Help#

If the printf statement is used on the host and the device in your original code, it does not change. To create output in DPC++, sycl::stream must be used on the device and printf must be used on the host.

Suggestions to Fix#

  • If the printf statement is only used from the host, do not change your code.

  • If the printf statement is only used from the device, use sycl::stream instead of printf.

For example, this original CUDA* code:

 1 __host__ __device__ void hd() {
 2   printf("Hello!\n");
 3 }
 5 __global__ void k() {
 6   hd();
 7 }
 9 void foo() {
10   hd();
11   k<<<1, 1>>>();
12 }

results in the following migrated SYCL* code:

 1 void hd() {
 2   /*
 3   DPCT1040:0: Use sycl::stream instead of printf if your code is used on the
 4   device.
 5   */
 6   printf("Hello!\n");
 7 }
 9 void k() {
10   hd();
11 }
13 void foo() {
14   hd();
15   dpct::get_default_queue().parallel_for(
16       sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
17       [=](sycl::nd_item<3> item_ct1) {
18         k();
19       });
20 }

which is rewritten to:

 1 void hd_host() {
 2   printf("Hello!\n");
 3 }
 5 void hd_device(const sycl::stream &stream) {
 6   stream << "Hello!\n";
 7 }
 9 void k(const sycl::stream &stream) {
10   hd_device(stream);
11 }
13 void foo() {
14   hd_host();
15   dpct::get_default_queue().submit([&](sycl::handler &cgh) {
16     sycl::stream stream(64 * 1024 /*totalBufferSize*/, 80 /*workItemBufferSize*/, cgh);
18     cgh.parallel_for(
19         sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
20         [=](sycl::nd_item<3> item_ct1) {
21           k(stream);
22         });
23   });
24 }