DPCT1012#
Message#
Detected kernel execution time measurement pattern and generated an initial code for time measurements in SYCL. You can change the way time is measured depending on your goals.
Detailed Help#
The generated code uses the CPU time to measure the kernel execution time. You can change the way time is measured depending on your requirements.
Suggestions to Fix#
Review the logic and adjust it as needed.
For example, this original CUDA* code:
1__global__ void kernel() {
2 ...
3}
4
5void foo() {
6 cudaEvent_t start;
7 cudaEvent_t end;
8 cudaEventCreate(&start);
9 cudaEventCreate(&end);
10 cudaEventRecord(start);
11 kernel<<<1, 1>>>();
12 cudaEventRecord(end, 0);
13 cudaEventSynchronize(end);
14 float time;
15 cudaEventElapsedTime(&time, start, end);
16}
results in the following migrated SYCL* code:
1void kernel() {
2 ...
3}
4
5void foo() {
6 dpct::device_ext &dev_ct1 = dpct::get_current_device();
7 sycl::queue &q_ct1 = dev_ct1.default_queue();
8 dpct::event_ptr start;
9 std::chrono::time_point<std::chrono::steady_clock> start_ct1;
10 dpct::event_ptr end;
11 std::chrono::time_point<std::chrono::steady_clock> end_ct1;
12 start = new sycl::event();
13 end = new sycl::event();
14 /*
15 DPCT1012:0: Detected kernel execution time measurement pattern and generated
16 an initial code for time measurements in SYCL. You can change the way time is
17 measured depending on your goals.
18 */
19 start_ct1 = std::chrono::steady_clock::now();
20 *end = q_ct1.parallel_for(
21 sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
22 [=](sycl::nd_item<3> item_ct1) {
23 kernel();
24 });
25 /*
26 DPCT1012:1: Detected kernel execution time measurement pattern and generated
27 an initial code for time measurements in SYCL. You can change the way time is
28 measured depending on your goals.
29 */
30 end->wait();
31 end_ct1 = std::chrono::steady_clock::now();
32 float time;
33 time = std::chrono::duration<float, std::milli>(end_ct1 - start_ct1).count();
34}
which is rewritten to:
1// User can add `--enable-profiling` option to migrate the code
2void kernel() {
3 ...
4}
5
6void foo() {
7 dpct::device_ext &dev_ct1 = dpct::get_current_device();
8 sycl::queue &q_ct1 = dev_ct1.default_queue();
9 dpct::event_ptr start;
10 dpct::event_ptr end;
11 start = new sycl::event();
12 end = new sycl::event();
13 *start = q_ct1.ext_oneapi_submit_barrier();
14 q_ct1.parallel_for(
15 sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
16 [=](sycl::nd_item<3> item_ct1) {
17 kernel();
18 });
19 *end = q_ct1.ext_oneapi_submit_barrier();
20 end->wait_and_throw();
21 float time;
22 time =
23 (end->get_profiling_info<sycl::info::event_profiling::command_end>() -
24 start
25 ->get_profiling_info<sycl::info::event_profiling::command_start>()) /
26 1000000.0f;
27}