diff --git a/behavior_tests/behavior_tests.xml b/behavior_tests/behavior_tests.xml
index 39e46053e..363162a07 100644
--- a/behavior_tests/behavior_tests.xml
+++ b/behavior_tests/behavior_tests.xml
@@ -167,6 +167,7 @@
+
diff --git a/behavior_tests/src/cmake_dpct_helper_add_mkl_to_target/CMakeLists.txt b/behavior_tests/src/cmake_dpct_helper_add_mkl_to_target/CMakeLists.txt
new file mode 100644
index 000000000..bdcabdc14
--- /dev/null
+++ b/behavior_tests/src/cmake_dpct_helper_add_mkl_to_target/CMakeLists.txt
@@ -0,0 +1,31 @@
+#==---- CMakeLists.txt ---------------------------- cmake script file ----==//
+#
+# Copyright (C) Intel Corporation
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# See https://llvm.org/LICENSE.txt for license information.
+#
+#===----------------------------------------------------------------------===//
+
+cmake_minimum_required(VERSION 3.10)
+
+project(cmake_add_mkl LANGUAGES CXX)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl")
+find_program(
+ dpct_bin_path
+ NAMES dpct
+ PATHS)
+get_filename_component(bin_path_of_dpct ${dpct_bin_path} DIRECTORY)
+set(dpct_cmake_file_path "${bin_path_of_dpct}/../cmake/dpct.cmake")
+include(${dpct_cmake_file_path})
+
+find_package(IntelSYCL REQUIRED)
+
+set(CUDA_SOURCES
+ fft.dp.cpp
+)
+
+add_executable(app.run ${CUDA_SOURCES})
+
+link_directories($ENV{TBBROOT}/$ENV{TBBVER}/tbb/$ENV{TBB_MAJORVER}/lib)
+link_directories(${TBBROOT}/lib)
+dpct_helper_add_mkl_to_target(app.run)
diff --git a/behavior_tests/src/cmake_dpct_helper_add_mkl_to_target/do_test.py b/behavior_tests/src/cmake_dpct_helper_add_mkl_to_target/do_test.py
new file mode 100644
index 000000000..bb973befd
--- /dev/null
+++ b/behavior_tests/src/cmake_dpct_helper_add_mkl_to_target/do_test.py
@@ -0,0 +1,48 @@
+# ====------ do_test.py---------- *- Python -* ----===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#
+# ===----------------------------------------------------------------------===#
+import subprocess
+import platform
+import os
+import sys
+from test_config import CT_TOOL
+
+from test_utils import *
+
+def setup_test():
+ change_dir(test_config.current_test)
+ return True
+
+def migrate_test():
+ # clean previous migration output
+ if (os.path.exists("build")):
+ shutil.rmtree("build")
+
+ ret = call_subprocess("mkdir build")
+ if not ret:
+ print("Error to create build folder:", test_config.command_output)
+
+ ret = change_dir("build")
+ if not ret:
+ print("Error to go to build folder:", test_config.command_output)
+
+ CXX_COMP = "icx" if (platform.system() == 'Windows') else "icpx"
+ ret = call_subprocess("cmake -G \"Unix Makefiles\" -DCMAKE_CXX_COMPILER=" + CXX_COMP + " ../")
+ if not ret:
+ print("Error to run cmake configure:", test_config.command_output)
+
+ ret = call_subprocess("make")
+ if not ret:
+ print("Error to run build process:", test_config.command_output)
+
+ return os.path.exists("app.run")
+def build_test():
+ return True
+def run_test():
+ return call_subprocess("./app.run")
+
diff --git a/behavior_tests/src/cmake_dpct_helper_add_mkl_to_target/fft.dp.cpp b/behavior_tests/src/cmake_dpct_helper_add_mkl_to_target/fft.dp.cpp
new file mode 100644
index 000000000..a08f7d327
--- /dev/null
+++ b/behavior_tests/src/cmake_dpct_helper_add_mkl_to_target/fft.dp.cpp
@@ -0,0 +1,101 @@
+#include
+#include
+#include
+#include
+
+#include
+
+// Function to compare two floating-point numbers with a tolerance
+int almostEqual(float a, float b, float tolerance) {
+ return fabs(a - b) < tolerance;
+}
+
+// Function to compare two cufftComplex variables
+int compareCufftComplex(sycl::float2 a, sycl::float2 b, float tolerance) {
+ return almostEqual(a.x(), b.x(), tolerance) &&
+ almostEqual(a.y(), b.y(), tolerance);
+}
+
+std::string strigifyCufftComplex(sycl::float2 a) {
+ return std::string("(" + std::to_string(a.x()) + ", " +
+ std::to_string(a.y()) + ")");
+}
+
+int main() {
+ dpct::device_ext &dev_ct1 = dpct::get_current_device();
+ sycl::queue &q_ct1 = dev_ct1.in_order_queue();
+ const int n = 8; // Size of the input array
+
+ // Allocate memory on the host for input and output arrays
+ sycl::float2 *h_input = (sycl::float2 *)malloc(sizeof(sycl::float2) * n);
+ sycl::float2 *h_output = (sycl::float2 *)malloc(sizeof(sycl::float2) * n);
+
+ // Initialize the input array with random values
+ for (int i = 0; i < n; ++i) {
+ h_input[i].x() = static_cast(rand()) / RAND_MAX;
+ h_input[i].y() = static_cast(rand()) / RAND_MAX;
+ }
+
+ // Allocate memory on the host for reference output array
+ sycl::float2 *h_ref_output =
+ (sycl::float2 *)malloc(sizeof(sycl::float2) * n);
+
+ // Initialize h_ref_output with expected output
+ h_ref_output[0] = sycl::float2(4.94234f, 4.77104f);
+ h_ref_output[1] = sycl::float2(0.914293f, -0.261793f);
+ h_ref_output[2] = sycl::float2(-0.415583f, 0.264357f);
+ h_ref_output[3] = sycl::float2(0.241085f, 0.382871f);
+ h_ref_output[4] = sycl::float2(-0.153554f, -1.45243f);
+ h_ref_output[5] = sycl::float2(-0.421167f, -1.15111f);
+ h_ref_output[6] = sycl::float2(0.0986443f, 0.210444f);
+ h_ref_output[7] = sycl::float2(1.51544f, 0.391681f);
+
+ // Allocate memory on the device (GPU)
+ sycl::float2 *d_input, *d_output;
+ d_input = sycl::malloc_device(n, q_ct1);
+ d_output = sycl::malloc_device(n, q_ct1);
+
+ // Copy input data from host to device
+ q_ct1.memcpy(d_input, h_input, sizeof(sycl::float2) * n).wait();
+
+ // Create a cuFFT plan
+ dpct::fft::fft_engine_ptr plan;
+ plan = dpct::fft::fft_engine::create(
+ &q_ct1, n, dpct::fft::fft_type::complex_float_to_complex_float, 1);
+
+ // Perform forward FFT
+ plan->compute(
+ d_input, d_output, dpct::fft::fft_direction::forward);
+
+ // Copy output data from device to host
+ q_ct1.memcpy(h_output, d_output, sizeof(sycl::float2) * n).wait();
+
+ // Verify the result
+ bool failed = false;
+
+ float tolerance = 5e-6f;
+ for (int i = 0; i < n; ++i) {
+ if (!compareCufftComplex(h_output[i], h_ref_output[i], tolerance)) {
+ std::cout << "Failed: at index - " << i;
+ std::cout << ": " << strigifyCufftComplex(h_output[i]);
+ std::cout << " != " << strigifyCufftComplex(h_ref_output[i]);
+ std::cout << std::endl;
+ failed = true;
+ break;
+ }
+ }
+
+ if(!failed) std::cout << "Verification successful" << std::endl;
+
+ // Destroy the cuFFT plan and free allocated memory
+ dpct::fft::fft_engine::destroy(plan);
+ sycl::free(d_input, q_ct1);
+ sycl::free(d_output, q_ct1);
+ free(h_input);
+ free(h_output);
+
+ if(failed)
+ return 1;
+
+ return 0;
+}