diff --git a/behavior_tests/behavior_tests.xml b/behavior_tests/behavior_tests.xml
index ffcd87aaa..8c2f0071e 100644
--- a/behavior_tests/behavior_tests.xml
+++ b/behavior_tests/behavior_tests.xml
@@ -165,6 +165,7 @@
+
diff --git a/behavior_tests/src/cmake_target_link_libraries/CMakeLists.txt b/behavior_tests/src/cmake_target_link_libraries/CMakeLists.txt
new file mode 100644
index 000000000..45ad19957
--- /dev/null
+++ b/behavior_tests/src/cmake_target_link_libraries/CMakeLists.txt
@@ -0,0 +1,13 @@
+cmake_minimum_required(VERSION 3.10)
+project(foo)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl")
+find_program(dpct_bin_path NAMES dpct PATHS)
+get_filename_component(bin_path_of_dpct ${dpct_bin_path} DIRECTORY)
+set(dpct_cmake_file_path "${bin_path_of_dpct}/../cmake/dpct.cmake")
+include(${dpct_cmake_file_path})
+
+set(SOURCES
+ ${CMAKE_SOURCE_DIR}/main.dp.cpp
+)
+add_executable(foo-bar ${SOURCES})
+target_link_libraries(foo-bar PUBLIC -qmkl ${DNN_LIB})
diff --git a/behavior_tests/src/cmake_target_link_libraries/do_test.py b/behavior_tests/src/cmake_target_link_libraries/do_test.py
new file mode 100644
index 000000000..6e63cc74e
--- /dev/null
+++ b/behavior_tests/src/cmake_target_link_libraries/do_test.py
@@ -0,0 +1,50 @@
+# ====------ do_test.py---------- *- Python -* ----===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#
+# ===----------------------------------------------------------------------===#
+import subprocess
+import platform
+import os
+import sys
+from test_config import CT_TOOL
+
+from test_utils import *
+
+def setup_test():
+ change_dir(test_config.current_test)
+ return True
+
+def migrate_test():
+ # clean previous migration output
+ if (os.path.exists("build")):
+ shutil.rmtree("build")
+
+ ret = call_subprocess("mkdir build")
+ if not ret:
+ print("Error to create build folder:", test_config.command_output)
+
+ ret = change_dir("build")
+ if not ret:
+ print("Error to go to build folder:", test_config.command_output)
+
+ if(sys.platform.startswith('win')):
+ ret = call_subprocess("cmake -G \"Unix Makefiles\" -DCMAKE_CXX_COMPILER=icx ../")
+ else:
+ ret = call_subprocess("cmake -G \"Unix Makefiles\" -DCMAKE_CXX_COMPILER=icpx ../")
+
+ if not ret:
+ print("Error to run cmake configure:", test_config.command_output)
+
+ ret = call_subprocess("make")
+ if not ret:
+ print("Error to run build process:", test_config.command_output)
+
+ return os.path.exists("foo-bar")
+def build_test():
+ return True
+def run_test():
+ return call_subprocess("./foo-bar")
diff --git a/behavior_tests/src/cmake_target_link_libraries/main.dp.cpp b/behavior_tests/src/cmake_target_link_libraries/main.dp.cpp
new file mode 100644
index 000000000..4a8bc62de
--- /dev/null
+++ b/behavior_tests/src/cmake_target_link_libraries/main.dp.cpp
@@ -0,0 +1,271 @@
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+using data_type = double;
+template
+bool check(std::vector &expect, std::vector &actual, int num,
+ float precision) {
+ for (int i = 0; i < num; i++) {
+ if (std::abs(expect[i] - actual[i]) > precision) {
+ std::cout << "test failed" << std::endl;
+ std::cout << "expect:" << expect[i] << std::endl;
+ std::cout << "actual:" << actual[i] << std::endl;
+ return false;
+ }
+ }
+ return true;
+}
+bool cublasCheck() {
+ dpct::device_ext &dev_ct1 = dpct::get_current_device();
+ sycl::queue &q_ct1 = dev_ct1.in_order_queue();
+ dpct::queue_ptr handle = NULL;
+ dpct::queue_ptr stream = &q_ct1;
+
+ const std::vector A = {1.0, 2.0, 3.0, 4.0};
+ const int incx = 1;
+
+ int result = 0.0;
+
+ data_type *d_A = nullptr;
+
+ handle = &q_ct1;
+
+ /*
+ DPCT1025:0: The SYCL queue is created ignoring the flag and priority options.
+ */
+ stream = dev_ct1.create_queue();
+ handle = stream;
+
+ d_A = (data_type *)sycl::malloc_device(sizeof(data_type) * A.size(), q_ct1);
+
+ stream->memcpy(d_A, A.data(), sizeof(data_type) * A.size());
+
+ int64_t *res_temp_ptr_ct1 = sycl::malloc_shared(1, q_ct1);
+ oneapi::mkl::blas::column_major::iamax(*handle, A.size(), d_A, incx,
+ res_temp_ptr_ct1,
+ oneapi::mkl::index_base::one)
+ .wait();
+ int res_temp_host_ct2 = (int)*res_temp_ptr_ct1;
+ dpct::dpct_memcpy(&result, &res_temp_host_ct2, sizeof(int));
+ sycl::free(res_temp_ptr_ct1, q_ct1);
+
+ stream->wait();
+
+ sycl::free(d_A, q_ct1);
+
+ handle = nullptr;
+
+ dev_ct1.destroy_queue(stream);
+
+ dev_ct1.reset();
+ if (result == 4) {
+ return true;
+ }
+ return false;
+}
+template
+void conv2d(int batch, int color, int rows, int cols, int kCols,
+ int kRows, T *matrix, float *kernel, T *result,
+ const sycl::nd_item<3> &item_ct1) {
+ int tid = item_ct1.get_group(2) * item_ct1.get_local_range(2) +
+ item_ct1.get_local_id(2);
+ int kCenterX = kCols / 2;
+ int kCenterY = kRows / 2;
+
+ for (int b = 0; b < batch; b++) {
+ for (int c = 0; c < color; c++) {
+ for (int i = 0; i < rows; i++) {
+ for (int j = 0; j < cols; j++) {
+ for (int m = 0; m < kRows; m++) {
+ int mm = kRows - 1 - m;
+ for (int n = 0; n < kCols; n++) {
+ int nn = kCols - 1 - n;
+
+ int ii = i + (kCenterY - mm);
+ int jj = j + (kCenterX - nn);
+
+ if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) {
+ result[b * color * rows * cols + c * rows * cols + i * cols +
+ j] +=
+ matrix[b * c * ii * jj + c * ii * jj + ii * kRows + jj] *
+ kernel[mm * kRows + nn];
+ result[tid] = result[b * color * rows * cols + c * rows * cols +
+ i * cols + j];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+bool cudnnCheck() {
+ dpct::device_ext &dev_ct1 = dpct::get_current_device();
+ sycl::queue &q_ct1 = dev_ct1.in_order_queue();
+ dpct::dnnl::engine_ext handle;
+ dpct::dnnl::memory_desc_ext dataTensor, outTensor, scalebiasTensor;
+ handle.create_engine();
+
+ /*
+ DPCT1026:1: The call to cudnnCreateTensorDescriptor was removed because this
+ call is redundant in SYCL.
+ */
+ /*
+ DPCT1026:2: The call to cudnnCreateTensorDescriptor was removed because this
+ call is redundant in SYCL.
+ */
+ /*
+ DPCT1026:3: The call to cudnnCreateTensorDescriptor was removed because this
+ call is redundant in SYCL.
+ */
+
+ int in = 2, ic = 4, ih = 5, iw = 5;
+ int on = 2, oc = 4, oh = 5, ow = 5;
+ int sbn = 1, sbc = 4, sbh = 5, sbw = 5;
+ int ele_num = in * ic * ih * iw;
+ int oele_num = on * oc * oh * ow;
+ int sele_num = sbn * sbc * sbh * sbw;
+ dataTensor.set(dpct::dnnl::memory_format_tag::nchw,
+ dpct::library_data_t::real_float, in, ic, ih, iw);
+ outTensor.set(dpct::dnnl::memory_format_tag::nchw,
+ dpct::library_data_t::real_float, on, oc, oh, ow);
+ scalebiasTensor.set(dpct::dnnl::memory_format_tag::nchw,
+ dpct::library_data_t::real_float, sbn, sbc, sbh, sbw);
+
+ int save = 1;
+ float *data, *out, *scale, *bias, *rmean, *rvar, *smean, *svar, *z;
+ std::vector host_data(ele_num, 1.0f);
+ std::vector host_z(oele_num, 1.0f);
+ std::vector host_out(oele_num, 0.0f);
+ std::vector host_scale(sele_num, 1.0f);
+ std::vector host_bias(sele_num, 0.0f);
+ std::vector host_rmean(sele_num, 0.0f);
+ std::vector host_rvar(sele_num, 0.0f);
+ std::vector host_smean(save * sele_num, 0.0f);
+ std::vector host_svar(save * sele_num, 0.0f);
+
+ for (int i = 0; i < ele_num; i++) {
+ host_data[i] = i + 4.f;
+ host_out[i] = 1.f;
+ host_z[i] = 10;
+ }
+ for (int i = 0; i < sele_num; i++) {
+ host_scale[i] = i;
+ host_bias[i] = i;
+ host_rmean[i] = i;
+ host_rvar[i] = i;
+ host_smean[i] = i;
+ host_svar[i] = i;
+ }
+
+ data = sycl::malloc_device(ele_num, q_ct1);
+ z = sycl::malloc_device(oele_num, q_ct1);
+ out = sycl::malloc_device(oele_num, q_ct1);
+ scale = sycl::malloc_device(sele_num, q_ct1);
+ bias = sycl::malloc_device(sele_num, q_ct1);
+ rmean = sycl::malloc_device(sele_num, q_ct1);
+ rvar = sycl::malloc_device(sele_num, q_ct1);
+ smean = (float *)sycl::malloc_device(sizeof(float) * save * sele_num, q_ct1);
+ svar = (float *)sycl::malloc_device(sizeof(float) * save * sele_num, q_ct1);
+
+ q_ct1.memcpy(data, host_data.data(), sizeof(float) * ele_num);
+ q_ct1.memcpy(z, host_z.data(), sizeof(float) * oele_num);
+ q_ct1.memcpy(out, host_out.data(), sizeof(float) * oele_num);
+ q_ct1.memcpy(scale, host_scale.data(), sizeof(float) * sele_num);
+ q_ct1.memcpy(bias, host_bias.data(), sizeof(float) * sele_num);
+ q_ct1.memcpy(rmean, host_rmean.data(), sizeof(float) * sele_num);
+ q_ct1.memcpy(rvar, host_rvar.data(), sizeof(float) * sele_num);
+ q_ct1.memcpy(smean, host_smean.data(), sizeof(float) * save * sele_num);
+ q_ct1.memcpy(svar, host_svar.data(), sizeof(float) * save * sele_num).wait();
+
+ float alpha = 2.5f, beta = 1.5f, eps = 1.f;
+ double factor = 0.5f;
+ dpct::dnnl::activation_desc ActivationDesc;
+ /*
+ DPCT1026:4: The call to cudnnCreateActivationDescriptor was removed because
+ this call is redundant in SYCL.
+ */
+ /*
+ DPCT1007:5: Migration of Nan numbers propagation option is not supported.
+ */
+ ActivationDesc.set(dnnl::algorithm::eltwise_relu_use_dst_for_bwd, 0.0f);
+
+ auto status =
+ DPCT_CHECK_ERROR(handle.async_batch_normalization_forward_inference(
+ dpct::dnnl::batch_normalization_mode::per_activation,
+ dpct::dnnl::batch_normalization_ops::none, ActivationDesc, eps, alpha,
+ dataTensor, data, beta, outTensor, out, dataTensor, z,
+ scalebiasTensor, scale, bias, scalebiasTensor, smean, svar));
+
+ dev_ct1.queues_wait_and_throw();
+ q_ct1.memcpy(host_out.data(), out, sizeof(float) * oele_num).wait();
+ std::vector expect = {
+ 1.5, 11.0711, 18.047, 24, 29.3885, 34.4124, 39.1779,
+ 43.7487, 48.1667, 52.4605, 56.6511, 60.7543, 64.782, 68.744,
+ 72.6478, 76.5, 80.3057, 84.0694, 87.7948, 91.4853, 95.1436,
+ 98.7721, 102.373, 105.949, 109.5,
+
+ 113.029, 116.537, 120.025, 123.495, 126.947, 130.382, 133.801,
+ 137.205, 140.595, 143.97, 147.333, 150.684, 154.022, 157.349,
+ 160.664, 163.969, 167.264, 170.549, 173.825, 177.091, 180.349,
+ 183.598, 186.839, 190.071, 193.296,
+
+ 196.514, 199.724, 202.927, 206.124, 209.314, 212.497, 215.674,
+ 218.845, 222.01, 225.169, 228.322, 231.47, 234.613, 237.75,
+ 240.882, 244.009, 247.132, 250.249, 253.362, 256.471, 259.575,
+ 262.674, 265.77, 268.861, 271.948,
+
+ 275.031, 278.11, 281.185, 284.257, 287.325, 290.389, 293.45,
+ 296.507, 299.56, 302.611, 305.658, 308.702, 311.742, 314.78,
+ 317.814, 320.846, 323.874, 326.9, 329.922, 332.942, 335.959,
+ 338.973, 341.985, 344.994, 348,
+
+ 1.5, 187.848, 306.722, 399, 476.602, 544.723, 606.125,
+ 662.467, 714.833, 763.973, 810.43, 854.611, 896.832, 937.343,
+ 976.344, 1014, 1050.45, 1085.8, 1120.17, 1153.62, 1186.23,
+ 1218.08, 1249.2, 1279.66, 1309.5,
+
+ 1338.75, 1367.46, 1395.66, 1423.36, 1450.61, 1477.42, 1503.82,
+ 1529.83, 1555.46, 1580.73, 1605.67, 1630.27, 1654.57, 1678.57,
+ 1702.27, 1725.71, 1748.87, 1771.78, 1794.45, 1816.87, 1839.07,
+ 1861.05, 1882.81, 1904.36, 1925.71,
+
+ 1946.86, 1967.83, 1988.61, 2009.22, 2029.65, 2049.92, 2070.02,
+ 2089.96, 2109.75, 2129.39, 2148.88, 2168.22, 2187.43, 2206.5,
+ 2225.44, 2244.25, 2262.93, 2281.49, 2299.92, 2318.24, 2336.44,
+ 2354.53, 2372.51, 2390.38, 2408.14,
+
+ 2425.8, 2443.36, 2460.82, 2478.18, 2495.44, 2512.61, 2529.69,
+ 2546.67, 2563.57, 2580.38, 2597.1, 2613.74, 2630.3, 2646.78,
+ 2663.17, 2679.49, 2695.73, 2711.89, 2727.98, 2743.99, 2759.93,
+ 2775.8, 2791.6, 2807.34, 2823,
+ };
+ /*
+ DPCT1026:6: The call to cudnnDestroy was removed because this call is
+ redundant in SYCL.
+ */
+ sycl::free(data, q_ct1);
+ sycl::free(out, q_ct1);
+ return check(expect, host_out, expect.size(), 1e-1);
+}
+
+int main(int argc, char *argv[]) {
+ if (cublasCheck() && cudnnCheck()) {
+ printf("Both case passed \n");
+ return 0;
+ } else {
+ printf("Tests failed");
+ exit(-1);
+ }
+ return 0;
+}