From 1b957687ad0a2f03dd31fb470f054ea20c138ef4 Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Mon, 20 May 2024 10:10:34 +0100 Subject: [PATCH] Fix issues in PVC example --- .../sycl/pvc/pvc_bfloat_dpas_gemm_cute.cpp | 19 +++++++++++-------- include/cutlass/relatively_equal.h | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/examples/sycl/pvc/pvc_bfloat_dpas_gemm_cute.cpp b/examples/sycl/pvc/pvc_bfloat_dpas_gemm_cute.cpp index 024df458f2..610c006cc9 100644 --- a/examples/sycl/pvc/pvc_bfloat_dpas_gemm_cute.cpp +++ b/examples/sycl/pvc/pvc_bfloat_dpas_gemm_cute.cpp @@ -48,11 +48,8 @@ template static void fill_matrix(std::vector &M) { - std::random_device dev; - std::mt19937 rng(dev()); - std::uniform_real_distribution dist(1.0, 2.0); std::generate(std::begin(M), std::end(M), [&] - { return static_cast(dist(rng)); }); + { return static_cast( 2*(rand() / double(RAND_MAX)) - 1 ); }); } template @@ -208,7 +205,12 @@ struct ExampleRunner { // Check if output from CUTLASS kernel and reference kernel are relatively equal or not // need to set a larger error margin for comparison to succeed - bool passed = cutlass::reference::device::BlockCompareRelativelyEqual(block_ref_D.get(), block_D.get(), block_D.size(), 0.5f, 0.5f); + auto epsilon = static_cast(0.1f); + auto nonzero_floor = static_cast(0.1f); + + bool passed = cutlass::reference::device::BlockCompareRelativelyEqual( + block_ref_D.get(), block_D.get(), block_D.size(), + epsilon, nonzero_floor); return passed; } @@ -219,7 +221,7 @@ struct ExampleRunner { auto [M, N, K, L] = problem_shape_MNKL; stride_A = cutlass::make_cute_packed_stride(StrideA{}, cute::make_shape(M, K, L)); - stride_B = cutlass::make_cute_packed_stride(StrideB{}, cute::make_shape(K, N, L)); + stride_B = cutlass::make_cute_packed_stride(StrideB{}, cute::make_shape(N, K, L)); stride_C = cutlass::make_cute_packed_stride(StrideC{}, cute::make_shape(M, N, L)); stride_D = cutlass::make_cute_packed_stride(StrideD{}, cute::make_shape(M, N, L)); @@ -279,7 +281,7 @@ struct ExampleRunner { // Verify that the result is correct bool passed = verify(problem_size, options.alpha, options.beta); - std::cout << "PVC GEMM Example : " << (passed ? "Passed" : "Failed") << std::endl; + std::cout << "Disposition: " << (passed ? "Passed" : "Failed") << std::endl; if (passed && options.iterations > 0) { GPU_Clock timer; @@ -291,7 +293,8 @@ struct ExampleRunner { float cute_time = timer.seconds() / options.iterations; double tflops = (2.0 * options.m * options.n * options.k * options.l) * 1e-12; - printf("PVC GEMM Performance: [%4.3f]TFlop/s (%6.4f)ms\n", tflops / cute_time, cute_time*1000); + std::cout << "Problem Size: " << options.m << 'x' << options.n << 'x' << options.k << 'x' << options.l << std::endl; + printf("Cutlass GEMM Performance: [%4.3f]TFlop/s (%6.4f)ms\n", tflops / cute_time, cute_time*1000); } return; diff --git a/include/cutlass/relatively_equal.h b/include/cutlass/relatively_equal.h index a2817d776d..f8b8c55a38 100644 --- a/include/cutlass/relatively_equal.h +++ b/include/cutlass/relatively_equal.h @@ -57,7 +57,7 @@ CUTLASS_HOST_DEVICE bool relatively_equal_float(T a, T b, T epsilon, T nonzero_floor) { #if defined (CUTLASS_ENABLE_SYCL) - using cutlass::abs; + using sycl::fabs; #elif defined(__CUDACC_RTC__) using cuda::std::abs; #else