diff --git a/Common/MathUtils/include/MathUtils/SMatrixGPU.h b/Common/MathUtils/include/MathUtils/SMatrixGPU.h index 2faaf118d1daa..d0820778550dd 100644 --- a/Common/MathUtils/include/MathUtils/SMatrixGPU.h +++ b/Common/MathUtils/include/MathUtils/SMatrixGPU.h @@ -140,7 +140,7 @@ template GPUd() SVectorGPU::SVectorGPU() { for (unsigned int i = 0; i < N; ++i) { - mArray[i] = 7; + mArray[i] = 0; } } @@ -1067,10 +1067,10 @@ GPUdi() void Inverter::InvertBunchKaufman(MatRepSymGPU& rhs, int& if } *mjj -= static_cast(temp2); } - } else //2x2 pivot, compute columns j and j-1 of the inverse + } else // 2x2 pivot, compute columns j and j-1 of the inverse { if (piv[j - 1] != 0) { - printf("error in piv %lf \n", piv[j - 1]); + printf("error in piv %lf \n", static_cast(piv[j - 1])); } s = 2; if (j < nrow) { @@ -1344,7 +1344,7 @@ GPUdi() int Inverter::DfinvMatrix(MatRepStdGPU& rhs, unsigned int for (unsigned int i = 1; i < n; i++) { unsigned int ni = n - i; mIter mij = mi; - //int j; + // int j; for (unsigned j = 1; j <= i; j++) { s33 = *mij; mIter mikj = mi + n + j - 1; diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index d22366b406a16..c37ec3f5f5a13 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -38,7 +38,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAVE_O2HEADERS) - + # cuda test, only compile if CUDA if(CUDA_ENABLED) o2_add_test(GPUsortCUDA NAME test_GPUsortCUDA @@ -46,8 +46,24 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") PUBLIC_LINK_LIBRARIES O2::${MODULE} COMPONENT_NAME GPU LABELS gpu) + o2_add_test(SMatrixImpCUDA NAME test_SMatrixImpCUDA + SOURCES test/testSMatrixImp.cu + PUBLIC_LINK_LIBRARIES O2::${MODULE} + O2::MathUtils + ROOT::Core + COMPONENT_NAME GPU + LABELS gpu) + endif() + if (HIP_ENABLED) + o2_add_test(SMatrixImpHIP NAME test_SMatrixImpHIP + SOURCES test/testSMatrixImp.cu + HIPIFIED test + PUBLIC_LINK_LIBRARIES O2::${MODULE} + O2::MathUtils + ROOT::Core + COMPONENT_NAME GPU + LABELS gpu) endif() - install(FILES ${HDRS_INSTALL} DESTINATION include/GPU) endif() diff --git a/GPU/Common/test/.gitignore b/GPU/Common/test/.gitignore new file mode 100644 index 0000000000000..43fd5862f17c1 --- /dev/null +++ b/GPU/Common/test/.gitignore @@ -0,0 +1 @@ +*.hip \ No newline at end of file diff --git a/GPU/Common/test/testSMatrixImp.cu b/GPU/Common/test/testSMatrixImp.cu new file mode 100644 index 0000000000000..08ca0b823488d --- /dev/null +++ b/GPU/Common/test/testSMatrixImp.cu @@ -0,0 +1,131 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file testGPUSMatrixImp.cu +/// \author Matteo Concas + +#define BOOST_TEST_MODULE Test GPUSMatrixImpl +#ifdef __HIPCC__ +#define GPUPLATFORM "HIP" +#include "hip/hip_runtime.h" +#else +#define GPUPLATFORM "CUDA" +#include +#endif + +#include +#include + +#include +#include + +template +void discardResult(const T&) +{ +} + +void prologue() +{ + int deviceCount; + discardResult(cudaGetDeviceCount(&deviceCount)); + if (!deviceCount) { + std::cerr << "No " << GPUPLATFORM << " devices found" << std::endl; + } + for (int iDevice = 0; iDevice < deviceCount; ++iDevice) { + cudaDeviceProp deviceProp; + discardResult(cudaGetDeviceProperties(&deviceProp, iDevice)); + std::cout << GPUPLATFORM << " Device " << iDevice << ": " << deviceProp.name << std::endl; + } +} + +using MatSym3DGPU = o2::math_utils::SMatrixGPU>; +using MatSym3D = ROOT::Math::SMatrix>; + +template +__global__ void invertSymMatrixKernel(o2::math_utils::SMatrixGPU>* matrix) +{ + MatSym3DGPU smat2 = *matrix; + + printf("A(0,0) = %f, A(0,1) = %f, A(0,2) = %f\n", (*matrix)(0, 0), (*matrix)(0, 1), (*matrix)(0, 2)); + printf("A(1,0) = %f, A(1,1) = %f, A(1,2) = %f\n", (*matrix)(1, 0), (*matrix)(1, 1), (*matrix)(1, 2)); + printf("A(2,0) = %f, A(2,1) = %f, A(2,2) = %f\n", (*matrix)(2, 0), (*matrix)(2, 1), (*matrix)(2, 2)); + + printf("B(0,0) = %f, B(0,1) = %f, B(0,2) = %f\n", smat2(0, 0), smat2(0, 1), smat2(0, 2)); + printf("B(1,0) = %f, B(1,1) = %f, B(1,2) = %f\n", smat2(1, 0), smat2(1, 1), smat2(1, 2)); + printf("B(2,0) = %f, B(2,1) = %f, B(2,2) = %f\n", smat2(2, 0), smat2(2, 1), smat2(2, 2)); + + printf("\nInverting A...\n"); + matrix->Invert(); + + printf("A(0,0) = %f, A(0,1) = %f, A(0,2) = %f\n", (*matrix)(0, 0), (*matrix)(0, 1), (*matrix)(0, 2)); + printf("A(1,0) = %f, A(1,1) = %f, A(1,2) = %f\n", (*matrix)(1, 0), (*matrix)(1, 1), (*matrix)(1, 2)); + printf("A(2,0) = %f, A(2,1) = %f, A(2,2) = %f\n", (*matrix)(2, 0), (*matrix)(2, 1), (*matrix)(2, 2)); + + printf("\nC = (A^-1) * B...\n"); + auto smat3 = (*matrix) * smat2; + + printf("C(0,0) = %f, C(0,1) = %f, C(0,2) = %f\n", smat3(0, 0), smat3(0, 1), smat3(0, 2)); + printf("C(1,0) = %f, C(1,1) = %f, C(1,2) = %f\n", smat3(1, 0), smat3(1, 1), smat3(1, 2)); + printf("C(2,0) = %f, C(2,1) = %f, C(2,2) = %f\n", smat3(2, 0), smat3(2, 1), smat3(2, 2)); + + printf("\nEvaluating...\n"); + MatSym3DGPU tmp; + o2::math_utils::AssignSym::Evaluate(tmp, smat3); + + printf("A(0,0) = %f, A(0,1) = %f, A(0,2) = %f\n", tmp(0, 0), tmp(0, 1), tmp(0, 2)); + printf("A(1,0) = %f, A(1,1) = %f, A(1,2) = %f\n", tmp(1, 0), tmp(1, 1), tmp(1, 2)); + printf("A(2,0) = %f, A(2,1) = %f, A(2,2) = %f\n", tmp(2, 0), tmp(2, 1), tmp(2, 2)); + (*matrix) = tmp; +} + +struct GPUSMatrixImplFixture { + GPUSMatrixImplFixture() : SMatrix3D_d(nullptr) + { + prologue(); + + SMatrix3D_h(0, 0) = 1; + SMatrix3D_h(1, 1) = 2; + SMatrix3D_h(2, 2) = 3; + SMatrix3D_h(0, 1) = 4; + SMatrix3D_h(0, 2) = 5; + SMatrix3D_h(1, 2) = 6; + + discardResult(cudaMalloc(&SMatrix3D_d, sizeof(MatSym3DGPU))); + discardResult(cudaMemcpy(SMatrix3D_d, &SMatrix3D_h, sizeof(MatSym3DGPU), cudaMemcpyHostToDevice)); + + std::cout << "sizeof(MatSym3DGPU) = " << sizeof(MatSym3DGPU) << std::endl; + std::cout << "sizeof(MatSym3D) = " << sizeof(MatSym3D) << std::endl; + i = 3; + } + + ~GPUSMatrixImplFixture() + { + discardResult(cudaFree(SMatrix3D_d)); + } + + int i; + MatSym3DGPU* SMatrix3D_d; // device ptr + MatSym3D SMatrix3D_h; +}; + +BOOST_FIXTURE_TEST_CASE(DummyFixtureUsage, GPUSMatrixImplFixture) +{ + invertSymMatrixKernel<<<1, 1>>>(SMatrix3D_d); + discardResult(cudaDeviceSynchronize()); + + discardResult(cudaMemcpy(&SMatrix3D_h, SMatrix3D_d, sizeof(MatSym3DGPU), cudaMemcpyDeviceToHost)); + + MatSym3D identity; + identity(0, 0) = 1; + identity(1, 1) = 1; + identity(2, 2) = 1; + BOOST_TEST(SMatrix3D_h == identity); +} \ No newline at end of file diff --git a/cmake/O2AddHipifiedExecutable.cmake b/cmake/O2AddHipifiedExecutable.cmake index 6f25e3b061cf5..1152273b1256f 100644 --- a/cmake/O2AddHipifiedExecutable.cmake +++ b/cmake/O2AddHipifiedExecutable.cmake @@ -15,11 +15,12 @@ include(O2AddExecutable) function(o2_add_hipified_executable baseTargetName) # Parse arguments in the same way o2_add_executable does + # DEST_SRC_REL_PATH is the relative destination path for converted src files cmake_parse_arguments(PARSE_ARGV 1 A "IS_TEST;NO_INSTALL;IS_BENCHMARK" - "COMPONENT_NAME;TARGETVARNAME" + "COMPONENT_NAME;TARGETVARNAME;DEST_SRC_REL_PATH" "SOURCES;PUBLIC_LINK_LIBRARIES;JOB_POOL") # Process each .cu file to generate a .hip file @@ -32,7 +33,7 @@ function(o2_add_hipified_executable baseTargetName) set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${file}) get_filename_component(CUDA_SOURCE ${file} NAME) string(REPLACE ".cu" ".hip" HIP_SOURCE ${CUDA_SOURCE}) - set(OUTPUT_HIP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${HIP_SOURCE}") + set(OUTPUT_HIP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${A_DEST_SRC_REL_PATH}/${HIP_SOURCE}") list(APPEND HIP_SOURCES ${OUTPUT_HIP_FILE}) add_custom_command( diff --git a/cmake/O2AddTest.cmake b/cmake/O2AddTest.cmake index b42c67cfb0821..5687b01724106 100644 --- a/cmake/O2AddTest.cmake +++ b/cmake/O2AddTest.cmake @@ -72,7 +72,7 @@ function(o2_add_test) 1 A "INSTALL;NO_BOOST_TEST" - "COMPONENT_NAME;TIMEOUT;WORKING_DIRECTORY;NAME;TARGETVARNAME" + "COMPONENT_NAME;TIMEOUT;WORKING_DIRECTORY;NAME;TARGETVARNAME;HIPIFIED" "SOURCES;PUBLIC_LINK_LIBRARIES;COMMAND_LINE_ARGS;LABELS;CONFIGURATIONS;ENVIRONMENT" ) @@ -103,12 +103,20 @@ function(o2_add_test) endif() # create the executable - o2_add_executable(${testName} - SOURCES ${A_SOURCES} - PUBLIC_LINK_LIBRARIES ${linkLibraries} - COMPONENT_NAME ${A_COMPONENT_NAME} - IS_TEST ${noInstall} TARGETVARNAME targetName) - + if (NOT A_HIPIFIED) + o2_add_executable(${testName} + SOURCES ${A_SOURCES} + PUBLIC_LINK_LIBRARIES ${linkLibraries} + COMPONENT_NAME ${A_COMPONENT_NAME} + IS_TEST ${noInstall} TARGETVARNAME targetName) + else() + o2_add_hipified_executable(${testName} + SOURCES ${A_SOURCES} + DEST_SRC_REL_PATH ${A_HIPIFIED} + PUBLIC_LINK_LIBRARIES ${linkLibraries} + COMPONENT_NAME ${A_COMPONENT_NAME} + IS_TEST ${noInstall} TARGETVARNAME targetName) + endif() if(A_TARGETVARNAME) set(${A_TARGETVARNAME} ${targetName} PARENT_SCOPE)