Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPU: Add prototype for SMatrixGPU testing #12693

Merged
merged 2 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Common/MathUtils/include/MathUtils/SMatrixGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ template <class T, unsigned int N>
GPUd() SVectorGPU<T, N>::SVectorGPU()
{
for (unsigned int i = 0; i < N; ++i) {
mArray[i] = 7;
mArray[i] = 0;
}
}

Expand Down Expand Up @@ -1067,10 +1067,10 @@ GPUdi() void Inverter<D, N>::InvertBunchKaufman(MatRepSymGPU<T, D>& rhs, int& if
}
*mjj -= static_cast<T>(temp2);
}
} else //2x2 pivot, compute columns j and j-1 of the inverse
} else // 2x2 pivot, compute columns j and j-1 of the inverse
{
if (piv[j - 1] != 0) {
printf("error in piv %lf \n", piv[j - 1]);
printf("error in piv %lf \n", static_cast<T>(piv[j - 1]));
}
s = 2;
if (j < nrow) {
Expand Down Expand Up @@ -1344,7 +1344,7 @@ GPUdi() int Inverter<D, n>::DfinvMatrix(MatRepStdGPU<T, D, n>& rhs, unsigned int
for (unsigned int i = 1; i < n; i++) {
unsigned int ni = n - i;
mIter mij = mi;
//int j;
// int j;
for (unsigned j = 1; j <= i; j++) {
s33 = *mij;
mIter mikj = mi + n + j - 1;
Expand Down
20 changes: 18 additions & 2 deletions GPU/Common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,32 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")

target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB
GPUCA_TPC_GEOMETRY_O2 GPUCA_HAVE_O2HEADERS)

# cuda test, only compile if CUDA
if(CUDA_ENABLED)
o2_add_test(GPUsortCUDA NAME test_GPUsortCUDA
SOURCES test/testGPUsortCUDA.cu
PUBLIC_LINK_LIBRARIES O2::${MODULE}
COMPONENT_NAME GPU
LABELS gpu)
o2_add_test(SMatrixImpCUDA NAME test_SMatrixImpCUDA
SOURCES test/testSMatrixImp.cu
PUBLIC_LINK_LIBRARIES O2::${MODULE}
O2::MathUtils
ROOT::Core
COMPONENT_NAME GPU
LABELS gpu)
endif()
if (HIP_ENABLED)
o2_add_test(SMatrixImpHIP NAME test_SMatrixImpHIP
SOURCES test/testSMatrixImp.cu
HIPIFIED test
PUBLIC_LINK_LIBRARIES O2::${MODULE}
O2::MathUtils
ROOT::Core
COMPONENT_NAME GPU
LABELS gpu)
endif()

install(FILES ${HDRS_INSTALL} DESTINATION include/GPU)
endif()

Expand Down
1 change: 1 addition & 0 deletions GPU/Common/test/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.hip
131 changes: 131 additions & 0 deletions GPU/Common/test/testSMatrixImp.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file testGPUSMatrixImp.cu
/// \author Matteo Concas

#define BOOST_TEST_MODULE Test GPUSMatrixImpl
#ifdef __HIPCC__
#define GPUPLATFORM "HIP"
#include "hip/hip_runtime.h"
#else
#define GPUPLATFORM "CUDA"
#include <cuda.h>
#endif

#include <boost/test/unit_test.hpp>
#include <iostream>

#include <MathUtils/SMatrixGPU.h>
#include <Math/SMatrix.h>

template <typename T>
void discardResult(const T&)
{
}

void prologue()
{
int deviceCount;
discardResult(cudaGetDeviceCount(&deviceCount));
if (!deviceCount) {
std::cerr << "No " << GPUPLATFORM << " devices found" << std::endl;
}
for (int iDevice = 0; iDevice < deviceCount; ++iDevice) {
cudaDeviceProp deviceProp;
discardResult(cudaGetDeviceProperties(&deviceProp, iDevice));
std::cout << GPUPLATFORM << " Device " << iDevice << ": " << deviceProp.name << std::endl;
}
}

using MatSym3DGPU = o2::math_utils::SMatrixGPU<float, 3, 3, o2::math_utils::MatRepSymGPU<float, 3>>;
using MatSym3D = ROOT::Math::SMatrix<float, 3, 3, ROOT::Math::MatRepSym<float, 3>>;

template <typename T, int D>
__global__ void invertSymMatrixKernel(o2::math_utils::SMatrixGPU<float, 3, 3, o2::math_utils::MatRepSymGPU<float, 3>>* matrix)
{
MatSym3DGPU smat2 = *matrix;

printf("A(0,0) = %f, A(0,1) = %f, A(0,2) = %f\n", (*matrix)(0, 0), (*matrix)(0, 1), (*matrix)(0, 2));
printf("A(1,0) = %f, A(1,1) = %f, A(1,2) = %f\n", (*matrix)(1, 0), (*matrix)(1, 1), (*matrix)(1, 2));
printf("A(2,0) = %f, A(2,1) = %f, A(2,2) = %f\n", (*matrix)(2, 0), (*matrix)(2, 1), (*matrix)(2, 2));

printf("B(0,0) = %f, B(0,1) = %f, B(0,2) = %f\n", smat2(0, 0), smat2(0, 1), smat2(0, 2));
printf("B(1,0) = %f, B(1,1) = %f, B(1,2) = %f\n", smat2(1, 0), smat2(1, 1), smat2(1, 2));
printf("B(2,0) = %f, B(2,1) = %f, B(2,2) = %f\n", smat2(2, 0), smat2(2, 1), smat2(2, 2));

printf("\nInverting A...\n");
matrix->Invert();

printf("A(0,0) = %f, A(0,1) = %f, A(0,2) = %f\n", (*matrix)(0, 0), (*matrix)(0, 1), (*matrix)(0, 2));
printf("A(1,0) = %f, A(1,1) = %f, A(1,2) = %f\n", (*matrix)(1, 0), (*matrix)(1, 1), (*matrix)(1, 2));
printf("A(2,0) = %f, A(2,1) = %f, A(2,2) = %f\n", (*matrix)(2, 0), (*matrix)(2, 1), (*matrix)(2, 2));

printf("\nC = (A^-1) * B...\n");
auto smat3 = (*matrix) * smat2;

printf("C(0,0) = %f, C(0,1) = %f, C(0,2) = %f\n", smat3(0, 0), smat3(0, 1), smat3(0, 2));
printf("C(1,0) = %f, C(1,1) = %f, C(1,2) = %f\n", smat3(1, 0), smat3(1, 1), smat3(1, 2));
printf("C(2,0) = %f, C(2,1) = %f, C(2,2) = %f\n", smat3(2, 0), smat3(2, 1), smat3(2, 2));

printf("\nEvaluating...\n");
MatSym3DGPU tmp;
o2::math_utils::AssignSym::Evaluate(tmp, smat3);

printf("A(0,0) = %f, A(0,1) = %f, A(0,2) = %f\n", tmp(0, 0), tmp(0, 1), tmp(0, 2));
printf("A(1,0) = %f, A(1,1) = %f, A(1,2) = %f\n", tmp(1, 0), tmp(1, 1), tmp(1, 2));
printf("A(2,0) = %f, A(2,1) = %f, A(2,2) = %f\n", tmp(2, 0), tmp(2, 1), tmp(2, 2));
(*matrix) = tmp;
}

struct GPUSMatrixImplFixture {
GPUSMatrixImplFixture() : SMatrix3D_d(nullptr)
{
prologue();

SMatrix3D_h(0, 0) = 1;
SMatrix3D_h(1, 1) = 2;
SMatrix3D_h(2, 2) = 3;
SMatrix3D_h(0, 1) = 4;
SMatrix3D_h(0, 2) = 5;
SMatrix3D_h(1, 2) = 6;

discardResult(cudaMalloc(&SMatrix3D_d, sizeof(MatSym3DGPU)));
discardResult(cudaMemcpy(SMatrix3D_d, &SMatrix3D_h, sizeof(MatSym3DGPU), cudaMemcpyHostToDevice));

std::cout << "sizeof(MatSym3DGPU) = " << sizeof(MatSym3DGPU) << std::endl;
std::cout << "sizeof(MatSym3D) = " << sizeof(MatSym3D) << std::endl;
i = 3;
}

~GPUSMatrixImplFixture()
{
discardResult(cudaFree(SMatrix3D_d));
}

int i;
MatSym3DGPU* SMatrix3D_d; // device ptr
MatSym3D SMatrix3D_h;
};

BOOST_FIXTURE_TEST_CASE(DummyFixtureUsage, GPUSMatrixImplFixture)
{
invertSymMatrixKernel<float, 3><<<1, 1>>>(SMatrix3D_d);
discardResult(cudaDeviceSynchronize());

discardResult(cudaMemcpy(&SMatrix3D_h, SMatrix3D_d, sizeof(MatSym3DGPU), cudaMemcpyDeviceToHost));

MatSym3D identity;
identity(0, 0) = 1;
identity(1, 1) = 1;
identity(2, 2) = 1;
BOOST_TEST(SMatrix3D_h == identity);
}
5 changes: 3 additions & 2 deletions cmake/O2AddHipifiedExecutable.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ include(O2AddExecutable)

function(o2_add_hipified_executable baseTargetName)
# Parse arguments in the same way o2_add_executable does
# DEST_SRC_REL_PATH is the relative destination path for converted src files
cmake_parse_arguments(PARSE_ARGV
1
A
"IS_TEST;NO_INSTALL;IS_BENCHMARK"
"COMPONENT_NAME;TARGETVARNAME"
"COMPONENT_NAME;TARGETVARNAME;DEST_SRC_REL_PATH"
"SOURCES;PUBLIC_LINK_LIBRARIES;JOB_POOL")

# Process each .cu file to generate a .hip file
Expand All @@ -32,7 +33,7 @@ function(o2_add_hipified_executable baseTargetName)
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${file})
get_filename_component(CUDA_SOURCE ${file} NAME)
string(REPLACE ".cu" ".hip" HIP_SOURCE ${CUDA_SOURCE})
set(OUTPUT_HIP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${HIP_SOURCE}")
set(OUTPUT_HIP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${A_DEST_SRC_REL_PATH}/${HIP_SOURCE}")
list(APPEND HIP_SOURCES ${OUTPUT_HIP_FILE})

add_custom_command(
Expand Down
22 changes: 15 additions & 7 deletions cmake/O2AddTest.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ function(o2_add_test)
1
A
"INSTALL;NO_BOOST_TEST"
"COMPONENT_NAME;TIMEOUT;WORKING_DIRECTORY;NAME;TARGETVARNAME"
"COMPONENT_NAME;TIMEOUT;WORKING_DIRECTORY;NAME;TARGETVARNAME;HIPIFIED"
"SOURCES;PUBLIC_LINK_LIBRARIES;COMMAND_LINE_ARGS;LABELS;CONFIGURATIONS;ENVIRONMENT"
)

Expand Down Expand Up @@ -103,12 +103,20 @@ function(o2_add_test)
endif()

# create the executable
o2_add_executable(${testName}
SOURCES ${A_SOURCES}
PUBLIC_LINK_LIBRARIES ${linkLibraries}
COMPONENT_NAME ${A_COMPONENT_NAME}
IS_TEST ${noInstall} TARGETVARNAME targetName)

if (NOT A_HIPIFIED)
o2_add_executable(${testName}
SOURCES ${A_SOURCES}
PUBLIC_LINK_LIBRARIES ${linkLibraries}
COMPONENT_NAME ${A_COMPONENT_NAME}
IS_TEST ${noInstall} TARGETVARNAME targetName)
else()
o2_add_hipified_executable(${testName}
SOURCES ${A_SOURCES}
DEST_SRC_REL_PATH ${A_HIPIFIED}
PUBLIC_LINK_LIBRARIES ${linkLibraries}
COMPONENT_NAME ${A_COMPONENT_NAME}
IS_TEST ${noInstall} TARGETVARNAME targetName)
endif()

if(A_TARGETVARNAME)
set(${A_TARGETVARNAME} ${targetName} PARENT_SCOPE)
Expand Down
Loading