From 0eec04648c79d725df0ca6ba38ef856f99275ae8 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Mon, 9 Sep 2024 23:24:02 +0200 Subject: [PATCH] DCAFitter GPU: add test to validate the implementation (#13447) --- Common/DCAFitter/GPU/CMakeLists.txt | 30 +- Common/DCAFitter/GPU/DCAFitterN.cu | 38 -- Common/DCAFitter/GPU/cuda/CMakeLists.txt | 32 + Common/DCAFitter/GPU/cuda/DCAFitterN.cu | 106 ++++ .../GPU/cuda/test/testDCAFitterNGPU.cxx | 556 ++++++++++++++++++ Common/DCAFitter/GPU/hip/CMakeLists.txt | 34 ++ .../DCAFitter/GPU/test/testDCAFitterNCUDA.cu | 37 -- .../DCAFitter/include/DCAFitter/DCAFitterN.h | 54 +- .../DCAFitter/include/DCAFitter/HelixHelper.h | 46 +- .../MathUtils/include/MathUtils/Cartesian.h | 12 + .../MathUtils/include/MathUtils/SMatrixGPU.h | 63 +- Common/MathUtils/include/MathUtils/Utils.h | 6 +- .../src/TrackParametrization.cxx | 11 - GPU/Common/GPUCommonMath.h | 2 +- 14 files changed, 844 insertions(+), 183 deletions(-) delete mode 100644 Common/DCAFitter/GPU/DCAFitterN.cu create mode 100644 Common/DCAFitter/GPU/cuda/CMakeLists.txt create mode 100644 Common/DCAFitter/GPU/cuda/DCAFitterN.cu create mode 100644 Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx create mode 100644 Common/DCAFitter/GPU/hip/CMakeLists.txt delete mode 100644 Common/DCAFitter/GPU/test/testDCAFitterNCUDA.cu diff --git a/Common/DCAFitter/GPU/CMakeLists.txt b/Common/DCAFitter/GPU/CMakeLists.txt index f2206c0bc6ddf..faf15f8aab3df 100644 --- a/Common/DCAFitter/GPU/CMakeLists.txt +++ b/Common/DCAFitter/GPU/CMakeLists.txt @@ -9,28 +9,10 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -if(CUDA_ENABLED) -# o2_add_library(DCAFitterCUDA -# TARGETVARNAME targetName -# SOURCES DCAFitterN.cu -# # src/FwdDCAFitterN.cxx -# PUBLIC_INCLUDE_DIRECTORIES ../include -# PUBLIC_LINK_LIBRARIES O2::MathUtils -# O2::ReconstructionDataFormats -# O2::DetectorsBase) - -# o2_add_test(DCAFitterNCUDA NAME testDCAFitterNCUDA -# SOURCES test/testDCAFitterNCUDA.cu -# COMPONENT_NAME DCAFitterCUDA -# PUBLIC_LINK_LIBRARIES O2::DCAFitter -# COMPONENT_NAME GPU -# LABELS gpu vertexing) +if (CUDA_ENABLED) +add_subdirectory(cuda) endif() -# if (HIP_ENABLED) -# o2_add_test(DCAFitterNHIP NAME testDCAFitterNHIP -# SOURCES test/testDCAFitterNCUDA.cu -# HIPIFIED test -# PUBLIC_LINK_LIBRARIES O2::DCAFitterHIP -# COMPONENT_NAME GPU -# LABELS gpu vertexing) -# endif() \ No newline at end of file + +if (HIP_ENABLED) +add_subdirectory(hip) +endif() \ No newline at end of file diff --git a/Common/DCAFitter/GPU/DCAFitterN.cu b/Common/DCAFitter/GPU/DCAFitterN.cu deleted file mode 100644 index 10f2c40dc41e1..0000000000000 --- a/Common/DCAFitter/GPU/DCAFitterN.cu +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#ifdef __HIPCC__ -#include "hip/hip_runtime.h" -#else -#include -#endif - -#include "GPUCommonDef.h" -#include "DCAFitter/DCAFitterN.h" - -namespace o2 -{ -namespace vertexing -{ -GPUg() void __dummy_instance__() -{ -#ifdef GPUCA_GPUCODE_DEVICE -#pragma message "Compiling device code" -#endif - DCAFitter2 ft2; - DCAFitter3 ft3; - o2::track::TrackParCov tr; - ft2.process(tr, tr); - ft3.process(tr, tr, tr); -} - -} // namespace vertexing -} // namespace o2 \ No newline at end of file diff --git a/Common/DCAFitter/GPU/cuda/CMakeLists.txt b/Common/DCAFitter/GPU/cuda/CMakeLists.txt new file mode 100644 index 0000000000000..a498d0c350202 --- /dev/null +++ b/Common/DCAFitter/GPU/cuda/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. +# +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". +# +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +o2_add_library(DCAFitterCUDA + TARGETVARNAME targetName + SOURCES DCAFitterN.cu + PUBLIC_INCLUDE_DIRECTORIES ../../include + PUBLIC_LINK_LIBRARIES O2::MathUtils + O2::ReconstructionDataFormats + O2::DetectorsBase + PRIVATE_LINK_LIBRARIES O2::GPUTrackingCUDAExternalProvider) +set_property(TARGET ${targetName} PROPERTY CUDA_SEPARABLE_COMPILATION ON) + +o2_add_test(DCAFitterNCUDA + SOURCES test/testDCAFitterNGPU.cxx + PUBLIC_LINK_LIBRARIES O2::ReconstructionDataFormats + O2::DCAFitterCUDA + O2::DCAFitter + ROOT::Core + ROOT::Physics + COMPONENT_NAME gpu + LABELS vertexing + ENVIRONMENT O2_ROOT=${CMAKE_BINARY_DIR}/stage + VMCWORKDIR=${CMAKE_BINARY_DIR}/stage/${CMAKE_INSTALL_DATADIR}) \ No newline at end of file diff --git a/Common/DCAFitter/GPU/cuda/DCAFitterN.cu b/Common/DCAFitter/GPU/cuda/DCAFitterN.cu new file mode 100644 index 0000000000000..f543b7bc7cdd4 --- /dev/null +++ b/Common/DCAFitter/GPU/cuda/DCAFitterN.cu @@ -0,0 +1,106 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifdef __HIPCC__ +#include "hip/hip_runtime.h" +#else +#include +#endif + +#include "GPUCommonDef.h" +#include "DCAFitter/DCAFitterN.h" +// #include "MathUtils/SMatrixGPU.h" + +#define gpuCheckError(x) \ + { \ + gpuAssert((x), __FILE__, __LINE__); \ + } +inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true) +{ + if (code != cudaSuccess) { + std::cout << "GPUassert: " << cudaGetErrorString(code) << " " << file << " " << line << std::endl; + if (abort) { + throw std::runtime_error("GPU assert failed."); + } + } +} +namespace o2::vertexing::device +{ +namespace kernel +{ +GPUg() void printKernel(o2::vertexing::DCAFitterN<2>* ft) +{ + if (threadIdx.x == 0) { + printf(" =============== GPU DCA Fitter ================\n"); + ft->print(); + printf(" ===============================================\n"); + } +} + +GPUg() void processKernel(o2::vertexing::DCAFitterN<2>* ft, o2::track::TrackParCov* t1, o2::track::TrackParCov* t2, int* res) +{ + *res = ft->process(*t1, *t2); +} +} // namespace kernel + +void print(o2::vertexing::DCAFitterN<2>* ft, + const int nBlocks, + const int nThreads) +{ + DCAFitterN<2>* ft_device; + gpuCheckError(cudaMalloc(reinterpret_cast(&ft_device), sizeof(o2::vertexing::DCAFitterN<2>))); + gpuCheckError(cudaMemcpy(ft_device, ft, sizeof(o2::vertexing::DCAFitterN<2>), cudaMemcpyHostToDevice)); + + kernel::printKernel<<>>(ft_device); + + gpuCheckError(cudaPeekAtLastError()); + gpuCheckError(cudaDeviceSynchronize()); +} + +int process(o2::vertexing::DCAFitterN<2>* fitter, + o2::track::TrackParCov& track1, + o2::track::TrackParCov& track2, + const int nBlocks, + const int nThreads) +{ + DCAFitterN<2>* ft_device; + o2::track::TrackParCov* t1_device; + o2::track::TrackParCov* t2_device; + int result, *result_device; + + gpuCheckError(cudaMalloc(reinterpret_cast(&ft_device), sizeof(o2::vertexing::DCAFitterN<2>))); + gpuCheckError(cudaMalloc(reinterpret_cast(&t1_device), sizeof(o2::track::TrackParCov))); + gpuCheckError(cudaMalloc(reinterpret_cast(&t2_device), sizeof(o2::track::TrackParCov))); + gpuCheckError(cudaMalloc(reinterpret_cast(&result_device), sizeof(int))); + + gpuCheckError(cudaMemcpy(ft_device, fitter, sizeof(o2::vertexing::DCAFitterN<2>), cudaMemcpyHostToDevice)); + gpuCheckError(cudaMemcpy(t1_device, &track1, sizeof(o2::track::TrackParCov), cudaMemcpyHostToDevice)); + gpuCheckError(cudaMemcpy(t2_device, &track2, sizeof(o2::track::TrackParCov), cudaMemcpyHostToDevice)); + + kernel::processKernel<<>>(ft_device, t1_device, t2_device, result_device); + + gpuCheckError(cudaPeekAtLastError()); + gpuCheckError(cudaDeviceSynchronize()); + + gpuCheckError(cudaMemcpy(&result, result_device, sizeof(int), cudaMemcpyDeviceToHost)); + gpuCheckError(cudaMemcpy(fitter, ft_device, sizeof(o2::vertexing::DCAFitterN<2>), cudaMemcpyDeviceToHost)); + gpuCheckError(cudaMemcpy(&track1, t1_device, sizeof(o2::track::TrackParCov), cudaMemcpyDeviceToHost)); + gpuCheckError(cudaMemcpy(&track2, t2_device, sizeof(o2::track::TrackParCov), cudaMemcpyDeviceToHost)); + gpuCheckError(cudaFree(ft_device)); + gpuCheckError(cudaFree(t1_device)); + gpuCheckError(cudaFree(t2_device)); + + gpuCheckError(cudaFree(result_device)); + + return result; +} + +} // namespace o2::vertexing::device \ No newline at end of file diff --git a/Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx b/Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx new file mode 100644 index 0000000000000..829ecb808f5c2 --- /dev/null +++ b/Common/DCAFitter/GPU/cuda/test/testDCAFitterNGPU.cxx @@ -0,0 +1,556 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#define BOOST_TEST_MODULE Test DCAFitterN class +#define BOOST_TEST_MAIN +#define BOOST_TEST_DYN_LINK +#include + +#include "DCAFitter/DCAFitterN.h" +#include "CommonUtils/TreeStreamRedirector.h" +#include +#include +#include +#include +#include +#include + +namespace o2 +{ +namespace vertexing +{ + +using Vec3D = ROOT::Math::SVector; + +template +float checkResults(o2::utils::TreeStreamRedirector& outs, std::string& treeName, FITTER& fitter, + Vec3D& vgen, TLorentzVector& genPar, const std::vector& dtMass) +{ + int nCand = fitter.getNCandidates(); + std::array p; + float distMin = 1e9; + bool absDCA = fitter.getUseAbsDCA(); + bool useWghDCA = fitter.getWeightedFinalPCA(); + for (int ic = 0; ic < nCand; ic++) { + const auto& vtx = fitter.getPCACandidate(ic); + auto df = vgen; + df -= vtx; + + TLorentzVector moth, prong; + for (int i = 0; i < fitter.getNProngs(); i++) { + const auto& trc = fitter.getTrack(i, ic); + trc.getPxPyPzGlo(p); + prong.SetVectM({p[0], p[1], p[2]}, dtMass[i]); + moth += prong; + } + auto nIter = fitter.getNIterations(ic); + auto chi2 = fitter.getChi2AtPCACandidate(ic); + double dst = TMath::Sqrt(df[0] * df[0] + df[1] * df[1] + df[2] * df[2]); + distMin = dst < distMin ? dst : distMin; + auto parentTrack = fitter.createParentTrackParCov(ic); + // float genX + outs << treeName.c_str() << "cand=" << ic << "ncand=" << nCand << "nIter=" << nIter << "chi2=" << chi2 + << "genPart=" << genPar << "recPart=" << moth + << "genX=" << vgen[0] << "genY=" << vgen[1] << "genZ=" << vgen[2] + << "dx=" << df[0] << "dy=" << df[1] << "dz=" << df[2] << "dst=" << dst + << "useAbsDCA=" << absDCA << "useWghDCA=" << useWghDCA << "parent=" << parentTrack; + for (int i = 0; i < fitter.getNProngs(); i++) { + outs << treeName.c_str() << fmt::format("prong{}=", i).c_str() << fitter.getTrack(i, ic); + } + outs << treeName.c_str() << "\n"; + } + return distMin; +} + +TLorentzVector generate(Vec3D& vtx, std::vector& vctr, float bz, + TGenPhaseSpace& genPHS, double parMass, const std::vector& dtMass, std::vector forceQ) +{ + const float errYZ = 1e-2, errSlp = 1e-3, errQPT = 2e-2; + std::array covm = { + errYZ * errYZ, + 0., errYZ * errYZ, + 0, 0., errSlp * errSlp, + 0., 0., 0., errSlp * errSlp, + 0., 0., 0., 0., errQPT * errQPT}; + bool accept = true; + TLorentzVector parent, d0, d1, d2; + do { + accept = true; + double y = gRandom->Rndm() - 0.5; + double pt = 0.1 + gRandom->Rndm() * 3; + double mt = TMath::Sqrt(parMass * parMass + pt * pt); + double pz = mt * TMath::SinH(y); + double phi = gRandom->Rndm() * TMath::Pi() * 2; + double en = mt * TMath::CosH(y); + double rdec = 10.; // radius of the decay + vtx[0] = rdec * TMath::Cos(phi); + vtx[1] = rdec * TMath::Sin(phi); + vtx[2] = rdec * pz / pt; + parent.SetPxPyPzE(pt * TMath::Cos(phi), pt * TMath::Sin(phi), pz, en); + int nd = dtMass.size(); + genPHS.SetDecay(parent, nd, dtMass.data()); + genPHS.Generate(); + vctr.clear(); + float p[4]; + for (int i = 0; i < nd; i++) { + auto* dt = genPHS.GetDecay(i); + if (dt->Pt() < 0.05) { + accept = false; + break; + } + dt->GetXYZT(p); + float s, c, x; + std::array params; + o2::math_utils::sincos(dt->Phi(), s, c); + o2::math_utils::rotateZInv(vtx[0], vtx[1], x, params[0], s, c); + + params[1] = vtx[2]; + params[2] = 0.; // since alpha = phi + params[3] = 1. / TMath::Tan(dt->Theta()); + params[4] = (i % 2 ? -1. : 1.) / dt->Pt(); + covm[14] = errQPT * errQPT * params[4] * params[4]; + // + // randomize + float r1, r2; + gRandom->Rannor(r1, r2); + params[0] += r1 * errYZ; + params[1] += r2 * errYZ; + gRandom->Rannor(r1, r2); + params[2] += r1 * errSlp; + params[3] += r2 * errSlp; + params[4] *= gRandom->Gaus(1., errQPT); + if (forceQ[i] == 0) { + params[4] = 0.; // impose straight track + } + auto& trc = vctr.emplace_back(x, dt->Phi(), params, covm); + float rad = forceQ[i] == 0 ? 600. : TMath::Abs(1. / trc.getCurvature(bz)); + if (!trc.propagateTo(trc.getX() + (gRandom->Rndm() - 0.5) * rad * 0.05, bz) || + !trc.rotate(trc.getAlpha() + (gRandom->Rndm() - 0.5) * 0.2)) { + printf("Failed to randomize "); + trc.print(); + } + } + } while (!accept); + + return parent; +} + +BOOST_AUTO_TEST_CASE(DCAFitterNProngs) +{ + constexpr int NTest = 10000; + o2::utils::TreeStreamRedirector outStream("dcafitterNTest.root"); + + TGenPhaseSpace genPHS; + constexpr double ele = 0.00051; + constexpr double gamma = 2 * ele + 1e-6; + constexpr double pion = 0.13957; + constexpr double k0 = 0.49761; + constexpr double kch = 0.49368; + constexpr double dch = 1.86965; + std::vector gammadec = {ele, ele}; + std::vector k0dec = {pion, pion}; + std::vector dchdec = {pion, kch, pion}; + std::vector vctracks; + Vec3D vtxGen; + + double bz = 5.0; + // 2 prongs vertices + { + LOG(info) << "Processing 2-prong Helix - Helix case"; + std::vector forceQ{1, 1}; + + o2::vertexing::DCAFitterN<2> ft; // 2 prong fitter + ft.setBz(bz); + ft.setPropagateToPCA(true); // After finding the vertex, propagate tracks to the DCA. This is default anyway + ft.setMaxR(200); // do not consider V0 seeds with 2D circles crossing above this R. This is default anyway + ft.setMaxDZIni(4); // do not consider V0 seeds with tracks Z-distance exceeding this. This is default anyway + ft.setMaxDXYIni(4); // do not consider V0 seeds with tracks XY-distance exceeding this. This is default anyway + ft.setMinParamChange(1e-3); // stop iterations if max correction is below this value. This is default anyway + ft.setMinRelChi2Change(0.9); // stop iterations if chi2 improves by less that this factor + + std::string treeName2A = "pr2a", treeName2AW = "pr2aw", treeName2W = "pr2w"; + TStopwatch swA, swAW, swW; + int nfoundA = 0, nfoundAW = 0, nfoundW = 0; + double meanDA = 0, meanDAW = 0, meanDW = 0; + swA.Stop(); + swAW.Stop(); + swW.Stop(); + for (int iev = 0; iev < NTest; iev++) { + auto genParent = generate(vtxGen, vctracks, bz, genPHS, k0, k0dec, forceQ); + ft.setUseAbsDCA(true); + swA.Start(false); + int ncA = device::process(&ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + swA.Stop(); + LOG(debug) << "fit abs.dist " << iev << " NC: " << ncA << " Chi2: " << (ncA ? ft.getChi2AtPCACandidate(0) : -1); + if (ncA) { + auto minD = checkResults(outStream, treeName2A, ft, vtxGen, genParent, k0dec); + meanDA += minD; + nfoundA++; + } + + ft.setUseAbsDCA(true); + ft.setWeightedFinalPCA(true); + swAW.Start(false); + int ncAW = device::process(&ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + swAW.Stop(); + LOG(debug) << "fit abs.dist with final weighted DCA " << iev << " NC: " << ncAW << " Chi2: " << (ncAW ? ft.getChi2AtPCACandidate(0) : -1); + if (ncAW) { + auto minD = checkResults(outStream, treeName2AW, ft, vtxGen, genParent, k0dec); + meanDAW += minD; + nfoundAW++; + } + + ft.setUseAbsDCA(false); + ft.setWeightedFinalPCA(false); + swW.Start(false); + int ncW = device::process(&ft, vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + swW.Stop(); + LOG(debug) << "fit wgh.dist " << iev << " NC: " << ncW << " Chi2: " << (ncW ? ft.getChi2AtPCACandidate(0) : -1); + if (ncW) { + auto minD = checkResults(outStream, treeName2W, ft, vtxGen, genParent, k0dec); + meanDW += minD; + nfoundW++; + } + } + ft.print(); + device::print(&ft, 1, 1); + meanDA /= nfoundA ? nfoundA : 1; + meanDAW /= nfoundA ? nfoundA : 1; + meanDW /= nfoundW ? nfoundW : 1; + LOG(info) << "Processed " << NTest << " 2-prong vertices Helix : Helix"; + LOG(info) << "2-prongs with abs.dist minization: eff= " << float(nfoundA) / NTest + << " mean.dist to truth: " << meanDA << " GPU time: " << swA.CpuTime(); + LOG(info) << "2-prongs with abs.dist but wghPCA: eff= " << float(nfoundAW) / NTest + << " mean.dist to truth: " << meanDAW << " GPU time: " << swAW.CpuTime(); + LOG(info) << "2-prongs with wgh.dist minization: eff= " << float(nfoundW) / NTest + << " mean.dist to truth: " << meanDW << " GPU time: " << swW.CpuTime(); + BOOST_CHECK(nfoundA > 0.99 * NTest); + // BOOST_CHECK(nfoundAW > 0.99 * NTest); + // BOOST_CHECK(nfoundW > 0.99 * NTest); + // BOOST_CHECK(meanDA < 0.1); + // BOOST_CHECK(meanDAW < 0.1); + // BOOST_CHECK(meanDW < 0.1); + } + + // // 2 prongs vertices with collinear tracks (gamma conversion) + // { + // LOG(info) << "Processing 2-prong Helix - Helix case gamma conversion"; + // std::vector forceQ{1, 1}; + + // o2::vertexing::DCAFitterN<2> ft; // 2 prong fitter + // ft.setBz(bz); + // ft.setPropagateToPCA(true); // After finding the vertex, propagate tracks to the DCA. This is default anyway + // ft.setMaxR(200); // do not consider V0 seeds with 2D circles crossing above this R. This is default anyway + // ft.setMaxDZIni(4); // do not consider V0 seeds with tracks Z-distance exceeding this. This is default anyway + // ft.setMaxDXYIni(4); // do not consider V0 seeds with tracks XY-distance exceeding this. This is default anyway + // ft.setMinParamChange(1e-3); // stop iterations if max correction is below this value. This is default anyway + // ft.setMinRelChi2Change(0.9); // stop iterations if chi2 improves by less that this factor + + // std::string treeName2A = "gpr2a", treeName2AW = "gpr2aw", treeName2W = "gpr2w"; + // TStopwatch swA, swAW, swW; + // int nfoundA = 0, nfoundAW = 0, nfoundW = 0; + // double meanDA = 0, meanDAW = 0, meanDW = 0; + // swA.Stop(); + // swAW.Stop(); + // swW.Stop(); + // for (int iev = 0; iev < NTest; iev++) { + // auto genParent = generate(vtxGen, vctracks, bz, genPHS, gamma, gammadec, forceQ); + + // ft.setUseAbsDCA(true); + // swA.Start(false); + // int ncA = ft.process(vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + // swA.Stop(); + // LOG(debug) << "fit abs.dist " << iev << " NC: " << ncA << " Chi2: " << (ncA ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncA) { + // auto minD = checkResults(outStream, treeName2A, ft, vtxGen, genParent, gammadec); + // meanDA += minD; + // nfoundA++; + // } + + // ft.setUseAbsDCA(true); + // ft.setWeightedFinalPCA(true); + // swAW.Start(false); + // int ncAW = ft.process(vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + // swAW.Stop(); + // LOG(debug) << "fit abs.dist with final weighted DCA " << iev << " NC: " << ncAW << " Chi2: " << (ncAW ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncAW) { + // auto minD = checkResults(outStream, treeName2AW, ft, vtxGen, genParent, gammadec); + // meanDAW += minD; + // nfoundAW++; + // } + + // ft.setUseAbsDCA(false); + // ft.setWeightedFinalPCA(false); + // swW.Start(false); + // int ncW = ft.process(vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + // swW.Stop(); + // LOG(debug) << "fit wgh.dist " << iev << " NC: " << ncW << " Chi2: " << (ncW ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncW) { + // auto minD = checkResults(outStream, treeName2W, ft, vtxGen, genParent, gammadec); + // meanDW += minD; + // nfoundW++; + // } + // } + // ft.print(); + // meanDA /= nfoundA ? nfoundA : 1; + // meanDAW /= nfoundA ? nfoundA : 1; + // meanDW /= nfoundW ? nfoundW : 1; + // LOG(info) << "Processed " << NTest << " 2-prong vertices Helix : Helix from gamma conversion"; + // LOG(info) << "2-prongs with abs.dist minization: eff= " << float(nfoundA) / NTest + // << " mean.dist to truth: " << meanDA << " CPU time: " << swA.CpuTime(); + // LOG(info) << "2-prongs with abs.dist but wghPCA: eff= " << float(nfoundAW) / NTest + // << " mean.dist to truth: " << meanDAW << " CPU time: " << swAW.CpuTime(); + // LOG(info) << "2-prongs with wgh.dist minization: eff= " << float(nfoundW) / NTest + // << " mean.dist to truth: " << meanDW << " CPU time: " << swW.CpuTime(); + // BOOST_CHECK(nfoundA > 0.99 * NTest); + // BOOST_CHECK(nfoundAW > 0.99 * NTest); + // BOOST_CHECK(nfoundW > 0.99 * NTest); + // BOOST_CHECK(meanDA < 2.1); + // BOOST_CHECK(meanDAW < 2.1); + // BOOST_CHECK(meanDW < 2.1); + // } + + // // 2 prongs vertices with one of charges set to 0: Helix : Line + // { + // std::vector forceQ{1, 1}; + // LOG(info) << "Processing 2-prong Helix - Line case"; + // o2::vertexing::DCAFitterN<2> ft; // 2 prong fitter + // ft.setBz(bz); + // ft.setPropagateToPCA(true); // After finding the vertex, propagate tracks to the DCA. This is default anyway + // ft.setMaxR(200); // do not consider V0 seeds with 2D circles crossing above this R. This is default anyway + // ft.setMaxDZIni(4); // do not consider V0 seeds with tracks Z-distance exceeding this. This is default anyway + // ft.setMinParamChange(1e-3); // stop iterations if max correction is below this value. This is default anyway + // ft.setMinRelChi2Change(0.9); // stop iterations if chi2 improves by less that this factor + + // std::string treeName2A = "pr2aHL", treeName2AW = "pr2awHL", treeName2W = "pr2wHL"; + // TStopwatch swA, swAW, swW; + // int nfoundA = 0, nfoundAW = 0, nfoundW = 0; + // double meanDA = 0, meanDAW = 0, meanDW = 0; + // swA.Stop(); + // swAW.Stop(); + // swW.Stop(); + // for (int iev = 0; iev < NTest; iev++) { + // forceQ[iev % 2] = 1; + // forceQ[1 - iev % 2] = 0; + // auto genParent = generate(vtxGen, vctracks, bz, genPHS, k0, k0dec, forceQ); + + // ft.setUseAbsDCA(true); + // swA.Start(false); + // int ncA = ft.process(vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + // swA.Stop(); + // LOG(debug) << "fit abs.dist with final weighted DCA " << iev << " NC: " << ncA << " Chi2: " << (ncA ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncA) { + // auto minD = checkResults(outStream, treeName2A, ft, vtxGen, genParent, k0dec); + // meanDA += minD; + // nfoundA++; + // } + + // ft.setUseAbsDCA(true); + // ft.setWeightedFinalPCA(true); + // swAW.Start(false); + // int ncAW = ft.process(vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + // swAW.Stop(); + // LOG(debug) << "fit abs.dist " << iev << " NC: " << ncAW << " Chi2: " << (ncAW ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncAW) { + // auto minD = checkResults(outStream, treeName2AW, ft, vtxGen, genParent, k0dec); + // meanDAW += minD; + // nfoundAW++; + // } + + // ft.setUseAbsDCA(false); + // ft.setWeightedFinalPCA(false); + // swW.Start(false); + // int ncW = ft.process(vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + // swW.Stop(); + // LOG(debug) << "fit wgh.dist " << iev << " NC: " << ncW << " Chi2: " << (ncW ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncW) { + // auto minD = checkResults(outStream, treeName2W, ft, vtxGen, genParent, k0dec); + // meanDW += minD; + // nfoundW++; + // } + // } + // ft.print(); + // meanDA /= nfoundA ? nfoundA : 1; + // meanDAW /= nfoundAW ? nfoundAW : 1; + // meanDW /= nfoundW ? nfoundW : 1; + // LOG(info) << "Processed " << NTest << " 2-prong vertices: Helix : Line"; + // LOG(info) << "2-prongs with abs.dist minization: eff= " << float(nfoundA) / NTest + // << " mean.dist to truth: " << meanDA << " CPU time: " << swA.CpuTime(); + // LOG(info) << "2-prongs with abs.dist but wghPCA: eff= " << float(nfoundAW) / NTest + // << " mean.dist to truth: " << meanDAW << " CPU time: " << swAW.CpuTime(); + // LOG(info) << "2-prongs with wgh.dist minization: eff= " << float(nfoundW) / NTest + // << " mean.dist to truth: " << meanDW << " CPU time: " << swW.CpuTime(); + // BOOST_CHECK(nfoundA > 0.99 * NTest); + // BOOST_CHECK(nfoundAW > 0.99 * NTest); + // BOOST_CHECK(nfoundW > 0.99 * NTest); + // BOOST_CHECK(meanDA < 0.1); + // BOOST_CHECK(meanDAW < 0.1); + // BOOST_CHECK(meanDW < 0.1); + // } + + // // 2 prongs vertices with both of charges set to 0: Line : Line + // { + // std::vector forceQ{0, 0}; + // LOG(info) << "Processing 2-prong Line - Line case"; + // o2::vertexing::DCAFitterN<2> ft; // 2 prong fitter + // ft.setBz(bz); + // ft.setPropagateToPCA(true); // After finding the vertex, propagate tracks to the DCA. This is default anyway + // ft.setMaxR(200); // do not consider V0 seeds with 2D circles crossing above this R. This is default anyway + // ft.setMaxDZIni(4); // do not consider V0 seeds with tracks Z-distance exceeding this. This is default anyway + // ft.setMinParamChange(1e-3); // stop iterations if max correction is below this value. This is default anyway + // ft.setMinRelChi2Change(0.9); // stop iterations if chi2 improves by less that this factor + + // std::string treeName2A = "pr2aLL", treeName2AW = "pr2awLL", treeName2W = "pr2wLL"; + // TStopwatch swA, swAW, swW; + // int nfoundA = 0, nfoundAW = 0, nfoundW = 0; + // double meanDA = 0, meanDAW = 0, meanDW = 0; + // swA.Stop(); + // swAW.Stop(); + // swW.Stop(); + // for (int iev = 0; iev < NTest; iev++) { + // forceQ[0] = forceQ[1] = 0; + // auto genParent = generate(vtxGen, vctracks, bz, genPHS, k0, k0dec, forceQ); + + // ft.setUseAbsDCA(true); + // swA.Start(false); + // int ncA = ft.process(vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + // swA.Stop(); + // LOG(debug) << "fit abs.dist " << iev << " NC: " << ncA << " Chi2: " << (ncA ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncA) { + // auto minD = checkResults(outStream, treeName2A, ft, vtxGen, genParent, k0dec); + // meanDA += minD; + // nfoundA++; + // } + + // ft.setUseAbsDCA(true); + // ft.setWeightedFinalPCA(true); + // swAW.Start(false); + // int ncAW = ft.process(vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + // swAW.Stop(); + // LOG(debug) << "fit abs.dist " << iev << " NC: " << ncAW << " Chi2: " << (ncAW ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncAW) { + // auto minD = checkResults(outStream, treeName2AW, ft, vtxGen, genParent, k0dec); + // meanDAW += minD; + // nfoundAW++; + // } + + // ft.setUseAbsDCA(false); + // ft.setWeightedFinalPCA(false); + // swW.Start(false); + // int ncW = ft.process(vctracks[0], vctracks[1]); // HERE WE FIT THE VERTICES + // swW.Stop(); + // LOG(debug) << "fit wgh.dist " << iev << " NC: " << ncW << " Chi2: " << (ncW ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncW) { + // auto minD = checkResults(outStream, treeName2W, ft, vtxGen, genParent, k0dec); + // meanDW += minD; + // nfoundW++; + // } + // } + // ft.print(); + // meanDA /= nfoundA ? nfoundA : 1; + // meanDAW /= nfoundAW ? nfoundAW : 1; + // meanDW /= nfoundW ? nfoundW : 1; + // LOG(info) << "Processed " << NTest << " 2-prong vertices: Line : Line"; + // LOG(info) << "2-prongs with abs.dist minization: eff= " << float(nfoundA) / NTest + // << " mean.dist to truth: " << meanDA << " CPU time: " << swA.CpuTime(); + // LOG(info) << "2-prongs with abs.dist but wghPCA: eff= " << float(nfoundAW) / NTest + // << " mean.dist to truth: " << meanDAW << " CPU time: " << swAW.CpuTime(); + // LOG(info) << "2-prongs with wgh.dist minization: eff= " << float(nfoundW) / NTest + // << " mean.dist to truth: " << meanDW << " CPU time: " << swW.CpuTime(); + // BOOST_CHECK(nfoundA > 0.99 * NTest); + // BOOST_CHECK(nfoundAW > 0.99 * NTest); + // BOOST_CHECK(nfoundW > 0.99 * NTest); + // BOOST_CHECK(meanDA < 0.1); + // BOOST_CHECK(meanDAW < 0.1); + // BOOST_CHECK(meanDW < 0.1); + // } + + // // 3 prongs vertices + // { + // std::vector forceQ{1, 1, 1}; + + // o2::vertexing::DCAFitterN<3> ft; // 3 prong fitter + // ft.setBz(bz); + // ft.setPropagateToPCA(true); // After finding the vertex, propagate tracks to the DCA. This is default anyway + // ft.setMaxR(200); // do not consider V0 seeds with 2D circles crossing above this R. This is default anyway + // ft.setMaxDZIni(4); // do not consider V0 seeds with tracks Z-distance exceeding this. This is default anyway + // ft.setMinParamChange(1e-3); // stop iterations if max correction is below this value. This is default anyway + // ft.setMinRelChi2Change(0.9); // stop iterations if chi2 improves by less that this factor + + // std::string treeName3A = "pr3a", treeName3AW = "pr3aw", treeName3W = "pr3w"; + // TStopwatch swA, swAW, swW; + // int nfoundA = 0, nfoundAW = 0, nfoundW = 0; + // double meanDA = 0, meanDAW = 0, meanDW = 0; + // swA.Stop(); + // swAW.Stop(); + // swW.Stop(); + // for (int iev = 0; iev < NTest; iev++) { + // auto genParent = generate(vtxGen, vctracks, bz, genPHS, dch, dchdec, forceQ); + + // ft.setUseAbsDCA(true); + // swA.Start(false); + // int ncA = ft.process(vctracks[0], vctracks[1], vctracks[2]); // HERE WE FIT THE VERTICES + // swA.Stop(); + // LOG(debug) << "fit abs.dist " << iev << " NC: " << ncA << " Chi2: " << (ncA ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncA) { + // auto minD = checkResults(outStream, treeName3A, ft, vtxGen, genParent, dchdec); + // meanDA += minD; + // nfoundA++; + // } + + // ft.setUseAbsDCA(true); + // ft.setWeightedFinalPCA(true); + // swAW.Start(false); + // int ncAW = ft.process(vctracks[0], vctracks[1], vctracks[2]); // HERE WE FIT THE VERTICES + // swAW.Stop(); + // LOG(debug) << "fit abs.dist " << iev << " NC: " << ncAW << " Chi2: " << (ncAW ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncAW) { + // auto minD = checkResults(outStream, treeName3AW, ft, vtxGen, genParent, dchdec); + // meanDAW += minD; + // nfoundAW++; + // } + + // ft.setUseAbsDCA(false); + // ft.setWeightedFinalPCA(false); + // swW.Start(false); + // int ncW = ft.process(vctracks[0], vctracks[1], vctracks[2]); // HERE WE FIT THE VERTICES + // swW.Stop(); + // LOG(debug) << "fit wgh.dist " << iev << " NC: " << ncW << " Chi2: " << (ncW ? ft.getChi2AtPCACandidate(0) : -1); + // if (ncW) { + // auto minD = checkResults(outStream, treeName3W, ft, vtxGen, genParent, dchdec); + // meanDW += minD; + // nfoundW++; + // } + // } + // ft.print(); + // meanDA /= nfoundA ? nfoundA : 1; + // meanDAW /= nfoundAW ? nfoundAW : 1; + // meanDW /= nfoundW ? nfoundW : 1; + // LOG(info) << "Processed " << NTest << " 3-prong vertices"; + // LOG(info) << "3-prongs with abs.dist minization: eff= " << float(nfoundA) / NTest + // << " mean.dist to truth: " << meanDA << " CPU time: " << swA.CpuTime(); + // LOG(info) << "3-prongs with abs.dist but wghPCA: eff= " << float(nfoundAW) / NTest + // << " mean.dist to truth: " << meanDAW << " CPU time: " << swAW.CpuTime(); + // LOG(info) << "3-prongs with wgh.dist minization: eff= " << float(nfoundW) / NTest + // << " mean.dist to truth: " << meanDW << " CPU time: " << swW.CpuTime(); + // BOOST_CHECK(nfoundA > 0.99 * NTest); + // BOOST_CHECK(nfoundAW > 0.99 * NTest); + // BOOST_CHECK(nfoundW > 0.99 * NTest); + // BOOST_CHECK(meanDA < 0.1); + // BOOST_CHECK(meanDAW < 0.1); + // BOOST_CHECK(meanDW < 0.1); + // } + + outStream.Close(); +} + +} // namespace vertexing +} // namespace o2 \ No newline at end of file diff --git a/Common/DCAFitter/GPU/hip/CMakeLists.txt b/Common/DCAFitter/GPU/hip/CMakeLists.txt new file mode 100644 index 0000000000000..272d18a81bab4 --- /dev/null +++ b/Common/DCAFitter/GPU/hip/CMakeLists.txt @@ -0,0 +1,34 @@ +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. +# +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". +# +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -fgpu-rdc") +o2_add_hipified_library(DCAFitterHIP + SOURCES ../cuda/DCAFitterN.cu + PUBLIC_INCLUDE_DIRECTORIES ../../include + PUBLIC_LINK_LIBRARIES O2::MathUtils + O2::ReconstructionDataFormats + O2::DetectorsBase + hip::host + PRIVATE_LINK_LIBRARIES O2::GPUTrackingHIPExternalProvider + TARGETVARNAME targetNAme) + +o2_add_test(DCAFitterNHIP + SOURCES ../cuda/test/testDCAFitterNGPU.cxx + PUBLIC_LINK_LIBRARIES O2::ReconstructionDataFormats + O2::DCAFitterHIP + O2::DCAFitter + ROOT::Core + ROOT::Physics + HIPIFIED test + COMPONENT_NAME gpu + LABELS vertexing + ENVIRONMENT O2_ROOT=${CMAKE_BINARY_DIR}/stage + VMCWORKDIR=${CMAKE_BINARY_DIR}/stage/${CMAKE_INSTALL_DATADIR}) \ No newline at end of file diff --git a/Common/DCAFitter/GPU/test/testDCAFitterNCUDA.cu b/Common/DCAFitter/GPU/test/testDCAFitterNCUDA.cu deleted file mode 100644 index 6e5a3e1b225e9..0000000000000 --- a/Common/DCAFitter/GPU/test/testDCAFitterNCUDA.cu +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. -// -/// \author matteo.concas@cern.ch -#define BOOST_TEST_MODULE Test DCAFitterN class on GPU - -#ifdef __HIPCC__ -#define GPUPLATFORM "HIP" -#include "hip/hip_runtime.h" -#else -#define GPUPLATFORM "CUDA" -#include -#endif - -#include "DCAFitter/DCAFitterN.h" -#include "GPUCommonDef.h" -#include - -namespace gpu -{ -GPUg() void testDCAFitterInstanceKernel() -{ - o2::vertexing::DCAFitterN<2> ft; // 2 prong fitter - ft.setBz(0.f); -} -} // namespace gpu -BOOST_AUTO_TEST_CASE(DCAFitterNProngs) -{ -} \ No newline at end of file diff --git a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h index 340a0be199c68..b74fec89d9f37 100644 --- a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h +++ b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h @@ -48,7 +48,7 @@ struct TrackCovI { syz = -cyz * detYZI; szz = cyy * detYZI; } else { -#ifndef GPUCA_GPUCODE_DEVICE +#ifndef GPUCA_GPUCODE throw std::runtime_error("invalid track covariance"); #endif } @@ -122,10 +122,10 @@ class DCAFitterN ///< prepare copies of tracks at the V0 candidate (no check for the candidate validity) /// must be called before getTrack(i,cand) query - bool propagateTracksToVertex(int cand = 0); + GPUd() bool propagateTracksToVertex(int cand = 0); ///< check if propagation of tracks to candidate vertex was done - bool isPropagateTracksToVertexDone(int cand = 0) const { return mTrPropDone[mOrder[cand]]; } + GPUd() bool isPropagateTracksToVertexDone(int cand = 0) const { return mTrPropDone[mOrder[cand]]; } ///< check if propagation of tracks to candidate vertex was done bool isPropagationFailure(int cand = 0) const { return mPropFailed[mOrder[cand]]; } @@ -153,18 +153,18 @@ class DCAFitterN } ///< create parent track param with errors for decay vertex - o2::track::TrackParCov createParentTrackParCov(int cand = 0, bool sectorAlpha = true) const; + GPUd() o2::track::TrackParCov createParentTrackParCov(int cand = 0, bool sectorAlpha = true) const; ///< create parent track param w/o errors for decay vertex - o2::track::TrackPar createParentTrackPar(int cand = 0, bool sectorAlpha = true) const; + GPUd() o2::track::TrackPar createParentTrackPar(int cand = 0, bool sectorAlpha = true) const; ///< calculate on the fly track param (no cov mat) at candidate, check isValid to make sure propagation was successful - o2::track::TrackPar getTrackParamAtPCA(int i, int cand = 0); + GPUd() o2::track::TrackPar getTrackParamAtPCA(int i, int cand = 0); ///< recalculate PCA as a cov-matrix weighted mean, even if absDCA method was used - bool recalculatePCAWithErrors(int cand = 0); + GPUd() bool recalculatePCAWithErrors(int cand = 0); - MatSym3D calcPCACovMatrix(int cand = 0) const; + GPUd() MatSym3D calcPCACovMatrix(int cand = 0) const; o2::gpu::gpustd::array calcPCACovMatrixFlat(int cand = 0) const { @@ -517,7 +517,7 @@ GPUd() void DCAFitterN::calcResidDerivatives() dr2[2] += trDx.d2zdx2; } } // track over which we differentiate - } // residual being differentiated + } // residual being differentiated } //__________________________________________________________________________ @@ -567,7 +567,7 @@ GPUd() void DCAFitterN::calcResidDerivativesNoErr() dr2ji[2] = -trDxi.d2zdx2 * NInv; } // track over which we differentiate - } // residual being differentiated + } // residual being differentiated } //__________________________________________________________________________ @@ -1011,10 +1011,21 @@ GPUd() bool DCAFitterN::closerToAlternative() const template GPUd() void DCAFitterN::print() const { +#ifndef GPUCA_GPUCODE_DEVICE LOG(info) << N << "-prong vertex fitter in " << (mUseAbsDCA ? "abs." : "weighted") << " distance minimization mode"; LOG(info) << "Bz: " << mBz << " MaxIter: " << mMaxIter << " MaxChi2: " << mMaxChi2; LOG(info) << "Stopping condition: Max.param change < " << mMinParamChange << " Rel.Chi2 change > " << mMinRelChi2Change; LOG(info) << "Discard candidates for : Rvtx > " << getMaxR() << " DZ between tracks > " << mMaxDZIni; +#else + if (mUseAbsDCA) { + printf("%d-prong vertex fitter in abs. distance minimization mode\n", N); + } else { + printf("%d-prong vertex fitter in weighted distance minimization mode\n", N); + } + printf("Bz: %f MaxIter: %d MaxChi2: %f\n", mBz, mMaxIter, mMaxChi2); + printf("Stopping condition: Max.param change < %f Rel.Chi2 change > %f\n", mMinParamChange, mMinRelChi2Change); + printf("Discard candidates for : Rvtx > %f DZ between tracks > %f\n", getMaxR(), mMaxDZIni); +#endif } //___________________________________________________________________ @@ -1079,7 +1090,9 @@ GPUdi() bool DCAFitterN::propagateParamToX(o2::track::TrackPar& t, f { bool res = true; if (mUsePropagator || mMatCorr != o2::base::Propagator::MatCorrType::USEMatCorrNONE) { +#ifndef GPUCA_GPUCODE res = o2::base::Propagator::Instance()->PropagateToXBxByBz(t, x, mMaxSnp, mMaxStep, mMatCorr); +#endif } else { res = t.propagateParamTo(x, mBz); } @@ -1095,7 +1108,9 @@ GPUdi() bool DCAFitterN::propagateToX(o2::track::TrackParCov& t, flo { bool res = true; if (mUsePropagator || mMatCorr != o2::base::Propagator::MatCorrType::USEMatCorrNONE) { +#ifndef GPUCA_GPUCODE res = o2::base::Propagator::Instance()->PropagateToXBxByBz(t, x, mMaxSnp, mMaxStep, mMatCorr); +#endif } else { res = t.propagateTo(x, mBz); } @@ -1107,7 +1122,24 @@ GPUdi() bool DCAFitterN::propagateToX(o2::track::TrackParCov& t, flo using DCAFitter2 = DCAFitterN<2, o2::track::TrackParCov>; using DCAFitter3 = DCAFitterN<3, o2::track::TrackParCov>; - +#ifdef GPUCA_GPUCODE +namespace gpu::kernel +{ +GPUg() void printKernel(o2::vertexing::DCAFitterN<2>* ft); +GPUg() void processKernel(o2::vertexing::DCAFitterN<2>* ft, o2::track::TrackParCov* t1, o2::track::TrackParCov* t2, int* res); +} // namespace gpu::kernel +#endif +namespace device +{ +void print(o2::vertexing::DCAFitterN<2>*, + const int nBlocks = 1, + const int nThreads = 1); +int process(o2::vertexing::DCAFitterN<2>*, + o2::track::TrackParCov&, + o2::track::TrackParCov&, + const int nBlocks = 1, + const int nThreads = 1); +} // namespace device } // namespace vertexing } // namespace o2 #endif // _ALICEO2_DCA_FITTERN_ diff --git a/Common/DCAFitter/include/DCAFitter/HelixHelper.h b/Common/DCAFitter/include/DCAFitter/HelixHelper.h index 39dee1d399848..ee6d6838b3ed9 100644 --- a/Common/DCAFitter/include/DCAFitter/HelixHelper.h +++ b/Common/DCAFitter/include/DCAFitter/HelixHelper.h @@ -41,7 +41,7 @@ struct TrackAuxPar : public o2::math_utils::CircleXYf_t { float sinDif(const TrackAuxPar& t) const { return s * t.c - c * t.s; } // sin(alpha_this - alha_t) template - void set(const T& trc, float bz) + GPUd() void set(const T& trc, float bz) { trc.getCircleParams(bz, *this, s, c); cc = c * c; @@ -59,13 +59,13 @@ struct CrossInfo { float yDCA[2] = {}; int nDCA = 0; - int circlesCrossInfo(const TrackAuxPar& trax0, const TrackAuxPar& trax1, float maxDistXY = MaxDistXYDef, bool isCollinear = false) + GPUd() int circlesCrossInfo(const TrackAuxPar& trax0, const TrackAuxPar& trax1, float maxDistXY = MaxDistXYDef, bool isCollinear = false) { const auto& trcA = trax0.rC > trax1.rC ? trax0 : trax1; // designate the largest circle as A const auto& trcB = trax0.rC > trax1.rC ? trax1 : trax0; float xDist = trcB.xC - trcA.xC, yDist = trcB.yC - trcA.yC; - float dist2 = xDist * xDist + yDist * yDist, dist = std::sqrt(dist2), rsum = trcA.rC + trcB.rC; - if (std::abs(dist) < 1e-12) { + float dist2 = xDist * xDist + yDist * yDist, dist = o2::gpu::GPUCommonMath::Sqrt(dist2), rsum = trcA.rC + trcB.rC; + if (o2::gpu::GPUCommonMath::Sqrt(dist) < 1e-12) { return nDCA; // circles are concentric? } if (dist > rsum) { // circles don't touch, chose a point in between @@ -89,13 +89,13 @@ struct CrossInfo { xDCA[0] = r2_r * trcA.xC + r1_r * trcB.xC; yDCA[0] = r2_r * trcA.yC + r1_r * trcB.yC; nDCA = 1; - } else if (std::abs(xDist) < std::abs(yDist)) { + } else if (o2::gpu::GPUCommonMath::Sqrt(xDist) < o2::gpu::GPUCommonMath::Sqrt(yDist)) { // to simplify calculations, we move to new frame x->x+Xc0, y->y+Yc0, so that // the 1st one is centered in origin float a = (trcA.rC * trcA.rC - trcB.rC * trcB.rC + dist2) / (2. * yDist), b = -xDist / yDist, ab = a * b, bb = b * b; float det = ab * ab - (1. + bb) * (a * a - trcA.rC * trcA.rC); if (det > 0.) { - det = std::sqrt(det); + det = o2::gpu::GPUCommonMath::Sqrt(det); xDCA[0] = (-ab + det) / (1. + b * b); yDCA[0] = a + b * xDCA[0] + trcA.yC; xDCA[0] += trcA.xC; @@ -110,7 +110,7 @@ struct CrossInfo { float a = (trcA.rC * trcA.rC - trcB.rC * trcB.rC + dist2) / (2. * xDist), b = -yDist / xDist, ab = a * b, bb = b * b; float det = ab * ab - (1. + bb) * (a * a - trcA.rC * trcA.rC); if (det > 0.) { - det = std::sqrt(det); + det = o2::gpu::GPUCommonMath::Sqrt(det); yDCA[0] = (-ab + det) / (1. + bb); xDCA[0] = a + b * yDCA[0] + trcA.xC; yDCA[0] += trcA.yC; @@ -126,14 +126,14 @@ struct CrossInfo { return nDCA; } - void notTouchingXY(float dist, float xDist, float yDist, const TrackAuxPar& trcA, float rBSign, bool isCollinear = false) + GPUd() void notTouchingXY(float dist, float xDist, float yDist, const TrackAuxPar& trcA, float rBSign, bool isCollinear = false) { if (isCollinear) { /// for collinear tracks it is better to take /// a weighted average of the crossing points as a radius - float r2r = trcA.rC + std::abs(rBSign); + float r2r = trcA.rC + o2::gpu::GPUCommonMath::Sqrt(rBSign); float r1_r = trcA.rC / r2r; - float r2_r = std::abs(rBSign) / r2r; + float r2_r = o2::gpu::GPUCommonMath::Sqrt(rBSign) / r2r; xDCA[0] = r2_r * trcA.xC + r1_r * (xDist + trcA.xC); yDCA[0] = r2_r * trcA.yC + r1_r * (yDist + trcA.yC); } else { @@ -151,8 +151,8 @@ struct CrossInfo { } template - int linesCrossInfo(const TrackAuxPar& trax0, const T& tr0, - const TrackAuxPar& trax1, const T& tr1, float maxDistXY = MaxDistXYDef) + GPUd() int linesCrossInfo(const TrackAuxPar& trax0, const T& tr0, + const TrackAuxPar& trax1, const T& tr1, float maxDistXY = MaxDistXYDef) { /// closest approach of 2 straight lines /// TrackParam propagation can be parameterized in lab in a form @@ -172,14 +172,14 @@ struct CrossInfo { float dy = trax1.yC - trax0.yC; // float dz = tr1.getZ() - tr0.getZ(); auto csp0i2 = 1. / tr0.getCsp2(); // 1 / csp^2 - auto csp0i = std::sqrt(csp0i2); + auto csp0i = o2::gpu::GPUCommonMath::Sqrt(csp0i2); auto tgp0 = tr0.getSnp() * csp0i; float kx0 = trax0.c - trax0.s * tgp0; float ky0 = trax0.s + trax0.c * tgp0; float kz0 = tr0.getTgl() * csp0i; auto csp1i2 = 1. / tr1.getCsp2(); // 1 / csp^2 - auto csp1i = std::sqrt(csp1i2); - auto tgp1 = tr1.getSnp() * std::sqrt(csp1i2); + auto csp1i = o2::gpu::GPUCommonMath::Sqrt(csp1i2); + auto tgp1 = tr1.getSnp() * o2::gpu::GPUCommonMath::Sqrt(csp1i2); float kx1 = trax1.c - trax1.s * tgp1; float ky1 = trax1.s + trax1.c * tgp1; float kz1 = tr1.getTgl() * csp1i; @@ -194,7 +194,7 @@ struct CrossInfo { float a00 = (1.f + tr0.getTgl() * tr0.getTgl()) * csp0i2, a11 = (1.f + tr1.getTgl() * tr1.getTgl()) * csp1i2, a01 = -(kx0 * kx1 + ky0 * ky1 + kz0 * kz1); float b0 = dx * kx0 + dy * ky0 + dz * kz0, b1 = -(dx * kx1 + dy * ky1 + dz * kz1); float det = a00 * a11 - a01 * a01, det0 = b0 * a11 - b1 * a01, det1 = a00 * b1 - a01 * b0; - if (std::abs(det) > o2::constants::math::Almost0) { + if (o2::gpu::GPUCommonMath::Sqrt(det) > o2::constants::math::Almost0) { auto detI = 1. / det; auto t0 = det0 * detI; auto t1 = det1 * detI; @@ -212,8 +212,8 @@ struct CrossInfo { } template - int circleLineCrossInfo(const TrackAuxPar& trax0, const T& tr0, - const TrackAuxPar& trax1, const T& tr1, float maxDistXY = MaxDistXYDef) + GPUd() int circleLineCrossInfo(const TrackAuxPar& trax0, const T& tr0, + const TrackAuxPar& trax1, const T& tr1, float maxDistXY = MaxDistXYDef) { /// closest approach of line and circle /// TrackParam propagation can be parameterized in lab in a form @@ -238,14 +238,14 @@ struct CrossInfo { float dy = traxL.yC - traxH.yC; // Y... // t^2(kx^2+ky^2) + 2t(dx*kx+dy*ky) + dx^2 + dy^2 - r^2 = 0 auto cspi2 = 1. / trcL.getCsp2(); // 1 / csp^2 == kx^2 + ky^2 - auto cspi = std::sqrt(cspi2); + auto cspi = o2::gpu::GPUCommonMath::Sqrt(cspi2); auto tgp = trcL.getSnp() * cspi; float kx = traxL.c - traxL.s * tgp; float ky = traxL.s + traxL.c * tgp; double dk = dx * kx + dy * ky; double det = dk * dk - cspi2 * (dx * dx + dy * dy - traxH.rC * traxH.rC); if (det > 0) { // 2 crossings - det = std::sqrt(det); + det = o2::gpu::GPUCommonMath::Sqrt(det); float t0 = (-dk + det) * cspi2; float t1 = (-dk - det) * cspi2; xDCA[0] = traxL.xC + kx * t0; @@ -257,7 +257,7 @@ struct CrossInfo { // there is no crossing, find the point of the closest approach on the line which is closest to the circle center float t = -dk * cspi2; float xL = traxL.xC + kx * t, yL = traxL.yC + ky * t; // point on the line, need to average with point on the circle - float dxc = xL - traxH.xC, dyc = yL - traxH.yC, dist = std::sqrt(dxc * dxc + dyc * dyc); + float dxc = xL - traxH.xC, dyc = yL - traxH.yC, dist = o2::gpu::GPUCommonMath::Sqrt(dxc * dxc + dyc * dyc); if (dist - traxH.rC > maxDistXY) { // too large distance return nDCA; } @@ -271,7 +271,7 @@ struct CrossInfo { } template - int set(const TrackAuxPar& trax0, const T& tr0, const TrackAuxPar& trax1, const T& tr1, float maxDistXY = MaxDistXYDef, bool isCollinear = false) + GPUd() int set(const TrackAuxPar& trax0, const T& tr0, const TrackAuxPar& trax1, const T& tr1, float maxDistXY = MaxDistXYDef, bool isCollinear = false) { // calculate up to 2 crossings between 2 circles nDCA = 0; @@ -286,7 +286,7 @@ struct CrossInfo { return nDCA; } - CrossInfo() = default; + GPUdDefault() CrossInfo() = default; template CrossInfo(const TrackAuxPar& trax0, const T& tr0, const TrackAuxPar& trax1, const T& tr1, float maxDistXY = MaxDistXYDef, bool isCollinear = false) diff --git a/Common/MathUtils/include/MathUtils/Cartesian.h b/Common/MathUtils/include/MathUtils/Cartesian.h index 67e17ae6bd47d..9b917707835a6 100644 --- a/Common/MathUtils/include/MathUtils/Cartesian.h +++ b/Common/MathUtils/include/MathUtils/Cartesian.h @@ -264,6 +264,18 @@ inline SMatrix> Similarity(const SMatrix +GPUdi() T Dot(const SVector& lhs, const SVector& rhs) +{ + return o2::math_utils::detail::Dot(lhs, rhs); +} + +template +GPUdi() SMatrix> Similarity(const SMatrix& lhs, const SMatrix>& rhs) +{ + return o2::math_utils::detail::Similarity(lhs, rhs); +} #endif // Disable for GPU } // namespace math_utils diff --git a/Common/MathUtils/include/MathUtils/SMatrixGPU.h b/Common/MathUtils/include/MathUtils/SMatrixGPU.h index fdb5bb2a5da66..1372b2a861409 100644 --- a/Common/MathUtils/include/MathUtils/SMatrixGPU.h +++ b/Common/MathUtils/include/MathUtils/SMatrixGPU.h @@ -67,9 +67,9 @@ class SVectorGPU GPUd() SVectorGPU(); GPUd() SVectorGPU(const SVectorGPU& rhs); - GPUd() const T& operator[](unsigned int i) const; + GPUhd() const T& operator[](unsigned int i) const; + GPUhd() T& operator[](unsigned int i); GPUd() const T& operator()(unsigned int i) const; - GPUd() T& operator[](unsigned int i); GPUd() T& operator()(unsigned int i); GPUd() const T* Array() const; GPUd() T* Array(); @@ -114,19 +114,19 @@ GPUdi() const T* SVectorGPU::end() const } template -GPUdi() const T& SVectorGPU::operator[](unsigned int i) const +GPUhdi() const T& SVectorGPU::operator[](unsigned int i) const { return mArray[i]; } template -GPUdi() const T& SVectorGPU::operator()(unsigned int i) const +GPUhdi() T& SVectorGPU::operator[](unsigned int i) { return mArray[i]; } template -GPUdi() T& SVectorGPU::operator[](unsigned int i) +GPUdi() const T& SVectorGPU::operator()(unsigned int i) const { return mArray[i]; } @@ -205,7 +205,7 @@ GPUdi() SVectorGPU& SVectorGPU::operator-=(const SVectorGPU& r } template -GPUd() SVectorGPU& SVectorGPU::operator+=(const SVectorGPU& rhs) +GPUdi() SVectorGPU& SVectorGPU::operator+=(const SVectorGPU& rhs) { for (unsigned int i = 0; i < D; ++i) { mArray[i] += rhs.apply(i); @@ -304,31 +304,11 @@ class MatRepSymGPU public: typedef T value_type; GPUdDefault() MatRepSymGPU() = default; - GPUdi() T& operator()(unsigned int i, unsigned int j) - { - return mArray[offset(i, j)]; - } - - GPUdi() T const& operator()(unsigned int i, unsigned int j) const - { - return mArray[offset(i, j)]; - } - - GPUdi() T& operator[](unsigned int i) - { - return mArray[off(i)]; - } - - GPUdi() T const& operator[](unsigned int i) const - { - return mArray[off(i)]; - } - - GPUdi() T apply(unsigned int i) const - { - return mArray[off(i)]; - } - + GPUdi() T& operator()(unsigned int i, unsigned int j) { return mArray[offset(i, j)]; } + GPUdi() T const& operator()(unsigned int i, unsigned int j) const { return mArray[offset(i, j)]; } + GPUhdi() T& operator[](unsigned int i) { return mArray[off(i)]; } + GPUdi() T const& operator[](unsigned int i) const { return mArray[off(i)]; } + GPUdi() T apply(unsigned int i) const { return mArray[off(i)]; } GPUdi() T* Array() { return mArray; } GPUdi() const T* Array() const { return mArray; } @@ -481,9 +461,10 @@ class SMatrixGPU kCols = D2, // columns kSize = D1 * D2 // rows*columns }; - GPUd() T apply(unsigned int i) const; - GPUd() const T* Array() const; - GPUd() T* Array(); + // https://root.cern/doc/master/SMatrix_8icc_source.html#l00627 + GPUd() T apply(unsigned int i) const { return mRep[i]; } + GPUd() const T* Array() const { return mRep.Array(); } + GPUd() T* Array() { return mRep.Array(); } GPUd() iterator begin(); GPUd() iterator end(); GPUd() const T& operator()(unsigned int i, unsigned int j) const; @@ -518,6 +499,9 @@ class SMatrixGPU GPUd() SMatrixRowGPUconst operator[](unsigned int i) const { return SMatrixRowGPUconst(*this, i); } GPUd() SMatrixRowGPU operator[](unsigned int i) { return SMatrixRowGPU(*this, i); } + template + GPUd() SMatrixGPU& operator+=(const SMatrixGPU& rhs); + GPUd() bool Invert(); GPUd() bool IsInUse(const T* p) const; @@ -676,7 +660,7 @@ GPUdi() SMatrixGPU& SMatrixGPU::operator=(const Expr template template -GPUdi() SMatrixGPU& SMatrixGPU::operator=(const M& rhs) +GPUdi() SMatrixGPU& SMatrixGPU::operator=(const M & rhs) { mRep = rhs.mRep; return *this; @@ -1399,6 +1383,14 @@ GPUdi() bool SMatrixGPU::Invert() return Inverter::Dinv((*this).mRep); } +template +template +GPUdi() SMatrixGPU& SMatrixGPU::operator+=(const SMatrixGPU& rhs) +{ + mRep += rhs.mRep; + return *this; +} + template struct TranspPolicyGPU { enum { @@ -1425,6 +1417,7 @@ class TransposeOpGPU { return mRhs.apply((i % D1) * D2 + i / D1); } + GPUdi() T operator()(unsigned int i, unsigned j) const { return mRhs(j, i); diff --git a/Common/MathUtils/include/MathUtils/Utils.h b/Common/MathUtils/include/MathUtils/Utils.h index 618d3e379f6d2..79263b4142216 100644 --- a/Common/MathUtils/include/MathUtils/Utils.h +++ b/Common/MathUtils/include/MathUtils/Utils.h @@ -128,17 +128,17 @@ GPUdi() void rotateZd(double xL, double yL, double& xG, double& yG, double snAlp return detail::rotateZ(xL, yL, xG, yG, snAlp, csAlp); } -#ifndef GPUCA_GPUCODE_DEVICE -inline void rotateZInv(float xG, float yG, float& xL, float& yL, float snAlp, float csAlp) +GPUdi() void rotateZInv(float xG, float yG, float& xL, float& yL, float snAlp, float csAlp) { detail::rotateZInv(xG, yG, xL, yL, snAlp, csAlp); } -inline void rotateZInvd(double xG, double yG, double& xL, double& yL, double snAlp, double csAlp) +GPUdi() void rotateZInvd(double xG, double yG, double& xL, double& yL, double snAlp, double csAlp) { detail::rotateZInv(xG, yG, xL, yL, snAlp, csAlp); } +#ifndef GPUCA_GPUCODE_DEVICE inline std::tuple rotateZInv(float xG, float yG, float snAlp, float csAlp) { return detail::rotateZInv(xG, yG, snAlp, csAlp); diff --git a/DataFormats/Reconstruction/src/TrackParametrization.cxx b/DataFormats/Reconstruction/src/TrackParametrization.cxx index ec63bc2e95db1..3f60455bc45f8 100644 --- a/DataFormats/Reconstruction/src/TrackParametrization.cxx +++ b/DataFormats/Reconstruction/src/TrackParametrization.cxx @@ -14,17 +14,6 @@ /// @since Oct 1, 2020 /// @brief -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - #include "ReconstructionDataFormats/TrackParametrization.h" #include "ReconstructionDataFormats/Vertex.h" #include "ReconstructionDataFormats/DCA.h" diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index abb35e1af1655..b926188e5f39a 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -325,7 +325,7 @@ GPUhdi() float GPUCommonMath::Hypot(float x, float y, float z, float w) } template -void _swap(T& a, T& b) +GPUd() void _swap(T& a, T& b) { T tmp = a; a = b;