Skip to content

Commit

Permalink
Removed TPCCluterDecompressor.inc
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabriele Cimador committed Nov 15, 2024
1 parent f938d5e commit a56aab0
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 309 deletions.
1 change: 0 additions & 1 deletion GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ set(HDRS_INSTALL
Base/GPUReconstructionKernels.h
DataCompression/GPUTPCClusterRejection.h
DataCompression/GPUTPCCompressionKernels.inc
DataCompression/TPCClusterDecompressor.inc
DataCompression/TPCClusterDecompressionCore.inc
DataTypes/GPUdEdxInfo.h
DataTypes/GPUHostDataTypes.h
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/DataCompression/GPUTPCDecompression.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class GPUTPCDecompression : public GPUProcessor
friend class GPUTPCDecompressionUtilKernels;
friend class GPUChainTracking;
friend class TPCClusterDecompressionCore;

public:
#ifndef GPUCA_GPUCODE
void InitializeProcessor();
Expand Down
8 changes: 0 additions & 8 deletions GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,6 @@ class GPUTPCDecompressionKernels : public GPUKernelTemplate

template <int32_t iKernel = defaultKernel, typename... Args>
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, Args... args);

/*template <typename... Args>
GPUd() static void decompressTrack(o2::tpc::CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t& clusterOffset, Args&... args);
GPUdi() static o2::tpc::ClusterNative decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor);
template <typename... Args>
GPUdi() static void decompressHits(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args);
GPUdi() static void decompressHitsStore(const o2::tpc::CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, o2::tpc::ClusterNative*& clusterNativeBuffer);*/

GPUd() static uint32_t computeLinearTmpBufferIndex(uint32_t slice, uint32_t row, uint32_t maxClustersPerBuffer)
{
Expand Down
261 changes: 130 additions & 131 deletions GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc
Original file line number Diff line number Diff line change
Expand Up @@ -26,160 +26,159 @@ using namespace o2::tpc;
namespace GPUCA_NAMESPACE::gpu
{

class TPCClusterDecompressionCore{
public:

#ifndef GPUCA_GPUCODE
GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::function<void(const ClusterNative&, uint32_t)> func)
class TPCClusterDecompressionCore
{
const auto cluster = ClusterNative(time, clustersCompressed.flagsA[offset], pad, clustersCompressed.sigmaTimeA[offset], clustersCompressed.sigmaPadA[offset], clustersCompressed.qMaxA[offset], clustersCompressed.qTotA[offset]);
func(cluster, offset);
return cluster;
}
public:
#ifndef GPUCA_GPUCODE
GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::function<void(const ClusterNative&, uint32_t)> func)
{
const auto cluster = ClusterNative(time, clustersCompressed.flagsA[offset], pad, clustersCompressed.sigmaTimeA[offset], clustersCompressed.sigmaPadA[offset], clustersCompressed.qMaxA[offset], clustersCompressed.qTotA[offset]);
func(cluster, offset);
return cluster;
}

GPUhi() static const auto& decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative>& clusterVector)
{
clusterVector.emplace_back(time, clustersCompressed.flagsA[offset], pad, clustersCompressed.sigmaTimeA[offset], clustersCompressed.sigmaPadA[offset], clustersCompressed.qMaxA[offset], clustersCompressed.qTotA[offset]);
return clusterVector.back();
}
GPUhi() static const auto& decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative>& clusterVector)
{
clusterVector.emplace_back(time, clustersCompressed.flagsA[offset], pad, clustersCompressed.sigmaTimeA[offset], clustersCompressed.sigmaPadA[offset], clustersCompressed.qMaxA[offset], clustersCompressed.qTotA[offset]);
return clusterVector.back();
}

GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative> (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT])
{
std::vector<ClusterNative>& clusterVector = clusters[slice][row];
auto& lock = locks[slice][row];
while (lock.test_and_set(std::memory_order_acquire)) {
GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative> (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT])
{
std::vector<ClusterNative>& clusterVector = clusters[slice][row];
auto& lock = locks[slice][row];
while (lock.test_and_set(std::memory_order_acquire)) {
}
ClusterNative retVal = decompressTrackStore(clustersCompressed, offset, slice, row, pad, time, clusterVector);
lock.clear(std::memory_order_release);
return retVal;
}
ClusterNative retVal = decompressTrackStore(clustersCompressed, offset, slice, row, pad, time, clusterVector);
lock.clear(std::memory_order_release);
return retVal;
}
#endif

GPUdi() static ClusterNative decompressTrackStore(const CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor)
{
uint32_t tmpBufferIndex = slice * (GPUCA_ROW_COUNT * decompressor.mMaxNativeClustersPerBuffer) + row * decompressor.mMaxNativeClustersPerBuffer;
uint32_t currentClusterIndex = CAMath::AtomicAdd(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u);
const ClusterNative c(time, cmprClusters.flagsA[clusterOffset], pad, cmprClusters.sigmaTimeA[clusterOffset], cmprClusters.sigmaPadA[clusterOffset], cmprClusters.qMaxA[clusterOffset], cmprClusters.qTotA[clusterOffset]);
if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer) {
decompressor.mTmpNativeClusters[tmpBufferIndex + currentClusterIndex] = c;
} else {
decompressor.raiseError(GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer);
CAMath::AtomicExch(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer);
GPUdi() static ClusterNative decompressTrackStore(const CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor)
{
uint32_t tmpBufferIndex = slice * (GPUCA_ROW_COUNT * decompressor.mMaxNativeClustersPerBuffer) + row * decompressor.mMaxNativeClustersPerBuffer;
uint32_t currentClusterIndex = CAMath::AtomicAdd(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u);
const ClusterNative c(time, cmprClusters.flagsA[clusterOffset], pad, cmprClusters.sigmaTimeA[clusterOffset], cmprClusters.sigmaPadA[clusterOffset], cmprClusters.qMaxA[clusterOffset], cmprClusters.qTotA[clusterOffset]);
if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer) {
decompressor.mTmpNativeClusters[tmpBufferIndex + currentClusterIndex] = c;
} else {
decompressor.raiseError(GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer);
CAMath::AtomicExch(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer);
}
return c;
}
return c;
}

template <typename... Args>
GPUhdi() static void decompressTrack(const CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t& clusterOffset, Args&... args)
{
float zOffset = 0;
uint32_t slice = cmprClusters.sliceA[trackIndex];
uint32_t row = cmprClusters.rowA[trackIndex];
GPUTPCCompressionTrackModel track;
uint32_t clusterIndex;
for (clusterIndex = 0; clusterIndex < cmprClusters.nTrackClusters[trackIndex]; clusterIndex++) {
uint32_t pad = 0, time = 0;
if (clusterIndex != 0) {
uint8_t tmpSlice = cmprClusters.sliceLegDiffA[clusterOffset - trackIndex - 1];
bool changeLeg = (tmpSlice >= GPUCA_NSLICES);
if (changeLeg) {
tmpSlice -= GPUCA_NSLICES;
}
if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
slice += tmpSlice;
if (slice >= GPUCA_NSLICES) {
slice -= GPUCA_NSLICES;
template <typename... Args>
GPUhdi() static void decompressTrack(const CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t& trackIndex, uint32_t& clusterOffset, Args&... args)
{
float zOffset = 0;
uint32_t slice = cmprClusters.sliceA[trackIndex];
uint32_t row = cmprClusters.rowA[trackIndex];
GPUTPCCompressionTrackModel track;
uint32_t clusterIndex;
for (clusterIndex = 0; clusterIndex < cmprClusters.nTrackClusters[trackIndex]; clusterIndex++) {
uint32_t pad = 0, time = 0;
if (clusterIndex != 0) {
uint8_t tmpSlice = cmprClusters.sliceLegDiffA[clusterOffset - trackIndex - 1];
bool changeLeg = (tmpSlice >= GPUCA_NSLICES);
if (changeLeg) {
tmpSlice -= GPUCA_NSLICES;
}
if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
slice += tmpSlice;
if (slice >= GPUCA_NSLICES) {
slice -= GPUCA_NSLICES;
}
row += cmprClusters.rowDiffA[clusterOffset - trackIndex - 1];
if (row >= GPUCA_ROW_COUNT) {
row -= GPUCA_ROW_COUNT;
}
} else {
slice = tmpSlice;
row = cmprClusters.rowDiffA[clusterOffset - trackIndex - 1];
}
row += cmprClusters.rowDiffA[clusterOffset - trackIndex - 1];
if (row >= GPUCA_ROW_COUNT) {
row -= GPUCA_ROW_COUNT;
if (changeLeg && track.Mirror()) {
break;
}
if (track.Propagate(param.tpcGeometry.Row2X(row), param.SliceParam[slice].Alpha)) {
break;
}
uint32_t timeTmp = cmprClusters.timeResA[clusterOffset - trackIndex - 1];
if (timeTmp & 800000) {
timeTmp |= 0xFF000000;
}
time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(slice, track.Z() + zOffset)));
float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), param.tpcGeometry.LinearY2Pad(slice, row, track.Y())));
pad = cmprClusters.padResA[clusterOffset - trackIndex - 1] + ClusterNative::packPad(tmpPad);
time = time & 0xFFFFFF;
pad = (uint16_t)pad;
if (pad >= param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked) {
if (pad >= 0xFFFF - 11968) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2
pad = 0;
} else {
pad = param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked - 1;
}
}
if (param.continuousMaxTimeBin > 0 && time >= maxTime) {
if (time >= 0xFFFFFF - 544768) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2)
time = 0;
} else {
time = maxTime;
}
}
} else {
slice = tmpSlice;
row = cmprClusters.rowDiffA[clusterOffset - trackIndex - 1];
time = cmprClusters.timeA[trackIndex];
pad = cmprClusters.padA[trackIndex];
}
if (changeLeg && track.Mirror()) {
break;
const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, args...);
float y = param.tpcGeometry.LinearPad2Y(slice, row, cluster.getPad());
float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime());
if (clusterIndex == 0) {
zOffset = z;
track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SliceParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param);
}
if (track.Propagate(param.tpcGeometry.Row2X(row), param.SliceParam[slice].Alpha)) {
if (clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y, z - zOffset, row)) {
break;
}
uint32_t timeTmp = cmprClusters.timeResA[clusterOffset - trackIndex - 1];
if (timeTmp & 800000) {
timeTmp |= 0xFF000000;
}
time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(slice, track.Z() + zOffset)));
float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), param.tpcGeometry.LinearY2Pad(slice, row, track.Y())));
pad = cmprClusters.padResA[clusterOffset - trackIndex - 1] + ClusterNative::packPad(tmpPad);
time = time & 0xFFFFFF;
pad = (uint16_t)pad;
if (pad >= param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked) {
if (pad >= 0xFFFF - 11968) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2
pad = 0;
} else {
pad = param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked - 1;
}
}
if (param.continuousMaxTimeBin > 0 && time >= maxTime) {
if (time >= 0xFFFFFF - 544768) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2)
time = 0;
} else {
time = maxTime;
}
}
} else {
time = cmprClusters.timeA[trackIndex];
pad = cmprClusters.padA[trackIndex];
}
const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, args...);
float y = param.tpcGeometry.LinearPad2Y(slice, row, cluster.getPad());
float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime());
if (clusterIndex == 0) {
zOffset = z;
track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SliceParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param);
}
if (clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y, z - zOffset, row)) {
break;
clusterOffset++;
}
clusterOffset++;
clusterOffset += cmprClusters.nTrackClusters[trackIndex] - clusterIndex;
}
clusterOffset += cmprClusters.nTrackClusters[trackIndex] - clusterIndex;
}

GPUhdi() static const auto& decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& clusterBuffer)
{
return ((*(clusterBuffer++) = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k])));
}
GPUhdi() static const auto& decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& clusterBuffer)
{
return ((*(clusterBuffer++) = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k])));
}

GPUhdi() static auto decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, std::function<void(const ClusterNative&, uint32_t)> func)
{
const auto cluster = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]);
func(cluster, k);
return cluster;
}
GPUhdi() static auto decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, std::function<void(const ClusterNative&, uint32_t)> func)
{
const auto cluster = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]);
func(cluster, k);
return cluster;
}

template <typename... Args>
GPUdi() static void decompressHits(const CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args)
{
uint32_t time = 0;
uint16_t pad = 0;
for (uint32_t k = start; k < end; k++) {
if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
uint32_t timeTmp = cmprClusters.timeDiffU[k];
if (timeTmp & 800000) {
timeTmp |= 0xFF000000;
template <typename... Args>
GPUdi() static void decompressHits(const CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args)
{
uint32_t time = 0;
uint16_t pad = 0;
for (uint32_t k = start; k < end; k++) {
if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
uint32_t timeTmp = cmprClusters.timeDiffU[k];
if (timeTmp & 800000) {
timeTmp |= 0xFF000000;
}
time += timeTmp;
pad += cmprClusters.padDiffU[k];
} else {
time = cmprClusters.timeDiffU[k];
pad = cmprClusters.padDiffU[k];
}
time += timeTmp;
pad += cmprClusters.padDiffU[k];
} else {
time = cmprClusters.timeDiffU[k];
pad = cmprClusters.padDiffU[k];
decompressHitsStore(cmprClusters, k, time, pad, args...);
}
decompressHitsStore(cmprClusters, k, time, pad, args...);
}
}

};
}
} // namespace GPUCA_NAMESPACE::gpu

#endif
5 changes: 0 additions & 5 deletions GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,6 @@ class TPCClusterDecompressor
static constexpr uint32_t NSLICES = GPUCA_NSLICES;
static int32_t decompress(const o2::tpc::CompressedClustersFlat* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function<o2::tpc::ClusterNative*(size_t)> allocator, const GPUParam& param, bool deterministicRec);
static int32_t decompress(const o2::tpc::CompressedClusters* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function<o2::tpc::ClusterNative*(size_t)> allocator, const GPUParam& param, bool deterministicRec);

template <typename... Args>
static void decompressTrack(const o2::tpc::CompressedClusters* clustersCompressed, const GPUParam& param, const uint32_t maxTime, const uint32_t i, uint32_t& offset, Args&... args);
template <typename... Args>
static void decompressHits(const o2::tpc::CompressedClusters* clustersCompressed, const uint32_t start, const uint32_t end, Args&... args);
};
} // namespace GPUCA_NAMESPACE::gpu

Expand Down
Loading

0 comments on commit a56aab0

Please sign in to comment.