Skip to content

Commit

Permalink
HPCC-33279 Serve CKeyBuilder stats through getStatistic
Browse files Browse the repository at this point in the history
- Serve CKeyBuilder statistics through getStatistic - the standard method of serving statistics.
- Publish 4 new statistics for index write activity: StNumDuplicateKeyCount, StNumOffsetBranches,
StSizeBranchMemory and StSizeLeafMemory.

Signed-off-by: Shamser Ahmed <[email protected]>
  • Loading branch information
shamser committed Jan 23, 2025
1 parent 44a639d commit 1ca7faf
Show file tree
Hide file tree
Showing 9 changed files with 37 additions and 46 deletions.
14 changes: 7 additions & 7 deletions ecl/hthor/hthor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1280,20 +1280,20 @@ void CHThorIndexWriteActivity::execute()
reccount++;
}
builder->finish(metadata, &fileCrc, maxRecordSizeSeen);
duplicateKeyCount = builder->getDuplicateCount();
duplicateKeyCount = builder->getStatistic(StNumDuplicateKeyCount);
cummulativeDuplicateKeyCount += duplicateKeyCount;
numLeafNodes = builder->getNumLeafNodes();
numBranchNodes = builder->getNumBranchNodes();
numBlobNodes = builder->getNumBlobNodes();
numLeafNodes = builder->getStatistic(StNumLeafCacheAdds);
numBranchNodes = builder->getStatistic(StNumNodeCacheAdds);
numBlobNodes = builder->getStatistic(StNumBlobCacheAdds);
originalBlobSize = bc.queryTotalSize();
branchMemorySize = builder->getBranchMemorySize();
leafMemorySize = builder->getLeafMemorySize();
branchMemorySize = builder->getStatistic(StSizeBranchMemory);
leafMemorySize = builder->getStatistic(StSizeLeafMemory);

totalLeafNodes += numLeafNodes;
totalBranchNodes += numBranchNodes;
totalBlobNodes += numBlobNodes;
numDiskWrites = io->getStatistic(StNumDiskWrites);
offsetBranches = builder->getOffsetBranches();
offsetBranches = builder->getStatistic(StNumOffsetBranches);
out->flush();
out.clear();
io->close();
Expand Down
14 changes: 7 additions & 7 deletions roxie/ccd/ccdserver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12838,16 +12838,16 @@ class CRoxieServerIndexWriteActivity : public CRoxieServerInternalSinkActivity,
}
reccount++;
}
duplicateKeyCount = builder->getDuplicateCount();
duplicateKeyCount = builder->getStatistic(StNumDuplicateKeyCount);
cummulativeDuplicateKeyCount += duplicateKeyCount;
builder->finish(metadata, &fileCrc, maxRecordSizeSeen);
numLeafNodes = builder->getNumLeafNodes();
numBranchNodes = builder->getNumBranchNodes();
numBlobNodes = builder->getNumBlobNodes();
offsetBranches = builder->getOffsetBranches();
numLeafNodes = builder->getStatistic(StNumLeafCacheAdds);
numBranchNodes = builder->getStatistic(StNumNodeCacheAdds);
numBlobNodes = builder->getStatistic(StNumBlobCacheAdds);
offsetBranches = builder->getStatistic(StNumOffsetBranches);
originalBlobSize = bc.queryTotalSize();
branchMemorySize = builder->getBranchMemorySize();
leafMemorySize = builder->getLeafMemorySize();
branchMemorySize = builder->getStatistic(StSizeBranchMemory);
leafMemorySize = builder->getStatistic(StSizeLeafMemory);

builder.clear();
out.clear();
Expand Down
15 changes: 8 additions & 7 deletions system/jhtree/keybuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,17 +598,18 @@ class CKeyBuilder : public CInterfaceOf<IKeyBuilder>
return head;
}

virtual unsigned __int64 getDuplicateCount() const override { return duplicateCount; };
virtual unsigned __int64 getNumLeafNodes() const override { return numLeaves; };
virtual unsigned __int64 getNumBranchNodes() const override { return numBranches; }
virtual unsigned __int64 getNumBlobNodes() const override { return numBlobs; }
virtual unsigned __int64 getOffsetBranches() const override { return offsetBranches; }
virtual unsigned __int64 getBranchMemorySize() const override { return indexCompressor->queryBranchMemorySize(); }
virtual unsigned __int64 getLeafMemorySize() const override { return indexCompressor->queryLeafMemorySize(); }
virtual unsigned __int64 getStatistic(StatisticKind kind) const override
{
switch (kind)
{
case StNumDuplicateKeyCount:
return duplicateCount;
case StNumOffsetBranches:
return offsetBranches;
case StSizeBranchMemory:
return indexCompressor->queryBranchMemorySize();
case StSizeLeafMemory:
return indexCompressor->queryLeafMemorySize();
case StNumLeafCacheAdds:
return numLeaves;
case StNumNodeCacheAdds:
Expand Down
7 changes: 0 additions & 7 deletions system/jhtree/keybuild.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,6 @@ interface IKeyBuilder : public IInterface
virtual void processKeyData(const char *keyData, offset_t pos, size32_t recsize) = 0;
virtual void addLeafInfo(CNodeInfo *info) = 0;
virtual unsigned __int64 createBlob(size32_t size, const char * _ptr) = 0;
virtual unsigned __int64 getDuplicateCount() const = 0;
virtual unsigned __int64 getNumLeafNodes() const = 0;
virtual unsigned __int64 getNumBranchNodes() const = 0;
virtual unsigned __int64 getNumBlobNodes() const = 0;
virtual unsigned __int64 getOffsetBranches() const = 0;
virtual unsigned __int64 getBranchMemorySize() const = 0;
virtual unsigned __int64 getLeafMemorySize() const = 0;
virtual unsigned __int64 getStatistic(StatisticKind kind) const = 0;
};

Expand Down
4 changes: 4 additions & 0 deletions system/jlib/jstatcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ enum StatisticKind
StNumPeakCacheObjects, // Peak number of objects in a generic cache
StNumCacheDuplicates,
StNumCacheEvictions,
StNumDuplicateKeyCount,
StNumOffsetBranches,
StSizeBranchMemory,
StSizeLeafMemory,
StMax,

//For any quantity there is potentially the following variants.
Expand Down
5 changes: 5 additions & 0 deletions system/jlib/jstats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -999,6 +999,11 @@ static const constexpr StatisticMeta statsMetaData[StMax] = {
{ PEAKNUMSTAT(PeakCacheObjects), "High water mark for number of objects in a cache"},
{ NUMSTAT(CacheDuplicates), "The number of times an item was added to a cache by two threads at the same time" },
{ NUMSTAT(CacheEvictions), "The number of times an item was evicted from a cache" },
{ NUMSTAT(DuplicateKeyCount), "The number of duplicate keys" },
{ NUMSTAT(OffsetBranches), "The number of offset branches" },
{ SIZESTAT(BranchMemory), "The size of branch memory" },
{ SIZESTAT(LeafMemory), "The size of leaf memory"},

};

static MapStringTo<StatisticKind, StatisticKind> statisticNameMap(true);
Expand Down
20 changes: 4 additions & 16 deletions thorlcr/activities/indexwrite/thindexwriteslave.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,8 @@ class IndexWriteSlaveActivity : public ProcessSlaveActivity, public ILookAheadSt
unsigned __int64 totalCount;

size32_t lastRowSize, firstRowSize, maxRecordSizeSeen, keyedSize;
unsigned __int64 duplicateKeyCount;
offset_t offsetBranches = 0;
offset_t uncompressedSize = 0;
offset_t originalBlobSize = 0;
offset_t branchMemorySize = 0;
offset_t leafMemorySize = 0;

MemoryBuffer rowBuff;
OwnedConstThorRow lastRow, firstRow;
Expand Down Expand Up @@ -98,7 +94,6 @@ class IndexWriteSlaveActivity : public ProcessSlaveActivity, public ILookAheadSt
enableTlkPart0 = (0 != container.queryJob().getWorkUnitValueInt("enableTlkPart0", globals->getPropBool("@enableTlkPart0", true)));
defaultNoSeek = (0 != container.queryJob().getWorkUnitValueInt("noSeekBuildIndex", globals->getPropBool("@noSeekBuildIndex", isContainerized())));
reInit = (0 != (TIWvarfilename & helper->getFlags()));
duplicateKeyCount = 0;
container.queryJob().getWorkUnitValue("defaultIndexCompression", defaultIndexCompression);
}
virtual void init(MemoryBuffer &data, MemoryBuffer &slaveData) override
Expand Down Expand Up @@ -245,13 +240,6 @@ class IndexWriteSlaveActivity : public ProcessSlaveActivity, public ILookAheadSt
if (tmpBuilder)
{
tmpBuilder->finish(metadata, &crc, maxRecordSizeSeen);
if (!isTLK)
{
duplicateKeyCount = tmpBuilder->getDuplicateCount();
offsetBranches = tmpBuilder->getOffsetBranches();
branchMemorySize = tmpBuilder->getBranchMemorySize();
leafMemorySize = tmpBuilder->getLeafMemorySize();
}
mergeStats(inactiveStats, tmpBuilder, indexWriteActivityStatistics);
}
}
Expand Down Expand Up @@ -614,7 +602,7 @@ class IndexWriteSlaveActivity : public ProcessSlaveActivity, public ILookAheadSt
return;
rowcount_t _processed = processed & THORDATALINK_COUNT_MASK;
mb.append(_processed);
mb.append(duplicateKeyCount);
mb.append(inactiveStats.getStatisticValue(StNumDuplicateKeyCount));
if (!singlePartKey || firstNode())
{
StringBuffer partFname;
Expand All @@ -629,11 +617,11 @@ class IndexWriteSlaveActivity : public ProcessSlaveActivity, public ILookAheadSt
mb.append(inactiveStats.getStatisticValue(StNumLeafCacheAdds));
mb.append(inactiveStats.getStatisticValue(StNumBlobCacheAdds));
mb.append(inactiveStats.getStatisticValue(StNumNodeCacheAdds));
mb.append(offsetBranches);
mb.append(inactiveStats.getStatisticValue(StNumOffsetBranches));
mb.append(uncompressedSize);
mb.append(originalBlobSize);
mb.append(branchMemorySize);
mb.append(leafMemorySize);
mb.append(inactiveStats.getStatisticValue(StSizeBranchMemory));
mb.append(inactiveStats.getStatisticValue(StSizeLeafMemory));

if (!singlePartKey && firstNode() && buildTlk)
{
Expand Down
2 changes: 1 addition & 1 deletion thorlcr/thorutil/thormisc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ const StatisticsMapping basicActivityStatistics({StNumParallelExecute, StTimeLoo
const StatisticsMapping groupActivityStatistics({StNumGroups, StNumGroupMax}, basicActivityStatistics);
const StatisticsMapping indexReadFileStatistics({}, diskReadRemoteStatistics, jhtreeCacheStatistics);
const StatisticsMapping indexReadActivityStatistics({StNumRowsProcessed}, indexReadFileStatistics, basicActivityStatistics);
const StatisticsMapping indexWriteActivityStatistics({StPerReplicated, StNumLeafCacheAdds, StNumNodeCacheAdds, StNumBlobCacheAdds }, basicActivityStatistics, diskWriteRemoteStatistics);
const StatisticsMapping indexWriteActivityStatistics({StPerReplicated, StNumLeafCacheAdds, StNumNodeCacheAdds, StNumBlobCacheAdds, StNumDuplicateKeyCount, StNumOffsetBranches, StSizeBranchMemory, StSizeLeafMemory}, basicActivityStatistics, diskWriteRemoteStatistics);
const StatisticsMapping keyedJoinActivityStatistics({ StNumIndexAccepted, StNumPreFiltered, StNumDiskSeeks, StNumDiskAccepted, StNumDiskRejected}, basicActivityStatistics, indexReadFileStatistics);
const StatisticsMapping commonJoinActivityStatistics({StNumMatchLeftRowsMax, StNumMatchRightRowsMax, StNumMatchCandidates, StNumMatchCandidatesMax}, basicActivityStatistics);
const StatisticsMapping hashJoinActivityStatistics({StNumLeftRows, StNumRightRows}, commonJoinActivityStatistics);
Expand Down
2 changes: 1 addition & 1 deletion tools/dumpkey/dumpkey.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ int main(int argc, const char **argv)
if (keyBuilder)
{
keyBuilder->finish(metadata, nullptr, maxSizeSeen);
printf("New key has %" I64F "u leaves, %" I64F "u branches, %" I64F "u duplicates\n", keyBuilder->getNumLeafNodes(), keyBuilder->getNumBranchNodes(), keyBuilder->getDuplicateCount());
printf("New key has %" I64F "u leaves, %" I64F "u branches, %" I64F "u duplicates\n", keyBuilder->getStatistic(StNumLeafCacheAdds), keyBuilder->getStatistic(StNumNodeCacheAdds), keyBuilder->getStatistic(StNumDuplicateKeyCount));
printf("Original key size: %" I64F "u bytes\n", const_cast<IFileIO *>(index->queryFileIO())->size());
printf("New key size: %" I64F "u bytes (%" I64F "u bytes written in %" I64F "u writes)\n", outFileStream->size(), outFileStream->getStatistic(StSizeDiskWrite), outFileStream->getStatistic(StNumDiskWrites));
keyBuilder.clear();
Expand Down

0 comments on commit 1ca7faf

Please sign in to comment.