From c139c799b1a6b5c8eb5a959524aa9ff0d55df124 Mon Sep 17 00:00:00 2001 From: Shamser Ahmed Date: Thu, 23 Jan 2025 16:54:52 +0000 Subject: [PATCH] HPCC-32901 Make super file cost and reads/write updates consistent - Super file's numDiskReads, numDiskWrites, readCost and writeCost stats are re-calculated from subfiles stats every time a subfile is added or removed from a super file - Super file's readCost and numReads are updated every time the superfile is read Signed-off-by: Shamser Ahmed --- dali/base/dadfs.cpp | 151 ++++++++++++++++++++-- dali/base/dadfs.hpp | 14 +- ecl/hthor/hthor.cpp | 1 + esp/clients/ws_dfsclient/ws_dfsclient.cpp | 17 ++- thorlcr/graph/thgraphmaster.cpp | 6 +- 5 files changed, 176 insertions(+), 13 deletions(-) diff --git a/dali/base/dadfs.cpp b/dali/base/dadfs.cpp index 734db18b27b..fb6fa19fdb9 100644 --- a/dali/base/dadfs.cpp +++ b/dali/base/dadfs.cpp @@ -5069,7 +5069,57 @@ protected: friend class CDistributedFilePart; atRestCost = calcFileAtRestCost(cluster, sizeGB, fileAgeDays); } if (attrs) - accessCost = getReadCost(*attrs, cluster) + getWriteCost(*attrs, cluster); + accessCost = ::getReadCost(*attrs, cluster) + ::getWriteCost(*attrs, cluster); + } + + virtual bool getNumReads(stat_type &numReads) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))) + numReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads)); + else + numReads = 0; + return true; + } + + virtual bool getNumWrites(stat_type &numWrites) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites))) + numWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites)); + else + numWrites = 0; + return true; + } + + virtual bool getReadCost(cost_type &cost, bool calculateIfMissing=false) const override + { + IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && (attrs->hasProp(getDFUQResultFieldName(DFUQRFreadCost)) ||calculateIfMissing)) + { + StringBuffer clusterName; + // getClusterName isn't const function, so need to cast this to non-const + const_cast(this)->getClusterName(0, clusterName); + cost = ::getReadCost(*attrs, clusterName.str()); + return true; + } + cost = 0; + return false; + } + + virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing=false) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && (attrs->hasProp(getDFUQResultFieldName(DFUQRFwriteCost)) || calculateIfMissing)) + { + StringBuffer clusterName; + // getClusterName isn't const function, so need to cast this to non-const + const_cast(this)->getClusterName(0, clusterName); + cost = ::getWriteCost(*attrs, clusterName.str()); + return true; + } + cost = 0; + return false; } }; @@ -6336,6 +6386,82 @@ class CDistributedSuperFile: public CDistributedFileBase return true; } + virtual bool getNumReads(stat_type &numReads) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))) + numReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads)); + else + { + numReads = 0; + ForEachItemIn(i,subfiles) + { + stat_type c; + if (!subfiles.item(i).getNumReads(c)) + return false; + numReads += c; + } + } + return true; + } + + virtual bool getNumWrites(stat_type &numWrites) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites))) + numWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites)); + else + { + numWrites = 0; + ForEachItemIn(i,subfiles) + { + stat_type c; + if (!subfiles.item(i).getNumWrites(c)) + return false; + numWrites += c; + } + } + return true; + } + + virtual bool getReadCost(cost_type &cost, bool calculateIfMissing=false) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFreadCost))) + cost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)); + else + { + cost = 0; + ForEachItemIn(i,subfiles) + { + cost_type c; + if (!subfiles.item(i).getReadCost(c,calculateIfMissing)) + return false; + cost += c; + } + } + return true; + } + + virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing=false) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFwriteCost))) + cost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost)); + else + { + cost = 0; + ForEachItemIn(i,subfiles) + { + cost_type c; + if (!subfiles.item(i).getWriteCost(c,calculateIfMissing)) + return false; + cost += c; + } + } + return true; + } + virtual IDistributedSuperFile *querySuperFile() override { return this; @@ -6562,6 +6688,10 @@ class CDistributedSuperFile: public CDistributedFileBase attrs.removeProp("@minSkew"); attrs.removeProp("@maxSkewPart"); attrs.removeProp("@minSkewPart"); + attrs.removeProp(getDFUQResultFieldName(DFUQRFnumDiskReads)); + attrs.removeProp(getDFUQResultFieldName(DFUQRFnumDiskWrites)); + attrs.removeProp(getDFUQResultFieldName(DFUQRFreadCost)); + attrs.removeProp(getDFUQResultFieldName(DFUQRFwriteCost)); __int64 fs = getFileSize(false,false); if (fs!=-1) @@ -6589,6 +6719,16 @@ class CDistributedSuperFile: public CDistributedFileBase attrs.setPropBin("_record_layout", mb.length(), mb.bufferBase()); if (getRecordLayout(mb, "_rtlType")) attrs.setPropBin("_rtlType", mb.length(), mb.bufferBase()); + stat_type numReads, numWrites; + if (getNumReads(numReads)) + attrs.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), numReads); + if (getNumWrites(numWrites)) + attrs.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), numWrites); + cost_type readCost, writeCost; + if (getReadCost(readCost)) + attrs.setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), numReads); + if (getWriteCost(writeCost)) + attrs.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), numWrites); const char *kind = nullptr; Owned subIter = getSubFileIterator(true); ForEach(*subIter) @@ -13407,16 +13547,7 @@ IDFProtectedIterator *CDistributedFileDirectory::lookupProtectedFiles(const char return new CDFProtectedIterator(owner,notsuper,superonly,defaultTimeout); } -const char* DFUQResultFieldNames[] = { "@name", "@description", "@group", "@kind", "@modified", "@job", "@owner", - "@DFUSFrecordCount", "@recordCount", "@recordSize", "@DFUSFsize", "@size", "@workunit", "@DFUSFcluster", "@numsubfiles", - "@accessed", "@numparts", "@compressedSize", "@directory", "@partmask", "@superowners", "@persistent", "@protect", "@compressed", - "@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart", - "@readCost", "@writeCost" }; -extern da_decl const char* getDFUQResultFieldName(DFUQResultField field) -{ - return DFUQResultFieldNames[field]; -} IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned numFiles, DFUQResultField* localFilters, const char* localFilterBuf) { diff --git a/dali/base/dadfs.hpp b/dali/base/dadfs.hpp index fb8453b291a..1d50d63cf32 100644 --- a/dali/base/dadfs.hpp +++ b/dali/base/dadfs.hpp @@ -304,8 +304,16 @@ enum DFUQResultField }; extern da_decl const char* getDFUQFilterFieldName(DFUQFilterField field); -extern da_decl const char* getDFUQResultFieldName(DFUQResultField field); +constexpr const char* DFUQResultFieldNames[] = { "@name", "@description", "@group", "@kind", "@modified", "@job", "@owner", + "@DFUSFrecordCount", "@recordCount", "@recordSize", "@DFUSFsize", "@size", "@workunit", "@DFUSFcluster", "@numsubfiles", + "@accessed", "@numparts", "@compressedSize", "@directory", "@partmask", "@superowners", "@persistent", "@protect", "@compressed", + "@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart", + "@readCost", "@writeCost" }; +extern da_decl constexpr const char* getDFUQResultFieldName(DFUQResultField field) +{ + return DFUQResultFieldNames[field]; +} /** * File operations can be included in a transaction to ensure that multiple * updates are handled atomically. This is the interface to a transaction @@ -434,6 +442,10 @@ interface IDistributedFile: extends IInterface virtual int getExpire(StringBuffer *expirationDate) = 0; virtual void setExpire(int expireDays) = 0; virtual void getCost(const char * cluster, cost_type & atRestCost, cost_type & accessCost) = 0; + virtual bool getNumReads(stat_type &numReads) const = 0; + virtual bool getNumWrites(stat_type &numWrites) const = 0; + virtual bool getReadCost(cost_type &cost, bool calculateIfMissing=false) const = 0; + virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing=false) const = 0; }; diff --git a/ecl/hthor/hthor.cpp b/ecl/hthor/hthor.cpp index 6dd02b640c7..30fb379f14f 100644 --- a/ecl/hthor/hthor.cpp +++ b/ecl/hthor/hthor.cpp @@ -8545,6 +8545,7 @@ void CHThorDiskReadBaseActivity::closepart() { IDistributedSuperFile *super = dFile->querySuperFile(); dFile = &(super->querySubFile(subfile, true)); + updateCostAndNumReads(super, curDiskReads); } } updateCostAndNumReads(dFile, curDiskReads); diff --git a/esp/clients/ws_dfsclient/ws_dfsclient.cpp b/esp/clients/ws_dfsclient/ws_dfsclient.cpp index 81a845665a1..ce7148fa91b 100644 --- a/esp/clients/ws_dfsclient/ws_dfsclient.cpp +++ b/esp/clients/ws_dfsclient/ws_dfsclient.cpp @@ -236,7 +236,22 @@ class CServiceDistributedFileBase : public CSimpleInterfaceOf virtual bool getSkewInfo(unsigned &maxSkew, unsigned &minSkew, unsigned &maxSkewPart, unsigned &minSkewPart, bool calculateIfMissing) override { return legacyDFSFile->getSkewInfo(maxSkew, minSkew, maxSkewPart, minSkewPart, calculateIfMissing); } virtual int getExpire(StringBuffer *expirationDate) override { return legacyDFSFile->getExpire(expirationDate); } virtual void getCost(const char * cluster, cost_type & atRestCost, cost_type & accessCost) override { legacyDFSFile->getCost(cluster, atRestCost, accessCost); } - + virtual bool getNumReads(stat_type &numReads) const override + { + return legacyDFSFile->getNumReads(numReads); + } + virtual bool getNumWrites(stat_type &numWrites) const override + { + return legacyDFSFile->getNumWrites(numWrites); + } + virtual bool getReadCost(cost_type &cost, bool calculateIfMissing) const override + { + return legacyDFSFile->getReadCost(cost, calculateIfMissing); + } + virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing) const override + { + return legacyDFSFile->getWriteCost(cost, calculateIfMissing); + } // setters (change file meta data) virtual void setPreferredClusters(const char *clusters) override { legacyDFSFile->setPreferredClusters(clusters); } virtual void setSingleClusterOnly() override { legacyDFSFile->setSingleClusterOnly(); } diff --git a/thorlcr/graph/thgraphmaster.cpp b/thorlcr/graph/thgraphmaster.cpp index 9dd7350afaa..cd04ffacc83 100644 --- a/thorlcr/graph/thgraphmaster.cpp +++ b/thorlcr/graph/thgraphmaster.cpp @@ -670,7 +670,7 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps) curReadCost = calcFileAccessCost(clusterName, 0, curDiskReads); if (updateFileProps) - updateCostAndNumReads(file, curDiskReads); + updateCostAndNumReads(file, curDiskReads, curReadCost); return curReadCost; }; cost_type readCost = 0; @@ -693,12 +693,16 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps) if (super) { unsigned numSubFiles = super->numSubFiles(true); + stat_type curDiskReads = 0; for (unsigned i=0; iquerySubFile(i, true); readCost += updateReadCosts(useJhtreeCache, &subFile, *fileStats[fileIndex]); + curDiskReads += fileStats[fileIndex]->getStatisticSum(StNumDiskReads); fileIndex++; } + if (updateFileProps) + updateCostAndNumReads(super, curDiskReads, readCost); } else {