Skip to content

Commit

Permalink
HPCC-32901 Make super file cost and reads/write updates consistent
Browse files Browse the repository at this point in the history
- Super file's numDiskReads, numDiskWrites, readCost and writeCost
stats are re-calculated from subfiles stats every time a subfile
is added or removed from a super file
- Super file's readCost and numReads are updated every time the
superfile is read

Signed-off-by: Shamser Ahmed <[email protected]>
  • Loading branch information
shamser committed Jan 28, 2025
1 parent f72de8b commit c139c79
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 13 deletions.
151 changes: 141 additions & 10 deletions dali/base/dadfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5069,7 +5069,57 @@ protected: friend class CDistributedFilePart;
atRestCost = calcFileAtRestCost(cluster, sizeGB, fileAgeDays);
}
if (attrs)
accessCost = getReadCost(*attrs, cluster) + getWriteCost(*attrs, cluster);
accessCost = ::getReadCost(*attrs, cluster) + ::getWriteCost(*attrs, cluster);
}

virtual bool getNumReads(stat_type &numReads) const override
{
const IPropertyTree *attrs = root->queryPropTree("Attr");
if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads)))
numReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads));
else
numReads = 0;
return true;
}

virtual bool getNumWrites(stat_type &numWrites) const override
{
const IPropertyTree *attrs = root->queryPropTree("Attr");
if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites)))
numWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites));
else
numWrites = 0;
return true;
}

virtual bool getReadCost(cost_type &cost, bool calculateIfMissing=false) const override
{
IPropertyTree *attrs = root->queryPropTree("Attr");
if (attrs && (attrs->hasProp(getDFUQResultFieldName(DFUQRFreadCost)) ||calculateIfMissing))
{
StringBuffer clusterName;
// getClusterName isn't const function, so need to cast this to non-const
const_cast<CDistributedFile *>(this)->getClusterName(0, clusterName);
cost = ::getReadCost(*attrs, clusterName.str());
return true;
}
cost = 0;
return false;
}

virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing=false) const override
{
const IPropertyTree *attrs = root->queryPropTree("Attr");
if (attrs && (attrs->hasProp(getDFUQResultFieldName(DFUQRFwriteCost)) || calculateIfMissing))
{
StringBuffer clusterName;
// getClusterName isn't const function, so need to cast this to non-const
const_cast<CDistributedFile *>(this)->getClusterName(0, clusterName);
cost = ::getWriteCost(*attrs, clusterName.str());
return true;
}
cost = 0;
return false;
}
};

Expand Down Expand Up @@ -6336,6 +6386,82 @@ class CDistributedSuperFile: public CDistributedFileBase<IDistributedSuperFile>
return true;
}

virtual bool getNumReads(stat_type &numReads) const override
{
const IPropertyTree *attrs = root->queryPropTree("Attr");
if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads)))
numReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads));
else
{
numReads = 0;
ForEachItemIn(i,subfiles)
{
stat_type c;
if (!subfiles.item(i).getNumReads(c))
return false;
numReads += c;
}
}
return true;
}

virtual bool getNumWrites(stat_type &numWrites) const override
{
const IPropertyTree *attrs = root->queryPropTree("Attr");
if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites)))
numWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites));
else
{
numWrites = 0;
ForEachItemIn(i,subfiles)
{
stat_type c;
if (!subfiles.item(i).getNumWrites(c))
return false;
numWrites += c;
}
}
return true;
}

virtual bool getReadCost(cost_type &cost, bool calculateIfMissing=false) const override
{
const IPropertyTree *attrs = root->queryPropTree("Attr");
if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFreadCost)))
cost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost));
else
{
cost = 0;
ForEachItemIn(i,subfiles)
{
cost_type c;
if (!subfiles.item(i).getReadCost(c,calculateIfMissing))
return false;
cost += c;
}
}
return true;
}

virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing=false) const override
{
const IPropertyTree *attrs = root->queryPropTree("Attr");
if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFwriteCost)))
cost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost));
else
{
cost = 0;
ForEachItemIn(i,subfiles)
{
cost_type c;
if (!subfiles.item(i).getWriteCost(c,calculateIfMissing))
return false;
cost += c;
}
}
return true;
}

virtual IDistributedSuperFile *querySuperFile() override
{
return this;
Expand Down Expand Up @@ -6562,6 +6688,10 @@ class CDistributedSuperFile: public CDistributedFileBase<IDistributedSuperFile>
attrs.removeProp("@minSkew");
attrs.removeProp("@maxSkewPart");
attrs.removeProp("@minSkewPart");
attrs.removeProp(getDFUQResultFieldName(DFUQRFnumDiskReads));
attrs.removeProp(getDFUQResultFieldName(DFUQRFnumDiskWrites));
attrs.removeProp(getDFUQResultFieldName(DFUQRFreadCost));
attrs.removeProp(getDFUQResultFieldName(DFUQRFwriteCost));

__int64 fs = getFileSize(false,false);
if (fs!=-1)
Expand Down Expand Up @@ -6589,6 +6719,16 @@ class CDistributedSuperFile: public CDistributedFileBase<IDistributedSuperFile>
attrs.setPropBin("_record_layout", mb.length(), mb.bufferBase());
if (getRecordLayout(mb, "_rtlType"))
attrs.setPropBin("_rtlType", mb.length(), mb.bufferBase());
stat_type numReads, numWrites;
if (getNumReads(numReads))
attrs.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), numReads);
if (getNumWrites(numWrites))
attrs.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), numWrites);
cost_type readCost, writeCost;
if (getReadCost(readCost))
attrs.setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), numReads);
if (getWriteCost(writeCost))
attrs.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), numWrites);
const char *kind = nullptr;
Owned<IDistributedFileIterator> subIter = getSubFileIterator(true);
ForEach(*subIter)
Expand Down Expand Up @@ -13407,16 +13547,7 @@ IDFProtectedIterator *CDistributedFileDirectory::lookupProtectedFiles(const char
return new CDFProtectedIterator(owner,notsuper,superonly,defaultTimeout);
}

const char* DFUQResultFieldNames[] = { "@name", "@description", "@group", "@kind", "@modified", "@job", "@owner",
"@DFUSFrecordCount", "@recordCount", "@recordSize", "@DFUSFsize", "@size", "@workunit", "@DFUSFcluster", "@numsubfiles",
"@accessed", "@numparts", "@compressedSize", "@directory", "@partmask", "@superowners", "@persistent", "@protect", "@compressed",
"@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart",
"@readCost", "@writeCost" };

extern da_decl const char* getDFUQResultFieldName(DFUQResultField field)
{
return DFUQResultFieldNames[field];
}

IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned numFiles, DFUQResultField* localFilters, const char* localFilterBuf)
{
Expand Down
14 changes: 13 additions & 1 deletion dali/base/dadfs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,16 @@ enum DFUQResultField
};

extern da_decl const char* getDFUQFilterFieldName(DFUQFilterField field);
extern da_decl const char* getDFUQResultFieldName(DFUQResultField field);
constexpr const char* DFUQResultFieldNames[] = { "@name", "@description", "@group", "@kind", "@modified", "@job", "@owner",
"@DFUSFrecordCount", "@recordCount", "@recordSize", "@DFUSFsize", "@size", "@workunit", "@DFUSFcluster", "@numsubfiles",
"@accessed", "@numparts", "@compressedSize", "@directory", "@partmask", "@superowners", "@persistent", "@protect", "@compressed",
"@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart",
"@readCost", "@writeCost" };

extern da_decl constexpr const char* getDFUQResultFieldName(DFUQResultField field)
{
return DFUQResultFieldNames[field];
}
/**
* File operations can be included in a transaction to ensure that multiple
* updates are handled atomically. This is the interface to a transaction
Expand Down Expand Up @@ -434,6 +442,10 @@ interface IDistributedFile: extends IInterface
virtual int getExpire(StringBuffer *expirationDate) = 0;
virtual void setExpire(int expireDays) = 0;
virtual void getCost(const char * cluster, cost_type & atRestCost, cost_type & accessCost) = 0;
virtual bool getNumReads(stat_type &numReads) const = 0;
virtual bool getNumWrites(stat_type &numWrites) const = 0;
virtual bool getReadCost(cost_type &cost, bool calculateIfMissing=false) const = 0;
virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing=false) const = 0;
};


Expand Down
1 change: 1 addition & 0 deletions ecl/hthor/hthor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8545,6 +8545,7 @@ void CHThorDiskReadBaseActivity::closepart()
{
IDistributedSuperFile *super = dFile->querySuperFile();
dFile = &(super->querySubFile(subfile, true));
updateCostAndNumReads(super, curDiskReads);
}
}
updateCostAndNumReads(dFile, curDiskReads);
Expand Down
17 changes: 16 additions & 1 deletion esp/clients/ws_dfsclient/ws_dfsclient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,22 @@ class CServiceDistributedFileBase : public CSimpleInterfaceOf<INTERFACE>
virtual bool getSkewInfo(unsigned &maxSkew, unsigned &minSkew, unsigned &maxSkewPart, unsigned &minSkewPart, bool calculateIfMissing) override { return legacyDFSFile->getSkewInfo(maxSkew, minSkew, maxSkewPart, minSkewPart, calculateIfMissing); }
virtual int getExpire(StringBuffer *expirationDate) override { return legacyDFSFile->getExpire(expirationDate); }
virtual void getCost(const char * cluster, cost_type & atRestCost, cost_type & accessCost) override { legacyDFSFile->getCost(cluster, atRestCost, accessCost); }

virtual bool getNumReads(stat_type &numReads) const override
{
return legacyDFSFile->getNumReads(numReads);
}
virtual bool getNumWrites(stat_type &numWrites) const override
{
return legacyDFSFile->getNumWrites(numWrites);
}
virtual bool getReadCost(cost_type &cost, bool calculateIfMissing) const override
{
return legacyDFSFile->getReadCost(cost, calculateIfMissing);
}
virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing) const override
{
return legacyDFSFile->getWriteCost(cost, calculateIfMissing);
}
// setters (change file meta data)
virtual void setPreferredClusters(const char *clusters) override { legacyDFSFile->setPreferredClusters(clusters); }
virtual void setSingleClusterOnly() override { legacyDFSFile->setSingleClusterOnly(); }
Expand Down
6 changes: 5 additions & 1 deletion thorlcr/graph/thgraphmaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps)
curReadCost = calcFileAccessCost(clusterName, 0, curDiskReads);

if (updateFileProps)
updateCostAndNumReads(file, curDiskReads);
updateCostAndNumReads(file, curDiskReads, curReadCost);
return curReadCost;
};
cost_type readCost = 0;
Expand All @@ -693,12 +693,16 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps)
if (super)
{
unsigned numSubFiles = super->numSubFiles(true);
stat_type curDiskReads = 0;
for (unsigned i=0; i<numSubFiles; i++)
{
IDistributedFile &subFile = super->querySubFile(i, true);
readCost += updateReadCosts(useJhtreeCache, &subFile, *fileStats[fileIndex]);
curDiskReads += fileStats[fileIndex]->getStatisticSum(StNumDiskReads);
fileIndex++;
}
if (updateFileProps)
updateCostAndNumReads(super, curDiskReads, readCost);
}
else
{
Expand Down

0 comments on commit c139c79

Please sign in to comment.