diff --git a/CustomStages/MCADFetchDelayStage.cpp b/CustomStages/MCADFetchDelayStage.cpp index 6171e44..e36b57e 100644 --- a/CustomStages/MCADFetchDelayStage.cpp +++ b/CustomStages/MCADFetchDelayStage.cpp @@ -67,11 +67,13 @@ llvm::Error MCADFetchDelayStage::execute(llvm::mca::InstRef &IR) { (fellThrough ? AbstractBranchPredictorUnit::NOT_TAKEN : AbstractBranchPredictorUnit::TAKEN); BPU->recordTakenBranch(*previousInstrAddr, actualBranchDirection); - + + stats.numBranches.inc(); if(actualBranchDirection != predictedBranchDirection) { // Previous prediction was wrong; this instruction will have extra // latency due to misprediction. delayCyclesLeft += BPU->getMispredictionPenalty(); + stats.numMispredictions.inc(); LLVM_DEBUG(dbgs() << "[MCAD FetchDelayStage] Previous branch at "); LLVM_DEBUG(dbgs().write_hex(instrAddr->addr)); LLVM_DEBUG(dbgs() << " mispredicted, delaying next instruction by " diff --git a/CustomStages/MCADFetchDelayStage.h b/CustomStages/MCADFetchDelayStage.h index c9b1196..5704f7a 100644 --- a/CustomStages/MCADFetchDelayStage.h +++ b/CustomStages/MCADFetchDelayStage.h @@ -27,6 +27,7 @@ class MCADFetchDelayStage : public llvm::mca::Stage { llvm::mca::InstRef IR; }; + const llvm::MCInstrInfo &MCII; std::deque instrQueue = {}; @@ -48,6 +49,29 @@ class MCADFetchDelayStage : public llvm::mca::Stage { std::optional previousInstrAddr = std::nullopt; std::optional previousInstrSize = std::nullopt; +public: + // Stats + // TODO: Move these elsewhere, as they are useful outside of just branch + // prediction or the FetchDelayStage; we could also make use of the event + // infrastructure that already exists (grep for STALL event) + struct OverflowableCount { + unsigned long long count; + bool overflowed; + void inc() { + if(count + 1 < count) { + overflowed = true; + } + count++; + } + }; + + struct Statistics { + OverflowableCount numBranches = {}; + OverflowableCount numMispredictions = {}; + }; + + Statistics stats = {}; + public: MCADFetchDelayStage(const llvm::MCInstrInfo &MCII, MetadataRegistry &MD, AbstractBranchPredictorUnit *BPU, diff --git a/MCAWorker.cpp b/MCAWorker.cpp index 422be62..d9e78c7 100644 --- a/MCAWorker.cpp +++ b/MCAWorker.cpp @@ -211,6 +211,8 @@ std::unique_ptr buildBranchPredictor() { } // anonymous namespace +MCADFetchDelayStage::Statistics *FetchDelayStats = nullptr; // TODO: ugly; move this elsewhere using hardware events + void BrokerFacade::setBroker(std::unique_ptr &&B) { Worker.TheBroker = std::move(B); } @@ -295,6 +297,7 @@ std::unique_ptr MCAWorker::createDefaultPipeline() { // Create the pipeline stages. auto Fetch = std::make_unique(SrcMgr); auto FetchDelay = std::make_unique(MCII, MDRegistry, BPU.get(), L1I); + FetchDelayStats = &FetchDelay->stats; // TODO: ugly; move this elsewhere using hardware events auto Dispatch = std::make_unique(STI, MRI, MCAPO.DispatchWidth, *RCU, *PRF); auto Execute = @@ -610,6 +613,10 @@ void MCAWorker::printMCA(StringRef RegionDescription) { << RegionDescription << " ===\n"; MCAPipelinePrinter->printReport(OS); + if(FetchDelayStats) { + OS << "Branch Instructions: " << FetchDelayStats->numBranches.count << (FetchDelayStats->numBranches.overflowed ? " (overflowed)" : "") << "\n"; + OS << "Branch Mispredictions: " << FetchDelayStats->numMispredictions.count << (FetchDelayStats->numMispredictions.overflowed ? " (overflowed)" : "") << "\n"; + } } MCAWorker::~MCAWorker() {