Skip to content

Commit

Permalink
integrate the cache unit with the LSUnit
Browse files Browse the repository at this point in the history
  • Loading branch information
tjhu committed Jan 7, 2025
1 parent 4908793 commit bd42619
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 9 deletions.
37 changes: 37 additions & 0 deletions CustomHWUnits/MCADLSUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,22 @@ unsigned MCADLSUnit::dispatch(const mca::InstRef &IR) {
const mca::Instruction &IS = *IR.getInstruction();
auto MaybeMDA = getMemoryAccessMD(IR);

// Update the cache timer
if (CU && MaybeMDA) {
uint64_t latency = 0;
if (isStore(IS, MaybeMDA)) {
latency = CU->store({MaybeMDA->Addr});
} else {
latency = CU->load({MaybeMDA->Addr});
}
// We can update the ongoing requests directly.
// If there are are multiple concurrent requests to the same address,
// it wouldn't affect the final simulation result by much.
// We should have a sperate tracker for L/S if we want to improve
// the accuaracy in the future.
ongoing_requests[MaybeMDA->Addr] = clock + latency;
}

bool IsStoreBarrier = IS.isAStoreBarrier();
bool IsLoadBarrier = IS.isALoadBarrier();
assert((IS.getMayLoad() || IS.getMayStore()) && "Not a memory operation!");
Expand Down Expand Up @@ -227,6 +243,19 @@ MCADLSUnit::Status MCADLSUnit::isAvailable(const mca::InstRef &IR) const {
return MCADLSUnit::LSU_LQUEUE_FULL;
if (isStore(IS, MaybeMDA) && isSQFull())
return MCADLSUnit::LSU_SQUEUE_FULL;

// Check if the memory operands are aready.
if (MaybeMDA && CU && ongoing_requests.count(MaybeMDA->Addr)) {
// FIXME: returning `LSU_LQUEUE_FULL` is a quick hack
if (ongoing_requests.at(MaybeMDA->Addr) > clock) {
LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
<< " is waiting for the cache at address " << *MaybeMDA << "\n");
return MCADLSUnit::LSU_LQUEUE_FULL;
}
LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
<< " is ready for the cache at address " << *MaybeMDA << "\n");
}

return MCADLSUnit::LSU_AVAILABLE;
}

Expand All @@ -248,6 +277,11 @@ void MCADLSUnit::onInstructionRetired(const mca::InstRef &IR) {
LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
<< " has been removed from the store queue.\n");
}

// Remove it from the cache latency tracker
if (CU && MaybeMDA) {
ongoing_requests.erase(MaybeMDA->Addr);
}
}

void MCADLSUnit::onInstructionExecuted(const mca::InstRef &IR) {
Expand Down Expand Up @@ -277,6 +311,9 @@ void MCADLSUnit::cycleEvent() {
for (const std::pair<unsigned, std::unique_ptr<CustomMemoryGroup>> &G :
CustomGroups)
G.second->cycleEvent();

// Update the cache timer
clock++;
}

#ifndef NDEBUG
Expand Down
18 changes: 13 additions & 5 deletions CustomHWUnits/MCADLSUnit.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
#include "llvm/MCA/HardwareUnits/LSUnit.h"
#include "llvm/MCA/Instruction.h"
#include <set>
#include <optional>

#include "Cache.h"
#include "MetadataRegistry.h"

namespace llvm {
Expand Down Expand Up @@ -97,18 +99,24 @@ class MCADLSUnit : public mca::LSUnit {

MetadataRegistry *MDRegistry;

/// The memory cache hierachy unit.
std::optional<CacheUnit> CU;
/// Timer to keep track of the memory access latency.
uint64_t clock = 0;
/// Map from the ongoing memory request address to the time it will be done.
std::unordered_map<uint64_t, uint64_t> ongoing_requests;

public:
MCADLSUnit(const MCSchedModel &SM, MetadataRegistry *MDR)
: LSUnit(SM, /* LQSize */ 0, /* SQSize */ 0, /* NoAlias */ false),
MDRegistry(MDR) {}
: MCADLSUnit(SM, /* LQSize */ 0, /* SQSize */ 0, /* NoAlias */ false, MDR) {}
MCADLSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
MetadataRegistry *MDR)
: LSUnit(SM, LQ, SQ, /* NoAlias */ false), MDRegistry(MDR) {}
: MCADLSUnit(SM, LQ, SQ, /* NoAlias */ false, MDR) {}
MCADLSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
bool AssumeNoAlias, MetadataRegistry *MDR)
bool AssumeNoAlias, MetadataRegistry *MDR, std::optional<CacheUnit> CU = std::nullopt)
: LSUnit(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0),
CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0),
CurrentStoreBarrierGroupID(0), MDRegistry(MDR) {}
CurrentStoreBarrierGroupID(0), MDRegistry(MDR), CU(std::move(CU)) {}

Status isAvailable(const mca::InstRef &IR) const override;

Expand Down
8 changes: 4 additions & 4 deletions MCAWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,12 @@ std::unique_ptr<mca::Pipeline> MCAWorker::createDefaultPipeline() {
// Create the hardware units defining the backend.
auto RCU = std::make_unique<RetireControlUnit>(SM);
auto PRF = std::make_unique<RegisterFile>(SM, MRI, MCAPO.RegisterFileSize);
auto [L1I, L1D] = buildCache();
auto LSU = std::make_unique<MCADLSUnit>(SM, MCAPO.LoadQueueSize,
MCAPO.StoreQueueSize,
MCAPO.AssumeNoAlias, &MDRegistry);
MCAPO.AssumeNoAlias, &MDRegistry, L1D);
auto HWS = std::make_unique<Scheduler>(SM, *LSU);
auto BPU = std::make_unique<NaiveBranchPredictorUnit>(BranchMispredictionDelay, BranchHistoryTableSize);
auto [L1I, L1D] = buildCache();

// Create the pipeline stages.
auto Fetch = std::make_unique<EntryStage>(SrcMgr);
Expand Down Expand Up @@ -298,12 +298,12 @@ std::unique_ptr<mca::Pipeline> MCAWorker::createInOrderPipeline() {
const MCSchedModel &SM = STI.getSchedModel();
const MCRegisterInfo &MRI = TheMCA.getMCRegisterInfo();

auto [L1I, L1D] = buildCache();
auto PRF = std::make_unique<RegisterFile>(SM, MRI, MCAPO.RegisterFileSize);
auto LSU = std::make_unique<MCADLSUnit>(SM, MCAPO.LoadQueueSize,
MCAPO.StoreQueueSize,
MCAPO.AssumeNoAlias, &MDRegistry);
MCAPO.AssumeNoAlias, &MDRegistry, L1D);
auto BPU = std::make_unique<NaiveBranchPredictorUnit>(BranchMispredictionDelay, BranchHistoryTableSize);
auto [L1I, L1D] = buildCache();

// Create the pipeline stages.
auto Entry = std::make_unique<EntryStage>(SrcMgr);
Expand Down

0 comments on commit bd42619

Please sign in to comment.