From 492b2993d4260322fd19468a3debb19c945fb57a Mon Sep 17 00:00:00 2001 From: Nathaniel-0 <97201514+Nathaniel-0@users.noreply.github.com> Date: Mon, 6 Jan 2025 14:08:58 -0800 Subject: [PATCH] cherry-pick `cf8fa6884c54a9f1d422deb0298ac7be35834dae` Skylake predictor added (#34) --------- Original author: Nathaniel-0 --- CMakeLists.txt | 3 +- CustomHWUnits/SkylakeBranchUnit.cpp | 185 ++++++++++++++++++++++++++++ CustomHWUnits/SkylakeBranchUnit.h | 108 ++++++++++++++++ MCAWorker.cpp | 5 +- MetadataCategories.h | 6 + 5 files changed, 304 insertions(+), 3 deletions(-) create mode 100644 CustomHWUnits/SkylakeBranchUnit.cpp create mode 100644 CustomHWUnits/SkylakeBranchUnit.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 022390d..5980e78 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,6 +100,7 @@ set(_CUSTOMHW_SOURCE_FILES CustomHWUnits/NaiveBranchPredictorUnit.cpp CustomHWUnits/LocalBPU.cpp CustomHWUnits/IndirectBPU.cpp + CustomHWUnits/SkylakeBranchUnit.cpp ) set(_CUSTOM_STAGES_SOURCE_FILES @@ -128,7 +129,7 @@ set(_SOURCE_FILES add_llvm_executable(llvm-mcad ${_SOURCE_FILES} - SUPPORT_PLUGINS + ${SUPPORT_PLUGINS} ) export_executable_symbols(llvm-mcad) diff --git a/CustomHWUnits/SkylakeBranchUnit.cpp b/CustomHWUnits/SkylakeBranchUnit.cpp new file mode 100644 index 0000000..d0dba17 --- /dev/null +++ b/CustomHWUnits/SkylakeBranchUnit.cpp @@ -0,0 +1,185 @@ + +//===----------------------- BranchUnit.cpp -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Work in progress +/// +//===----------------------------------------------------------------------===// + + +#define DEBUG_TYPE "llvm-mca" + +#include +#include "MetadataCategories.h" +#include "AbstractBranchPredictorUnit.h" +#include "SkylakeBranchUnit.h" +#include +#include +#include + +namespace llvm { +namespace mcad { + + + SkylakeBranchUnit::SkylakeBranchUnit(uint32_t penalty_) { + penalty = penalty_; + } + + uint32_t SkylakeBranchUnit::getMispredictionPenalty() { + return penalty; + } + + AbstractBranchPredictorUnit::BranchDirection SkylakeBranchUnit::predictBranch(MDInstrAddr pc) { + return predictBranch(pc, MDInstrAddr{0}); + } + + AbstractBranchPredictorUnit::BranchDirection SkylakeBranchUnit::predictBranch(MDInstrAddr pc, MDInstrAddr target) { + SkylakeBranchEntry* entry = nullptr; + // See if present in any table + // Greedily accepts first table where present + auto test = updatePHR(pc, target); + entry = getTable(pht1, getPHTIndex(test, 1, 6), pc, entry); + entry = getTable(pht2, getPHTIndex(test, 10, 3), pc, entry); + entry = getTable(pht3, getPHTIndex(test, 10, 3), pc, entry); + entry = getTable(base, SkylakePHR(pc.addr & 0x1FFF), pc, entry); // Check base last + + if (entry != nullptr) { + // Branch taken + entry->useful++; + return BranchDirection::TAKEN; + } + return BranchDirection::NOT_TAKEN; + } + + SkylakeBranchUnit::SkylakeBranchEntry* SkylakeBranchUnit::getTable(SkylakeBranchTable& pht, + SkylakePHR index, + MDInstrAddr pc, + SkylakeBranchEntry* out) { + if (out != nullptr) + return out; + + auto exists = pht.find(index); + if (exists != pht.end()) + // If index exists, put into that table + for (int i = 0; i < exists->second.size(); i++) + if (exists->second[i].pc == pc.addr && exists->second[i].useful > 0) + return &exists->second[i]; + return out; + } + + // Functions currently implementing Skylake behavior + // Can make class virtual for architecture compatibility in the future + void SkylakeBranchUnit::insertTable(SkylakeBranchTable& pht, MDInstrAddr pc, SkylakePHR index) { + + auto exists = pht.find(index); + if (exists != pht.end()) + // If index exists, put into that table + phtSetPush(pht, pc, index); + else if (pht.size() < 2048) { + // Does not exist, but room to add table + pht[index] = {}; + phtSetPush(pht, pc, index); + } + else { + // Need to evict a table + // Evict row where total prediction score is minimum + SkylakeBranchTable::iterator to_remove; + uint32_t check = UINT_MAX; + for (auto e = pht.begin(); e != pht.end(); e++) { + uint32_t current = 0; + for (auto i = e->second.begin(); i != e->second.end(); i++) + current += i->useful; + to_remove = (check < current) ? to_remove : e; + check = check < current ? check : current; + } + pht.erase(to_remove); + pht[index] = {}; + phtSetPush(pht, pc, index); + } + } + void SkylakeBranchUnit::phtSetPush(SkylakeBranchTable& pht, MDInstrAddr pc, SkylakePHR index) { + if (pht[index].size() >= 4) { + auto to_remove = std::min_element(pht[index].begin(), pht[index].end()); + pht[index].erase(to_remove); + } + pht[index].push_back(SkylakeBranchEntry(pc.addr,0)); + } + + void SkylakeBranchUnit::recordTakenBranch(MDInstrAddr pc, BranchDirection nextInstrDirection) { + if (nextInstrDirection == BranchDirection::TAKEN) + recordTakenBranch(pc, {0}); + } + + void SkylakeBranchUnit::recordTakenBranch(MDInstrAddr pc, MDInstrAddr target) { + // TODO: Get correct index for each table + + // Base predictor + auto base_index = SkylakePHR(pc.addr & 0x1FFF); + phtSetPush(base, pc, base_index); + + // PHTs + // See page 9 of H&H + phr = updatePHR(pc, target); + insertTable(pht1, pc, getPHTIndex(phr, 1, 6)); + insertTable(pht2, pc, getPHTIndex(phr, 10, 3)); + insertTable(pht3, pc, getPHTIndex(phr, 10, 3)); + } + + + // Each table has its own indexing + // Work in progress + SkylakeBranchUnit::SkylakePHR SkylakeBranchUnit::getPHTIndex(SkylakePHR phr, int start1, int start2) { + // Convert PHR to the index for a PHT table + const SkylakePHR base("101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101"); + + + // Get range of bits from 16(i)-6 to 16(i)+8 + auto index = base << (93 - (start1+14)); + index = index >> (93-14-start1); + index &= phr; + + auto index2 = base << (93 - (start2+14)); + index2 = index2 >> (93-14-start2); + index2 &= phr; + + // xor two indices together to get final index + return index ^ index2; + } + + // Part of PHR + unsigned long long SkylakeBranchUnit::getFootprint(MDInstrAddr branchInstr, MDInstrAddr targetInstr) { + // branchAddr = (branchAddr >> 3) & 0x3FFFF; + uint32_t branchAddr = branchInstr.addr, targetAddr = targetInstr.addr; + + targetAddr = targetAddr & 0x001F; + + uint32_t result = 0; + uint32_t branchRight = ((branchAddr & 0x18) >> 3) + | ((branchAddr & 0x180) >> 5) + | ((branchAddr & 0x1800) >> 7); + + uint32_t branchLeft = ((branchAddr & 0x60) >> 5) + | ((branchAddr & 0x600) >> 7) + | ((branchAddr & 0x7E000) >> 9); + + result |= branchRight ^ targetAddr; + result |= branchLeft << 6; + + return result; + } + + SkylakeBranchUnit::SkylakePHR SkylakeBranchUnit::updatePHR(MDInstrAddr currentAddr, MDInstrAddr targetAddr) { + auto next = phr << 2; + return next ^ SkylakePHR(getFootprint(currentAddr, targetAddr)); + } + + + } // namespace mca +} // namespace llvm + diff --git a/CustomHWUnits/SkylakeBranchUnit.h b/CustomHWUnits/SkylakeBranchUnit.h new file mode 100644 index 0000000..29c11b2 --- /dev/null +++ b/CustomHWUnits/SkylakeBranchUnit.h @@ -0,0 +1,108 @@ + +//===------------------------- BranchUnit.h -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Work in progress +/// +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_HARDWAREUNITS_BRANCHUNIT_H +#define LLVM_MCA_HARDWAREUNITS_BRANCHUNIT_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include +#include "MetadataCategories.h" +#include "AbstractBranchPredictorUnit.h" +#include +#include +#include + +namespace llvm { + namespace mcad { + + // Branch Predictor implemented according to Half&Half description + // of Intel Skylake Branch Predictor + + + class BranchUnit { + public: + virtual void recordTakenBranch(unsigned long long key, uint32_t target) = 0; + virtual void predictCond(unsigned long long key, uint32_t target) = 0; + virtual void predictInd(unsigned long long key, uint32_t target) = 0; + }; + + class GenericBranchUnit : public BranchUnit { + + }; + + class SkylakeBranchUnit : public AbstractBranchPredictorUnit { + public: + // Maps Branch History to 4-way set of branch PC + struct SkylakeBranchEntry { + unsigned long long pc; + unsigned long long useful; // decides eviction + + SkylakeBranchEntry(unsigned long long pc_, unsigned long long useful_) { + pc = pc_; + useful = useful_; + } + + SkylakeBranchEntry(unsigned long long useful_) { + useful = useful_; + } + bool operator<(const SkylakeBranchEntry& other) const { + return useful < other.useful; + } + unsigned long long operator+(const SkylakeBranchEntry& other) { + return useful + other.useful; + } + + }; + using SkylakePHR = std::bitset<93>; + using SkylakeBranchTable = std::unordered_map>; + + SkylakeBranchUnit(uint32_t penalty_); + BranchDirection predictBranch(MDInstrAddr pc, MDInstrAddr target); + BranchDirection predictBranch(MDInstrAddr pc) override; + void recordTakenBranch(MDInstrAddr pc, MDInstrAddr target); + void recordTakenBranch(MDInstrAddr instrAddr, BranchDirection nextInstrDirection) override; + uint32_t getMispredictionPenalty() override; + private: + + // Each table records progressively further away branches + SkylakeBranchTable base; + SkylakeBranchTable pht1; + SkylakeBranchTable pht2; + SkylakeBranchTable pht3; + SkylakePHR phr; + uint32_t penalty; + + + + void insertTable(SkylakeBranchTable& pht, MDInstrAddr pc, SkylakePHR phr); + SkylakeBranchEntry* getTable(SkylakeBranchTable& pht, + SkylakePHR phr, + MDInstrAddr pc, + SkylakeBranchEntry* out); + void phtSetPush(SkylakeBranchTable& pht, MDInstrAddr pc, SkylakePHR phr); + SkylakePHR getPHTIndex(SkylakePHR phr, int start1, int start2); + unsigned long long getFootprint(MDInstrAddr branchAddr, MDInstrAddr targetAddr); + SkylakePHR updatePHR(MDInstrAddr currentAddr, MDInstrAddr targetAddr); + + + }; + + + } // namespace mcad +} // namespace llvm + + +#endif // LLVM_MCA_HARDWAREUNITS_BRANCHUNIT_H diff --git a/MCAWorker.cpp b/MCAWorker.cpp index f50e405..cb20ffa 100644 --- a/MCAWorker.cpp +++ b/MCAWorker.cpp @@ -38,6 +38,7 @@ #include "CustomHWUnits/MCADLSUnit.h" #include "CustomHWUnits/NaiveBranchPredictorUnit.h" +#include "CustomHWUnits/SkylakeBranchUnit.h" #include "CustomStages/MCADFetchDelayStage.h" #include "MCAViews/SummaryView.h" #include "MCAViews/TimelineView.h" @@ -192,7 +193,7 @@ std::unique_ptr MCAWorker::createDefaultPipeline() { MCAPO.StoreQueueSize, MCAPO.AssumeNoAlias, &MDRegistry); auto HWS = std::make_unique(SM, *LSU); - auto BPU = std::make_unique(BranchMispredictionDelay, BranchHistoryTableSize); + auto BPU = std::make_unique(20); // Create the pipeline stages. auto Fetch = std::make_unique(SrcMgr); @@ -237,7 +238,7 @@ std::unique_ptr MCAWorker::createInOrderPipeline() { auto LSU = std::make_unique(SM, MCAPO.LoadQueueSize, MCAPO.StoreQueueSize, MCAPO.AssumeNoAlias, &MDRegistry); - auto BPU = std::make_unique(BranchMispredictionDelay, BranchHistoryTableSize); + auto BPU = std::make_unique(20); // Create the pipeline stages. auto Entry = std::make_unique(SrcMgr); diff --git a/MetadataCategories.h b/MetadataCategories.h index a4f2f2e..89f10bd 100644 --- a/MetadataCategories.h +++ b/MetadataCategories.h @@ -1,5 +1,10 @@ #ifndef LLVM_MCA_METADATACATEGORIES_H #define LLVM_MCA_METADATACATEGORIES_H + + +#include "MetadataRegistry.h" +#include "llvm/MCA/Instruction.h" + namespace llvm { namespace mcad { @@ -24,6 +29,7 @@ MD_BinaryRegionMarkers struct MDInstrAddr { unsigned long long addr; + const bool operator<(const MDInstrAddr &b) const { return addr < b.addr; }