Skip to content

Commit

Permalink
Add Skylake Branch Predictor (#35)
Browse files Browse the repository at this point in the history
cherry-pick `cf8fa6884c54a9f1d422deb0298ac7be35834dae`

Skylake predictor added (#34)

---------

Original author: Nathaniel-0

Co-authored-by: Nathaniel-0 <[email protected]>
  • Loading branch information
tjhu and Nathaniel-0 authored Jan 7, 2025
1 parent 21470e8 commit ba6e2a2
Show file tree
Hide file tree
Showing 5 changed files with 304 additions and 3 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ set(_CUSTOMHW_SOURCE_FILES
CustomHWUnits/NaiveBranchPredictorUnit.cpp
CustomHWUnits/LocalBPU.cpp
CustomHWUnits/IndirectBPU.cpp
CustomHWUnits/SkylakeBranchUnit.cpp
)

set(_CUSTOM_STAGES_SOURCE_FILES
Expand Down Expand Up @@ -128,7 +129,7 @@ set(_SOURCE_FILES
add_llvm_executable(llvm-mcad
${_SOURCE_FILES}

SUPPORT_PLUGINS
${SUPPORT_PLUGINS}
)
export_executable_symbols(llvm-mcad)

Expand Down
185 changes: 185 additions & 0 deletions CustomHWUnits/SkylakeBranchUnit.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@

//===----------------------- BranchUnit.cpp -----------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// Work in progress
///
//===----------------------------------------------------------------------===//


#define DEBUG_TYPE "llvm-mca"

#include <optional>
#include "MetadataCategories.h"
#include "AbstractBranchPredictorUnit.h"
#include "SkylakeBranchUnit.h"
#include <climits>
#include <cstdint>
#include <numeric>

namespace llvm {
namespace mcad {


SkylakeBranchUnit::SkylakeBranchUnit(uint32_t penalty_) {
penalty = penalty_;
}

uint32_t SkylakeBranchUnit::getMispredictionPenalty() {
return penalty;
}

AbstractBranchPredictorUnit::BranchDirection SkylakeBranchUnit::predictBranch(MDInstrAddr pc) {
return predictBranch(pc, MDInstrAddr{0});
}

AbstractBranchPredictorUnit::BranchDirection SkylakeBranchUnit::predictBranch(MDInstrAddr pc, MDInstrAddr target) {
SkylakeBranchEntry* entry = nullptr;
// See if present in any table
// Greedily accepts first table where present
auto test = updatePHR(pc, target);
entry = getTable(pht1, getPHTIndex(test, 1, 6), pc, entry);
entry = getTable(pht2, getPHTIndex(test, 10, 3), pc, entry);
entry = getTable(pht3, getPHTIndex(test, 10, 3), pc, entry);
entry = getTable(base, SkylakePHR(pc.addr & 0x1FFF), pc, entry); // Check base last

if (entry != nullptr) {
// Branch taken
entry->useful++;
return BranchDirection::TAKEN;
}
return BranchDirection::NOT_TAKEN;
}

SkylakeBranchUnit::SkylakeBranchEntry* SkylakeBranchUnit::getTable(SkylakeBranchTable& pht,
SkylakePHR index,
MDInstrAddr pc,
SkylakeBranchEntry* out) {
if (out != nullptr)
return out;

auto exists = pht.find(index);
if (exists != pht.end())
// If index exists, put into that table
for (int i = 0; i < exists->second.size(); i++)
if (exists->second[i].pc == pc.addr && exists->second[i].useful > 0)
return &exists->second[i];
return out;
}

// Functions currently implementing Skylake behavior
// Can make class virtual for architecture compatibility in the future
void SkylakeBranchUnit::insertTable(SkylakeBranchTable& pht, MDInstrAddr pc, SkylakePHR index) {

auto exists = pht.find(index);
if (exists != pht.end())
// If index exists, put into that table
phtSetPush(pht, pc, index);
else if (pht.size() < 2048) {
// Does not exist, but room to add table
pht[index] = {};
phtSetPush(pht, pc, index);
}
else {
// Need to evict a table
// Evict row where total prediction score is minimum
SkylakeBranchTable::iterator to_remove;
uint32_t check = UINT_MAX;
for (auto e = pht.begin(); e != pht.end(); e++) {
uint32_t current = 0;
for (auto i = e->second.begin(); i != e->second.end(); i++)
current += i->useful;
to_remove = (check < current) ? to_remove : e;
check = check < current ? check : current;
}
pht.erase(to_remove);
pht[index] = {};
phtSetPush(pht, pc, index);
}
}
void SkylakeBranchUnit::phtSetPush(SkylakeBranchTable& pht, MDInstrAddr pc, SkylakePHR index) {
if (pht[index].size() >= 4) {
auto to_remove = std::min_element(pht[index].begin(), pht[index].end());
pht[index].erase(to_remove);
}
pht[index].push_back(SkylakeBranchEntry(pc.addr,0));
}

void SkylakeBranchUnit::recordTakenBranch(MDInstrAddr pc, BranchDirection nextInstrDirection) {
if (nextInstrDirection == BranchDirection::TAKEN)
recordTakenBranch(pc, {0});
}

void SkylakeBranchUnit::recordTakenBranch(MDInstrAddr pc, MDInstrAddr target) {
// TODO: Get correct index for each table

// Base predictor
auto base_index = SkylakePHR(pc.addr & 0x1FFF);
phtSetPush(base, pc, base_index);

// PHTs
// See page 9 of H&H
phr = updatePHR(pc, target);
insertTable(pht1, pc, getPHTIndex(phr, 1, 6));
insertTable(pht2, pc, getPHTIndex(phr, 10, 3));
insertTable(pht3, pc, getPHTIndex(phr, 10, 3));
}


// Each table has its own indexing
// Work in progress
SkylakeBranchUnit::SkylakePHR SkylakeBranchUnit::getPHTIndex(SkylakePHR phr, int start1, int start2) {
// Convert PHR to the index for a PHT table
const SkylakePHR base("101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101");


// Get range of bits from 16(i)-6 to 16(i)+8
auto index = base << (93 - (start1+14));
index = index >> (93-14-start1);
index &= phr;

auto index2 = base << (93 - (start2+14));
index2 = index2 >> (93-14-start2);
index2 &= phr;

// xor two indices together to get final index
return index ^ index2;
}

// Part of PHR
unsigned long long SkylakeBranchUnit::getFootprint(MDInstrAddr branchInstr, MDInstrAddr targetInstr) {
// branchAddr = (branchAddr >> 3) & 0x3FFFF;
uint32_t branchAddr = branchInstr.addr, targetAddr = targetInstr.addr;

targetAddr = targetAddr & 0x001F;

uint32_t result = 0;
uint32_t branchRight = ((branchAddr & 0x18) >> 3)
| ((branchAddr & 0x180) >> 5)
| ((branchAddr & 0x1800) >> 7);

uint32_t branchLeft = ((branchAddr & 0x60) >> 5)
| ((branchAddr & 0x600) >> 7)
| ((branchAddr & 0x7E000) >> 9);

result |= branchRight ^ targetAddr;
result |= branchLeft << 6;

return result;
}

SkylakeBranchUnit::SkylakePHR SkylakeBranchUnit::updatePHR(MDInstrAddr currentAddr, MDInstrAddr targetAddr) {
auto next = phr << 2;
return next ^ SkylakePHR(getFootprint(currentAddr, targetAddr));
}


} // namespace mca
} // namespace llvm

108 changes: 108 additions & 0 deletions CustomHWUnits/SkylakeBranchUnit.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@

//===------------------------- BranchUnit.h -----------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// Work in progress
///
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_MCA_HARDWAREUNITS_BRANCHUNIT_H
#define LLVM_MCA_HARDWAREUNITS_BRANCHUNIT_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include <optional>
#include "MetadataCategories.h"
#include "AbstractBranchPredictorUnit.h"
#include <bitset>
#include <cstdint>
#include <unordered_map>

namespace llvm {
namespace mcad {

// Branch Predictor implemented according to Half&Half description
// of Intel Skylake Branch Predictor


class BranchUnit {
public:
virtual void recordTakenBranch(unsigned long long key, uint32_t target) = 0;
virtual void predictCond(unsigned long long key, uint32_t target) = 0;
virtual void predictInd(unsigned long long key, uint32_t target) = 0;
};

class GenericBranchUnit : public BranchUnit {

};

class SkylakeBranchUnit : public AbstractBranchPredictorUnit {
public:
// Maps Branch History to 4-way set of branch PC
struct SkylakeBranchEntry {
unsigned long long pc;
unsigned long long useful; // decides eviction

SkylakeBranchEntry(unsigned long long pc_, unsigned long long useful_) {
pc = pc_;
useful = useful_;
}

SkylakeBranchEntry(unsigned long long useful_) {
useful = useful_;
}
bool operator<(const SkylakeBranchEntry& other) const {
return useful < other.useful;
}
unsigned long long operator+(const SkylakeBranchEntry& other) {
return useful + other.useful;
}

};
using SkylakePHR = std::bitset<93>;
using SkylakeBranchTable = std::unordered_map<SkylakePHR, SmallVector<SkylakeBranchEntry, 4>>;

SkylakeBranchUnit(uint32_t penalty_);
BranchDirection predictBranch(MDInstrAddr pc, MDInstrAddr target);
BranchDirection predictBranch(MDInstrAddr pc) override;
void recordTakenBranch(MDInstrAddr pc, MDInstrAddr target);
void recordTakenBranch(MDInstrAddr instrAddr, BranchDirection nextInstrDirection) override;
uint32_t getMispredictionPenalty() override;
private:

// Each table records progressively further away branches
SkylakeBranchTable base;
SkylakeBranchTable pht1;
SkylakeBranchTable pht2;
SkylakeBranchTable pht3;
SkylakePHR phr;
uint32_t penalty;



void insertTable(SkylakeBranchTable& pht, MDInstrAddr pc, SkylakePHR phr);
SkylakeBranchEntry* getTable(SkylakeBranchTable& pht,
SkylakePHR phr,
MDInstrAddr pc,
SkylakeBranchEntry* out);
void phtSetPush(SkylakeBranchTable& pht, MDInstrAddr pc, SkylakePHR phr);
SkylakePHR getPHTIndex(SkylakePHR phr, int start1, int start2);
unsigned long long getFootprint(MDInstrAddr branchAddr, MDInstrAddr targetAddr);
SkylakePHR updatePHR(MDInstrAddr currentAddr, MDInstrAddr targetAddr);


};


} // namespace mcad
} // namespace llvm


#endif // LLVM_MCA_HARDWAREUNITS_BRANCHUNIT_H
5 changes: 3 additions & 2 deletions MCAWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

#include "CustomHWUnits/MCADLSUnit.h"
#include "CustomHWUnits/NaiveBranchPredictorUnit.h"
#include "CustomHWUnits/SkylakeBranchUnit.h"
#include "CustomStages/MCADFetchDelayStage.h"
#include "MCAViews/SummaryView.h"
#include "MCAViews/TimelineView.h"
Expand Down Expand Up @@ -192,7 +193,7 @@ std::unique_ptr<mca::Pipeline> MCAWorker::createDefaultPipeline() {
MCAPO.StoreQueueSize,
MCAPO.AssumeNoAlias, &MDRegistry);
auto HWS = std::make_unique<Scheduler>(SM, *LSU);
auto BPU = std::make_unique<NaiveBranchPredictorUnit>(BranchMispredictionDelay, BranchHistoryTableSize);
auto BPU = std::make_unique<SkylakeBranchUnit>(20);

// Create the pipeline stages.
auto Fetch = std::make_unique<EntryStage>(SrcMgr);
Expand Down Expand Up @@ -237,7 +238,7 @@ std::unique_ptr<mca::Pipeline> MCAWorker::createInOrderPipeline() {
auto LSU = std::make_unique<MCADLSUnit>(SM, MCAPO.LoadQueueSize,
MCAPO.StoreQueueSize,
MCAPO.AssumeNoAlias, &MDRegistry);
auto BPU = std::make_unique<NaiveBranchPredictorUnit>(BranchMispredictionDelay, BranchHistoryTableSize);
auto BPU = std::make_unique<SkylakeBranchUnit>(20);

// Create the pipeline stages.
auto Entry = std::make_unique<EntryStage>(SrcMgr);
Expand Down
6 changes: 6 additions & 0 deletions MetadataCategories.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#ifndef LLVM_MCA_METADATACATEGORIES_H
#define LLVM_MCA_METADATACATEGORIES_H


#include "MetadataRegistry.h"
#include "llvm/MCA/Instruction.h"

namespace llvm {
namespace mcad {

Expand All @@ -24,6 +29,7 @@ MD_BinaryRegionMarkers

struct MDInstrAddr {
unsigned long long addr;

const bool operator<(const MDInstrAddr &b) const {
return addr < b.addr;
}
Expand Down

0 comments on commit ba6e2a2

Please sign in to comment.