Skip to content

Commit

Permalink
Ian/allow for typehints in spec (#390)
Browse files Browse the repository at this point in the history
* add support for type hints after instructions

* fix annotations to use intrinsics

* remove spurious header

* remove unrequired import

* remove unused import
  • Loading branch information
2over12 authored Sep 18, 2023
1 parent 66bb39d commit e04ff9d
Show file tree
Hide file tree
Showing 9 changed files with 135 additions and 3 deletions.
10 changes: 10 additions & 0 deletions data_specifications/specification.proto
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,11 @@ message StackFrame {
int64 parameter_offset = 5;
}

message TypeHint {
uint64 target_addr = 1;
Variable target_var = 2;
}

message Function {
uint64 entry_address = 1;
FunctionLinkage func_linkage = 3;
Expand All @@ -319,6 +324,11 @@ message Function {
StackFrame frame = 9;

repeated Parameter in_scope_vars = 10;

// an instruction can have a set of typehints that says this loc is known
// to have this type after this instruction, these will be translated into
// a low lifting of that location with spec type metadata
repeated TypeHint type_hints = 11;
}

message GlobalVariable {
Expand Down
8 changes: 8 additions & 0 deletions include/anvill/Declarations.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,11 @@ class SpecBlockContext : public BasicBlockContext {
virtual const std::vector<ParameterDecl> &LiveParamsAtExit() const override;
};


struct TypeHint {
uint64_t target_addr;
ValueDecl hint;
};
// A function decl, as represented at a "near ABI" level. To be specific,
// not all C, and most C++ decls, as written would be directly translatable
// to this. This ought nearly represent how LLVM represents a C/C++ function
Expand Down Expand Up @@ -415,6 +420,9 @@ struct FunctionDecl : public CallableDecl {
std::unordered_map<std::uint64_t, std::vector<ConstantDomain>>
constant_values_at_exit;

// sorted vector of hints
std::vector<TypeHint> type_hints;

std::uint64_t stack_depth;

std::uint64_t maximum_depth;
Expand Down
43 changes: 43 additions & 0 deletions lib/Lifters/BasicBlockLifter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,33 @@ bool BasicBlockLifter::DecodeInstructionInto(const uint64_t addr,
}


void BasicBlockLifter::ApplyTypeHint(llvm::IRBuilder<> &bldr,
const ValueDecl &type_hint) {

auto ty_hint = this->GetTypeHintFunction();
auto state_ptr_internal =
this->lifted_func->getArg(remill::kStatePointerArgNum);
auto mem_ptr =
remill::LoadMemoryPointer(bldr.GetInsertBlock(), this->intrinsics);
auto curr_value =
anvill::LoadLiftedValue(type_hint, options.TypeDictionary(), intrinsics,
options.arch, bldr, state_ptr_internal, mem_ptr);

if (curr_value->getType()->isPointerTy()) {
auto call = bldr.CreateCall(ty_hint, {curr_value});
call->setMetadata("anvill.type", this->type_specifier.EncodeToMetadata(
type_hint.spec_type));
curr_value = call;
}

auto new_mem_ptr =
StoreNativeValue(curr_value, type_hint, options.TypeDictionary(),
intrinsics, bldr, state_ptr_internal, mem_ptr);
bldr.CreateStore(new_mem_ptr,
remill::LoadMemoryPointerRef(bldr.GetInsertBlock()));
}


void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() {
auto entry_block = &this->lifted_func->getEntryBlock();

Expand Down Expand Up @@ -340,6 +367,22 @@ void BasicBlockLifter::LiftInstructionsIntoLiftedFunction() {
inst, bb, this->lifted_func->getArg(remill::kStatePointerArgNum),
false /* is_delayed */);

llvm::IRBuilder<> builder(bb);

auto start =
std::lower_bound(decl.type_hints.begin(), decl.type_hints.end(),
inst.pc, [](const TypeHint &hint_rhs, uint64_t addr) {
return hint_rhs.target_addr < addr;
});
auto end =
std::upper_bound(decl.type_hints.begin(), decl.type_hints.end(),
inst.pc, [](uint64_t addr, const TypeHint &hint_rhs) {
return addr < hint_rhs.target_addr;
});
for (; start != end; start++) {
this->ApplyTypeHint(builder, start->hint);
}

ended_on_terminal =
!this->ApplyInterProceduralControlFlowOverride(inst, bb);
DLOG_IF(INFO, ended_on_terminal)
Expand Down
3 changes: 3 additions & 0 deletions lib/Lifters/BasicBlockLifter.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ class BasicBlockLifter : public CodeLifter {

remill::DecodingContext CreateDecodingContext(const CodeBlock &blk);


void ApplyTypeHint(llvm::IRBuilder<> &bldr, const ValueDecl &type_hint);

void LiftInstructionsIntoLiftedFunction();

BasicBlockFunction CreateBasicBlockFunction();
Expand Down
24 changes: 22 additions & 2 deletions lib/Lifters/CodeLifter.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#include "CodeLifter.h"

#include <anvill/ABI.h>
#include <anvill/Type.h>
#include <glog/logging.h>
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/InstIterator.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/Verifier.h>
Expand All @@ -24,8 +27,6 @@

#include <unordered_set>

#include "anvill/Type.h"

namespace anvill {
namespace {
// Clear out LLVM variable names. They're usually not helpful.
Expand Down Expand Up @@ -170,6 +171,25 @@ void CodeLifter::InitializeStateStructureFromGlobalRegisterVariables(
});
}

llvm::Function *CodeLifter::GetTypeHintFunction() {
const auto &func_name = kTypeHintFunctionPrefix;

auto func = semantics_module->getFunction(func_name);
if (func != nullptr) {
return func;
}

auto ptr = llvm::PointerType::get(this->semantics_module->getContext(), 0);
llvm::Type *func_parameters[] = {ptr};

auto func_type = llvm::FunctionType::get(ptr, func_parameters, false);

func = llvm::Function::Create(func_type, llvm::GlobalValue::ExternalLinkage,
func_name, this->semantics_module);

return func;
}

llvm::MDNode *CodeLifter::GetAddrAnnotation(uint64_t addr,
llvm::LLVMContext &context) const {
auto pc_val = llvm::ConstantInt::get(
Expand Down
3 changes: 3 additions & 0 deletions lib/Lifters/CodeLifter.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ class CodeLifter {

unsigned pc_annotation_id;


llvm::Function *GetTypeHintFunction();

llvm::MDNode *GetAddrAnnotation(uint64_t addr,
llvm::LLVMContext &context) const;

Expand Down
1 change: 0 additions & 1 deletion lib/Optimize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@ void OptimizeModule(const EntityLifter &lifter, llvm::Module &module,
//AddRecoverBasicStackFrame(fpm, options.stack_frame_recovery_options);
//AddSplitStackFrameAtReturnAddress(fpm, options.stack_frame_recovery_options);
fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG));
//fpm.addPass(anvill::ReplaceStackReferences(contexts, lifter));
fpm.addPass(llvm::VerifierPass());
fpm.addPass(llvm::SROAPass(llvm::SROAOptions::ModifyCFG));
fpm.addPass(llvm::VerifierPass());
Expand Down
25 changes: 25 additions & 0 deletions lib/Passes/ConvertPointerArithmeticToGEP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* the LICENSE file found in the root directory of this source tree.
*/

#include <anvill/ABI.h>
#include <anvill/Declarations.h>
#include <anvill/Passes/ConvertPointerArithmeticToGEP.h>
#include <anvill/Type.h>
Expand All @@ -18,6 +19,7 @@
#include <llvm/IR/GlobalValue.h>
#include <llvm/IR/GlobalVariable.h>
#include <llvm/IR/InstIterator.h>
#include <llvm/IR/InstrTypes.h>
#include <llvm/IR/Instruction.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/LLVMContext.h>
Expand Down Expand Up @@ -71,6 +73,8 @@ struct ConvertPointerArithmeticToGEP::Impl {
llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, UnknownType t);
llvm::MDNode *TypeSpecToMD(llvm::LLVMContext &context, TypeSpec type);


bool ConvertTypeHints(llvm::Function &f);
bool ConvertLoadInt(llvm::Function &f);
bool FoldPtrAdd(llvm::Function &f);
bool FoldScaledIndex(llvm::Function &f);
Expand Down Expand Up @@ -330,6 +334,26 @@ llvm::StringRef ConvertPointerArithmeticToGEP::name() {
return "ConvertPointerArithmeticToGEP";
}

bool ConvertPointerArithmeticToGEP::Impl::ConvertTypeHints(llvm::Function &f) {
std::vector<llvm::CallBase *> calls;
for (auto &insn : llvm::instructions(f)) {
if (auto *call = llvm::dyn_cast<llvm::CallBase>(&insn)) {
if (call->getCalledFunction() &&
call->getCalledFunction()->getName() == kTypeHintFunctionPrefix) {
calls.push_back(call);
}
}
}

for (auto call : calls) {
auto arg = call->getArgOperand(0);
call->replaceAllUsesWith(arg);
call->eraseFromParent();
}

return !calls.empty();
}

// Finds `(load i64, P)` and converts it to `(ptrtoint (load ptr, P))`
bool ConvertPointerArithmeticToGEP::Impl::ConvertLoadInt(llvm::Function &f) {
using namespace llvm::PatternMatch;
Expand Down Expand Up @@ -573,6 +597,7 @@ llvm::PreservedAnalyses ConvertPointerArithmeticToGEP::runOnBasicBlockFunction(
bool changed = impl->ConvertLoadInt(function);
changed |= impl->FoldPtrAdd(function);
changed |= impl->FoldScaledIndex(function);
changed |= impl->ConvertTypeHints(function);
return changed ? llvm::PreservedAnalyses::none()
: llvm::PreservedAnalyses::all();
}
Expand Down
21 changes: 21 additions & 0 deletions lib/Protobuf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,27 @@ Result<FunctionDecl, std::string> ProtobufTranslator::DecodeFunction(

this->ParseCFGIntoFunction(function, decl);


for (auto &ty_hint : function.type_hints()) {
auto maybe_type = DecodeType(ty_hint.target_var().type());
if (maybe_type.Succeeded()) {
auto maybe_var =
DecodeValueDecl(ty_hint.target_var().values(), maybe_type.TakeValue(),
"attempting to decode type hint value");
if (maybe_var.Succeeded()) {
decl.type_hints.push_back(
{ty_hint.target_addr(), maybe_var.TakeValue()});
}
} else {
LOG(ERROR) << "Failed to decode type for type hint";
}
}

std::sort(decl.type_hints.begin(), decl.type_hints.end(),
[](const TypeHint &hint_lhs, const TypeHint &hint_rhs) {
return hint_lhs.target_addr < hint_rhs.target_addr;
});

auto link = function.func_linkage();

if (link == specification::FUNCTION_LINKAGE_DECL) {
Expand Down

0 comments on commit e04ff9d

Please sign in to comment.