Skip to content

Commit

Permalink
Merge branch 'refactor-wis-clean' into 'main'
Browse files Browse the repository at this point in the history
refactor wis

See merge request rewriting/ddisasm!1213
  • Loading branch information
aeflores committed Aug 13, 2024
2 parents 4e3349b + a38f867 commit ef37cd0
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 82 deletions.
2 changes: 1 addition & 1 deletion src/Functors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ uint64_t functor_aligned(uint64_t EA, size_t Size)
return EA + ((Size - (EA % Size)) % Size);
}

uint64_t functor_choose_max(uint64_t Val1, uint64_t Val2, uint64_t Id1, uint64_t Id2)
uint64_t functor_choose_max(int64_t Val1, int64_t Val2, uint64_t Id1, uint64_t Id2)
{
if(Val1 <= Val2)
{
Expand Down
2 changes: 1 addition & 1 deletion src/Functors.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ extern "C"

EXPORT uint64_t functor_aligned(uint64_t EA, size_t Size);

EXPORT uint64_t functor_choose_max(uint64_t Val1, uint64_t Val2, uint64_t Id1, uint64_t Id2);
EXPORT uint64_t functor_choose_max(int64_t Val1, int64_t Val2, uint64_t Id1, uint64_t Id2);

EXPORT int64_t functor_thumb32_branch_offset(uint32_t Instruction);

Expand Down
211 changes: 131 additions & 80 deletions src/datalog/code_inference.dl
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ There is a segment of data in a code section that spans from 'Begin' to 'End'.
.decl data_in_code(Begin:address,End:address)

/**
A block candidate can be a "code" or "data" block candidate.
A block candidate can be a "code", "data", or "padding" block candidate.
*/
.type block_type <: symbol
/**
Expand Down Expand Up @@ -903,83 +903,133 @@ unresolved_block_overlap(Block1,Type1,Size1,Block2,Type2,Size2):-
// For details of the algoritm's implementation, see:
// https://www.cs.umd.edu/class/fall2017/cmsc451-0101/Lects/lect10-dp-intv-sched.pdf

/**
A numerical value that encodes the type of the interval for the WIS algorithm.
*/
.type interval_type <: unsigned

/**
This predicate allows us to translate back and forth from block types (padding, code, or data)
into numerical interval types that are easily sorted. For padding and code blocks, we can
have Default or Thumb blocks and we distinguish those with a different interval type.
The field `AddrAdjust` allows us to adjust the addresses of Thumb blocks.

This predicate also defines an implicit priority. If two blocks have the same weight
and the same boundaries, the one with higher interval_type will be chosen.
This derives from the way we deal with ties in the WIS algorithm. In case of a tie,
the algorithm selects the block, which means that the last block in the ordering will
be selected first.
*/
.decl type_ordering_map(Type:block_type,IntervalType:interval_type,AddrAdjust:address)

type_ordering_map("padding",0,0).
// padding thumb
type_ordering_map("padding",1,1).
type_ordering_map("code",2,0).
// code thumb
type_ordering_map("code",3,1).
type_ordering_map("data",4,0).

/**
Valid unresolved intervals
*/
.decl unresolved_interval(Start:address,End:address)
.decl unresolved_interval(Start:address,End:address,TypeOrd:interval_type,Weight:number)

unresolved_interval(Start,End):-
unresolved_interval(Start,End,IntervalType, Points):-
unresolved_block(Block,Type,Size),
block_total_points(Block,Type,Size,Points),
Points >= 0,
type_ordering_map(Type,IntervalType,AddrAdjust),
Start = Block - AddrAdjust,
block_candidate_boundaries(Block,Type,Start,End),
Size = End - Start.



/**
Sort intervals by end address
Auxiliary predicate to help sorting the intervals.
`next_type` captures the next type for a fixed start and end addresses.
*/
.decl unresolved_interval_order(ID:unsigned,Start:address,End:address)
.decl next_type(Start:address,End:address,PrevType:interval_type,NextType:interval_type)

unresolved_interval_order(ID,FirstStart,FirstEnd):-
ID = 1,
FirstEnd = min End : {
unresolved_interval(_,End)
},
FirstStart = min Start : {
unresolved_interval(Start,FirstEnd)
/**
Auxiliary predicate to help sorting the intervals.
`next_start` captures the next starting address for a fixed end address.
*/
.decl next_start(PrevStart:address,End:address,NextStart:address)

/**
Auxiliary predicate to help sorting the intervals.
`next_end` captures the next end address overall.
*/
.decl next_end(PrevEnd:address,NextEnd:address)

next_type(Start,End,PrevType,NextType):-
unresolved_interval(Start,End,PrevType,_),
NextType = min Type : {
unresolved_interval(Start,End,Type,_),
Type > PrevType
}.

unresolved_interval_order(LastID+1,NextStart,LastEnd):-
unresolved_interval_order(LastID,LastStart,LastEnd),
next_start(PrevStart,End,NextStart):-
unresolved_interval(PrevStart,End,_,_),
NextStart = min Start : {
unresolved_interval(Start,LastEnd),
Start > LastStart
unresolved_interval(Start,End,_,_),
Start > PrevStart
}.

unresolved_interval_order(LastID+1,NextStart,NextEnd):-
unresolved_interval_order(LastID,LastStart,LastEnd),
0 = count : {
unresolved_interval(Start,LastEnd),
Start > LastStart
},
next_end(PrevEnd,NextEnd):-
unresolved_interval(_,PrevEnd,_,_),
NextEnd = min End : {
unresolved_interval(_,End),
End > LastEnd
},
NextStart = min Start : {
unresolved_interval(Start,NextEnd)
unresolved_interval(_,End,_,_),
End > PrevEnd
}.

.decl block_type_priority(Type:block_type,Priority:unsigned)


block_type_priority("data",2).
block_type_priority("code",1).
block_type_priority("padding",0).

/**
Select best block in an unresolved interval and calculate weight
Sort intervals lexicographically by <end address, start address, interval_type>
*/
.decl unresolved_interval_best_block(ID:unsigned,Block:address,Type:block_type,Size:unsigned,Weight:unsigned)
.decl unresolved_interval_order(ID:unsigned,Start:address,End:address,Type:interval_type,Weight:number)

unresolved_interval_best_block(ID,SelectedBlock,SelectedType,Size,Weight):-
unresolved_interval_order(ID,Start,End),
Size = End - Start,
MaxPoints = max Points : {
block_candidate_boundaries(Block,Type,Start,End),
block_total_points(Block,Type,Size,Points),
// Don't select intervals with negative score.
Points >= 0
unresolved_interval_order(ID,FirstStart,FirstEnd,FirstType,Weight):-
ID = 1,
FirstEnd = min End : {
unresolved_interval(_,End,_,_)
},
FirstStart = min Start : {
unresolved_interval(Start,FirstEnd,_,_)
},
FirstType = min Type : {
unresolved_interval(FirstStart,FirstEnd,Type,_)
},
MaxPriority = max Priority : {
block_candidate_boundaries(Block,Type,Start,End),
block_total_points(Block,Type,Size,MaxPoints),
block_type_priority(Type,Priority)
unresolved_interval(FirstStart,FirstEnd,FirstType,Weight).

unresolved_interval_order(ID+1,Start,End,NextType,Weight):-
unresolved_interval_order(ID,Start,End,Type,_),
next_type(Start,End,Type,NextType),
unresolved_interval(Start,End,NextType,Weight).

unresolved_interval_order(ID+1,NextStart,End,FirstType,Weight):-
unresolved_interval_order(ID,Start,End,PrevType,_),
!next_type(Start,End,PrevType,_),
next_start(Start,End,NextStart),
FirstType = min Type : {
unresolved_interval(NextStart,End,Type,_)
},
block_type_priority(SelectedType,MaxPriority),
block_total_points(SelectedBlock,SelectedType,Size,MaxPoints),
block_candidate_boundaries(SelectedBlock,SelectedType,Start,End),
Weight = as(MaxPoints, unsigned).
unresolved_interval(NextStart,End,FirstType,Weight).

unresolved_interval_order(ID+1,FirstStart,NextEnd,FirstType,Weight):-
unresolved_interval_order(ID,PrevStart,End,PrevType,_),
!next_type(PrevStart,End,PrevType,_),
!next_start(PrevStart,End,_),
next_end(End,NextEnd),
FirstStart = min Start : {
unresolved_interval(Start,NextEnd,_,_)
},
FirstType = min Type : {
unresolved_interval(FirstStart,NextEnd,Type,_)
},
unresolved_interval(FirstStart,NextEnd,FirstType,Weight).


/**
Weighted interval schedule: `Count` of intervals that end prior to the start of
Expand All @@ -990,13 +1040,13 @@ Only calculated for intervals that have prior intervals.
.decl wis_has_prior(I:unsigned,Count:unsigned)

wis_has_prior(I,Prior):-
unresolved_interval_order(Prior,_,PriorEnd),
unresolved_interval_order(Prior,_,PriorEnd,_,_),
// check the we have the last interval ending at PriorEnd
unresolved_interval_order(Prior+1,_,NextEnd),
unresolved_interval_order(Prior+1,_,NextEnd,_,_),
NextEnd > PriorEnd,
// find intervals that start after PriorEnd but before NextEnd.
// for those intervals `Prior` is the prior.
unresolved_interval_order(I,Start,_),
unresolved_interval_order(I,Start,_,_,_),
NextEnd > Start, Start >= PriorEnd.

/**
Expand All @@ -1006,7 +1056,7 @@ intervals (i.e., with `Count` of 0).
.decl wis_prior(I:unsigned,Count:unsigned)

wis_prior(I,0):-
unresolved_interval_order(I,_,_),
unresolved_interval_order(I,_,_,_,_),
!wis_has_prior(I,_).

wis_prior(I,Prior):-
Expand All @@ -1015,7 +1065,7 @@ wis_prior(I,Prior):-
/**
If Val1 >= Val2, return Id1. Otherwise, return Id2.
*/
.functor functor_choose_max(Val1:unsigned,Val2:unsigned,Id1:unsigned,Id2:unsigned):unsigned
.functor functor_choose_max(Val1:number,Val2:number,Id1:unsigned,Id2:unsigned):unsigned

/**
Weighted interval schedule: memoized subproblems
Expand All @@ -1026,7 +1076,7 @@ Value is the optimized total weight for intervals [1:I].
Predecessor) is the previous entry in `wis_memo`, later used to reconstruct
the optimal solution.
*/
.decl wis_memo(I:unsigned,Value:unsigned,Pred:unsigned)
.decl wis_memo(I:unsigned,Value:number,Pred:unsigned)

wis_memo(0,0,0).

Expand All @@ -1035,13 +1085,14 @@ wis_memo(I,Value,Pred):-
// specifying this expression in both forms allows it to be indexed in both rule plans.
I = IPrev + 1,
IPrev = I - 1,
unresolved_interval_best_block(I,_,_,_,IWeight),
unresolved_interval_order(I,_,_,_,IWeight),
wis_prior(I,P),
wis_memo(P,TakeWeight0,_),
TakeWeight = TakeWeight0 + IWeight,
Value = max(LeaveWeight,TakeWeight),
// functor_choose_max used for efficiency here instead of a disjunction
// for performance.
// In case of equality, it defaults to including P.
Pred = @functor_choose_max(LeaveWeight,TakeWeight,IPrev,P).

// ensure @delta_wis_memo is evaluated first in both plans
Expand All @@ -1057,14 +1108,24 @@ selected.
interval_schedule_tie(BlockA,TypeA,SizeA,BlockB,TypeB,SizeB):-
wis_memo(IPrev,LeaveWeight,_),
I = IPrev + 1,
unresolved_interval_best_block(I,BlockA,TypeA,SizeA,IWeight),
unresolved_interval_order(I,StartA,EndA,IntervalTypeA,IWeight),

SizeA = as(EndA - StartA,unsigned),
type_ordering_map(TypeA,IntervalTypeA,AddrAdjustA),
BlockA = StartA + AddrAdjustA,

// Don't warn if the selected block is later discarded.
!discarded_block(BlockA,TypeA,SizeA,_,_),
wis_prior(I,P),
wis_memo(P,TakeWeight0,_),
TakeWeight = TakeWeight0 + IWeight,
LeaveWeight = TakeWeight,
unresolved_interval_best_block(IPrev,BlockB,TypeB,SizeB,_),
unresolved_interval_order(IPrev,StartB,EndB,IntervalTypeB,_),

SizeB = as(EndB - StartB,unsigned),
type_ordering_map(TypeB,IntervalTypeB,AddrAdjustB),
BlockB = StartB + AddrAdjustB,

// We only care about ties if I is selected in the final schedule.
// Otherwise, it had no impact on the output.
// It's a non-issue if IPrev is selected. Because the wis_memo rule favors
Expand All @@ -1081,8 +1142,8 @@ Weighted interval schedule: reconstruct schedule of selected intervals

// Start from the end, which holds the completed optimal solution.
wis_schedule_iter(I):-
unresolved_interval_order(I,_,_),
!unresolved_interval_order(I+1,_,_).
unresolved_interval_order(I,_,_,_,_),
!unresolved_interval_order(I+1,_,_,_,_).

// Add to schedule if interval I was taken.
wis_schedule(I):-
Expand Down Expand Up @@ -1115,11 +1176,13 @@ BlockPropagated (for debugging purpose):
.decl discarded_block(Block:address,Type:block_type,Size:unsigned,Reason:symbol,BlockPropagated:address)

discarded_block(Block,Type,Size,"interval discarded",0):-
unresolved_block(Block,Type,Size),
block_candidate_boundaries(Block,Type,Start,End),
unresolved_interval_order(I,Start,End),
unresolved_interval_order(I,Start,End,IntervalType,_),
Size = End - Start,
!wis_schedule(I).
!wis_schedule(I),
type_ordering_map(Type,IntervalType,AddrAdjust),
Block = Start + AddrAdjust,
unresolved_block(Block,Type,Size).


// If both a code and data block are both known, and the data block is only
// known by propagation, discard the data block.
Expand All @@ -1141,20 +1204,10 @@ discarded_block(Block2,Type2,Size2,"overlaps known code",Block1):-
known_block(Block2,Type2,Size2,"propagated"),
Type1 = "code", Type2 = "data".

discarded_block(Block,Type,Size,"less points",0):-
wis_schedule(I),
unresolved_interval_order(I,Start,End),
Size = End - Start,
block_candidate_boundaries(Block,Type,Start,End),
!unresolved_interval_best_block(I,Block,Type,Size,_).

discarded_block(Block,Type,Size,"impossible",0):-
impossible_block(Block,Type,Size,_).

discarded_block(Block,Type,Size,"negative points",0):-
block_total_points(Block,Type,Size,Points),
Points < 0.

// propagate discarding blocks through direct jumps, calls, and must_fallthrough
discarded_block(Block1,Type1,Size1,"propagated",Block2):-
discarded_block(Block2,"code",Size2,_,_),
Expand Down Expand Up @@ -1628,9 +1681,7 @@ block_still_overlap(Block1,Type1,Size1,Block2,Type2,Size2):-
;
Block1 = Block2,
Size1 = Size2,
block_type_priority(Type1,Priority1),
block_type_priority(Type2,Priority2),
Priority1 < Priority2
Type1 < Type2
).

/**
Expand Down

0 comments on commit ef37cd0

Please sign in to comment.