diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f41f9b4..9d519e17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ * Requires gtirb >=2.1.0 * Track values of registers R8B - R15B on x86-64, which are in some cases needed for inferring jump table boundaries. * Infer jump table boundaries from comparisons of registers correlated to the index register. +* Relax constraints for inferring jump table boundaries from comparisons of indirect operands * Fix bug where a relative jump table starting with consecutive zero offsets was truncated at the first non-zero value. # 1.8.0 diff --git a/examples/asm_examples/ex_relative_jump_tables3/Makefile b/examples/asm_examples/ex_relative_jump_tables3/Makefile new file mode 100644 index 00000000..cd3634c9 --- /dev/null +++ b/examples/asm_examples/ex_relative_jump_tables3/Makefile @@ -0,0 +1,10 @@ + +all: ex_original.s + gcc ex_original.s -o ex + @./ex > out.txt +clean: + rm -f ex out.txt + rm -fr ex.unstripped ex.s *.old* dl_files *.gtirb +check: + ./ex > /tmp/res.txt + @ diff out.txt /tmp/res.txt && echo TEST OK diff --git a/examples/asm_examples/ex_relative_jump_tables3/ex_original.s b/examples/asm_examples/ex_relative_jump_tables3/ex_original.s new file mode 100644 index 00000000..3647c003 --- /dev/null +++ b/examples/asm_examples/ex_relative_jump_tables3/ex_original.s @@ -0,0 +1,335 @@ +// Similar to ex_relative_jump_tables except that in this example, there are +// memory writes between the bound check instruction (`cmp`) and the +// corresponding `jmp`. +// +// This example is to demonstrate that `jump_table_B` and `jump_table_D` are +// correctly resolved when we have a bound-check pattern as the following +// example where a value at memory is compared to a constant, and the value is +// loaded right after the corresponding jump: +// +// cmp dword ptr [rdx], 31 +// ... +// ja target +// mov ecx, dword ptr [rdx] +// +// We make sure that up to 2 instructions between cmp and jmp are not a +// memory-write to the same memory location that the `cmp` compares with. +// As long as the memory operands are syntactically different, we assume that +// they do not alias. + + .text + .intel_syntax noprefix + .file "ex.c" + +# -- Begin function one + .globl one + .p2align 4, 0x90 + .type one,@function +one: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end0: +.size one, .Lfunc_end0-one +# -- End function + +# -- Begin function two + .globl two + .p2align 4, 0x90 + .type two,@function +two: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.1] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end1: +.size two, .Lfunc_end1-two +# -- End function + +# -- Begin function three + .globl three + .p2align 4, 0x90 + .type three,@function +three: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.2] + call puts@PLT + lea eax, [rbx + 1] + pop rbx + ret +.Lfunc_end2: +.size three, .Lfunc_end2-three +# -- End function + +# -- Begin function four + .globl four + .p2align 4, 0x90 + .type four,@function +four: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.3] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end3: +.size four, .Lfunc_end3-four +# -- End function + +# -- Begin function five + .globl five + .p2align 4, 0x90 + .type five,@function +five: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.4] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end4: +.size five, .Lfunc_end4-five +# -- End function + +# -- Begin function six + .globl six + .p2align 4, 0x90 + .type six,@function +six: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.5] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end5: +.size six, .Lfunc_end5-six +# -- End function + +# -- Begin function def + .globl def + .p2align 4, 0x90 + .type def,@function +def: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.6] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end6: +.size def, .Lfunc_end6-def +# -- End function + +# -- Begin function fun + .globl fun + .p2align 4, 0x90 + .type fun,@function +fun: + push rbp + push r9 + push r10 + push r12 + push r13 + push rbx + mov rbp, rsp + mov r13d, esi + mov ebx, edi + cmp ebx, r13d + jge .LBB5_10 +.LBB5_2: + lea eax, [rbx - 1] + cmp eax, 1 + ja .LBB5_9 + jbe .target1 + jmp .target2 +.target1: + lea r9, [rip + .jump_table_A] + mov edi, ebx + call one + lea r12, dword ptr [rip + bound] + test rbx, 1 + jnz .L_odd1 + mov dword ptr [r12], 1 + jmp .L_end1 +.L_odd1: + mov dword ptr [r12], 2 +.L_end1: + cmp dword ptr [r12], 4 + mov dword ptr [r12 + 4], 7 + mov r10d, dword ptr [r12 + rbx*4] + jbe .L_jump1 + jmp .LBB5_9 +.L_jump1: + mov r12d, dword ptr [r12] + lea r10, [rip + .jump_table_B] + movsxd rax, dword ptr [r9 + 4*r12] + add rax, r9 + jmp rax + .p2align 4, 0x90 +.target2: + lea r9, [rip + .jump_table_C] + mov edi, ebx + call one + lea r12, dword ptr [rip + bound] + test rbx, 1 + jnz .L_odd2 + mov dword ptr [r12], 1 + jmp .L_end2 +.L_odd2: + mov dword ptr [r12], 2 +.L_end2: + cmp dword ptr [r12], 4 + mov dword ptr [r12 + rbx*4], ebx + lea r10, [rip + .jump_table_D] + jbe .L_jump2 + jmp .LBB5_9 +.L_jump2: + mov r12d, dword ptr [r12] + movsxd rax, dword ptr [r9 + 4*r12] + add rax, r9 + jmp rax + .p2align 4, 0x90 +.jump_table_target3: + mov edi, ebx + call three + test rbx, 1 + jnz .L_odd3 + mov r12, 32 + jmp .L_end3 +.L_odd3: + mov r12, 33 +.L_end3: + sub r12, 32 + movsxd rax, dword ptr [r10 + 4*r12] + add rax, r10 + jmp rax + .p2align 4, 0x90 +.jump_table_target4: + mov edi, ebx + call four + jmp .LBB5_9 + .p2align 4, 0x90 +.jump_table_target5: + mov edi, ebx + call five + jmp .LBB5_9 + .p2align 4, 0x90 +.jump_table_target6: + mov edi, ebx + call six +.LBB5_9: + add ebx, 1 + cmp r13d, ebx + jne .LBB5_2 +.LBB5_10: + pop rbx + pop r13 + pop r12 + pop r10 + pop r9 + pop rbp + ret +.Lfunc_end8: + .size fun, .Lfunc_end8-fun + .section .rodata,"a",@progbits + .p2align 2 + +// here we have tables of relative offsets (symbol minus symbol) +.jump_table_A: + .long .target1-.jump_table_A + .long .jump_table_target3-.jump_table_A + .long .jump_table_target4-.jump_table_A +.jump_table_B: + .long .jump_table_target5-.jump_table_B + .long .jump_table_target6-.jump_table_B +.jump_table_C: + .long .target1-.jump_table_C + .long .jump_table_target3-.jump_table_C + .long .jump_table_target4-.jump_table_C +.jump_table_D: + .long .jump_table_target5-.jump_table_D + .long .jump_table_target6-.jump_table_D +# -- End function + + .text +# -- Begin function main + .globl main + .p2align 4, 0x90 + .type main,@function +main: + push rax + lea rdi, [rip + .L.str.7] + call puts@PLT + mov edi, 1 + mov esi, 6 + call fun + xor eax, eax + pop rcx + ret +.Lfunc_end7: + .size main, .Lfunc_end7-main +# -- End function + + + .type .L.str,@object # @.str + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "one" + .size .L.str, 4 + + .type .L.str.1,@object # @.str.1 +.L.str.1: + .asciz "two" + .size .L.str.1, 4 + + .type .L.str.2,@object # @.str.2 +.L.str.2: + .asciz "three" + .size .L.str.2, 6 + + .type .L.str.3,@object # @.str.3 +.L.str.3: + .asciz "four" + .size .L.str.3, 5 + + .type .L.str.4,@object # @.str.4 +.L.str.4: + .asciz "five" + .size .L.str.4, 5 + + .type .L.str.5,@object # @.str.5 +.L.str.5: + .asciz "six" + .size .L.str.5, 5 + + .type .L.str.6,@object # @.str.6 +.L.str.6: + .asciz "last" + .size .L.str.6, 5 + + .type .L.str.7,@object # @.str.7 +.L.str.7: + .asciz "!!!Hello World!!!" + .size .L.str.7, 18 + + + .data + .align 8 +bound: + .zero 64 + + .ident "clang version 6.0.0 (tags/RELEASE_600/final)" + .section ".note.GNU-stack","",@progbits diff --git a/src/datalog/boundary_value_analysis.dl b/src/datalog/boundary_value_analysis.dl index bd385160..2d81cc55 100644 --- a/src/datalog/boundary_value_analysis.dl +++ b/src/datalog/boundary_value_analysis.dl @@ -181,14 +181,63 @@ value_reg_limit(EA_jmp,EA_fallthrough,Reg,FallthroughValue,FallthroughLT):- BranchValue = Immediate + BranchOffset, FallthroughValue = Immediate + FallthroughOffset. +/** +Detect where indirect operand used in a comparison is unmodified afterward. +*/ +.decl compare_and_jump_indirect_op_valid(EA_cmp:address,EA_jmp:address,EA:address,IndirectOp:operand_code,Steps:number) + +compare_and_jump_indirect_op_valid(EA_cmp,EA_jmp,EA_cmp,IndirectOp,0):- + compare_and_jump_indirect(EA_cmp,EA_jmp,_,IndirectOp,_). + +compare_and_jump_indirect_op_valid(EA_cmp,EA_jmp,EA,IndirectOp,Steps+1):- + // Propagate forward (hopefully) towards the jump. + // We don't support compare/jump pairs that require following complex CFG + // edges. + EA <= EA_jmp, + compare_and_jump_indirect_op_valid(EA_cmp,EA_jmp,Prev,IndirectOp,Steps), + Steps < 3, + may_fallthrough(Prev,EA), + code(EA), + ( + // No store + !arch.memory_access("STORE",EA,_,_,_,_,_,_,_), + !arch.store_immediate(EA,_,_,_,_,_,_,_), + !arch.memory_access_aggregated("STORE",EA,_,_,_,_,_) + ; + // If there are memory-writes between cmp and jmp, + // they must be syntactically different from IndirectOp. + // NOTE: This is based on the assumption that as long as the memory + // operands are syntactically different, they do not alias. + // However, this is a possibly wrong assumption. + ( + arch.memory_access("STORE",EA,_,_,_,_,_,_,_); + arch.store_immediate(EA,_,_,_,_,_,_,_); + arch.memory_access_aggregated("STORE",EA,_,_,_,_,_) + ), + instruction_get_op(EA,_,IndirectOp2), + IndirectOp2 != IndirectOp + ), + // The registers used in the indirect operand aren't modified, either. + op_indirect_mapped(IndirectOp,"NONE",RegBase_nn,RegIndex_nn,_,_,_), + reg_nonnull(RegBase_nn,RegBase), + !reg_def_use.def(EA,RegBase), + ( + RegIndex_nn = "NONE" + ; + reg_nonnull(RegIndex_nn,RegIndex), + !reg_def_use.def(EA,RegIndex) + ). + // Ad-hoc rule for basic indirect comparison cases where the value is moved to // a register right after the jump (either at the branch target or the // fallthrough). value_reg_limit(EA_target,EA_limited,Reg,Value,LimitType):- compare_and_jump_indirect(EA_cmp,EA_jmp,CC,IndirectOp,Immediate), limit_type_map(CC,BranchLT,FallthroughLT,BranchOffset,FallthroughOffset), - // Enforce next to ensure the memory isn't modified between comparison and jump. - next(EA_cmp,EA_jmp), + // Validate that the memory isn't modified between the comparison and jump. + // Difficult to do generally, but we can cover some common cases with + // compare_and_jump_indirect_op_valid. + compare_and_jump_indirect_op_valid(EA_cmp,EA_jmp,EA_jmp,IndirectOp,_), ( direct_jump(EA_jmp,EA_target), LimitType = BranchLT, @@ -201,10 +250,12 @@ value_reg_limit(EA_target,EA_limited,Reg,Value,LimitType):- UNUSED(BranchLT), UNUSED(BranchOffset) ), // The value is loaded from this IndirectOp into a register immediately after the jump. + code(EA_target), arch.load(EA_target,_,_,Reg,_,_,_,_), track_register(Reg), instruction_get_op(EA_target,_,IndirectOp), - local_next(EA_target,EA_limited). + may_fallthrough(EA_target,EA_limited), + code(EA_limited). /** Two live registers are a constant offset from each other at the end of a block. diff --git a/tests/linux-elf-x64.yaml b/tests/linux-elf-x64.yaml index 873daf81..bfb7a61e 100644 --- a/tests/linux-elf-x64.yaml +++ b/tests/linux-elf-x64.yaml @@ -440,6 +440,9 @@ tests: - name: ex_relative_jump_tables2 <<: *assembly + - name: ex_relative_jump_tables3 + <<: *assembly + - name: ex_relative_switch <<: *assembly @@ -541,6 +544,10 @@ tests: <<: *assembly <<: *test-strip-default + - name: ex_relative_jump_tables3 + <<: *assembly + <<: *test-strip-default + - name: ex_relative_switch <<: *assembly <<: *test-strip-default