diff --git a/lib86cpu/core/emitter/emitter_common.cpp b/lib86cpu/core/emitter/emitter_common.cpp index 497646e..ded1c48 100644 --- a/lib86cpu/core/emitter/emitter_common.cpp +++ b/lib86cpu/core/emitter/emitter_common.cpp @@ -7,6 +7,15 @@ #include "emitter_common.h" +// For fpu_check_stack_fault +static_assert(CPU_CTX_ES < 65536); +static_assert(CPU_CTX_CS < 65536); +static_assert(CPU_CTX_SS < 65536); +static_assert(CPU_CTX_DS < 65536); +static_assert(CPU_CTX_FS < 65536); +static_assert(CPU_CTX_GS < 65536); + + static const std::unordered_map> zydis_to_reg_offset_table = { { ZYDIS_REGISTER_AL, { EAX_idx, CPU_CTX_EAX } }, { ZYDIS_REGISTER_CL, { ECX_idx, CPU_CTX_ECX } }, diff --git a/lib86cpu/core/emitter/emitter_common.h b/lib86cpu/core/emitter/emitter_common.h index 2969274..9c6b2e5 100644 --- a/lib86cpu/core/emitter/emitter_common.h +++ b/lib86cpu/core/emitter/emitter_common.h @@ -208,6 +208,9 @@ inline constexpr auto all_callable_funcs = std::make_tuple( cpu_runtime_abort, dbg_update_exp_hook, tlb_invalidate_, - fpu_stack_check, - fpu_stack_check + fpu_is_tag_empty, + fpu_stack_overflow, + fpu_stack_underflow, + fpu_stack_fault, + fpu_update_ptr ); diff --git a/lib86cpu/core/emitter/x64/jit.cpp b/lib86cpu/core/emitter/x64/jit.cpp index 1d0262a..9b9d71d 100644 --- a/lib86cpu/core/emitter/x64/jit.cpp +++ b/lib86cpu/core/emitter/x64/jit.cpp @@ -507,6 +507,17 @@ static_assert((LOCAL_VARS_off(0) & 15) == 0); // must be 16 byte aligned so that #define RELOAD_RCX_CTX() MOV(RCX, &m_cpu->cpu_ctx) #define RESTORE_FPU_CTX() FLDCW(MEMD16(RSP, LOCAL_VARS_off(5))) #define CALL_F(func) MOV(RAX, func); CALL(RAX); RELOAD_RCX_CTX() +#define FPU_IS_TAG_EMPTY(num) MOV(EDX, (num)); \ + CALL_F(&fpu_is_tag_empty); \ + TEST(EAX, EAX) +#define FPU_CLEAR_C1() AND(MEMD16(RCX, CPU_CTX_FSTATUS), ~FPU_FLG_C1) +#define FPU_PUSH() DEC(MEMD16(RCX, FPU_DATA_FTOP)); AND(MEMD16(RCX, FPU_DATA_FTOP), 7) +#define FPU_LOAD_STX(x) MOVZX(EDX, MEMD16(RCX, FPU_DATA_FTOP)); \ + ADD(EDX, (x)); \ + AND(EDX, 7); \ + MOV(EAX, sizeof(uint80_t)); \ + MUL(DX); \ + FLD(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)) lc86_jit::lc86_jit(cpu_t *cpu) @@ -2345,30 +2356,23 @@ lc86_jit::gen_simd_mem_align_check() m_a.bind(ok); } -void -lc86_jit::gen_fpu_exp_post_check() +template +void lc86_jit::gen_fpu_exp_post_check(uint32_t exception, T &&unmasked) { - // this function should be called immediately after the fpu instr to check exceptions for. It expects to find in R8W the flags of the status word following - // a previous stack fault (if any happened) - // NOTE: we only support masked exceptions for now + // This function should be called immediately after the fpu instr to check exceptions for - Label no_exp = m_a.newLabel(); + Label masked = m_a.newLabel(); FNSTSW(AX); - TEST(AX, FPU_EXP_ALL); - BR_EQ(no_exp); - LD_R16(R9W, CPU_CTX_FCTRL); - AND(R9W, FPU_EXP_ALL); - CMP(R9W, FPU_EXP_ALL); - BR_EQ(no_exp); - static const char *abort_msg = "Unmasked fpu exceptions are not supported"; - MOV(RCX, abort_msg); - MOV(RAX, &cpu_runtime_abort); - CALL(RAX); // won't return - INT3(); - m_a.bind(no_exp); - AND(AX, ~(FPU_FLG_SF | FPU_FLG_ES | FPU_FLG_TOP | FPU_FLG_BSY)); - OR(AX, R8W); - ST_R16(CPU_CTX_FSTATUS, AX); + MOV(DX, MEMD16(RCX, CPU_CTX_FCTRL)); + NOT(DX); + AND(DX, AX); + AND(AX, (FPU_EXP_ALL | FPU_FLG_CC_ALL)); + AND(MEMD16(RCX, CPU_CTX_FSTATUS), ~(FPU_EXP_ALL | FPU_FLG_CC_ALL)); + OR(MEMD16(RCX, CPU_CTX_FSTATUS), AX); // update exception and condition code flags of guest fstatus + TEST(DX, exception); // test if exceptions of interest are unmasked + BR_EQ(masked); + unmasked(); + m_a.bind(masked); } void @@ -2380,26 +2384,48 @@ lc86_jit::gen_set_host_fpu_ctx() FNCLEX(); // clear all pending fpu exceptions, so that we can use the host to detect guest fpu exceptions } -template -void lc86_jit::gen_update_fpu_ptr(decoded_instr *instr) +void +lc86_jit::gen_update_fpu_ptr(decoded_instr *instr, x86::Gp mem_addr64) { - ST_R16(CPU_CTX_FCS, m_cpu->cpu_ctx.regs.cs); - ST_R32(CPU_CTX_FIP, m_cpu->instr_eip); - MOV(AX, MEMD16(RCX, CPU_CTX_DS)); - ST_R16(CPU_CTX_FDS, AX); - if constexpr (update_fdp) { - ST_R32(CPU_CTX_FDP, m_cpu->instr_eip + instr->i.raw.modrm.offset); + /* + is_mem_op -> bit 63 + fop -> [48 - 58] + seg offset -> [32 - 47] + modrm addr -> [0 - 31] + */ + + uint64_t is_mem_operand = !(((instr->i.raw.modrm.reg << 3) | (instr->i.raw.modrm.mod << 6)) == 0xC0); // all fpu instr with 0xCx modrm have reg only operands + uint64_t instr_info = is_mem_operand << 63; + instr_info |= ((((instr->i.raw.modrm.rm | (instr->i.raw.modrm.reg << 3) | (instr->i.raw.modrm.mod << 6)) | ((uint64_t)instr->i.opcode << 8)) & 0x7FF) << 48); // fop is a 11 bit register + MOV(RDX, instr_info); + if (is_mem_operand) { + instr_info |= ((uint64_t)get_seg_prfx_offset(instr) << 32); + // modrm addr is calculated at runtime and placed in edx + OR(RDX, mem_addr64); } + CALL_F(&fpu_update_ptr); +} + +void +lc86_jit::gen_fpu_exp(uint32_t exception, stack_fault_func func) +{ + MOV(EDX, exception); + CALL_F(func); } -template -void lc86_jit::gen_fpu_stack_fault_check(fpu_instr_t fpu_instr) +void +lc86_jit::gen_check_fpu_unmasked_exp() { - MOV(R9D, fpu_instr); - LEA(R8, MEMD64(RSP, LOCAL_VARS_off(0))); - LEA(RDX, MEMD64(RSP, LOCAL_VARS_off(2))); - CALL_F((&fpu_stack_check)); - MOV(R8D, MEMD32(RSP, LOCAL_VARS_off(2))); + Label no_exp = m_a.newLabel(); + TEST(MEMD16(RCX, CPU_CTX_FSTATUS), FPU_FLG_ES); + BR_EQ(no_exp); + if (m_cpu->cpu_ctx.regs.cr0 & CR0_NE_MASK) { + RAISEin0_f(EXP_MF); + } + else { + LIB86CPU_ABORT_msg("MS-DOS compatibility mode for fpu exceptions is not supported"); + } + m_a.bind(no_exp); } template @@ -2428,6 +2454,8 @@ void lc86_jit::gen_fpu_stack_prologue(fpu_instr_t fpu_instr, T &&action_when_no_ template void lc86_jit::float_arithmetic(decoded_instr *instr) { + LIB86CPU_ABORT(); +#if 0 if (m_cpu->cpu_ctx.hflags & (HFLG_CR0_EM | HFLG_CR0_TS)) { RAISEin0_t(EXP_NM); } @@ -2529,6 +2557,7 @@ void lc86_jit::float_arithmetic(decoded_instr *instr) } } +#endif } template @@ -3526,7 +3555,8 @@ template void lc86_jit::float_load_constant(decoded_instr *instr) { // idx 0 -> fld1, 1 -> fldl2e, 2 -> fldl2t, 3 -> fldlg2, 4 -> fldln2, 5 -> fldpi, 6 -> fldz - + LIB86CPU_ABORT(); +#if 0 if (m_cpu->cpu_ctx.hflags & (HFLG_CR0_EM | HFLG_CR0_TS)) { RAISEin0_t(EXP_NM); } @@ -3567,6 +3597,7 @@ void lc86_jit::float_load_constant(decoded_instr *instr) MOV(EDX, EBX); CALL_F(&fpu_update_tag); } +#endif } void @@ -5248,6 +5279,8 @@ lc86_jit::enter(decoded_instr *instr) void lc86_jit::fild(decoded_instr *instr) { + LIB86CPU_ABORT(); +#if 0 if (m_cpu->cpu_ctx.hflags & (HFLG_CR0_EM | HFLG_CR0_TS)) { RAISEin0_t(EXP_NM); } @@ -5281,11 +5314,14 @@ lc86_jit::fild(decoded_instr *instr) CALL_F(&fpu_update_tag); }); } +#endif } void lc86_jit::fistp(decoded_instr *instr) { + LIB86CPU_ABORT(); +#if 0 if (m_cpu->cpu_ctx.hflags & (HFLG_CR0_EM | HFLG_CR0_TS)) { RAISEin0_t(EXP_NM); } @@ -5328,6 +5364,7 @@ lc86_jit::fistp(decoded_instr *instr) CALL_F(&fpu_update_tag); } } +#endif } void @@ -5337,45 +5374,77 @@ lc86_jit::fld(decoded_instr *instr) RAISEin0_t(EXP_NM); } else { + gen_check_fpu_unmasked_exp(); + Label end_instr = m_a.newLabel(); + const auto stack_fault_check = [&]() { + Label ok = m_a.newLabel(); + FPU_IS_TAG_EMPTY(-1); // check for stack overflow of dst st0 + BR_NE(ok); + gen_fpu_exp(FPU_STACK_OVERFLOW, &fpu_stack_overflow); + BR_UNCOND(end_instr); + m_a.bind(ok); + }; + get_rm(instr, - [this, instr](const op_info rm) + [this, instr, end_instr, &stack_fault_check](const op_info rm) { - gen_fpu_stack_prologue(fpu_instr_t::float_, [this, instr]() { - MOV(EDX, instr->i.raw.modrm.rm); - MOV(EAX, sizeof(uint80_t)); - MUL(DX); - FLD(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // load guest st(i) to host st0 - }); - gen_fpu_exp_post_check(); - MOV(EBX, MEMD32(RSP, LOCAL_VARS_off(4))); + Label ok = m_a.newLabel(), masked = m_a.newLabel(), do_push = m_a.newLabel(); + unsigned stx = instr->i.raw.modrm.rm; + gen_update_fpu_ptr(instr); + FPU_CLEAR_C1(); + stack_fault_check(); + FPU_IS_TAG_EMPTY(stx); // check for stack underflow for src stx + BR_EQ(ok); + gen_fpu_exp(FPU_STACK_UNDERFLOW, &fpu_stack_fault); + TEST(MEMD16(RCX, CPU_CTX_FCTRL), FPU_FLG_IE); + BR_NE(masked); + BR_UNCOND(end_instr); + m_a.bind(masked); // if masked, load a qnan + EMMS(); + FNCLEX(); + MOV(MEMD64(RSP, LOCAL_VARS_off(0)), FPU_QNAN_FLOAT_INDEFINITE64); + MOV(MEMD64(RSP, LOCAL_VARS_off(1)), FPU_QNAN_FLOAT_INDEFINITE16); + FLD(MEMD80(RSP, LOCAL_VARS_off(0))); // load qnan + BR_UNCOND(do_push); + m_a.bind(ok); + EMMS(); + FNCLEX(); + FPU_LOAD_STX(instr->i.raw.modrm.rm); // load src stx + m_a.bind(do_push); + FPU_PUSH(); MOV(EAX, sizeof(uint80_t)); - ST_R16(FPU_DATA_FTOP, BX); - MUL(BX); - FSTP(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); - gen_update_fpu_ptr(instr); + MUL(MEMD16(RCX, FPU_DATA_FTOP)); + FSTP(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // store src stx or qnan to dst st0 }, - [this, instr](const op_info rm) + [this, instr, end_instr, &stack_fault_check](const op_info rm) { uint8_t size = instr->i.opcode == 0xD9 ? SIZE32 : (instr->i.opcode == 0xDD ? SIZE64 : SIZE80); - LD_MEMs(size); + auto rax_host_reg = SIZED_REG(x64::rax, size); + MOV(EBX, EDX); // save mem addr for gen_fpu_exp + LD_MEMs(size); // load src mem if (size != SIZE80) { - MOV(MEMD(RSP, LOCAL_VARS_off(0), size), EAX); + MOV(MEMD(RSP, LOCAL_VARS_off(0), size), rax_host_reg); } - gen_fpu_stack_fault_check(fpu_instr_t::float_); + gen_update_fpu_ptr(instr); + FPU_CLEAR_C1(); + stack_fault_check(); gen_set_host_fpu_ctx(); FLD(MEMD(RSP, LOCAL_VARS_off(0), size)); - gen_fpu_exp_post_check(); - MOV(EBX, MEMD32(RSP, LOCAL_VARS_off(4))); + gen_fpu_exp_post_check(FPU_EXP_INVALID, [this, instr, end_instr, size]() { + FSTP(MEMD(RSP, LOCAL_VARS_off(0), size)); // do a dummy pop to restore host fpu stack + OR(MEMD16(RCX, CPU_CTX_FSTATUS), FPU_FLG_ES); + BR_UNCOND(end_instr); + }); + FPU_PUSH(); MOV(EAX, sizeof(uint80_t)); - ST_R16(FPU_DATA_FTOP, BX); - MUL(BX); - FSTP(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); - gen_update_fpu_ptr(instr); + MUL(MEMD16(RCX, FPU_DATA_FTOP)); + FSTP(MEMSD(RCX, RAX, 0, CPU_CTX_R0, size)); // store src mem to dst st0 + RESTORE_FPU_CTX(); }); - RESTORE_FPU_CTX(); - MOV(EDX, EBX); - CALL_F(&fpu_update_tag); + XOR(EDX, EDX); + CALL_F(&fpu_update_tag); // update dst st0 tag + m_a.bind(end_instr); } } @@ -5533,6 +5602,8 @@ lc86_jit::fnstsw(decoded_instr *instr) void lc86_jit::fstp(decoded_instr *instr) { + LIB86CPU_ABORT(); +#if 0 if (m_cpu->cpu_ctx.hflags & (HFLG_CR0_EM | HFLG_CR0_TS)) { RAISEin0_t(EXP_NM); } @@ -5542,10 +5613,13 @@ lc86_jit::fstp(decoded_instr *instr) gen_fpu_stack_prologue(fpu_instr_t::float_, [this]() { MOV(EAX, sizeof(uint80_t)); MUL(MEMD16(RSP, LOCAL_VARS_off(4))); - FLD(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // load guest st0 to host st0 + FLD(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // load src st0 }); } else { + MOV(EAX, sizeof(uint80_t)); + MUL(MEMD16(RCX, FPU_DATA_FTOP)); + FLD(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // load src st0 XOR(R8D, R8D); // clear r8w so that gen_fpu_exp_post_check still works } @@ -5584,6 +5658,7 @@ lc86_jit::fstp(decoded_instr *instr) CALL_F(&fpu_update_tag); } } +#endif } void diff --git a/lib86cpu/core/emitter/x64/jit.h b/lib86cpu/core/emitter/x64/jit.h index 22e319d..8cbe2c8 100644 --- a/lib86cpu/core/emitter/x64/jit.h +++ b/lib86cpu/core/emitter/x64/jit.h @@ -260,12 +260,10 @@ class lc86_jit : public Target { template void gen_stack_pop(); void gen_simd_mem_align_check(); - void gen_fpu_exp_post_check(); + template + void gen_fpu_exp_post_check(uint32_t exception, T &&unmasked); void gen_set_host_fpu_ctx(); - template - void gen_update_fpu_ptr(decoded_instr *instr); - template - void gen_fpu_stack_fault_check(fpu_instr_t fpu_instr); + void gen_update_fpu_ptr(decoded_instr *instr, x86::Gp mem_addr64 = x86::rbx); template void shift(decoded_instr *instr); template @@ -288,6 +286,8 @@ class lc86_jit : public Target { void float_load_constant(decoded_instr *instr); template void gen_fpu_stack_prologue(fpu_instr_t fpu_instr, T &&action_when_no_fault); + void gen_fpu_exp(uint32_t exception, stack_fault_func func); + void gen_check_fpu_unmasked_exp(); cpu_t *m_cpu; CodeHolder m_code; diff --git a/lib86cpu/core/fpu.cpp b/lib86cpu/core/fpu.cpp index 230f363..a590228 100644 --- a/lib86cpu/core/fpu.cpp +++ b/lib86cpu/core/fpu.cpp @@ -20,9 +20,16 @@ fpu_init(cpu_t *cpu) } } +static void +fpu_push(cpu_ctx_t *cpu_ctx) +{ + cpu_ctx->fpu_data.ftop = (cpu_ctx->fpu_data.ftop - 1) & 7; +} + template -void fpu_update_tag(cpu_ctx_t *cpu_ctx, uint32_t idx) +void fpu_update_tag(cpu_ctx_t *cpu_ctx, uint32_t st_num) { + uint32_t idx = (st_num + cpu_ctx->fpu_data.ftop) & 7; if constexpr (is_push) { uint16_t exp = cpu_ctx->regs.fr[idx].high & 0x7FFF; uint64_t mant = cpu_ctx->regs.fr[idx].low; @@ -43,77 +50,62 @@ void fpu_update_tag(cpu_ctx_t *cpu_ctx, uint32_t idx) } } -template -uint32_t fpu_stack_check(cpu_ctx_t *cpu_ctx, uint32_t *sw, uint80_t *inv_val, fpu_instr_t instr_type) +uint32_t +fpu_is_tag_empty(cpu_ctx_t *cpu_ctx, uint32_t st_num) { - // this function returns the fpu stack pointer to the value modified by the push/pop, and the flags of the status word following a stack fault. - // It also writes an appropriate indefinite value when it detects a masked stack exception - // NOTE: we only support masked stack exceptions for now + return cpu_ctx->regs.ftags[(st_num + cpu_ctx->fpu_data.ftop) & 7] == FPU_TAG_EMPTY; +} - uint32_t ftop, fstatus = cpu_ctx->regs.fstatus; - *sw = fstatus; - bool no_stack_fault; - if constexpr (is_push) { - // detect stack overflow - ftop = cpu_ctx->fpu_data.ftop; - ftop -= 1; - ftop &= 7; - no_stack_fault = cpu_ctx->regs.ftags[ftop] == FPU_TAG_EMPTY; - } - else { - // detect stack underflow - ftop = cpu_ctx->fpu_data.ftop; - no_stack_fault = cpu_ctx->regs.ftags[ftop] != FPU_TAG_EMPTY; +void +fpu_update_ptr(cpu_ctx_t *cpu_ctx, uint64_t instr_info) +{ + cpu_ctx->regs.fcs = cpu_ctx->regs.cs; + cpu_ctx->regs.fip = cpu_ctx->regs.eip; + cpu_ctx->regs.fop = ((instr_info >> 48) & 0x7FF); + if (instr_info & (1ULL << 63)) { + cpu_ctx->regs.fds = *(uint16_t *)(((instr_info >> 32) & 0xFFFF) + (uint8_t *)cpu_ctx); + cpu_ctx->regs.fdp = instr_info & 0xFFFFFFFF; } +} - if (!no_stack_fault) { - uint16_t fctrl = cpu_ctx->regs.fctrl; - fctrl &= FPU_EXP_INVALID; - if ((cpu_ctx->regs.fctrl & FPU_EXP_INVALID) == 0) { - static const char *abort_msg = "Unmasked fpu stack exception not supported"; - cpu_runtime_abort(abort_msg); // won't return - } - // stack fault exception masked, write an indefinite value, so that the fpu instr uses it - fstatus |= (FPU_FLG_IE | FPU_FLG_SF | (is_push ? (1 << FPU_C1_SHIFT) : (0 << FPU_C1_SHIFT))); - *sw = fstatus; - - switch (instr_type) - { - case fpu_instr_t::integer8: - inv_val->low = FPU_INTEGER_INDEFINITE8; - break; - - case fpu_instr_t::integer16: - inv_val->low = FPU_INTEGER_INDEFINITE16; - break; - - case fpu_instr_t::integer32: - inv_val->low = FPU_INTEGER_INDEFINITE32; - break; - - case fpu_instr_t::integer64: - inv_val->low = FPU_INTEGER_INDEFINITE64; - break; - - case fpu_instr_t::float_: - inv_val->low = FPU_QNAN_FLOAT_INDEFINITE64; - inv_val->high = FPU_QNAN_FLOAT_INDEFINITE16; - break; +void +fpu_stack_fault(cpu_ctx_t *cpu_ctx, uint32_t exception) +{ + assert(exception & FPU_EXP_INVALID); - case fpu_instr_t::bcd: - inv_val->low = FPU_BCD_INDEFINITE64; - inv_val->high = FPU_BCD_INDEFINITE16; - break; + exception &= (FPU_EXP_ALL | FPU_FLG_SF | FPU_FLG_C1); + uint32_t unmasked = (exception & ~cpu_ctx->regs.fctrl) & FPU_EXP_ALL; + if (unmasked) { + cpu_ctx->regs.fstatus |= FPU_FLG_ES; + } - default: - LIB86CPU_ABORT(); + cpu_ctx->regs.fstatus |= exception; + if (exception & FPU_FLG_SF) { + if (!(exception & FPU_FLG_C1)) { + cpu_ctx->regs.fstatus &= ~FPU_FLG_C1; } } +} + +void +fpu_stack_overflow(cpu_ctx_t *cpu_ctx, uint32_t exception) +{ + if (cpu_ctx->regs.fctrl & FPU_EXP_INVALID) { + // masked stack fault response + fpu_push(cpu_ctx); + cpu_ctx->regs.fr[cpu_ctx->fpu_data.ftop].low = FPU_QNAN_FLOAT_INDEFINITE64; + cpu_ctx->regs.fr[cpu_ctx->fpu_data.ftop].high = FPU_QNAN_FLOAT_INDEFINITE16; + fpu_update_tag(cpu_ctx, 0); + } - return ftop; + fpu_stack_fault(cpu_ctx, exception); +} + +void +fpu_stack_underflow(cpu_ctx_t *cpu_ctx, uint32_t exception) +{ + // TODO } -template JIT_API uint32_t fpu_stack_check(cpu_ctx_t *cpu_ctx, uint32_t *sw, uint80_t *inv_val, fpu_instr_t instr_type); -template JIT_API uint32_t fpu_stack_check(cpu_ctx_t *cpu_ctx, uint32_t *sw, uint80_t *inv_val, fpu_instr_t instr_type); template JIT_API void fpu_update_tag(cpu_ctx_t *cpu_ctx, uint32_t idx); template JIT_API void fpu_update_tag(cpu_ctx_t *cpu_ctx, uint32_t idx); diff --git a/lib86cpu/core/fpu.h b/lib86cpu/core/fpu.h index beb8a78..9ebb35d 100644 --- a/lib86cpu/core/fpu.h +++ b/lib86cpu/core/fpu.h @@ -16,8 +16,14 @@ enum class fpu_instr_t : uint32_t { bcd, }; +using stack_fault_func = void(* JIT_API)(cpu_ctx_t *, uint32_t); + + void fpu_init(cpu_t *cpu); template -JIT_API void fpu_update_tag(cpu_ctx_t *cpu_ctx, uint32_t idx); -template -JIT_API uint32_t fpu_stack_check(cpu_ctx_t *cpu_ctx, uint32_t *sw, uint80_t *inv_val, fpu_instr_t instr_type); +JIT_API void fpu_update_tag(cpu_ctx_t *cpu_ctx, uint32_t st_num); +JIT_API uint32_t fpu_is_tag_empty(cpu_ctx_t *cpu_ctx, uint32_t st_num); +JIT_API void fpu_stack_overflow(cpu_ctx_t *cpu_ctx, uint32_t exception); +JIT_API void fpu_stack_underflow(cpu_ctx_t *cpu_ctx, uint32_t exception); +JIT_API void fpu_stack_fault(cpu_ctx_t *cpu_ctx, uint32_t exception); +JIT_API void fpu_update_ptr(cpu_ctx_t *cpu_ctx, uint64_t instr_info); diff --git a/lib86cpu/core/instructions.cpp b/lib86cpu/core/instructions.cpp index db06dc9..b3f630e 100644 --- a/lib86cpu/core/instructions.cpp +++ b/lib86cpu/core/instructions.cpp @@ -819,6 +819,9 @@ uint32_t update_crN_helper(cpu_ctx_t *cpu_ctx, uint32_t new_cr, uint8_t idx) cpu_ctx->hflags = (((new_cr & CR0_EM_MASK) << 3) | (cpu_ctx->hflags & ~HFLG_CR0_EM)); cpu_ctx->hflags = (((new_cr & CR0_MP_MASK) << 14) | (cpu_ctx->hflags & ~HFLG_CR0_MP)); } + if constexpr (idx1 == 0) { + cpu_ctx->hflags = (((new_cr & CR0_NE_MASK) << 2) | (cpu_ctx->hflags & ~HFLG_CR0_NE)); + } cpu_ctx->hflags = (((new_cr & CR0_TS_MASK) << 7) | (cpu_ctx->hflags & ~HFLG_CR0_TS)); if constexpr (idx1 != 2) { diff --git a/lib86cpu/core/internal.h b/lib86cpu/core/internal.h index a1827ba..cbab99e 100644 --- a/lib86cpu/core/internal.h +++ b/lib86cpu/core/internal.h @@ -43,6 +43,7 @@ JIT_API void tlb_invalidate_(cpu_ctx_t *cpu_ctx, addr_t addr); #define PE_MODE_SHIFT 4 #define CR0_EM_SHIFT 5 #define TRAMP_SHIFT 6 +#define CR0_NE_SHIFT 7 #define CR4_OSFXSR_SHIFT 9 #define CR0_TS_SHIFT 10 #define CR0_MP_SHIFT 15 @@ -55,14 +56,15 @@ JIT_API void tlb_invalidate_(cpu_ctx_t *cpu_ctx, addr_t addr); #define HFLG_PE_MODE (1 << PE_MODE_SHIFT) #define HFLG_CR0_EM (1 << CR0_EM_SHIFT) #define HFLG_TRAMP (1 << TRAMP_SHIFT) +#define HFLG_CR0_NE (1 << CR0_NE_SHIFT) #define HFLG_CR0_MP (1 << CR0_MP_SHIFT) #define HFLG_CR0_TS (1 << CR0_TS_SHIFT) #define HFLG_CR4_OSFXSR (1 << CR4_OSFXSR_SHIFT) #define HFLG_CR4_VME (1 << CR4_VME_SHIFT) #define HFLG_CR4_PVI (1 << CR4_PVI_SHIFT) -#define HFLG_CONST (HFLG_CPL | HFLG_CS32 | HFLG_SS32 | HFLG_PE_MODE | HFLG_CR0_EM | HFLG_TRAMP | HFLG_CR0_MP | HFLG_CR0_TS \ +#define HFLG_CONST (HFLG_CPL | HFLG_CS32 | HFLG_SS32 | HFLG_PE_MODE | HFLG_CR0_EM | HFLG_TRAMP | HFLG_CR0_MP | HFLG_CR0_TS | HFLG_CR0_NE \ | HFLG_CR4_OSFXSR | HFLG_CR4_VME | HFLG_CR4_PVI) -#define HFLG_SAVED_MASK (HFLG_CPL | HFLG_CS32 | HFLG_SS32 | HFLG_PE_MODE | HFLG_CR0_EM | HFLG_CR0_MP | HFLG_CR0_TS | HFLG_CR4_OSFXSR | HFLG_CR4_VME | HFLG_CR4_PVI) +#define HFLG_SAVED_MASK (HFLG_CPL | HFLG_CS32 | HFLG_SS32 | HFLG_PE_MODE | HFLG_CR0_EM | HFLG_CR0_MP | HFLG_CR0_TS | HFLG_CR0_NE | HFLG_CR4_OSFXSR | HFLG_CR4_VME | HFLG_CR4_PVI) // cpu interrupt flags #define CPU_NO_INT 0 @@ -395,7 +397,7 @@ CR0_TS_MASK | CR0_EM_MASK | CR0_MP_MASK | CR0_PE_MASK) #define FPU_EXP_PRECISION (1 << 5) #define FPU_EXP_ALL (FPU_EXP_INVALID | FPU_EXP_DENORMAL | FPU_EXP_DIVBYZERO | FPU_EXP_OVERFLOW | FPU_EXP_UNDERFLOW | FPU_EXP_PRECISION) -// fpu fstatus flags and shifts +// fpu fstatus flags #define FPU_FLG_IE FPU_EXP_INVALID #define FPU_FLG_DE FPU_EXP_DENORMAL #define FPU_FLG_ZE FPU_EXP_DIVBYZERO @@ -404,14 +406,17 @@ CR0_TS_MASK | CR0_EM_MASK | CR0_MP_MASK | CR0_PE_MASK) #define FPU_FLG_PE FPU_EXP_PRECISION #define FPU_FLG_SF (1 << 6) #define FPU_FLG_ES (1 << 7) +#define FPU_FLG_C0 (1 << 8) +#define FPU_FLG_C1 (1 << 9) +#define FPU_FLG_C2 (1 << 10) #define FPU_FLG_TOP (7 << 11) +#define FPU_FLG_C3 (1 << 14) #define FPU_FLG_BSY (1 << 15) -#define FPU_ES_SHIFT 7 -#define FPU_C0_SHIFT 8 -#define FPU_C1_SHIFT 9 -#define FPU_C2_SHIFT 10 -#define FPU_TOP_SHIFT 11 -#define FPU_C3_SHIFT 14 +#define FPU_FLG_CC_ALL (FPU_FLG_C0 | FPU_FLG_C1 | FPU_FLG_C2 | FPU_FLG_C3) + +// fpu stack fault flags +#define FPU_STACK_OVERFLOW (FPU_EXP_INVALID | FPU_FLG_SF | FPU_FLG_C1) +#define FPU_STACK_UNDERFLOW (FPU_EXP_INVALID | FPU_FLG_SF) // fpu cctrl flags #define FPU_FLG_PC (3 << 8) diff --git a/lib86cpu/interface.cpp b/lib86cpu/interface.cpp index 8ecbbf8..2cebb66 100644 --- a/lib86cpu/interface.cpp +++ b/lib86cpu/interface.cpp @@ -1338,7 +1338,7 @@ uint16_t read_fstatus(cpu_t *cpu) { uint16_t fstatus = (cpu->cpu_ctx.regs.fstatus & ~FPU_FLG_TOP); - fstatus |= (cpu->cpu_ctx.fpu_data.ftop << FPU_TOP_SHIFT); + fstatus |= (cpu->cpu_ctx.fpu_data.ftop << 11); return fstatus; } @@ -1351,6 +1351,6 @@ read_fstatus(cpu_t *cpu) void write_fstatus(cpu_t *cpu, uint16_t value) { - cpu->cpu_ctx.fpu_data.ftop = (value & FPU_FLG_TOP) >> FPU_TOP_SHIFT; + cpu->cpu_ctx.fpu_data.ftop = (value & FPU_FLG_TOP) >> 11; cpu->cpu_ctx.regs.fstatus = value; } diff --git a/lib86cpu/support.cpp b/lib86cpu/support.cpp index 2676ebe..8c5c4c8 100644 --- a/lib86cpu/support.cpp +++ b/lib86cpu/support.cpp @@ -14,7 +14,7 @@ #endif // This should be updated whenever cpu members that need to be saved are added/removed -#define SAVE_STATE_ID 7 +#define SAVE_STATE_ID 8 void