diff --git a/README.md b/README.md index eed1233a3..a3ddf1233 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1800. +This is the source code for early-access 1801. ## Legal Notice diff --git a/externals/dynarmic/README.md b/externals/dynarmic/README.md index dd58d2b96..1b0eafb28 100755 --- a/externals/dynarmic/README.md +++ b/externals/dynarmic/README.md @@ -44,7 +44,7 @@ More general alternatives: * [VisUAL](https://salmanarif.bitbucket.io/visual/index.html) - Visual ARM UAL emulator intended for education * A wide variety of other recompilers, interpreters and emulators can be found embedded in other projects, here are some we would recommend looking at: * [firebird's recompiler](https://github.com/nspire-emus/firebird) - Takes more of a call-threaded approach to recompilation - * [higan's arm7tdmi emulator](https://gitlab.com/higan/higan/tree/master/higan/component/processor/arm7tdmi) - Very clean code-style + * [higan's arm7tdmi emulator](https://github.com/higan-emu/higan/tree/master/higan/component/processor/arm7tdmi) - Very clean code-style * [arm-js by ozaki-r](https://github.com/ozaki-r/arm-js) - Emulates ARMv7A and some peripherals of Versatile Express, in the browser Disadvantages of Dynarmic diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index e2c56b212..37f97207c 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -541,6 +541,12 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { } void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store); +} + +void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { const u32 imm = args[0].GetImmediateU32(); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/constants.h b/externals/dynarmic/src/dynarmic/backend/x64/constants.h index 682d8ebdf..0316cebc0 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/constants.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/constants.h @@ -13,6 +13,42 @@ namespace Dynarmic::Backend::X64 { +// Redefinition of _MM_CMP_* constants for use with the 'vcmp' instruction +namespace Cmp { +constexpr u8 Equal_OQ = 0; // Equal (Quiet, Ordered). +constexpr u8 LessThan_OS = 1; // Less (Signaling, Ordered). +constexpr u8 LessEqual_OS = 2; // Less/Equal (Signaling, Ordered). +constexpr u8 Unordered_Q = 3; // Unordered (Quiet). +constexpr u8 NotEqual_UQ = 4; // Not Equal (Quiet, Unordered). +constexpr u8 NotLessThan_US = 5; // Not Less (Signaling, Unordered). +constexpr u8 NotLessEqual_US = 6; // Not Less/Equal (Signaling, Unordered). +constexpr u8 Ordered_Q = 7; // Ordered (Quiet). +constexpr u8 Equal_UQ = 8; // Equal (Quiet, Unordered). +constexpr u8 NotGreaterEqual_US = 9; // Not Greater/Equal (Signaling, Unordered). +constexpr u8 NotGreaterThan_US = 10; // Not Greater (Signaling, Unordered). +constexpr u8 False_OQ = 11; // False (Quiet, Ordered). +constexpr u8 NotEqual_OQ = 12; // Not Equal (Quiet, Ordered). +constexpr u8 GreaterEqual_OS = 13; // Greater/Equal (Signaling, Ordered). +constexpr u8 GreaterThan_OS = 14; // Greater (Signaling, Ordered). +constexpr u8 True_UQ = 15; // True (Quiet, Unordered). +constexpr u8 Equal_OS = 16; // Equal (Signaling, Ordered). +constexpr u8 LessThan_OQ = 17; // Less (Quiet, Ordered). +constexpr u8 LessEqual_OQ = 18; // Less/Equal (Quiet, Ordered). +constexpr u8 Unordered_S = 19; // Unordered (Signaling). +constexpr u8 NotEqual_US = 20; // Not Equal (Signaling, Unordered). +constexpr u8 NotLessThan_UQ = 21; // Not Less (Quiet, Unordered). +constexpr u8 NotLessEqual_UQ = 22; // Not Less/Equal (Quiet, Unordered). +constexpr u8 Ordered_S = 23; // Ordered (Signaling). +constexpr u8 Equal_US = 24; // Equal (Signaling, Unordered). +constexpr u8 NotGreaterEqual_UQ = 25; // Not Greater/Equal (Quiet, Unordered). +constexpr u8 NotGreaterThan_UQ = 26; // Not Greater (Quiet, Unordered). +constexpr u8 False_OS = 27; // False (Signaling, Ordered). +constexpr u8 NotEqual_OS = 28; // Not Equal (Signaling, Ordered). +constexpr u8 GreaterEqual_OQ = 29; // Greater/Equal (Quiet, Ordered). +constexpr u8 GreaterThan_OQ = 30; // Greater (Quiet, Ordered). +constexpr u8 True_US = 31; // True (Signaling, Unordered). +} // namespace Cmp + // Redefinition of _MM_CMPINT_* constants for use with the 'vpcmp' instruction namespace CmpInt { constexpr u8 Equal = 0x0; @@ -46,6 +82,18 @@ constexpr u8 b = 0b11001100; constexpr u8 c = 0b10101010; } // namespace Tern +// For use as a bitmask with vfpclass instruction +namespace FpClass { +constexpr u8 QNaN = 0b00000001; +constexpr u8 ZeroPos = 0b00000010; +constexpr u8 ZeroNeg = 0b00000100; +constexpr u8 InfPos = 0b00001000; +constexpr u8 InfNeg = 0b00010000; +constexpr u8 Denormal = 0b00100000; +constexpr u8 Negative = 0b01000000; // Negative finite value +constexpr u8 SNaN = 0b10000000; +} // namespace FpClass + // Opcodes for use with vfixupimm enum class FpFixup : u8 { A = 0b0000, // A @@ -87,6 +135,29 @@ constexpr u32 FixupLUT(FpFixup src_qnan = FpFixup::A, return fixup_lut; } +// Opcodes for use with vrange* instructions +enum class FpRangeSelect : u8 { + Min = 0b00, + Max = 0b01, + AbsMin = 0b10, // Smaller absolute value + AbsMax = 0b11, // Larger absolute value +}; + +enum class FpRangeSign : u8 { + A = 0b00, // Copy sign of operand A + Preserve = 0b01, // Leave sign as is + Positive = 0b10, // Set Positive + Negative = 0b11, // Set Negative +}; + +// Generates 8-bit immediate LUT for vrange instruction +constexpr u8 FpRangeLUT(FpRangeSelect range_select, FpRangeSign range_sign) { + u8 range_lut = 0; + range_lut = Common::ModifyBits<0, 1, u8>(range_lut, static_cast(range_select)); + range_lut = Common::ModifyBits<2, 3, u8>(range_lut, static_cast(range_sign)); + return range_lut; +} + constexpr std::optional ConvertRoundingModeToX64Immediate(FP::RoundingMode rounding_mode) { switch (rounding_mode) { case FP::RoundingMode::ToNearest_TieEven: diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 2994a91e3..5b8969b04 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -39,9 +39,6 @@ namespace { const Xbyak::Reg64 INVALID_REG = Xbyak::Reg64(-1); -constexpr u64 f16_negative_zero = 0x8000; -constexpr u64 f16_non_sign_mask = 0x7fff; - constexpr u64 f32_negative_zero = 0x80000000u; constexpr u64 f32_nan = 0x7fc00000u; constexpr u64 f32_non_sign_mask = 0x7fffffffu; @@ -328,58 +325,56 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) } // anonymous namespace -void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) { +template +void FPAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + using FPT = mp::unsigned_integer_of_size; + constexpr FPT non_sign_mask = FP::FPInfo::sign_mask - FPT(1u); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Address mask = code.MConst(xword, non_sign_mask); - code.pand(result, code.MConst(xword, f16_non_sign_mask)); + code.andps(result, mask); ctx.reg_alloc.DefineValue(inst, result); } +void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) { + FPAbs<16>(code, ctx, inst); +} + void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - - code.pand(result, code.MConst(xword, f32_non_sign_mask)); - - ctx.reg_alloc.DefineValue(inst, result); + FPAbs<32>(code, ctx, inst); } void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) { + FPAbs<64>(code, ctx, inst); +} + +template +void FPNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + using FPT = mp::unsigned_integer_of_size; + constexpr FPT sign_mask = FP::FPInfo::sign_mask; + auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Address mask = code.MConst(xword, u64(sign_mask)); - code.pand(result, code.MConst(xword, f64_non_sign_mask)); + code.xorps(result, mask); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPNeg16(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - - code.pxor(result, code.MConst(xword, f16_negative_zero)); - - ctx.reg_alloc.DefineValue(inst, result); + FPNeg<16>(code, ctx, inst); } void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - - code.pxor(result, code.MConst(xword, f32_negative_zero)); - - ctx.reg_alloc.DefineValue(inst, result); + FPNeg<32>(code, ctx, inst); } void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - - code.pxor(result, code.MConst(xword, f64_negative_zero)); - - ctx.reg_alloc.DefineValue(inst, result); + FPNeg<64>(code, ctx, inst); } void EmitX64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) { @@ -449,97 +444,112 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { template static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { using FPT = mp::unsigned_integer_of_size; + constexpr FPT default_nan = FP::FPInfo::DefaultNaN(); constexpr u8 mantissa_msb_bit = static_cast(FP::FPInfo::explicit_mantissa_width - 1); auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm op1 = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm op2 = ctx.reg_alloc.UseScratchXmm(args[1]); // Result stored here! - Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr(); - tmp.setBit(fsize); - - const auto move_to_tmp = [&](const Xbyak::Xmm& xmm) { - if constexpr (fsize == 32) { - code.movd(tmp.cvt32(), xmm); - } else { - code.movq(tmp.cvt64(), xmm); - } - }; - - Xbyak::Label end, z, nan, op2_is_nan, snan, maybe_both_nan, normal; DenormalsAreZero(code, ctx, {op1, op2}); - FCODE(ucomis)(op1, op2); - code.jz(z, code.T_NEAR); - code.L(normal); - if constexpr (is_max) { - FCODE(maxs)(op2, op1); + + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + // vrangep{s,d} will already correctly handle comparing + // signed zeros and propagating NaNs similar to ARM + constexpr FpRangeSelect range_select = is_max ? FpRangeSelect::Max : FpRangeSelect::Min; + FCODE(vranges)(op2, op1, op2, FpRangeLUT(range_select, FpRangeSign::Preserve)); + + if (ctx.FPCR().DN()) { + FCODE(vcmps)(k1, op2, op2, Cmp::Unordered_Q); + FCODE(vmovs)(op2 | k1, code.MConst(xword, default_nan)); + } } else { - FCODE(mins)(op2, op1); - } - code.L(end); + Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr(); + tmp.setBit(fsize); - code.SwitchToFarCode(); + const auto move_to_tmp = [&](const Xbyak::Xmm& xmm) { + if constexpr (fsize == 32) { + code.movd(tmp.cvt32(), xmm); + } else { + code.movq(tmp.cvt64(), xmm); + } + }; - code.L(z); - code.jp(nan); - if constexpr (is_max) { - code.andps(op2, op1); - } else { - code.orps(op2, op1); - } - code.jmp(end); + Xbyak::Label end, z, nan, op2_is_nan, snan, maybe_both_nan, normal; - // NaN requirements: - // op1 op2 result - // SNaN anything op1 - // !SNaN SNaN op2 - // QNaN !NaN op2 - // !NaN QNaN op1 - // QNaN QNaN op1 + FCODE(ucomis)(op1, op2); + code.jz(z, code.T_NEAR); + code.L(normal); + if constexpr (is_max) { + FCODE(maxs)(op2, op1); + } else { + FCODE(mins)(op2, op1); + } + code.L(end); - code.L(nan); - FCODE(ucomis)(op1, op1); - code.jnp(op2_is_nan); + code.SwitchToFarCode(); - // op1 is NaN - move_to_tmp(op1); - code.bt(tmp, mantissa_msb_bit); - code.jc(maybe_both_nan); - if (ctx.FPCR().DN()) { - code.L(snan); - code.movaps(op2, code.MConst(xword, FP::FPInfo::DefaultNaN())); + code.L(z); + code.jp(nan); + if constexpr (is_max) { + code.andps(op2, op1); + } else { + code.orps(op2, op1); + } code.jmp(end); - } else { - code.movaps(op2, op1); - code.L(snan); - code.orps(op2, code.MConst(xword, FP::FPInfo::mantissa_msb)); - code.jmp(end); - } - code.L(maybe_both_nan); - FCODE(ucomis)(op2, op2); - code.jnp(end, code.T_NEAR); - if (ctx.FPCR().DN()) { - code.jmp(snan); - } else { + // NaN requirements: + // op1 op2 result + // SNaN anything op1 + // !SNaN SNaN op2 + // QNaN !NaN op2 + // !NaN QNaN op1 + // QNaN QNaN op1 + + code.L(nan); + FCODE(ucomis)(op1, op1); + code.jnp(op2_is_nan); + + // op1 is NaN + move_to_tmp(op1); + code.bt(tmp, mantissa_msb_bit); + code.jc(maybe_both_nan); + if (ctx.FPCR().DN()) { + code.L(snan); + code.movaps(op2, code.MConst(xword, default_nan)); + code.jmp(end); + } else { + code.movaps(op2, op1); + code.L(snan); + code.orps(op2, code.MConst(xword, FP::FPInfo::mantissa_msb)); + code.jmp(end); + } + + code.L(maybe_both_nan); + FCODE(ucomis)(op2, op2); + code.jnp(end, code.T_NEAR); + if (ctx.FPCR().DN()) { + code.jmp(snan); + } else { + move_to_tmp(op2); + code.bt(tmp.cvt64(), mantissa_msb_bit); + code.jnc(snan); + code.movaps(op2, op1); + code.jmp(end); + } + + // op2 is NaN + code.L(op2_is_nan); move_to_tmp(op2); - code.bt(tmp.cvt64(), mantissa_msb_bit); + code.bt(tmp, mantissa_msb_bit); code.jnc(snan); code.movaps(op2, op1); code.jmp(end); + + code.SwitchToNearCode(); } - // op2 is NaN - code.L(op2_is_nan); - move_to_tmp(op2); - code.bt(tmp, mantissa_msb_bit); - code.jnc(snan); - code.movaps(op2, op1); - code.jmp(end); - - code.SwitchToNearCode(); - ctx.reg_alloc.DefineValue(inst, op2); } @@ -1681,7 +1691,7 @@ void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) { const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - if (rounding_mode == ctx.FPCR().RMode()) { + if (rounding_mode == ctx.FPCR().RMode() || ctx.HasOptimization(OptimizationFlag::Unsafe_IgnoreStandardFPCRValue)) { code.cvtsi2ss(result, from); } else { ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven); @@ -1717,7 +1727,7 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) { } }; - if (rounding_mode == ctx.FPCR().RMode()) { + if (rounding_mode == ctx.FPCR().RMode() || ctx.HasOptimization(OptimizationFlag::Unsafe_IgnoreStandardFPCRValue)) { op(); } else { ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index 4615320d4..871aa2e4f 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -440,11 +440,11 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) { } static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) { - if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) { + if (code.HasHostFeature(HostFeature::GFNI)) { const u64 shift_matrix = shift_amount < 8 ? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8)) : 0x8080808080808080; - code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0); + code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0); return; } @@ -1472,9 +1472,9 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { code.pxor(result, result); } else if (shift_amount == 1) { code.paddb(result, result); - } else if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) { + } else if (code.HasHostFeature(HostFeature::GFNI)) { const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8); - code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0); + code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0); } else { const u64 replicand = (0xFFULL << shift_amount) & 0xFF; const u64 mask = Common::Replicate(replicand, Common::BitSize()); @@ -1529,9 +1529,9 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { // Do nothing } else if (shift_amount >= 8) { code.pxor(result, result); - } else if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) { + } else if (code.HasHostFeature(HostFeature::GFNI)) { const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8); - code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0); + code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0); } else { const u64 replicand = 0xFEULL >> shift_amount; const u64 mask = Common::Replicate(replicand, Common::BitSize()); @@ -2137,12 +2137,18 @@ void EmitX64::EmitVectorNarrow64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); + code.vpternlogq(result, operand, operand, u8(~Tern::c)); + ctx.reg_alloc.DefineValue(inst, result); + return; + } + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm(); - code.pcmpeqw(xmm_b, xmm_b); code.pxor(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -2789,8 +2795,8 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) { - code.vgf2p8affineqb(data, data, code.MConst(xword_b, 0x8040201008040201), 0); + if (code.HasHostFeature(HostFeature::GFNI)) { + code.gf2p8affineqb(data, code.MConst(xword, 0x8040201008040201, 0x8040201008040201), 0); } else { const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm(); code.movdqa(high_nibble_reg, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index 3952527b1..aeba7e2bb 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -58,7 +58,7 @@ template void MaybeStandardFPSCRValue(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, Lambda lambda) { const bool switch_mxcsr = ctx.FPCR(fpcr_controlled) != ctx.FPCR(); - if (switch_mxcsr) { + if (switch_mxcsr && !ctx.HasOptimization(OptimizationFlag::Unsafe_IgnoreStandardFPCRValue)) { code.EnterStandardASIMD(); lambda(); code.LeaveStandardASIMD(); @@ -557,37 +557,32 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam } // anonymous namespace -void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) { +template +void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + using FPT = mp::unsigned_integer_of_size; + constexpr FPT non_sign_mask = FP::FPInfo::sign_mask - FPT(1u); + constexpr u64 non_sign_mask64 = Common::Replicate(non_sign_mask, fsize); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x7FFF7FFF7FFF7FFF, 0x7FFF7FFF7FFF7FFF); - - code.pand(a, mask); - - ctx.reg_alloc.DefineValue(inst, a); -} - -void EmitX64::EmitFPVectorAbs32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF); + const Xbyak::Address mask = code.MConst(xword, non_sign_mask64, non_sign_mask64); code.andps(a, mask); ctx.reg_alloc.DefineValue(inst, a); } +void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) { + FPVectorAbs<16>(code, ctx, inst); +} + +void EmitX64::EmitFPVectorAbs32(EmitContext& ctx, IR::Inst* inst) { + FPVectorAbs<32>(code, ctx, inst); +} + void EmitX64::EmitFPVectorAbs64(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF); - - code.andpd(a, mask); - - ctx.reg_alloc.DefineValue(inst, a); + FPVectorAbs<64>(code, ctx, inst); } void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) { @@ -943,7 +938,12 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { DenormalsAreZero(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask); - if (code.HasHostFeature(HostFeature::AVX)) { + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + constexpr FpRangeSelect range_select = is_max ? FpRangeSelect::Max : FpRangeSelect::Min; + FCODE(vcmpp)(k1, result, xmm_b, Cmp::Unordered_Q); + FCODE(vrangep)(result, result, xmm_b, FpRangeLUT(range_select, FpRangeSign::Preserve)); + FCODE(vblendmp)(result | k1, result, GetNaNVector(code)); + } else if (code.HasHostFeature(HostFeature::AVX)) { FCODE(vcmpeqp)(mask, result, xmm_b); FCODE(vcmpunordp)(nan_mask, result, xmm_b); if constexpr (is_max) { @@ -1001,7 +1001,14 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in // x86-64 treats differently signed zeros as equal while ARM does not. // Thus if we AND together things that x86-64 thinks are equal we'll get the positive zero. - if (code.HasHostFeature(HostFeature::AVX)) { + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + // vrangep{s,d} will already correctly handle comparing + // signed zeros similar to ARM + // max(+0.0, -0.0) = +0.0. + // min(+0.0, -0.0) = -0.0 + constexpr FpRangeSelect range_select = is_max ? FpRangeSelect::Max : FpRangeSelect::Min; + FCODE(vrangep)(result, result, xmm_b, FpRangeLUT(range_select, FpRangeSign::Preserve)); + } else if (code.HasHostFeature(HostFeature::AVX)) { FCODE(vcmpeqp)(mask, result, xmm_b); if constexpr (is_max) { FCODE(vandp)(eq, result, xmm_b); @@ -1217,37 +1224,32 @@ void EmitX64::EmitFPVectorMulX64(EmitContext& ctx, IR::Inst* inst) { EmitFPVectorMulX<64>(code, ctx, inst); } -void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) { +template +void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + using FPT = mp::unsigned_integer_of_size; + constexpr FPT sign_mask = FP::FPInfo::sign_mask; + constexpr u64 sign_mask64 = Common::Replicate(sign_mask, fsize); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x8000800080008000, 0x8000800080008000); + const Xbyak::Address mask = code.MConst(xword, sign_mask64, sign_mask64); - code.pxor(a, mask); + code.xorps(a, mask); ctx.reg_alloc.DefineValue(inst, a); } +void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) { + FPVectorNeg<16>(code, ctx, inst); +} + void EmitX64::EmitFPVectorNeg32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x8000000080000000, 0x8000000080000000); - - code.pxor(a, mask); - - ctx.reg_alloc.DefineValue(inst, a); + FPVectorNeg<32>(code, ctx, inst); } void EmitX64::EmitFPVectorNeg64(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x8000000000000000, 0x8000000000000000); - - code.pxor(a, mask); - - ctx.reg_alloc.DefineValue(inst, a); + FPVectorNeg<64>(code, ctx, inst); } void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { diff --git a/externals/dynarmic/src/dynarmic/common/bit_util.h b/externals/dynarmic/src/dynarmic/common/bit_util.h index bb3cfae60..d05c22215 100755 --- a/externals/dynarmic/src/dynarmic/common/bit_util.h +++ b/externals/dynarmic/src/dynarmic/common/bit_util.h @@ -199,7 +199,7 @@ constexpr bool MostSignificantBit(T value) { } template -inline T Replicate(T value, size_t element_size) { +constexpr T Replicate(T value, size_t element_size) { ASSERT_MSG(BitSize() % element_size == 0, "bitsize of T not divisible by element_size"); if (element_size == BitSize()) return value; diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/ir_emitter.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/ir_emitter.cpp index 9839beca7..c948a78ff 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/ir_emitter.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/ir_emitter.cpp @@ -141,10 +141,14 @@ void IREmitter::SetCpsr(const IR::U32& value) { Inst(Opcode::A32SetCpsr, value); } -void IREmitter::SetCpsrNZCV(const IR::U32& value) { +void IREmitter::SetCpsrNZCV(const IR::NZCV& value) { Inst(Opcode::A32SetCpsrNZCV, value); } +void IREmitter::SetCpsrNZCVRaw(const IR::U32& value) { + Inst(Opcode::A32SetCpsrNZCVRaw, value); +} + void IREmitter::SetCpsrNZCVQ(const IR::U32& value) { Inst(Opcode::A32SetCpsrNZCVQ, value); } @@ -153,6 +157,10 @@ void IREmitter::SetCheckBit(const IR::U1& value) { Inst(Opcode::A32SetCheckBit, value); } +IR::U1 IREmitter::GetOverflowFrom(const IR::Value& value) { + return Inst(Opcode::GetOverflowFromOp, value); +} + IR::U1 IREmitter::GetCFlag() { return Inst(Opcode::A32GetCFlag); } diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/ir_emitter.h b/externals/dynarmic/src/dynarmic/frontend/A32/ir_emitter.h index 80bcf6b97..9377a5c8a 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/ir_emitter.h +++ b/externals/dynarmic/src/dynarmic/frontend/A32/ir_emitter.h @@ -55,9 +55,11 @@ public: IR::U32 GetCpsr(); void SetCpsr(const IR::U32& value); - void SetCpsrNZCV(const IR::U32& value); + void SetCpsrNZCV(const IR::NZCV& value); + void SetCpsrNZCVRaw(const IR::U32& value); void SetCpsrNZCVQ(const IR::U32& value); void SetCheckBit(const IR::U1& value); + IR::U1 GetOverflowFrom(const IR::Value& value); IR::U1 GetCFlag(); void SetNFlag(const IR::U1& value); void SetZFlag(const IR::U1& value); diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/coprocessor.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/coprocessor.cpp index 63a233f0c..402efd4b1 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/coprocessor.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/coprocessor.cpp @@ -102,7 +102,7 @@ bool TranslatorVisitor::arm_MRC(Cond cond, size_t opc1, CoprocReg CRn, Reg t, si ir.SetRegister(t, word); } else { const auto new_cpsr_nzcv = ir.And(word, ir.Imm32(0xF0000000)); - ir.SetCpsrNZCV(new_cpsr_nzcv); + ir.SetCpsrNZCVRaw(new_cpsr_nzcv); } } return true; diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/data_processing.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/data_processing.cpp index ef038d2b0..e340104ef 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/data_processing.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/data_processing.cpp @@ -21,17 +21,14 @@ bool TranslatorVisitor::arm_ADC_imm(Cond cond, bool S, Reg n, Reg d, int rotate, return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -50,17 +47,14 @@ bool TranslatorVisitor::arm_ADC_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5 return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -81,12 +75,9 @@ bool TranslatorVisitor::arm_ADC_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in); const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.GetCFlag()); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -106,17 +97,14 @@ bool TranslatorVisitor::arm_ADD_imm(Cond cond, bool S, Reg n, Reg d, int rotate, return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -136,17 +124,14 @@ bool TranslatorVisitor::arm_ADD_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5 return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -167,12 +152,9 @@ bool TranslatorVisitor::arm_ADD_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in); const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(0)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -354,10 +336,7 @@ bool TranslatorVisitor::arm_CMN_imm(Cond cond, Reg n, int rotate, Imm<8> imm8) { const u32 imm32 = ArmExpandImm(rotate, imm8); const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(0)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -370,10 +349,7 @@ bool TranslatorVisitor::arm_CMN_reg(Cond cond, Reg n, Imm<5> imm5, ShiftType shi const auto shifted = EmitImmShift(ir.GetRegister(m), shift, imm5, ir.GetCFlag()); const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(0)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -392,10 +368,7 @@ bool TranslatorVisitor::arm_CMN_rsr(Cond cond, Reg n, Reg s, ShiftType shift, Re const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in); const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(0)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -408,10 +381,7 @@ bool TranslatorVisitor::arm_CMP_imm(Cond cond, Reg n, int rotate, Imm<8> imm8) { const u32 imm32 = ArmExpandImm(rotate, imm8); const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -424,10 +394,7 @@ bool TranslatorVisitor::arm_CMP_reg(Cond cond, Reg n, Imm<5> imm5, ShiftType shi const auto shifted = EmitImmShift(ir.GetRegister(m), shift, imm5, ir.GetCFlag()); const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(1)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -446,10 +413,7 @@ bool TranslatorVisitor::arm_CMP_rsr(Cond cond, Reg n, Reg s, ShiftType shift, Re const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in); const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(1)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -801,17 +765,14 @@ bool TranslatorVisitor::arm_RSB_imm(Cond cond, bool S, Reg n, Reg d, int rotate, return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -831,17 +792,14 @@ bool TranslatorVisitor::arm_RSB_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5 return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -862,12 +820,9 @@ bool TranslatorVisitor::arm_RSB_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in); const auto result = ir.SubWithCarry(shifted.result, ir.GetRegister(n), ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -887,17 +842,14 @@ bool TranslatorVisitor::arm_RSC_imm(Cond cond, bool S, Reg n, Reg d, int rotate, return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -916,17 +868,14 @@ bool TranslatorVisitor::arm_RSC_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5 return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -947,12 +896,9 @@ bool TranslatorVisitor::arm_RSC_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in); const auto result = ir.SubWithCarry(shifted.result, ir.GetRegister(n), ir.GetCFlag()); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -972,17 +918,14 @@ bool TranslatorVisitor::arm_SBC_imm(Cond cond, bool S, Reg n, Reg d, int rotate, return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -1002,17 +945,14 @@ bool TranslatorVisitor::arm_SBC_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5 return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -1033,12 +973,9 @@ bool TranslatorVisitor::arm_SBC_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in); const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.GetCFlag()); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -1058,17 +995,14 @@ bool TranslatorVisitor::arm_SUB_imm(Cond cond, bool S, Reg n, Reg d, int rotate, return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -1088,17 +1022,14 @@ bool TranslatorVisitor::arm_SUB_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5 return UnpredictableInstruction(); } - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); ir.SetTerm(IR::Term::ReturnToDispatch{}); return false; } - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; @@ -1118,12 +1049,9 @@ bool TranslatorVisitor::arm_SUB_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in); const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/multiply.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/multiply.cpp index 3af0600d7..0c497a874 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/multiply.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/multiply.cpp @@ -260,10 +260,10 @@ bool TranslatorVisitor::arm_SMLAxy(Cond cond, Reg d, Reg a, Reg m, bool M, bool const IR::U32 m16 = M ? ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result : ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32)); const IR::U32 product = ir.Mul(n16, m16); - const auto result_overflow = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0)); + const auto result = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0)); - ir.SetRegister(d, result_overflow.result); - ir.OrQFlag(result_overflow.overflow); + ir.SetRegister(d, result); + ir.OrQFlag(ir.GetOverflowFrom(result)); return true; } @@ -306,10 +306,10 @@ bool TranslatorVisitor::arm_SMLAWy(Cond cond, Reg d, Reg a, Reg m, bool M, Reg n } const IR::U64 m16 = ir.SignExtendWordToLong(ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32))); const auto product = ir.LeastSignificantWord(ir.LogicalShiftRight(ir.Mul(n32, m16), ir.Imm8(16))); - const auto result_overflow = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0)); + const auto result = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0)); - ir.SetRegister(d, result_overflow.result); - ir.OrQFlag(result_overflow.overflow); + ir.SetRegister(d, result); + ir.OrQFlag(ir.GetOverflowFrom(result)); return true; } @@ -352,7 +352,7 @@ bool TranslatorVisitor::arm_SMMLA(Cond cond, Reg d, Reg a, Reg m, bool R, Reg n) const auto result_carry = ir.MostSignificantWord(temp); auto result = result_carry.result; if (R) { - result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result; + result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry); } ir.SetRegister(d, result); @@ -376,7 +376,7 @@ bool TranslatorVisitor::arm_SMMLS(Cond cond, Reg d, Reg a, Reg m, bool R, Reg n) const auto result_carry = ir.MostSignificantWord(temp); auto result = result_carry.result; if (R) { - result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result; + result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry); } ir.SetRegister(d, result); @@ -399,7 +399,7 @@ bool TranslatorVisitor::arm_SMMUL(Cond cond, Reg d, Reg m, bool R, Reg n) { const auto result_carry = ir.MostSignificantWord(product); auto result = result_carry.result; if (R) { - result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result; + result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry); } ir.SetRegister(d, result); @@ -435,11 +435,11 @@ bool TranslatorVisitor::arm_SMLAD(Cond cond, Reg d, Reg a, Reg m, bool M, Reg n) const IR::U32 product_hi = ir.Mul(n_hi, m_hi); const IR::U32 addend = ir.GetRegister(a); - auto result_overflow = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0)); - ir.OrQFlag(result_overflow.overflow); - result_overflow = ir.AddWithCarry(result_overflow.result, addend, ir.Imm1(0)); - ir.SetRegister(d, result_overflow.result); - ir.OrQFlag(result_overflow.overflow); + auto result = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0)); + ir.OrQFlag(ir.GetOverflowFrom(result)); + result = ir.AddWithCarry(result, addend, ir.Imm1(0)); + ir.SetRegister(d, result); + ir.OrQFlag(ir.GetOverflowFrom(result)); return true; } @@ -507,10 +507,10 @@ bool TranslatorVisitor::arm_SMLSD(Cond cond, Reg d, Reg a, Reg m, bool M, Reg n) const IR::U32 product_hi = ir.Mul(n_hi, m_hi); const IR::U32 addend = ir.GetRegister(a); const IR::U32 product = ir.Sub(product_lo, product_hi); - auto result_overflow = ir.AddWithCarry(product, addend, ir.Imm1(0)); + auto result = ir.AddWithCarry(product, addend, ir.Imm1(0)); - ir.SetRegister(d, result_overflow.result); - ir.OrQFlag(result_overflow.overflow); + ir.SetRegister(d, result); + ir.OrQFlag(ir.GetOverflowFrom(result)); return true; } @@ -572,10 +572,10 @@ bool TranslatorVisitor::arm_SMUAD(Cond cond, Reg d, Reg m, bool M, Reg n) { const IR::U32 product_lo = ir.Mul(n_lo, m_lo); const IR::U32 product_hi = ir.Mul(n_hi, m_hi); - const auto result_overflow = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0)); + const auto result = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0)); - ir.SetRegister(d, result_overflow.result); - ir.OrQFlag(result_overflow.overflow); + ir.SetRegister(d, result); + ir.OrQFlag(ir.GetOverflowFrom(result)); return true; } diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/parallel.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/parallel.cpp index 8f036c6cf..921c8b128 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/parallel.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/parallel.cpp @@ -194,7 +194,7 @@ bool TranslatorVisitor::arm_USADA8(Cond cond, Reg d, Reg a, Reg m, Reg n) { const auto tmp = ir.PackedAbsDiffSumS8(ir.GetRegister(n), ir.GetRegister(m)); const auto result = ir.AddWithCarry(ir.GetRegister(a), tmp, ir.Imm1(0)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); return true; } diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp index 6e2899d26..3714f4bc5 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp @@ -62,12 +62,9 @@ bool TranslatorVisitor::thumb16_ASR_imm(Imm<5> imm5, Reg m, Reg d) { bool TranslatorVisitor::thumb16_ADD_reg_t1(Reg m, Reg n, Reg d) { const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(0)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (!ir.current_location.IT().IsInITBlock()) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -77,12 +74,9 @@ bool TranslatorVisitor::thumb16_ADD_reg_t1(Reg m, Reg n, Reg d) { bool TranslatorVisitor::thumb16_SUB_reg(Reg m, Reg n, Reg d) { const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (!ir.current_location.IT().IsInITBlock()) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -93,12 +87,9 @@ bool TranslatorVisitor::thumb16_ADD_imm_t1(Imm<3> imm3, Reg n, Reg d) { const u32 imm32 = imm3.ZeroExtend(); const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(0)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (!ir.current_location.IT().IsInITBlock()) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -109,12 +100,9 @@ bool TranslatorVisitor::thumb16_SUB_imm_t1(Imm<3> imm3, Reg n, Reg d) { const u32 imm32 = imm3.ZeroExtend(); const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (!ir.current_location.IT().IsInITBlock()) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -138,10 +126,7 @@ bool TranslatorVisitor::thumb16_CMP_imm(Reg n, Imm<8> imm8) { const u32 imm32 = imm8.ZeroExtend(); const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -153,12 +138,9 @@ bool TranslatorVisitor::thumb16_ADD_imm_t2(Reg d_n, Imm<8> imm8) { const Reg n = d_n; const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(0)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (!ir.current_location.IT().IsInITBlock()) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -171,12 +153,9 @@ bool TranslatorVisitor::thumb16_SUB_imm_t2(Reg d_n, Imm<8> imm8) { const Reg n = d_n; const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (!ir.current_location.IT().IsInITBlock()) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -270,12 +249,9 @@ bool TranslatorVisitor::thumb16_ADC_reg(Reg m, Reg d_n) { const auto aspr_c = ir.GetCFlag(); const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.GetRegister(m), aspr_c); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (!ir.current_location.IT().IsInITBlock()) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -288,12 +264,9 @@ bool TranslatorVisitor::thumb16_SBC_reg(Reg m, Reg d_n) { const auto aspr_c = ir.GetCFlag(); const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.GetRegister(m), aspr_c); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (!ir.current_location.IT().IsInITBlock()) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -327,12 +300,9 @@ bool TranslatorVisitor::thumb16_TST_reg(Reg m, Reg n) { // Rd can never encode R15. bool TranslatorVisitor::thumb16_RSB_imm(Reg n, Reg d) { const auto result = ir.SubWithCarry(ir.Imm32(0), ir.GetRegister(n), ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (!ir.current_location.IT().IsInITBlock()) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -340,20 +310,14 @@ bool TranslatorVisitor::thumb16_RSB_imm(Reg n, Reg d) { // CMP , bool TranslatorVisitor::thumb16_CMP_reg_t1(Reg m, Reg n) { const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(1)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } // CMN , bool TranslatorVisitor::thumb16_CMN_reg(Reg m, Reg n) { const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(0)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -430,12 +394,12 @@ bool TranslatorVisitor::thumb16_ADD_reg_t2(bool d_n_hi, Reg m, Reg d_n_lo) { const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(0)); if (d == Reg::PC) { ir.UpdateUpperLocationDescriptor(); - ir.ALUWritePC(result.result); + ir.ALUWritePC(result); // Return to dispatch as we can't predict what PC is going to be. Stop compilation. ir.SetTerm(IR::Term::FastDispatchHint{}); return false; } else { - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); return true; } } @@ -451,10 +415,7 @@ bool TranslatorVisitor::thumb16_CMP_reg_t2(bool n_hi, Reg m, Reg n_lo) { } const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(1)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -672,7 +633,7 @@ bool TranslatorVisitor::thumb16_ADD_sp_t1(Reg d, Imm<8> imm8) { const u32 imm32 = imm8.ZeroExtend() << 2; const auto result = ir.AddWithCarry(ir.GetRegister(Reg::SP), ir.Imm32(imm32), ir.Imm1(0)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); return true; } @@ -682,7 +643,7 @@ bool TranslatorVisitor::thumb16_ADD_sp_t2(Imm<7> imm7) { const Reg d = Reg::SP; const auto result = ir.AddWithCarry(ir.GetRegister(Reg::SP), ir.Imm32(imm32), ir.Imm1(0)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); return true; } @@ -692,7 +653,7 @@ bool TranslatorVisitor::thumb16_SUB_sp(Imm<7> imm7) { const Reg d = Reg::SP; const auto result = ir.SubWithCarry(ir.GetRegister(Reg::SP), ir.Imm32(imm32), ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); return true; } diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_coprocessor.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_coprocessor.cpp index 23cf5fa82..71f108f91 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_coprocessor.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_coprocessor.cpp @@ -67,7 +67,7 @@ bool TranslatorVisitor::thumb32_MRC(bool two, size_t opc1, CoprocReg CRn, Reg t, ir.SetRegister(t, word); } else { const auto new_cpsr_nzcv = ir.And(word, ir.Imm32(0xF0000000)); - ir.SetCpsrNZCV(new_cpsr_nzcv); + ir.SetCpsrNZCVRaw(new_cpsr_nzcv); } return true; } diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_modified_immediate.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_modified_immediate.cpp index f7c710c7f..f6ef5db72 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_modified_immediate.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_modified_immediate.cpp @@ -166,10 +166,7 @@ bool TranslatorVisitor::thumb32_CMN_imm(Imm<1> i, Reg n, Imm<3> imm3, Imm<8> imm const auto imm32 = ThumbExpandImm(i, imm3, imm8); const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(0)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -182,12 +179,9 @@ bool TranslatorVisitor::thumb32_ADD_imm_1(Imm<1> i, bool S, Reg n, Imm<3> imm3, const auto imm32 = ThumbExpandImm(i, imm3, imm8); const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(0)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -200,12 +194,9 @@ bool TranslatorVisitor::thumb32_ADC_imm(Imm<1> i, bool S, Reg n, Imm<3> imm3, Re const auto imm32 = ThumbExpandImm(i, imm3, imm8); const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.GetCFlag()); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -218,12 +209,9 @@ bool TranslatorVisitor::thumb32_SBC_imm(Imm<1> i, bool S, Reg n, Imm<3> imm3, Re const auto imm32 = ThumbExpandImm(i, imm3, imm8); const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.GetCFlag()); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -236,10 +224,7 @@ bool TranslatorVisitor::thumb32_CMP_imm(Imm<1> i, Reg n, Imm<3> imm3, Imm<8> imm const auto imm32 = ThumbExpandImm(i, imm3, imm8); const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -252,12 +237,9 @@ bool TranslatorVisitor::thumb32_SUB_imm_1(Imm<1> i, bool S, Reg n, Imm<3> imm3, const auto imm32 = ThumbExpandImm(i, imm3, imm8); const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -270,12 +252,9 @@ bool TranslatorVisitor::thumb32_RSB_imm(Imm<1> i, bool S, Reg n, Imm<3> imm3, Re const auto imm32 = ThumbExpandImm(i, imm3, imm8); const auto result = ir.SubWithCarry(ir.Imm32(imm32), ir.GetRegister(n), ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp index ffe5e56de..be4040d10 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp @@ -85,7 +85,7 @@ bool TranslatorVisitor::thumb32_ADD_imm_2(Imm<1> imm1, Reg n, Imm<3> imm3, Reg d const auto reg_n = ir.GetRegister(n); const auto result = ir.AddWithCarry(reg_n, ir.Imm32(imm), ir.Imm1(0)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); return true; } @@ -196,7 +196,7 @@ bool TranslatorVisitor::thumb32_SUB_imm_2(Imm<1> imm1, Reg n, Imm<3> imm3, Reg d const auto reg_n = ir.GetRegister(n); const auto result = ir.SubWithCarry(reg_n, ir.Imm32(imm), ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); return true; } diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_shifted_register.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_shifted_register.cpp index 82a25fad1..0543ebca1 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_shifted_register.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_shifted_register.cpp @@ -178,10 +178,7 @@ bool TranslatorVisitor::thumb32_CMN_reg(Reg n, Imm<3> imm3, Imm<2> imm2, ShiftTy const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag()); const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(0)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -194,12 +191,9 @@ bool TranslatorVisitor::thumb32_ADD_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2 const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag()); const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(0)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -211,12 +205,9 @@ bool TranslatorVisitor::thumb32_ADC_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2 const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag()); const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.GetCFlag()); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -228,12 +219,9 @@ bool TranslatorVisitor::thumb32_SBC_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2 const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag()); const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.GetCFlag()); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -246,10 +234,7 @@ bool TranslatorVisitor::thumb32_CMP_reg(Reg n, Imm<3> imm3, Imm<2> imm2, ShiftTy const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag()); const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(1)); - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); return true; } @@ -262,12 +247,9 @@ bool TranslatorVisitor::thumb32_SUB_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2 const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag()); const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } @@ -279,12 +261,9 @@ bool TranslatorVisitor::thumb32_RSB_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2 const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag()); const auto result = ir.SubWithCarry(shifted.result, ir.GetRegister(n), ir.Imm1(1)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); if (S) { - ir.SetNFlag(ir.MostSignificantBit(result.result)); - ir.SetZFlag(ir.IsZero(result.result)); - ir.SetCFlag(result.carry); - ir.SetVFlag(result.overflow); + ir.SetCpsrNZCV(ir.NZCVFrom(result)); } return true; } diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_multiply.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_multiply.cpp index bcbcf73c0..6db559d19 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_multiply.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_multiply.cpp @@ -68,12 +68,12 @@ bool TranslatorVisitor::thumb32_SMLAD(Reg n, Reg a, Reg d, bool X, Reg m) { const IR::U32 product_hi = ir.Mul(n_hi, m_hi); const IR::U32 addend = ir.GetRegister(a); - auto result_overflow = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0)); - ir.OrQFlag(result_overflow.overflow); - result_overflow = ir.AddWithCarry(result_overflow.result, addend, ir.Imm1(0)); + auto result = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0)); + ir.OrQFlag(ir.GetOverflowFrom(result)); + result = ir.AddWithCarry(result, addend, ir.Imm1(0)); - ir.SetRegister(d, result_overflow.result); - ir.OrQFlag(result_overflow.overflow); + ir.SetRegister(d, result); + ir.OrQFlag(ir.GetOverflowFrom(result)); return true; } @@ -97,10 +97,10 @@ bool TranslatorVisitor::thumb32_SMLSD(Reg n, Reg a, Reg d, bool X, Reg m) { const IR::U32 product_hi = ir.Mul(n_hi, m_hi); const IR::U32 addend = ir.GetRegister(a); const IR::U32 product = ir.Sub(product_lo, product_hi); - auto result_overflow = ir.AddWithCarry(product, addend, ir.Imm1(0)); + auto result = ir.AddWithCarry(product, addend, ir.Imm1(0)); - ir.SetRegister(d, result_overflow.result); - ir.OrQFlag(result_overflow.overflow); + ir.SetRegister(d, result); + ir.OrQFlag(ir.GetOverflowFrom(result)); return true; } @@ -116,10 +116,10 @@ bool TranslatorVisitor::thumb32_SMLAXY(Reg n, Reg a, Reg d, bool N, bool M, Reg const IR::U32 m16 = M ? ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result : ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32)); const IR::U32 product = ir.Mul(n16, m16); - const auto result_overflow = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0)); + const auto result = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0)); - ir.SetRegister(d, result_overflow.result); - ir.OrQFlag(result_overflow.overflow); + ir.SetRegister(d, result); + ir.OrQFlag(ir.GetOverflowFrom(result)); return true; } @@ -135,7 +135,7 @@ bool TranslatorVisitor::thumb32_SMMLA(Reg n, Reg a, Reg d, bool R, Reg m) { const auto result_carry = ir.MostSignificantWord(temp); auto result = result_carry.result; if (R) { - result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result; + result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry); } ir.SetRegister(d, result); @@ -154,7 +154,7 @@ bool TranslatorVisitor::thumb32_SMMLS(Reg n, Reg a, Reg d, bool R, Reg m) { const auto result_carry = ir.MostSignificantWord(temp); auto result = result_carry.result; if (R) { - result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result; + result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry); } ir.SetRegister(d, result); @@ -172,7 +172,7 @@ bool TranslatorVisitor::thumb32_SMMUL(Reg n, Reg d, bool R, Reg m) { const auto result_carry = ir.MostSignificantWord(product); auto result = result_carry.result; if (R) { - result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result; + result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry); } ir.SetRegister(d, result); @@ -197,10 +197,10 @@ bool TranslatorVisitor::thumb32_SMUAD(Reg n, Reg d, bool M, Reg m) { const IR::U32 product_lo = ir.Mul(n_lo, m_lo); const IR::U32 product_hi = ir.Mul(n_hi, m_hi); - const auto result_overflow = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0)); + const auto result = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0)); - ir.SetRegister(d, result_overflow.result); - ir.OrQFlag(result_overflow.overflow); + ir.SetRegister(d, result); + ir.OrQFlag(ir.GetOverflowFrom(result)); return true; } @@ -257,10 +257,10 @@ bool TranslatorVisitor::thumb32_SMLAWY(Reg n, Reg a, Reg d, bool M, Reg m) { } const IR::U64 m16 = ir.SignExtendWordToLong(ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32))); const auto product = ir.LeastSignificantWord(ir.LogicalShiftRight(ir.Mul(n32, m16), ir.Imm8(16))); - const auto result_overflow = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0)); + const auto result = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0)); - ir.SetRegister(d, result_overflow.result); - ir.OrQFlag(result_overflow.overflow); + ir.SetRegister(d, result); + ir.OrQFlag(ir.GetOverflowFrom(result)); return true; } @@ -305,7 +305,7 @@ bool TranslatorVisitor::thumb32_USADA8(Reg n, Reg a, Reg d, Reg m) { const auto tmp = ir.PackedAbsDiffSumS8(reg_n, reg_m); const auto result = ir.AddWithCarry(reg_a, tmp, ir.Imm1(0)); - ir.SetRegister(d, result.result); + ir.SetRegister(d, result); return true; } diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/vfp.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/vfp.cpp index c36225334..0ca541088 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/vfp.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/vfp.cpp @@ -1155,7 +1155,7 @@ bool TranslatorVisitor::vfp_VMRS(Cond cond, Reg t) { if (t == Reg::R15) { // This encodes ASPR_nzcv access const auto nzcv = ir.GetFpscrNZCV(); - ir.SetCpsrNZCV(nzcv); + ir.SetCpsrNZCVRaw(nzcv); } else { ir.SetRegister(t, ir.GetFpscr()); } diff --git a/externals/dynarmic/src/dynarmic/interface/optimization_flags.h b/externals/dynarmic/src/dynarmic/interface/optimization_flags.h index 143edc5ce..df7eee3e5 100755 --- a/externals/dynarmic/src/dynarmic/interface/optimization_flags.h +++ b/externals/dynarmic/src/dynarmic/interface/optimization_flags.h @@ -42,6 +42,9 @@ enum class OptimizationFlag : std::uint32_t { /// This is an UNSAFE optimization that causes floating-point instructions to not produce correct NaNs. /// This may also result in inaccurate results when instructions are given certain special values. Unsafe_InaccurateNaN = 0x00040000, + /// This is an UNSAFE optimization that causes ASIMD floating-point instructions to be run with incorrect + /// rounding modes. This may result in inaccurate results with all floating-point ASIMD instructions. + Unsafe_IgnoreStandardFPCRValue = 0x00080000, }; constexpr OptimizationFlag no_optimizations = static_cast(0); diff --git a/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp index 2f289d670..4f13c318c 100755 --- a/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ b/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp @@ -240,13 +240,6 @@ U32U64 IREmitter::RotateRightMasked(const U32U64& value_in, const U32U64& shift_ } } -ResultAndCarryAndOverflow IREmitter::AddWithCarry(const U32& a, const U32& b, const U1& carry_in) { - const auto result = Inst(Opcode::Add32, a, b, carry_in); - const auto carry_out = Inst(Opcode::GetCarryFromOp, result); - const auto overflow = Inst(Opcode::GetOverflowFromOp, result); - return {result, carry_out, overflow}; -} - U32U64 IREmitter::AddWithCarry(const U32U64& a, const U32U64& b, const U1& carry_in) { ASSERT(a.GetType() == b.GetType()); if (a.GetType() == Type::U32) { @@ -265,14 +258,6 @@ U32U64 IREmitter::Add(const U32U64& a, const U32U64& b) { } } -ResultAndCarryAndOverflow IREmitter::SubWithCarry(const U32& a, const U32& b, const U1& carry_in) { - // This is equivalent to AddWithCarry(a, Not(b), carry_in). - const auto result = Inst(Opcode::Sub32, a, b, carry_in); - const auto carry_out = Inst(Opcode::GetCarryFromOp, result); - const auto overflow = Inst(Opcode::GetOverflowFromOp, result); - return {result, carry_out, overflow}; -} - U32U64 IREmitter::SubWithCarry(const U32U64& a, const U32U64& b, const U1& carry_in) { ASSERT(a.GetType() == b.GetType()); if (a.GetType() == Type::U32) { diff --git a/externals/dynarmic/src/dynarmic/ir/ir_emitter.h b/externals/dynarmic/src/dynarmic/ir/ir_emitter.h index caba937d4..f4cf66fda 100755 --- a/externals/dynarmic/src/dynarmic/ir/ir_emitter.h +++ b/externals/dynarmic/src/dynarmic/ir/ir_emitter.h @@ -134,8 +134,6 @@ public: U32U64 ArithmeticShiftRightMasked(const U32U64& value_in, const U32U64& shift_amount); U32U64 RotateRightMasked(const U32U64& value_in, const U32U64& shift_amount); ResultAndCarry RotateRightExtended(const U32& value_in, const U1& carry_in); - ResultAndCarryAndOverflow AddWithCarry(const U32& a, const U32& b, const U1& carry_in); - ResultAndCarryAndOverflow SubWithCarry(const U32& a, const U32& b, const U1& carry_in); U32U64 AddWithCarry(const U32U64& a, const U32U64& b, const U1& carry_in); U32U64 SubWithCarry(const U32U64& a, const U32U64& b, const U1& carry_in); U32U64 Add(const U32U64& a, const U32U64& b); diff --git a/externals/dynarmic/src/dynarmic/ir/microinstruction.cpp b/externals/dynarmic/src/dynarmic/ir/microinstruction.cpp index a499c327a..858c5bc2e 100755 --- a/externals/dynarmic/src/dynarmic/ir/microinstruction.cpp +++ b/externals/dynarmic/src/dynarmic/ir/microinstruction.cpp @@ -176,6 +176,7 @@ bool Inst::ReadsFromCPSR() const { bool Inst::WritesToCPSR() const { switch (op) { case Opcode::A32SetCpsr: + case Opcode::A32SetCpsrNZCVRaw: case Opcode::A32SetCpsrNZCV: case Opcode::A32SetCpsrNZCVQ: case Opcode::A32SetNFlag: diff --git a/externals/dynarmic/src/dynarmic/ir/opcodes.inc b/externals/dynarmic/src/dynarmic/ir/opcodes.inc index f8995b19c..be0cda9e1 100755 --- a/externals/dynarmic/src/dynarmic/ir/opcodes.inc +++ b/externals/dynarmic/src/dynarmic/ir/opcodes.inc @@ -17,7 +17,8 @@ A32OPC(SetExtendedRegister64, Void, A32E A32OPC(SetVector, Void, A32ExtReg, U128 ) A32OPC(GetCpsr, U32, ) A32OPC(SetCpsr, Void, U32 ) -A32OPC(SetCpsrNZCV, Void, U32 ) +A32OPC(SetCpsrNZCV, Void, NZCV ) +A32OPC(SetCpsrNZCVRaw, Void, U32 ) A32OPC(SetCpsrNZCVQ, Void, U32 ) A32OPC(GetNFlag, U1, ) A32OPC(SetNFlag, Void, U1 ) diff --git a/externals/dynarmic/tests/A64/a64.cpp b/externals/dynarmic/tests/A64/a64.cpp index 6a465cfac..a6e549755 100755 --- a/externals/dynarmic/tests/A64/a64.cpp +++ b/externals/dynarmic/tests/A64/a64.cpp @@ -454,6 +454,116 @@ TEST_CASE("A64: FABD", "[a64]") { REQUIRE(jit.GetVector(22) == Vector{0x56d3f0857fc90e2b, 0x6e4b0a4144873176}); } +TEST_CASE("A64: FABS", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0x4ef8f804); // FABS v4.8h, v0.8h + env.code_mem.emplace_back(0x4ea0f825); // FABS v5.4s, v1.4s + env.code_mem.emplace_back(0x4ee0f846); // FABS v6.2d, v2.2d + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetPC(0); + jit.SetVector(0, {0xffffffffffffffff, 0xffffffffffff8000}); + jit.SetVector(1, {0xffbfffffffc00000, 0xff80000080000000}); + jit.SetVector(2, {0xffffffffffffffff, 0x8000000000000000}); + + env.ticks_left = 4; + jit.Run(); + + REQUIRE(jit.GetVector(4) == Vector{0x7fff7fff7fff7fff, 0x7fff7fff7fff0000}); + REQUIRE(jit.GetVector(5) == Vector{0x7fbfffff7fc00000, 0x7f80000000000000}); + REQUIRE(jit.GetVector(6) == Vector{0x7fffffffffffffff, 0x0000000000000000}); +} + +TEST_CASE("A64: FMIN", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0x4ea1f400); // FMIN.4S V0, V0, V1 + env.code_mem.emplace_back(0x4ee3f442); // FMIN.2D V2, V2, V3 + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetPC(0); + jit.SetVector(0, {0x7fc00000'09503366, 0x00000000'7f984a37}); + jit.SetVector(1, {0xc1200000'00000001, 0x6e4b0a41'ffffffff}); + + jit.SetVector(2, {0x7fc0000009503366, 0x3ff0000000000000}); + jit.SetVector(3, {0xbff0000000000000, 0x6e4b0a41ffffffff}); + + env.ticks_left = 2; + jit.Run(); + + REQUIRE(jit.GetVector(0) == Vector{0x7fc00000'00000001, 0x00000000'7fd84a37}); + REQUIRE(jit.GetVector(2) == Vector{0xbff0000000000000, 0x3ff0000000000000}); +} + +TEST_CASE("A64: FMAX", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0x4e21f400); // FMAX.4S V0, V0, V1 + env.code_mem.emplace_back(0x4e63f442); // FMAX.2D V2, V2, V3 + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetPC(0); + jit.SetVector(0, {0x7fc00000'09503366, 0x00000000'7f984a37}); + jit.SetVector(1, {0xc1200000'00000001, 0x6e4b0a41'ffffffff}); + + jit.SetVector(2, {0x7fc0000009503366, 0x3ff0000000000000}); + jit.SetVector(3, {0xbff0000000000000, 0x6e4b0a41ffffffff}); + + env.ticks_left = 2; + jit.Run(); + + REQUIRE(jit.GetVector(0) == Vector{0x7fc00000'09503366, 0x6e4b0a41'7fd84a37}); + REQUIRE(jit.GetVector(2) == Vector{0x7fc0000009503366, 0x6e4b0a41ffffffff}); +} + +TEST_CASE("A64: FMINNM", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0x4ea1c400); // FMINNM.4S V0, V0, V1 + env.code_mem.emplace_back(0x4ee3c442); // FMINNM.2D V2, V2, V3 + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetPC(0); + jit.SetVector(0, {0x7fc00000'09503366, 0x00000000'7f984a37}); + jit.SetVector(1, {0xc1200000'00000001, 0x6e4b0a41'ffffffff}); + + jit.SetVector(2, {0x7fc0000009503366, 0x3ff0000000000000}); + jit.SetVector(3, {0xfff0000000000000, 0xffffffffffffffff}); + + env.ticks_left = 2; + jit.Run(); + + REQUIRE(jit.GetVector(0) == Vector{0xc1200000'00000001, 0x00000000'7fd84a37}); + REQUIRE(jit.GetVector(2) == Vector{0xfff0000000000000, 0x3ff0000000000000}); +} + +TEST_CASE("A64: FMAXNM", "[a64]") { + A64TestEnv env; + A64::Jit jit{A64::UserConfig{&env}}; + + env.code_mem.emplace_back(0x4e21c400); // FMAXNM.4S V0, V0, V1 + env.code_mem.emplace_back(0x4e63c442); // FMAXNM.2D V2, V2, V3 + env.code_mem.emplace_back(0x14000000); // B . + + jit.SetPC(0); + jit.SetVector(0, {0x7fc00000'09503366, 0x00000000'7f984a37}); + jit.SetVector(1, {0xc1200000'00000001, 0x6e4b0a41'ffffffff}); + + jit.SetVector(2, {0x7fc0000009503366, 0x3ff0000000000000}); + jit.SetVector(3, {0xfff0000000000000, 0xffffffffffffffff}); + + env.ticks_left = 2; + jit.Run(); + + REQUIRE(jit.GetVector(0) == Vector{0xc1200000'09503366, 0x6e4b0a41'7fd84a37}); + REQUIRE(jit.GetVector(2) == Vector{0x7fc0000009503366, 0x3ff0000000000000}); +} + TEST_CASE("A64: 128-bit exclusive read/write", "[a64]") { A64TestEnv env; ExclusiveMonitor monitor{1}; diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index dc3eca26e..0757cd804 100755 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -69,6 +69,8 @@ namespace { } // namespace namespace AudioCore { +constexpr s32 NUM_BUFFERS = 2; + AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& memory_, AudioCommon::AudioRendererParameter params, Stream::ReleaseCallback&& release_callback, @@ -89,9 +91,9 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing_, Core::Memor core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS, fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback)); process_event = Core::Timing::CreateEvent( - fmt::format("AudioRenderer-Instance{}-Consume", instance_number), + fmt::format("AudioRenderer-Instance{}-Process", instance_number), [this](std::uintptr_t, std::chrono::nanoseconds) { ReleaseAndQueueBuffers(); }); - for (size_t i = 0; i < NUM_BUFFERS; ++i) { + for (s32 i = 0; i < NUM_BUFFERS; ++i) { QueueMixedBuffer(i); } } @@ -128,7 +130,7 @@ Stream::State AudioRenderer::GetStreamState() const { ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector& input_params, std::vector& output_params) { { - std::scoped_lock l{lock}; + std::scoped_lock lock{mutex}; InfoUpdater info_updater{input_params, output_params, behavior_info}; if (!info_updater.UpdateBehaviorInfo(behavior_info)) { @@ -332,17 +334,17 @@ void AudioRenderer::ReleaseAndQueueBuffers() { } { - std::scoped_lock l{lock}; + std::scoped_lock lock{mutex}; const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)}; for (const auto& tag : released_buffers) { QueueMixedBuffer(tag); } } - const f32 sampleRate = static_cast(GetSampleRate()); - const f32 sampleCount = static_cast(GetSampleCount()); - const f32 consumeRate = sampleRate / (sampleCount * (sampleCount / 240)); - const s32 ms = (1000 / static_cast(consumeRate)) - 1; + const f32 sample_rate = static_cast(GetSampleRate()); + const f32 sample_count = static_cast(GetSampleCount()); + const f32 consume_rate = sample_rate / (sample_count * (sample_count / 240)); + const s32 ms = (1000 / static_cast(consume_rate)) - 1; const std::chrono::milliseconds next_event_time(std::max(ms / NUM_BUFFERS, 1)); core_timing.ScheduleEvent(next_event_time, process_event, {}); } diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index c1e66bb51..88fdd13dd 100755 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -34,7 +34,6 @@ class Memory; namespace AudioCore { using DSPStateHolder = std::array; -constexpr s32 NUM_BUFFERS = 2; class AudioOut; @@ -74,7 +73,7 @@ private: std::size_t elapsed_frame_count{}; Core::Timing::CoreTiming& core_timing; std::shared_ptr process_event; - std::mutex lock; + std::mutex mutex; }; } // namespace AudioCore diff --git a/src/common/settings.cpp b/src/common/settings.cpp index ab5cbe67b..e1bb4b7ff 100755 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -123,6 +123,7 @@ void RestoreGlobalState(bool is_powered_on) { values.cpu_accuracy.SetGlobal(true); values.cpuopt_unsafe_unfuse_fma.SetGlobal(true); values.cpuopt_unsafe_reduce_fp_error.SetGlobal(true); + values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true); values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true); values.cpuopt_unsafe_fastmem_check.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 55cef40f8..82ec18e27 100755 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -129,6 +129,7 @@ struct Values { Setting cpuopt_unsafe_unfuse_fma; Setting cpuopt_unsafe_reduce_fp_error; + Setting cpuopt_unsafe_ignore_standard_fpcr; Setting cpuopt_unsafe_inaccurate_nan; Setting cpuopt_unsafe_fastmem_check; diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index c8f6dc765..f871f7bf4 100755 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -186,6 +186,9 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; } + if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr.GetValue()) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; + } if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index be065f077..62bafc453 100755 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -757,6 +757,8 @@ void Config::ReadCpuValues() { QStringLiteral("cpuopt_unsafe_unfuse_fma"), true); ReadSettingGlobal(Settings::values.cpuopt_unsafe_reduce_fp_error, QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true); + ReadSettingGlobal(Settings::values.cpuopt_unsafe_ignore_standard_fpcr, + QStringLiteral("cpuopt_unsafe_ignore_standard_fpcr"), true); ReadSettingGlobal(Settings::values.cpuopt_unsafe_inaccurate_nan, QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true); ReadSettingGlobal(Settings::values.cpuopt_unsafe_fastmem_check, @@ -1343,6 +1345,8 @@ void Config::SaveCpuValues() { Settings::values.cpuopt_unsafe_unfuse_fma, true); WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), Settings::values.cpuopt_unsafe_reduce_fp_error, true); + WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_ignore_standard_fpcr"), + Settings::values.cpuopt_unsafe_ignore_standard_fpcr, true); WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), Settings::values.cpuopt_unsafe_inaccurate_nan, true); WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_fastmem_check"), diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index 22219cbad..13db2ba98 100755 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp @@ -34,12 +34,15 @@ void ConfigureCpu::SetConfiguration() { ui->accuracy->setEnabled(runtime_lock); ui->cpuopt_unsafe_unfuse_fma->setEnabled(runtime_lock); ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); + ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock); ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock); ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()); ui->cpuopt_unsafe_reduce_fp_error->setChecked( Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()); + ui->cpuopt_unsafe_ignore_standard_fpcr->setChecked( + Settings::values.cpuopt_unsafe_ignore_standard_fpcr.GetValue()); ui->cpuopt_unsafe_inaccurate_nan->setChecked( Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()); ui->cpuopt_unsafe_fastmem_check->setChecked( @@ -84,6 +87,9 @@ void ConfigureCpu::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_reduce_fp_error, ui->cpuopt_unsafe_reduce_fp_error, cpuopt_unsafe_reduce_fp_error); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_standard_fpcr, + ui->cpuopt_unsafe_ignore_standard_fpcr, + cpuopt_unsafe_ignore_standard_fpcr); ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_inaccurate_nan, ui->cpuopt_unsafe_inaccurate_nan, cpuopt_unsafe_inaccurate_nan); @@ -137,6 +143,9 @@ void ConfigureCpu::SetupPerGameUI() { ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_reduce_fp_error, Settings::values.cpuopt_unsafe_reduce_fp_error, cpuopt_unsafe_reduce_fp_error); + ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_standard_fpcr, + Settings::values.cpuopt_unsafe_ignore_standard_fpcr, + cpuopt_unsafe_ignore_standard_fpcr); ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_inaccurate_nan, Settings::values.cpuopt_unsafe_inaccurate_nan, cpuopt_unsafe_inaccurate_nan); diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h index 57ff2772a..b2b5f1671 100755 --- a/src/yuzu/configuration/configure_cpu.h +++ b/src/yuzu/configuration/configure_cpu.h @@ -40,6 +40,7 @@ private: ConfigurationShared::CheckState cpuopt_unsafe_unfuse_fma; ConfigurationShared::CheckState cpuopt_unsafe_reduce_fp_error; + ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr; ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan; ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check; }; diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index 31ef9e3f5..0e296d4e5 100755 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui @@ -111,6 +111,18 @@ + + + + + <div>This option improves the speed of 32 bits ASIMD floating-point functions by running with incorrect rounding modes.</div> + + + + Faster ASIMD instructions (32 bits only) + + +