early-access version 1801

main
pineappleEA 2021-06-22 01:03:38 +02:00
parent dd0ca32f47
commit eab9e5d173
36 changed files with 574 additions and 492 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 1800.
This is the source code for early-access 1801.
## Legal Notice

View File

@ -44,7 +44,7 @@ More general alternatives:
* [VisUAL](https://salmanarif.bitbucket.io/visual/index.html) - Visual ARM UAL emulator intended for education
* A wide variety of other recompilers, interpreters and emulators can be found embedded in other projects, here are some we would recommend looking at:
* [firebird's recompiler](https://github.com/nspire-emus/firebird) - Takes more of a call-threaded approach to recompilation
* [higan's arm7tdmi emulator](https://gitlab.com/higan/higan/tree/master/higan/component/processor/arm7tdmi) - Very clean code-style
* [higan's arm7tdmi emulator](https://github.com/higan-emu/higan/tree/master/higan/component/processor/arm7tdmi) - Very clean code-style
* [arm-js by ozaki-r](https://github.com/ozaki-r/arm-js) - Emulates ARMv7A and some peripherals of Versatile Express, in the browser
Disadvantages of Dynarmic

View File

@ -541,6 +541,12 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
}
void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store);
}
void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
const u32 imm = args[0].GetImmediateU32();

View File

@ -13,6 +13,42 @@
namespace Dynarmic::Backend::X64 {
// Redefinition of _MM_CMP_* constants for use with the 'vcmp' instruction
namespace Cmp {
constexpr u8 Equal_OQ = 0; // Equal (Quiet, Ordered).
constexpr u8 LessThan_OS = 1; // Less (Signaling, Ordered).
constexpr u8 LessEqual_OS = 2; // Less/Equal (Signaling, Ordered).
constexpr u8 Unordered_Q = 3; // Unordered (Quiet).
constexpr u8 NotEqual_UQ = 4; // Not Equal (Quiet, Unordered).
constexpr u8 NotLessThan_US = 5; // Not Less (Signaling, Unordered).
constexpr u8 NotLessEqual_US = 6; // Not Less/Equal (Signaling, Unordered).
constexpr u8 Ordered_Q = 7; // Ordered (Quiet).
constexpr u8 Equal_UQ = 8; // Equal (Quiet, Unordered).
constexpr u8 NotGreaterEqual_US = 9; // Not Greater/Equal (Signaling, Unordered).
constexpr u8 NotGreaterThan_US = 10; // Not Greater (Signaling, Unordered).
constexpr u8 False_OQ = 11; // False (Quiet, Ordered).
constexpr u8 NotEqual_OQ = 12; // Not Equal (Quiet, Ordered).
constexpr u8 GreaterEqual_OS = 13; // Greater/Equal (Signaling, Ordered).
constexpr u8 GreaterThan_OS = 14; // Greater (Signaling, Ordered).
constexpr u8 True_UQ = 15; // True (Quiet, Unordered).
constexpr u8 Equal_OS = 16; // Equal (Signaling, Ordered).
constexpr u8 LessThan_OQ = 17; // Less (Quiet, Ordered).
constexpr u8 LessEqual_OQ = 18; // Less/Equal (Quiet, Ordered).
constexpr u8 Unordered_S = 19; // Unordered (Signaling).
constexpr u8 NotEqual_US = 20; // Not Equal (Signaling, Unordered).
constexpr u8 NotLessThan_UQ = 21; // Not Less (Quiet, Unordered).
constexpr u8 NotLessEqual_UQ = 22; // Not Less/Equal (Quiet, Unordered).
constexpr u8 Ordered_S = 23; // Ordered (Signaling).
constexpr u8 Equal_US = 24; // Equal (Signaling, Unordered).
constexpr u8 NotGreaterEqual_UQ = 25; // Not Greater/Equal (Quiet, Unordered).
constexpr u8 NotGreaterThan_UQ = 26; // Not Greater (Quiet, Unordered).
constexpr u8 False_OS = 27; // False (Signaling, Ordered).
constexpr u8 NotEqual_OS = 28; // Not Equal (Signaling, Ordered).
constexpr u8 GreaterEqual_OQ = 29; // Greater/Equal (Quiet, Ordered).
constexpr u8 GreaterThan_OQ = 30; // Greater (Quiet, Ordered).
constexpr u8 True_US = 31; // True (Signaling, Unordered).
} // namespace Cmp
// Redefinition of _MM_CMPINT_* constants for use with the 'vpcmp' instruction
namespace CmpInt {
constexpr u8 Equal = 0x0;
@ -46,6 +82,18 @@ constexpr u8 b = 0b11001100;
constexpr u8 c = 0b10101010;
} // namespace Tern
// For use as a bitmask with vfpclass instruction
namespace FpClass {
constexpr u8 QNaN = 0b00000001;
constexpr u8 ZeroPos = 0b00000010;
constexpr u8 ZeroNeg = 0b00000100;
constexpr u8 InfPos = 0b00001000;
constexpr u8 InfNeg = 0b00010000;
constexpr u8 Denormal = 0b00100000;
constexpr u8 Negative = 0b01000000; // Negative finite value
constexpr u8 SNaN = 0b10000000;
} // namespace FpClass
// Opcodes for use with vfixupimm
enum class FpFixup : u8 {
A = 0b0000, // A
@ -87,6 +135,29 @@ constexpr u32 FixupLUT(FpFixup src_qnan = FpFixup::A,
return fixup_lut;
}
// Opcodes for use with vrange* instructions
enum class FpRangeSelect : u8 {
Min = 0b00,
Max = 0b01,
AbsMin = 0b10, // Smaller absolute value
AbsMax = 0b11, // Larger absolute value
};
enum class FpRangeSign : u8 {
A = 0b00, // Copy sign of operand A
Preserve = 0b01, // Leave sign as is
Positive = 0b10, // Set Positive
Negative = 0b11, // Set Negative
};
// Generates 8-bit immediate LUT for vrange instruction
constexpr u8 FpRangeLUT(FpRangeSelect range_select, FpRangeSign range_sign) {
u8 range_lut = 0;
range_lut = Common::ModifyBits<0, 1, u8>(range_lut, static_cast<u8>(range_select));
range_lut = Common::ModifyBits<2, 3, u8>(range_lut, static_cast<u8>(range_sign));
return range_lut;
}
constexpr std::optional<int> ConvertRoundingModeToX64Immediate(FP::RoundingMode rounding_mode) {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:

View File

@ -39,9 +39,6 @@ namespace {
const Xbyak::Reg64 INVALID_REG = Xbyak::Reg64(-1);
constexpr u64 f16_negative_zero = 0x8000;
constexpr u64 f16_non_sign_mask = 0x7fff;
constexpr u64 f32_negative_zero = 0x80000000u;
constexpr u64 f32_nan = 0x7fc00000u;
constexpr u64 f32_non_sign_mask = 0x7fffffffu;
@ -328,58 +325,56 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
} // anonymous namespace
void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize>
void FPAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>;
constexpr FPT non_sign_mask = FP::FPInfo<FPT>::sign_mask - FPT(1u);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, non_sign_mask);
code.pand(result, code.MConst(xword, f16_non_sign_mask));
code.andps(result, mask);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
FPAbs<16>(code, ctx, inst);
}
void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pand(result, code.MConst(xword, f32_non_sign_mask));
ctx.reg_alloc.DefineValue(inst, result);
FPAbs<32>(code, ctx, inst);
}
void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
FPAbs<64>(code, ctx, inst);
}
template<size_t fsize>
void FPNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>;
constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask;
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, u64(sign_mask));
code.pand(result, code.MConst(xword, f64_non_sign_mask));
code.xorps(result, mask);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitFPNeg16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pxor(result, code.MConst(xword, f16_negative_zero));
ctx.reg_alloc.DefineValue(inst, result);
FPNeg<16>(code, ctx, inst);
}
void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pxor(result, code.MConst(xword, f32_negative_zero));
ctx.reg_alloc.DefineValue(inst, result);
FPNeg<32>(code, ctx, inst);
}
void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pxor(result, code.MConst(xword, f64_negative_zero));
ctx.reg_alloc.DefineValue(inst, result);
FPNeg<64>(code, ctx, inst);
}
void EmitX64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) {
@ -449,97 +444,112 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize, bool is_max>
static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>;
constexpr FPT default_nan = FP::FPInfo<FPT>::DefaultNaN();
constexpr u8 mantissa_msb_bit = static_cast<u8>(FP::FPInfo<FPT>::explicit_mantissa_width - 1);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm op1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm op2 = ctx.reg_alloc.UseScratchXmm(args[1]); // Result stored here!
Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr();
tmp.setBit(fsize);
const auto move_to_tmp = [&](const Xbyak::Xmm& xmm) {
if constexpr (fsize == 32) {
code.movd(tmp.cvt32(), xmm);
} else {
code.movq(tmp.cvt64(), xmm);
}
};
Xbyak::Label end, z, nan, op2_is_nan, snan, maybe_both_nan, normal;
DenormalsAreZero<fsize>(code, ctx, {op1, op2});
FCODE(ucomis)(op1, op2);
code.jz(z, code.T_NEAR);
code.L(normal);
if constexpr (is_max) {
FCODE(maxs)(op2, op1);
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
// vrangep{s,d} will already correctly handle comparing
// signed zeros and propagating NaNs similar to ARM
constexpr FpRangeSelect range_select = is_max ? FpRangeSelect::Max : FpRangeSelect::Min;
FCODE(vranges)(op2, op1, op2, FpRangeLUT(range_select, FpRangeSign::Preserve));
if (ctx.FPCR().DN()) {
FCODE(vcmps)(k1, op2, op2, Cmp::Unordered_Q);
FCODE(vmovs)(op2 | k1, code.MConst(xword, default_nan));
}
} else {
FCODE(mins)(op2, op1);
}
code.L(end);
Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr();
tmp.setBit(fsize);
code.SwitchToFarCode();
const auto move_to_tmp = [&](const Xbyak::Xmm& xmm) {
if constexpr (fsize == 32) {
code.movd(tmp.cvt32(), xmm);
} else {
code.movq(tmp.cvt64(), xmm);
}
};
code.L(z);
code.jp(nan);
if constexpr (is_max) {
code.andps(op2, op1);
} else {
code.orps(op2, op1);
}
code.jmp(end);
Xbyak::Label end, z, nan, op2_is_nan, snan, maybe_both_nan, normal;
// NaN requirements:
// op1 op2 result
// SNaN anything op1
// !SNaN SNaN op2
// QNaN !NaN op2
// !NaN QNaN op1
// QNaN QNaN op1
FCODE(ucomis)(op1, op2);
code.jz(z, code.T_NEAR);
code.L(normal);
if constexpr (is_max) {
FCODE(maxs)(op2, op1);
} else {
FCODE(mins)(op2, op1);
}
code.L(end);
code.L(nan);
FCODE(ucomis)(op1, op1);
code.jnp(op2_is_nan);
code.SwitchToFarCode();
// op1 is NaN
move_to_tmp(op1);
code.bt(tmp, mantissa_msb_bit);
code.jc(maybe_both_nan);
if (ctx.FPCR().DN()) {
code.L(snan);
code.movaps(op2, code.MConst(xword, FP::FPInfo<FPT>::DefaultNaN()));
code.L(z);
code.jp(nan);
if constexpr (is_max) {
code.andps(op2, op1);
} else {
code.orps(op2, op1);
}
code.jmp(end);
} else {
code.movaps(op2, op1);
code.L(snan);
code.orps(op2, code.MConst(xword, FP::FPInfo<FPT>::mantissa_msb));
code.jmp(end);
}
code.L(maybe_both_nan);
FCODE(ucomis)(op2, op2);
code.jnp(end, code.T_NEAR);
if (ctx.FPCR().DN()) {
code.jmp(snan);
} else {
// NaN requirements:
// op1 op2 result
// SNaN anything op1
// !SNaN SNaN op2
// QNaN !NaN op2
// !NaN QNaN op1
// QNaN QNaN op1
code.L(nan);
FCODE(ucomis)(op1, op1);
code.jnp(op2_is_nan);
// op1 is NaN
move_to_tmp(op1);
code.bt(tmp, mantissa_msb_bit);
code.jc(maybe_both_nan);
if (ctx.FPCR().DN()) {
code.L(snan);
code.movaps(op2, code.MConst(xword, default_nan));
code.jmp(end);
} else {
code.movaps(op2, op1);
code.L(snan);
code.orps(op2, code.MConst(xword, FP::FPInfo<FPT>::mantissa_msb));
code.jmp(end);
}
code.L(maybe_both_nan);
FCODE(ucomis)(op2, op2);
code.jnp(end, code.T_NEAR);
if (ctx.FPCR().DN()) {
code.jmp(snan);
} else {
move_to_tmp(op2);
code.bt(tmp.cvt64(), mantissa_msb_bit);
code.jnc(snan);
code.movaps(op2, op1);
code.jmp(end);
}
// op2 is NaN
code.L(op2_is_nan);
move_to_tmp(op2);
code.bt(tmp.cvt64(), mantissa_msb_bit);
code.bt(tmp, mantissa_msb_bit);
code.jnc(snan);
code.movaps(op2, op1);
code.jmp(end);
code.SwitchToNearCode();
}
// op2 is NaN
code.L(op2_is_nan);
move_to_tmp(op2);
code.bt(tmp, mantissa_msb_bit);
code.jnc(snan);
code.movaps(op2, op1);
code.jmp(end);
code.SwitchToNearCode();
ctx.reg_alloc.DefineValue(inst, op2);
}
@ -1681,7 +1691,7 @@ void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
const size_t fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
if (rounding_mode == ctx.FPCR().RMode()) {
if (rounding_mode == ctx.FPCR().RMode() || ctx.HasOptimization(OptimizationFlag::Unsafe_IgnoreStandardFPCRValue)) {
code.cvtsi2ss(result, from);
} else {
ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven);
@ -1717,7 +1727,7 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
}
};
if (rounding_mode == ctx.FPCR().RMode()) {
if (rounding_mode == ctx.FPCR().RMode() || ctx.HasOptimization(OptimizationFlag::Unsafe_IgnoreStandardFPCRValue)) {
op();
} else {
ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven);

View File

@ -440,11 +440,11 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) {
}
static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) {
if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
if (code.HasHostFeature(HostFeature::GFNI)) {
const u64 shift_matrix = shift_amount < 8
? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8))
: 0x8080808080808080;
code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
return;
}
@ -1472,9 +1472,9 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
code.pxor(result, result);
} else if (shift_amount == 1) {
code.paddb(result, result);
} else if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
} else if (code.HasHostFeature(HostFeature::GFNI)) {
const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8);
code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
} else {
const u64 replicand = (0xFFULL << shift_amount) & 0xFF;
const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());
@ -1529,9 +1529,9 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) {
// Do nothing
} else if (shift_amount >= 8) {
code.pxor(result, result);
} else if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
} else if (code.HasHostFeature(HostFeature::GFNI)) {
const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8);
code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0);
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
} else {
const u64 replicand = 0xFEULL >> shift_amount;
const u64 mask = Common::Replicate(replicand, Common::BitSize<u8>());
@ -2137,12 +2137,18 @@ void EmitX64::EmitVectorNarrow64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
code.vpternlogq(result, operand, operand, u8(~Tern::c));
ctx.reg_alloc.DefineValue(inst, result);
return;
}
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm();
code.pcmpeqw(xmm_b, xmm_b);
code.pxor(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}
@ -2789,8 +2795,8 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::GFNI)) {
code.vgf2p8affineqb(data, data, code.MConst(xword_b, 0x8040201008040201), 0);
if (code.HasHostFeature(HostFeature::GFNI)) {
code.gf2p8affineqb(data, code.MConst(xword, 0x8040201008040201, 0x8040201008040201), 0);
} else {
const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm();
code.movdqa(high_nibble_reg, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));

View File

@ -58,7 +58,7 @@ template<typename Lambda>
void MaybeStandardFPSCRValue(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, Lambda lambda) {
const bool switch_mxcsr = ctx.FPCR(fpcr_controlled) != ctx.FPCR();
if (switch_mxcsr) {
if (switch_mxcsr && !ctx.HasOptimization(OptimizationFlag::Unsafe_IgnoreStandardFPCRValue)) {
code.EnterStandardASIMD();
lambda();
code.LeaveStandardASIMD();
@ -557,37 +557,32 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam
} // anonymous namespace
void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize>
void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>;
constexpr FPT non_sign_mask = FP::FPInfo<FPT>::sign_mask - FPT(1u);
constexpr u64 non_sign_mask64 = Common::Replicate<u64>(non_sign_mask, fsize);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, 0x7FFF7FFF7FFF7FFF, 0x7FFF7FFF7FFF7FFF);
code.pand(a, mask);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitX64::EmitFPVectorAbs32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF);
const Xbyak::Address mask = code.MConst(xword, non_sign_mask64, non_sign_mask64);
code.andps(a, mask);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
FPVectorAbs<16>(code, ctx, inst);
}
void EmitX64::EmitFPVectorAbs32(EmitContext& ctx, IR::Inst* inst) {
FPVectorAbs<32>(code, ctx, inst);
}
void EmitX64::EmitFPVectorAbs64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF);
code.andpd(a, mask);
ctx.reg_alloc.DefineValue(inst, a);
FPVectorAbs<64>(code, ctx, inst);
}
void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) {
@ -943,7 +938,12 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
if (code.HasHostFeature(HostFeature::AVX)) {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
constexpr FpRangeSelect range_select = is_max ? FpRangeSelect::Max : FpRangeSelect::Min;
FCODE(vcmpp)(k1, result, xmm_b, Cmp::Unordered_Q);
FCODE(vrangep)(result, result, xmm_b, FpRangeLUT(range_select, FpRangeSign::Preserve));
FCODE(vblendmp)(result | k1, result, GetNaNVector<fsize>(code));
} else if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vcmpeqp)(mask, result, xmm_b);
FCODE(vcmpunordp)(nan_mask, result, xmm_b);
if constexpr (is_max) {
@ -1001,7 +1001,14 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
// x86-64 treats differently signed zeros as equal while ARM does not.
// Thus if we AND together things that x86-64 thinks are equal we'll get the positive zero.
if (code.HasHostFeature(HostFeature::AVX)) {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
// vrangep{s,d} will already correctly handle comparing
// signed zeros similar to ARM
// max(+0.0, -0.0) = +0.0.
// min(+0.0, -0.0) = -0.0
constexpr FpRangeSelect range_select = is_max ? FpRangeSelect::Max : FpRangeSelect::Min;
FCODE(vrangep)(result, result, xmm_b, FpRangeLUT(range_select, FpRangeSign::Preserve));
} else if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vcmpeqp)(mask, result, xmm_b);
if constexpr (is_max) {
FCODE(vandp)(eq, result, xmm_b);
@ -1217,37 +1224,32 @@ void EmitX64::EmitFPVectorMulX64(EmitContext& ctx, IR::Inst* inst) {
EmitFPVectorMulX<64>(code, ctx, inst);
}
void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize>
void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>;
constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask;
constexpr u64 sign_mask64 = Common::Replicate<u64>(sign_mask, fsize);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, 0x8000800080008000, 0x8000800080008000);
const Xbyak::Address mask = code.MConst(xword, sign_mask64, sign_mask64);
code.pxor(a, mask);
code.xorps(a, mask);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) {
FPVectorNeg<16>(code, ctx, inst);
}
void EmitX64::EmitFPVectorNeg32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, 0x8000000080000000, 0x8000000080000000);
code.pxor(a, mask);
ctx.reg_alloc.DefineValue(inst, a);
FPVectorNeg<32>(code, ctx, inst);
}
void EmitX64::EmitFPVectorNeg64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, 0x8000000000000000, 0x8000000000000000);
code.pxor(a, mask);
ctx.reg_alloc.DefineValue(inst, a);
FPVectorNeg<64>(code, ctx, inst);
}
void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {

View File

@ -199,7 +199,7 @@ constexpr bool MostSignificantBit(T value) {
}
template<typename T>
inline T Replicate(T value, size_t element_size) {
constexpr T Replicate(T value, size_t element_size) {
ASSERT_MSG(BitSize<T>() % element_size == 0, "bitsize of T not divisible by element_size");
if (element_size == BitSize<T>())
return value;

View File

@ -141,10 +141,14 @@ void IREmitter::SetCpsr(const IR::U32& value) {
Inst(Opcode::A32SetCpsr, value);
}
void IREmitter::SetCpsrNZCV(const IR::U32& value) {
void IREmitter::SetCpsrNZCV(const IR::NZCV& value) {
Inst(Opcode::A32SetCpsrNZCV, value);
}
void IREmitter::SetCpsrNZCVRaw(const IR::U32& value) {
Inst(Opcode::A32SetCpsrNZCVRaw, value);
}
void IREmitter::SetCpsrNZCVQ(const IR::U32& value) {
Inst(Opcode::A32SetCpsrNZCVQ, value);
}
@ -153,6 +157,10 @@ void IREmitter::SetCheckBit(const IR::U1& value) {
Inst(Opcode::A32SetCheckBit, value);
}
IR::U1 IREmitter::GetOverflowFrom(const IR::Value& value) {
return Inst<IR::U1>(Opcode::GetOverflowFromOp, value);
}
IR::U1 IREmitter::GetCFlag() {
return Inst<IR::U1>(Opcode::A32GetCFlag);
}

View File

@ -55,9 +55,11 @@ public:
IR::U32 GetCpsr();
void SetCpsr(const IR::U32& value);
void SetCpsrNZCV(const IR::U32& value);
void SetCpsrNZCV(const IR::NZCV& value);
void SetCpsrNZCVRaw(const IR::U32& value);
void SetCpsrNZCVQ(const IR::U32& value);
void SetCheckBit(const IR::U1& value);
IR::U1 GetOverflowFrom(const IR::Value& value);
IR::U1 GetCFlag();
void SetNFlag(const IR::U1& value);
void SetZFlag(const IR::U1& value);

View File

@ -102,7 +102,7 @@ bool TranslatorVisitor::arm_MRC(Cond cond, size_t opc1, CoprocReg CRn, Reg t, si
ir.SetRegister(t, word);
} else {
const auto new_cpsr_nzcv = ir.And(word, ir.Imm32(0xF0000000));
ir.SetCpsrNZCV(new_cpsr_nzcv);
ir.SetCpsrNZCVRaw(new_cpsr_nzcv);
}
}
return true;

View File

@ -21,17 +21,14 @@ bool TranslatorVisitor::arm_ADC_imm(Cond cond, bool S, Reg n, Reg d, int rotate,
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -50,17 +47,14 @@ bool TranslatorVisitor::arm_ADC_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -81,12 +75,9 @@ bool TranslatorVisitor::arm_ADC_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif
const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in);
const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.GetCFlag());
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -106,17 +97,14 @@ bool TranslatorVisitor::arm_ADD_imm(Cond cond, bool S, Reg n, Reg d, int rotate,
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -136,17 +124,14 @@ bool TranslatorVisitor::arm_ADD_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -167,12 +152,9 @@ bool TranslatorVisitor::arm_ADD_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif
const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in);
const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(0));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -354,10 +336,7 @@ bool TranslatorVisitor::arm_CMN_imm(Cond cond, Reg n, int rotate, Imm<8> imm8) {
const u32 imm32 = ArmExpandImm(rotate, imm8);
const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(0));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -370,10 +349,7 @@ bool TranslatorVisitor::arm_CMN_reg(Cond cond, Reg n, Imm<5> imm5, ShiftType shi
const auto shifted = EmitImmShift(ir.GetRegister(m), shift, imm5, ir.GetCFlag());
const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(0));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -392,10 +368,7 @@ bool TranslatorVisitor::arm_CMN_rsr(Cond cond, Reg n, Reg s, ShiftType shift, Re
const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in);
const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(0));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -408,10 +381,7 @@ bool TranslatorVisitor::arm_CMP_imm(Cond cond, Reg n, int rotate, Imm<8> imm8) {
const u32 imm32 = ArmExpandImm(rotate, imm8);
const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -424,10 +394,7 @@ bool TranslatorVisitor::arm_CMP_reg(Cond cond, Reg n, Imm<5> imm5, ShiftType shi
const auto shifted = EmitImmShift(ir.GetRegister(m), shift, imm5, ir.GetCFlag());
const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(1));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -446,10 +413,7 @@ bool TranslatorVisitor::arm_CMP_rsr(Cond cond, Reg n, Reg s, ShiftType shift, Re
const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in);
const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(1));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -801,17 +765,14 @@ bool TranslatorVisitor::arm_RSB_imm(Cond cond, bool S, Reg n, Reg d, int rotate,
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -831,17 +792,14 @@ bool TranslatorVisitor::arm_RSB_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -862,12 +820,9 @@ bool TranslatorVisitor::arm_RSB_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif
const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in);
const auto result = ir.SubWithCarry(shifted.result, ir.GetRegister(n), ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -887,17 +842,14 @@ bool TranslatorVisitor::arm_RSC_imm(Cond cond, bool S, Reg n, Reg d, int rotate,
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -916,17 +868,14 @@ bool TranslatorVisitor::arm_RSC_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -947,12 +896,9 @@ bool TranslatorVisitor::arm_RSC_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif
const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in);
const auto result = ir.SubWithCarry(shifted.result, ir.GetRegister(n), ir.GetCFlag());
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -972,17 +918,14 @@ bool TranslatorVisitor::arm_SBC_imm(Cond cond, bool S, Reg n, Reg d, int rotate,
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -1002,17 +945,14 @@ bool TranslatorVisitor::arm_SBC_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -1033,12 +973,9 @@ bool TranslatorVisitor::arm_SBC_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif
const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in);
const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.GetCFlag());
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -1058,17 +995,14 @@ bool TranslatorVisitor::arm_SUB_imm(Cond cond, bool S, Reg n, Reg d, int rotate,
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -1088,17 +1022,14 @@ bool TranslatorVisitor::arm_SUB_reg(Cond cond, bool S, Reg n, Reg d, Imm<5> imm5
return UnpredictableInstruction();
}
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
@ -1118,12 +1049,9 @@ bool TranslatorVisitor::arm_SUB_rsr(Cond cond, bool S, Reg n, Reg d, Reg s, Shif
const auto shifted = EmitRegShift(ir.GetRegister(m), shift, shift_n, carry_in);
const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;

View File

@ -260,10 +260,10 @@ bool TranslatorVisitor::arm_SMLAxy(Cond cond, Reg d, Reg a, Reg m, bool M, bool
const IR::U32 m16 = M ? ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result
: ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32));
const IR::U32 product = ir.Mul(n16, m16);
const auto result_overflow = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0));
const auto result = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0));
ir.SetRegister(d, result_overflow.result);
ir.OrQFlag(result_overflow.overflow);
ir.SetRegister(d, result);
ir.OrQFlag(ir.GetOverflowFrom(result));
return true;
}
@ -306,10 +306,10 @@ bool TranslatorVisitor::arm_SMLAWy(Cond cond, Reg d, Reg a, Reg m, bool M, Reg n
}
const IR::U64 m16 = ir.SignExtendWordToLong(ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32)));
const auto product = ir.LeastSignificantWord(ir.LogicalShiftRight(ir.Mul(n32, m16), ir.Imm8(16)));
const auto result_overflow = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0));
const auto result = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0));
ir.SetRegister(d, result_overflow.result);
ir.OrQFlag(result_overflow.overflow);
ir.SetRegister(d, result);
ir.OrQFlag(ir.GetOverflowFrom(result));
return true;
}
@ -352,7 +352,7 @@ bool TranslatorVisitor::arm_SMMLA(Cond cond, Reg d, Reg a, Reg m, bool R, Reg n)
const auto result_carry = ir.MostSignificantWord(temp);
auto result = result_carry.result;
if (R) {
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result;
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry);
}
ir.SetRegister(d, result);
@ -376,7 +376,7 @@ bool TranslatorVisitor::arm_SMMLS(Cond cond, Reg d, Reg a, Reg m, bool R, Reg n)
const auto result_carry = ir.MostSignificantWord(temp);
auto result = result_carry.result;
if (R) {
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result;
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry);
}
ir.SetRegister(d, result);
@ -399,7 +399,7 @@ bool TranslatorVisitor::arm_SMMUL(Cond cond, Reg d, Reg m, bool R, Reg n) {
const auto result_carry = ir.MostSignificantWord(product);
auto result = result_carry.result;
if (R) {
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result;
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry);
}
ir.SetRegister(d, result);
@ -435,11 +435,11 @@ bool TranslatorVisitor::arm_SMLAD(Cond cond, Reg d, Reg a, Reg m, bool M, Reg n)
const IR::U32 product_hi = ir.Mul(n_hi, m_hi);
const IR::U32 addend = ir.GetRegister(a);
auto result_overflow = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0));
ir.OrQFlag(result_overflow.overflow);
result_overflow = ir.AddWithCarry(result_overflow.result, addend, ir.Imm1(0));
ir.SetRegister(d, result_overflow.result);
ir.OrQFlag(result_overflow.overflow);
auto result = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0));
ir.OrQFlag(ir.GetOverflowFrom(result));
result = ir.AddWithCarry(result, addend, ir.Imm1(0));
ir.SetRegister(d, result);
ir.OrQFlag(ir.GetOverflowFrom(result));
return true;
}
@ -507,10 +507,10 @@ bool TranslatorVisitor::arm_SMLSD(Cond cond, Reg d, Reg a, Reg m, bool M, Reg n)
const IR::U32 product_hi = ir.Mul(n_hi, m_hi);
const IR::U32 addend = ir.GetRegister(a);
const IR::U32 product = ir.Sub(product_lo, product_hi);
auto result_overflow = ir.AddWithCarry(product, addend, ir.Imm1(0));
auto result = ir.AddWithCarry(product, addend, ir.Imm1(0));
ir.SetRegister(d, result_overflow.result);
ir.OrQFlag(result_overflow.overflow);
ir.SetRegister(d, result);
ir.OrQFlag(ir.GetOverflowFrom(result));
return true;
}
@ -572,10 +572,10 @@ bool TranslatorVisitor::arm_SMUAD(Cond cond, Reg d, Reg m, bool M, Reg n) {
const IR::U32 product_lo = ir.Mul(n_lo, m_lo);
const IR::U32 product_hi = ir.Mul(n_hi, m_hi);
const auto result_overflow = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0));
const auto result = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0));
ir.SetRegister(d, result_overflow.result);
ir.OrQFlag(result_overflow.overflow);
ir.SetRegister(d, result);
ir.OrQFlag(ir.GetOverflowFrom(result));
return true;
}

View File

@ -194,7 +194,7 @@ bool TranslatorVisitor::arm_USADA8(Cond cond, Reg d, Reg a, Reg m, Reg n) {
const auto tmp = ir.PackedAbsDiffSumS8(ir.GetRegister(n), ir.GetRegister(m));
const auto result = ir.AddWithCarry(ir.GetRegister(a), tmp, ir.Imm1(0));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
return true;
}

View File

@ -62,12 +62,9 @@ bool TranslatorVisitor::thumb16_ASR_imm(Imm<5> imm5, Reg m, Reg d) {
bool TranslatorVisitor::thumb16_ADD_reg_t1(Reg m, Reg n, Reg d) {
const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(0));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (!ir.current_location.IT().IsInITBlock()) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -77,12 +74,9 @@ bool TranslatorVisitor::thumb16_ADD_reg_t1(Reg m, Reg n, Reg d) {
bool TranslatorVisitor::thumb16_SUB_reg(Reg m, Reg n, Reg d) {
const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (!ir.current_location.IT().IsInITBlock()) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -93,12 +87,9 @@ bool TranslatorVisitor::thumb16_ADD_imm_t1(Imm<3> imm3, Reg n, Reg d) {
const u32 imm32 = imm3.ZeroExtend();
const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(0));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (!ir.current_location.IT().IsInITBlock()) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -109,12 +100,9 @@ bool TranslatorVisitor::thumb16_SUB_imm_t1(Imm<3> imm3, Reg n, Reg d) {
const u32 imm32 = imm3.ZeroExtend();
const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (!ir.current_location.IT().IsInITBlock()) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -138,10 +126,7 @@ bool TranslatorVisitor::thumb16_CMP_imm(Reg n, Imm<8> imm8) {
const u32 imm32 = imm8.ZeroExtend();
const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -153,12 +138,9 @@ bool TranslatorVisitor::thumb16_ADD_imm_t2(Reg d_n, Imm<8> imm8) {
const Reg n = d_n;
const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(0));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (!ir.current_location.IT().IsInITBlock()) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -171,12 +153,9 @@ bool TranslatorVisitor::thumb16_SUB_imm_t2(Reg d_n, Imm<8> imm8) {
const Reg n = d_n;
const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (!ir.current_location.IT().IsInITBlock()) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -270,12 +249,9 @@ bool TranslatorVisitor::thumb16_ADC_reg(Reg m, Reg d_n) {
const auto aspr_c = ir.GetCFlag();
const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.GetRegister(m), aspr_c);
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (!ir.current_location.IT().IsInITBlock()) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -288,12 +264,9 @@ bool TranslatorVisitor::thumb16_SBC_reg(Reg m, Reg d_n) {
const auto aspr_c = ir.GetCFlag();
const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.GetRegister(m), aspr_c);
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (!ir.current_location.IT().IsInITBlock()) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -327,12 +300,9 @@ bool TranslatorVisitor::thumb16_TST_reg(Reg m, Reg n) {
// Rd can never encode R15.
bool TranslatorVisitor::thumb16_RSB_imm(Reg n, Reg d) {
const auto result = ir.SubWithCarry(ir.Imm32(0), ir.GetRegister(n), ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (!ir.current_location.IT().IsInITBlock()) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -340,20 +310,14 @@ bool TranslatorVisitor::thumb16_RSB_imm(Reg n, Reg d) {
// CMP <Rn>, <Rm>
bool TranslatorVisitor::thumb16_CMP_reg_t1(Reg m, Reg n) {
const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(1));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
// CMN <Rn>, <Rm>
bool TranslatorVisitor::thumb16_CMN_reg(Reg m, Reg n) {
const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(0));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -430,12 +394,12 @@ bool TranslatorVisitor::thumb16_ADD_reg_t2(bool d_n_hi, Reg m, Reg d_n_lo) {
const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(0));
if (d == Reg::PC) {
ir.UpdateUpperLocationDescriptor();
ir.ALUWritePC(result.result);
ir.ALUWritePC(result);
// Return to dispatch as we can't predict what PC is going to be. Stop compilation.
ir.SetTerm(IR::Term::FastDispatchHint{});
return false;
} else {
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
return true;
}
}
@ -451,10 +415,7 @@ bool TranslatorVisitor::thumb16_CMP_reg_t2(bool n_hi, Reg m, Reg n_lo) {
}
const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.GetRegister(m), ir.Imm1(1));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -672,7 +633,7 @@ bool TranslatorVisitor::thumb16_ADD_sp_t1(Reg d, Imm<8> imm8) {
const u32 imm32 = imm8.ZeroExtend() << 2;
const auto result = ir.AddWithCarry(ir.GetRegister(Reg::SP), ir.Imm32(imm32), ir.Imm1(0));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
return true;
}
@ -682,7 +643,7 @@ bool TranslatorVisitor::thumb16_ADD_sp_t2(Imm<7> imm7) {
const Reg d = Reg::SP;
const auto result = ir.AddWithCarry(ir.GetRegister(Reg::SP), ir.Imm32(imm32), ir.Imm1(0));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
return true;
}
@ -692,7 +653,7 @@ bool TranslatorVisitor::thumb16_SUB_sp(Imm<7> imm7) {
const Reg d = Reg::SP;
const auto result = ir.SubWithCarry(ir.GetRegister(Reg::SP), ir.Imm32(imm32), ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
return true;
}

View File

@ -67,7 +67,7 @@ bool TranslatorVisitor::thumb32_MRC(bool two, size_t opc1, CoprocReg CRn, Reg t,
ir.SetRegister(t, word);
} else {
const auto new_cpsr_nzcv = ir.And(word, ir.Imm32(0xF0000000));
ir.SetCpsrNZCV(new_cpsr_nzcv);
ir.SetCpsrNZCVRaw(new_cpsr_nzcv);
}
return true;
}

View File

@ -166,10 +166,7 @@ bool TranslatorVisitor::thumb32_CMN_imm(Imm<1> i, Reg n, Imm<3> imm3, Imm<8> imm
const auto imm32 = ThumbExpandImm(i, imm3, imm8);
const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(0));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -182,12 +179,9 @@ bool TranslatorVisitor::thumb32_ADD_imm_1(Imm<1> i, bool S, Reg n, Imm<3> imm3,
const auto imm32 = ThumbExpandImm(i, imm3, imm8);
const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(0));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -200,12 +194,9 @@ bool TranslatorVisitor::thumb32_ADC_imm(Imm<1> i, bool S, Reg n, Imm<3> imm3, Re
const auto imm32 = ThumbExpandImm(i, imm3, imm8);
const auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.GetCFlag());
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -218,12 +209,9 @@ bool TranslatorVisitor::thumb32_SBC_imm(Imm<1> i, bool S, Reg n, Imm<3> imm3, Re
const auto imm32 = ThumbExpandImm(i, imm3, imm8);
const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.GetCFlag());
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -236,10 +224,7 @@ bool TranslatorVisitor::thumb32_CMP_imm(Imm<1> i, Reg n, Imm<3> imm3, Imm<8> imm
const auto imm32 = ThumbExpandImm(i, imm3, imm8);
const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -252,12 +237,9 @@ bool TranslatorVisitor::thumb32_SUB_imm_1(Imm<1> i, bool S, Reg n, Imm<3> imm3,
const auto imm32 = ThumbExpandImm(i, imm3, imm8);
const auto result = ir.SubWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -270,12 +252,9 @@ bool TranslatorVisitor::thumb32_RSB_imm(Imm<1> i, bool S, Reg n, Imm<3> imm3, Re
const auto imm32 = ThumbExpandImm(i, imm3, imm8);
const auto result = ir.SubWithCarry(ir.Imm32(imm32), ir.GetRegister(n), ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}

View File

@ -85,7 +85,7 @@ bool TranslatorVisitor::thumb32_ADD_imm_2(Imm<1> imm1, Reg n, Imm<3> imm3, Reg d
const auto reg_n = ir.GetRegister(n);
const auto result = ir.AddWithCarry(reg_n, ir.Imm32(imm), ir.Imm1(0));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
return true;
}
@ -196,7 +196,7 @@ bool TranslatorVisitor::thumb32_SUB_imm_2(Imm<1> imm1, Reg n, Imm<3> imm3, Reg d
const auto reg_n = ir.GetRegister(n);
const auto result = ir.SubWithCarry(reg_n, ir.Imm32(imm), ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
return true;
}

View File

@ -178,10 +178,7 @@ bool TranslatorVisitor::thumb32_CMN_reg(Reg n, Imm<3> imm3, Imm<2> imm2, ShiftTy
const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag());
const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(0));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -194,12 +191,9 @@ bool TranslatorVisitor::thumb32_ADD_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2
const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag());
const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(0));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -211,12 +205,9 @@ bool TranslatorVisitor::thumb32_ADC_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2
const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag());
const auto result = ir.AddWithCarry(ir.GetRegister(n), shifted.result, ir.GetCFlag());
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -228,12 +219,9 @@ bool TranslatorVisitor::thumb32_SBC_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2
const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag());
const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.GetCFlag());
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -246,10 +234,7 @@ bool TranslatorVisitor::thumb32_CMP_reg(Reg n, Imm<3> imm3, Imm<2> imm2, ShiftTy
const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag());
const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(1));
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
return true;
}
@ -262,12 +247,9 @@ bool TranslatorVisitor::thumb32_SUB_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2
const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag());
const auto result = ir.SubWithCarry(ir.GetRegister(n), shifted.result, ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}
@ -279,12 +261,9 @@ bool TranslatorVisitor::thumb32_RSB_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2
const auto shifted = EmitImmShift(ir.GetRegister(m), type, imm3, imm2, ir.GetCFlag());
const auto result = ir.SubWithCarry(shifted.result, ir.GetRegister(n), ir.Imm1(1));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
ir.SetCpsrNZCV(ir.NZCVFrom(result));
}
return true;
}

View File

@ -68,12 +68,12 @@ bool TranslatorVisitor::thumb32_SMLAD(Reg n, Reg a, Reg d, bool X, Reg m) {
const IR::U32 product_hi = ir.Mul(n_hi, m_hi);
const IR::U32 addend = ir.GetRegister(a);
auto result_overflow = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0));
ir.OrQFlag(result_overflow.overflow);
result_overflow = ir.AddWithCarry(result_overflow.result, addend, ir.Imm1(0));
auto result = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0));
ir.OrQFlag(ir.GetOverflowFrom(result));
result = ir.AddWithCarry(result, addend, ir.Imm1(0));
ir.SetRegister(d, result_overflow.result);
ir.OrQFlag(result_overflow.overflow);
ir.SetRegister(d, result);
ir.OrQFlag(ir.GetOverflowFrom(result));
return true;
}
@ -97,10 +97,10 @@ bool TranslatorVisitor::thumb32_SMLSD(Reg n, Reg a, Reg d, bool X, Reg m) {
const IR::U32 product_hi = ir.Mul(n_hi, m_hi);
const IR::U32 addend = ir.GetRegister(a);
const IR::U32 product = ir.Sub(product_lo, product_hi);
auto result_overflow = ir.AddWithCarry(product, addend, ir.Imm1(0));
auto result = ir.AddWithCarry(product, addend, ir.Imm1(0));
ir.SetRegister(d, result_overflow.result);
ir.OrQFlag(result_overflow.overflow);
ir.SetRegister(d, result);
ir.OrQFlag(ir.GetOverflowFrom(result));
return true;
}
@ -116,10 +116,10 @@ bool TranslatorVisitor::thumb32_SMLAXY(Reg n, Reg a, Reg d, bool N, bool M, Reg
const IR::U32 m16 = M ? ir.ArithmeticShiftRight(m32, ir.Imm8(16), ir.Imm1(0)).result
: ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32));
const IR::U32 product = ir.Mul(n16, m16);
const auto result_overflow = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0));
const auto result = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0));
ir.SetRegister(d, result_overflow.result);
ir.OrQFlag(result_overflow.overflow);
ir.SetRegister(d, result);
ir.OrQFlag(ir.GetOverflowFrom(result));
return true;
}
@ -135,7 +135,7 @@ bool TranslatorVisitor::thumb32_SMMLA(Reg n, Reg a, Reg d, bool R, Reg m) {
const auto result_carry = ir.MostSignificantWord(temp);
auto result = result_carry.result;
if (R) {
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result;
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry);
}
ir.SetRegister(d, result);
@ -154,7 +154,7 @@ bool TranslatorVisitor::thumb32_SMMLS(Reg n, Reg a, Reg d, bool R, Reg m) {
const auto result_carry = ir.MostSignificantWord(temp);
auto result = result_carry.result;
if (R) {
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result;
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry);
}
ir.SetRegister(d, result);
@ -172,7 +172,7 @@ bool TranslatorVisitor::thumb32_SMMUL(Reg n, Reg d, bool R, Reg m) {
const auto result_carry = ir.MostSignificantWord(product);
auto result = result_carry.result;
if (R) {
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry).result;
result = ir.AddWithCarry(result, ir.Imm32(0), result_carry.carry);
}
ir.SetRegister(d, result);
@ -197,10 +197,10 @@ bool TranslatorVisitor::thumb32_SMUAD(Reg n, Reg d, bool M, Reg m) {
const IR::U32 product_lo = ir.Mul(n_lo, m_lo);
const IR::U32 product_hi = ir.Mul(n_hi, m_hi);
const auto result_overflow = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0));
const auto result = ir.AddWithCarry(product_lo, product_hi, ir.Imm1(0));
ir.SetRegister(d, result_overflow.result);
ir.OrQFlag(result_overflow.overflow);
ir.SetRegister(d, result);
ir.OrQFlag(ir.GetOverflowFrom(result));
return true;
}
@ -257,10 +257,10 @@ bool TranslatorVisitor::thumb32_SMLAWY(Reg n, Reg a, Reg d, bool M, Reg m) {
}
const IR::U64 m16 = ir.SignExtendWordToLong(ir.SignExtendHalfToWord(ir.LeastSignificantHalf(m32)));
const auto product = ir.LeastSignificantWord(ir.LogicalShiftRight(ir.Mul(n32, m16), ir.Imm8(16)));
const auto result_overflow = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0));
const auto result = ir.AddWithCarry(product, ir.GetRegister(a), ir.Imm1(0));
ir.SetRegister(d, result_overflow.result);
ir.OrQFlag(result_overflow.overflow);
ir.SetRegister(d, result);
ir.OrQFlag(ir.GetOverflowFrom(result));
return true;
}
@ -305,7 +305,7 @@ bool TranslatorVisitor::thumb32_USADA8(Reg n, Reg a, Reg d, Reg m) {
const auto tmp = ir.PackedAbsDiffSumS8(reg_n, reg_m);
const auto result = ir.AddWithCarry(reg_a, tmp, ir.Imm1(0));
ir.SetRegister(d, result.result);
ir.SetRegister(d, result);
return true;
}

View File

@ -1155,7 +1155,7 @@ bool TranslatorVisitor::vfp_VMRS(Cond cond, Reg t) {
if (t == Reg::R15) {
// This encodes ASPR_nzcv access
const auto nzcv = ir.GetFpscrNZCV();
ir.SetCpsrNZCV(nzcv);
ir.SetCpsrNZCVRaw(nzcv);
} else {
ir.SetRegister(t, ir.GetFpscr());
}

View File

@ -42,6 +42,9 @@ enum class OptimizationFlag : std::uint32_t {
/// This is an UNSAFE optimization that causes floating-point instructions to not produce correct NaNs.
/// This may also result in inaccurate results when instructions are given certain special values.
Unsafe_InaccurateNaN = 0x00040000,
/// This is an UNSAFE optimization that causes ASIMD floating-point instructions to be run with incorrect
/// rounding modes. This may result in inaccurate results with all floating-point ASIMD instructions.
Unsafe_IgnoreStandardFPCRValue = 0x00080000,
};
constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0);

View File

@ -240,13 +240,6 @@ U32U64 IREmitter::RotateRightMasked(const U32U64& value_in, const U32U64& shift_
}
}
ResultAndCarryAndOverflow<U32> IREmitter::AddWithCarry(const U32& a, const U32& b, const U1& carry_in) {
const auto result = Inst<U32>(Opcode::Add32, a, b, carry_in);
const auto carry_out = Inst<U1>(Opcode::GetCarryFromOp, result);
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, carry_out, overflow};
}
U32U64 IREmitter::AddWithCarry(const U32U64& a, const U32U64& b, const U1& carry_in) {
ASSERT(a.GetType() == b.GetType());
if (a.GetType() == Type::U32) {
@ -265,14 +258,6 @@ U32U64 IREmitter::Add(const U32U64& a, const U32U64& b) {
}
}
ResultAndCarryAndOverflow<U32> IREmitter::SubWithCarry(const U32& a, const U32& b, const U1& carry_in) {
// This is equivalent to AddWithCarry(a, Not(b), carry_in).
const auto result = Inst<U32>(Opcode::Sub32, a, b, carry_in);
const auto carry_out = Inst<U1>(Opcode::GetCarryFromOp, result);
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, carry_out, overflow};
}
U32U64 IREmitter::SubWithCarry(const U32U64& a, const U32U64& b, const U1& carry_in) {
ASSERT(a.GetType() == b.GetType());
if (a.GetType() == Type::U32) {

View File

@ -134,8 +134,6 @@ public:
U32U64 ArithmeticShiftRightMasked(const U32U64& value_in, const U32U64& shift_amount);
U32U64 RotateRightMasked(const U32U64& value_in, const U32U64& shift_amount);
ResultAndCarry<U32> RotateRightExtended(const U32& value_in, const U1& carry_in);
ResultAndCarryAndOverflow<U32> AddWithCarry(const U32& a, const U32& b, const U1& carry_in);
ResultAndCarryAndOverflow<U32> SubWithCarry(const U32& a, const U32& b, const U1& carry_in);
U32U64 AddWithCarry(const U32U64& a, const U32U64& b, const U1& carry_in);
U32U64 SubWithCarry(const U32U64& a, const U32U64& b, const U1& carry_in);
U32U64 Add(const U32U64& a, const U32U64& b);

View File

@ -176,6 +176,7 @@ bool Inst::ReadsFromCPSR() const {
bool Inst::WritesToCPSR() const {
switch (op) {
case Opcode::A32SetCpsr:
case Opcode::A32SetCpsrNZCVRaw:
case Opcode::A32SetCpsrNZCV:
case Opcode::A32SetCpsrNZCVQ:
case Opcode::A32SetNFlag:

View File

@ -17,7 +17,8 @@ A32OPC(SetExtendedRegister64, Void, A32E
A32OPC(SetVector, Void, A32ExtReg, U128 )
A32OPC(GetCpsr, U32, )
A32OPC(SetCpsr, Void, U32 )
A32OPC(SetCpsrNZCV, Void, U32 )
A32OPC(SetCpsrNZCV, Void, NZCV )
A32OPC(SetCpsrNZCVRaw, Void, U32 )
A32OPC(SetCpsrNZCVQ, Void, U32 )
A32OPC(GetNFlag, U1, )
A32OPC(SetNFlag, Void, U1 )

View File

@ -454,6 +454,116 @@ TEST_CASE("A64: FABD", "[a64]") {
REQUIRE(jit.GetVector(22) == Vector{0x56d3f0857fc90e2b, 0x6e4b0a4144873176});
}
TEST_CASE("A64: FABS", "[a64]") {
A64TestEnv env;
A64::Jit jit{A64::UserConfig{&env}};
env.code_mem.emplace_back(0x4ef8f804); // FABS v4.8h, v0.8h
env.code_mem.emplace_back(0x4ea0f825); // FABS v5.4s, v1.4s
env.code_mem.emplace_back(0x4ee0f846); // FABS v6.2d, v2.2d
env.code_mem.emplace_back(0x14000000); // B .
jit.SetPC(0);
jit.SetVector(0, {0xffffffffffffffff, 0xffffffffffff8000});
jit.SetVector(1, {0xffbfffffffc00000, 0xff80000080000000});
jit.SetVector(2, {0xffffffffffffffff, 0x8000000000000000});
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetVector(4) == Vector{0x7fff7fff7fff7fff, 0x7fff7fff7fff0000});
REQUIRE(jit.GetVector(5) == Vector{0x7fbfffff7fc00000, 0x7f80000000000000});
REQUIRE(jit.GetVector(6) == Vector{0x7fffffffffffffff, 0x0000000000000000});
}
TEST_CASE("A64: FMIN", "[a64]") {
A64TestEnv env;
A64::Jit jit{A64::UserConfig{&env}};
env.code_mem.emplace_back(0x4ea1f400); // FMIN.4S V0, V0, V1
env.code_mem.emplace_back(0x4ee3f442); // FMIN.2D V2, V2, V3
env.code_mem.emplace_back(0x14000000); // B .
jit.SetPC(0);
jit.SetVector(0, {0x7fc00000'09503366, 0x00000000'7f984a37});
jit.SetVector(1, {0xc1200000'00000001, 0x6e4b0a41'ffffffff});
jit.SetVector(2, {0x7fc0000009503366, 0x3ff0000000000000});
jit.SetVector(3, {0xbff0000000000000, 0x6e4b0a41ffffffff});
env.ticks_left = 2;
jit.Run();
REQUIRE(jit.GetVector(0) == Vector{0x7fc00000'00000001, 0x00000000'7fd84a37});
REQUIRE(jit.GetVector(2) == Vector{0xbff0000000000000, 0x3ff0000000000000});
}
TEST_CASE("A64: FMAX", "[a64]") {
A64TestEnv env;
A64::Jit jit{A64::UserConfig{&env}};
env.code_mem.emplace_back(0x4e21f400); // FMAX.4S V0, V0, V1
env.code_mem.emplace_back(0x4e63f442); // FMAX.2D V2, V2, V3
env.code_mem.emplace_back(0x14000000); // B .
jit.SetPC(0);
jit.SetVector(0, {0x7fc00000'09503366, 0x00000000'7f984a37});
jit.SetVector(1, {0xc1200000'00000001, 0x6e4b0a41'ffffffff});
jit.SetVector(2, {0x7fc0000009503366, 0x3ff0000000000000});
jit.SetVector(3, {0xbff0000000000000, 0x6e4b0a41ffffffff});
env.ticks_left = 2;
jit.Run();
REQUIRE(jit.GetVector(0) == Vector{0x7fc00000'09503366, 0x6e4b0a41'7fd84a37});
REQUIRE(jit.GetVector(2) == Vector{0x7fc0000009503366, 0x6e4b0a41ffffffff});
}
TEST_CASE("A64: FMINNM", "[a64]") {
A64TestEnv env;
A64::Jit jit{A64::UserConfig{&env}};
env.code_mem.emplace_back(0x4ea1c400); // FMINNM.4S V0, V0, V1
env.code_mem.emplace_back(0x4ee3c442); // FMINNM.2D V2, V2, V3
env.code_mem.emplace_back(0x14000000); // B .
jit.SetPC(0);
jit.SetVector(0, {0x7fc00000'09503366, 0x00000000'7f984a37});
jit.SetVector(1, {0xc1200000'00000001, 0x6e4b0a41'ffffffff});
jit.SetVector(2, {0x7fc0000009503366, 0x3ff0000000000000});
jit.SetVector(3, {0xfff0000000000000, 0xffffffffffffffff});
env.ticks_left = 2;
jit.Run();
REQUIRE(jit.GetVector(0) == Vector{0xc1200000'00000001, 0x00000000'7fd84a37});
REQUIRE(jit.GetVector(2) == Vector{0xfff0000000000000, 0x3ff0000000000000});
}
TEST_CASE("A64: FMAXNM", "[a64]") {
A64TestEnv env;
A64::Jit jit{A64::UserConfig{&env}};
env.code_mem.emplace_back(0x4e21c400); // FMAXNM.4S V0, V0, V1
env.code_mem.emplace_back(0x4e63c442); // FMAXNM.2D V2, V2, V3
env.code_mem.emplace_back(0x14000000); // B .
jit.SetPC(0);
jit.SetVector(0, {0x7fc00000'09503366, 0x00000000'7f984a37});
jit.SetVector(1, {0xc1200000'00000001, 0x6e4b0a41'ffffffff});
jit.SetVector(2, {0x7fc0000009503366, 0x3ff0000000000000});
jit.SetVector(3, {0xfff0000000000000, 0xffffffffffffffff});
env.ticks_left = 2;
jit.Run();
REQUIRE(jit.GetVector(0) == Vector{0xc1200000'09503366, 0x6e4b0a41'7fd84a37});
REQUIRE(jit.GetVector(2) == Vector{0x7fc0000009503366, 0x3ff0000000000000});
}
TEST_CASE("A64: 128-bit exclusive read/write", "[a64]") {
A64TestEnv env;
ExclusiveMonitor monitor{1};

View File

@ -69,6 +69,8 @@ namespace {
} // namespace
namespace AudioCore {
constexpr s32 NUM_BUFFERS = 2;
AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& memory_,
AudioCommon::AudioRendererParameter params,
Stream::ReleaseCallback&& release_callback,
@ -89,9 +91,9 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing_, Core::Memor
core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS,
fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback));
process_event = Core::Timing::CreateEvent(
fmt::format("AudioRenderer-Instance{}-Consume", instance_number),
fmt::format("AudioRenderer-Instance{}-Process", instance_number),
[this](std::uintptr_t, std::chrono::nanoseconds) { ReleaseAndQueueBuffers(); });
for (size_t i = 0; i < NUM_BUFFERS; ++i) {
for (s32 i = 0; i < NUM_BUFFERS; ++i) {
QueueMixedBuffer(i);
}
}
@ -128,7 +130,7 @@ Stream::State AudioRenderer::GetStreamState() const {
ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params,
std::vector<u8>& output_params) {
{
std::scoped_lock l{lock};
std::scoped_lock lock{mutex};
InfoUpdater info_updater{input_params, output_params, behavior_info};
if (!info_updater.UpdateBehaviorInfo(behavior_info)) {
@ -332,17 +334,17 @@ void AudioRenderer::ReleaseAndQueueBuffers() {
}
{
std::scoped_lock l{lock};
std::scoped_lock lock{mutex};
const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)};
for (const auto& tag : released_buffers) {
QueueMixedBuffer(tag);
}
}
const f32 sampleRate = static_cast<f32>(GetSampleRate());
const f32 sampleCount = static_cast<f32>(GetSampleCount());
const f32 consumeRate = sampleRate / (sampleCount * (sampleCount / 240));
const s32 ms = (1000 / static_cast<s32>(consumeRate)) - 1;
const f32 sample_rate = static_cast<f32>(GetSampleRate());
const f32 sample_count = static_cast<f32>(GetSampleCount());
const f32 consume_rate = sample_rate / (sample_count * (sample_count / 240));
const s32 ms = (1000 / static_cast<s32>(consume_rate)) - 1;
const std::chrono::milliseconds next_event_time(std::max(ms / NUM_BUFFERS, 1));
core_timing.ScheduleEvent(next_event_time, process_event, {});
}

View File

@ -34,7 +34,6 @@ class Memory;
namespace AudioCore {
using DSPStateHolder = std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>;
constexpr s32 NUM_BUFFERS = 2;
class AudioOut;
@ -74,7 +73,7 @@ private:
std::size_t elapsed_frame_count{};
Core::Timing::CoreTiming& core_timing;
std::shared_ptr<Core::Timing::EventType> process_event;
std::mutex lock;
std::mutex mutex;
};
} // namespace AudioCore

View File

@ -123,6 +123,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.cpu_accuracy.SetGlobal(true);
values.cpuopt_unsafe_unfuse_fma.SetGlobal(true);
values.cpuopt_unsafe_reduce_fp_error.SetGlobal(true);
values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true);
values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true);
values.cpuopt_unsafe_fastmem_check.SetGlobal(true);

View File

@ -129,6 +129,7 @@ struct Values {
Setting<bool> cpuopt_unsafe_unfuse_fma;
Setting<bool> cpuopt_unsafe_reduce_fp_error;
Setting<bool> cpuopt_unsafe_ignore_standard_fpcr;
Setting<bool> cpuopt_unsafe_inaccurate_nan;
Setting<bool> cpuopt_unsafe_fastmem_check;

View File

@ -186,6 +186,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
}
if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr.GetValue()) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
}
if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}

View File

@ -757,6 +757,8 @@ void Config::ReadCpuValues() {
QStringLiteral("cpuopt_unsafe_unfuse_fma"), true);
ReadSettingGlobal(Settings::values.cpuopt_unsafe_reduce_fp_error,
QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true);
ReadSettingGlobal(Settings::values.cpuopt_unsafe_ignore_standard_fpcr,
QStringLiteral("cpuopt_unsafe_ignore_standard_fpcr"), true);
ReadSettingGlobal(Settings::values.cpuopt_unsafe_inaccurate_nan,
QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true);
ReadSettingGlobal(Settings::values.cpuopt_unsafe_fastmem_check,
@ -1343,6 +1345,8 @@ void Config::SaveCpuValues() {
Settings::values.cpuopt_unsafe_unfuse_fma, true);
WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_reduce_fp_error"),
Settings::values.cpuopt_unsafe_reduce_fp_error, true);
WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_ignore_standard_fpcr"),
Settings::values.cpuopt_unsafe_ignore_standard_fpcr, true);
WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_inaccurate_nan"),
Settings::values.cpuopt_unsafe_inaccurate_nan, true);
WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_fastmem_check"),

View File

@ -34,12 +34,15 @@ void ConfigureCpu::SetConfiguration() {
ui->accuracy->setEnabled(runtime_lock);
ui->cpuopt_unsafe_unfuse_fma->setEnabled(runtime_lock);
ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock);
ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock);
ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock);
ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue());
ui->cpuopt_unsafe_reduce_fp_error->setChecked(
Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue());
ui->cpuopt_unsafe_ignore_standard_fpcr->setChecked(
Settings::values.cpuopt_unsafe_ignore_standard_fpcr.GetValue());
ui->cpuopt_unsafe_inaccurate_nan->setChecked(
Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue());
ui->cpuopt_unsafe_fastmem_check->setChecked(
@ -84,6 +87,9 @@ void ConfigureCpu::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_reduce_fp_error,
ui->cpuopt_unsafe_reduce_fp_error,
cpuopt_unsafe_reduce_fp_error);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_standard_fpcr,
ui->cpuopt_unsafe_ignore_standard_fpcr,
cpuopt_unsafe_ignore_standard_fpcr);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_inaccurate_nan,
ui->cpuopt_unsafe_inaccurate_nan,
cpuopt_unsafe_inaccurate_nan);
@ -137,6 +143,9 @@ void ConfigureCpu::SetupPerGameUI() {
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_reduce_fp_error,
Settings::values.cpuopt_unsafe_reduce_fp_error,
cpuopt_unsafe_reduce_fp_error);
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_standard_fpcr,
Settings::values.cpuopt_unsafe_ignore_standard_fpcr,
cpuopt_unsafe_ignore_standard_fpcr);
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_inaccurate_nan,
Settings::values.cpuopt_unsafe_inaccurate_nan,
cpuopt_unsafe_inaccurate_nan);

View File

@ -40,6 +40,7 @@ private:
ConfigurationShared::CheckState cpuopt_unsafe_unfuse_fma;
ConfigurationShared::CheckState cpuopt_unsafe_reduce_fp_error;
ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr;
ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan;
ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check;
};

View File

@ -111,6 +111,18 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_unsafe_ignore_standard_fpcr">
<property name="toolTip">
<string>
&lt;div&gt;This option improves the speed of 32 bits ASIMD floating-point functions by running with incorrect rounding modes.&lt;/div&gt;
</string>
</property>
<property name="text">
<string>Faster ASIMD instructions (32 bits only)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_unsafe_inaccurate_nan">
<property name="toolTip">