From f7b2c595757c2aa9f1e3567667aa0cf0a6b87bca Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sat, 2 Jan 2021 22:47:26 +0100 Subject: [PATCH] early-access version 1279 --- README.md | 2 +- .../include/dynarmic/optimization_flags.h | 3 + .../backend/x64/emit_x64_data_processing.cpp | 40 +++++-- .../backend/x64/emit_x64_floating_point.cpp | 68 ++++++++++- .../x64/emit_x64_vector_floating_point.cpp | 79 +++++++++---- .../data_processing_conditional_compare.cpp | 4 +- src/common/CMakeLists.txt | 1 - src/common/atomic_ops.h | 71 ++++++++++- src/common/page_table.h | 2 +- src/common/swap.h | 4 +- src/common/x64/native_clock.cpp | 110 +++++++++++++++--- src/common/x64/native_clock.h | 21 +++- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 3 + src/core/arm/dynarmic/arm_dynarmic_64.cpp | 3 + src/core/hle/kernel/memory/memory_block.h | 14 +-- src/core/hle/kernel/memory/page_table.cpp | 12 +- src/core/hle/kernel/svc_types.h | 4 +- src/core/hle/service/am/am.cpp | 6 +- src/core/hle/service/am/am.h | 2 +- src/core/settings.h | 3 +- src/input_common/gcadapter/gc_adapter.h | 6 +- src/input_common/motion_input.cpp | 2 +- src/input_common/mouse/mouse_input.h | 2 +- src/input_common/udp/udp.cpp | 8 +- src/tests/common/fibers.cpp | 4 +- src/video_core/command_classes/vic.cpp | 2 +- src/video_core/renderer_vulkan/vk_device.cpp | 2 +- src/yuzu/applets/error.cpp | 6 +- src/yuzu/compatdb.cpp | 2 +- src/yuzu/configuration/config.cpp | 4 + src/yuzu/configuration/configure_cpu.cpp | 3 + src/yuzu/configuration/configure_cpu.ui | 12 ++ src/yuzu/main.cpp | 2 +- src/yuzu_cmd/yuzu.cpp | 2 +- src/yuzu_tester/yuzu.cpp | 2 +- 35 files changed, 399 insertions(+), 112 deletions(-) diff --git a/README.md b/README.md index 858e2efe5..8cb3551d5 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1277. +This is the source code for early-access 1279. ## Legal Notice diff --git a/externals/dynarmic/include/dynarmic/optimization_flags.h b/externals/dynarmic/include/dynarmic/optimization_flags.h index 29b35425e..3a4ca56fe 100755 --- a/externals/dynarmic/include/dynarmic/optimization_flags.h +++ b/externals/dynarmic/include/dynarmic/optimization_flags.h @@ -39,6 +39,9 @@ enum class OptimizationFlag : std::uint32_t { /// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions. /// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows. Unsafe_ReducedErrorFP = 0x00020000, + /// This is an UNSAFE optimization that causes floating-point instructions to not produce correct NaNs. + /// This may also result in inaccurate results when instructions are given certain special values. + Unsafe_InaccurateNaN = 0x00040000, }; constexpr OptimizationFlag no_optimizations = static_cast(0); diff --git a/externals/dynarmic/src/backend/x64/emit_x64_data_processing.cpp b/externals/dynarmic/src/backend/x64/emit_x64_data_processing.cpp index e7b9495e8..461aef385 100755 --- a/externals/dynarmic/src/backend/x64/emit_x64_data_processing.cpp +++ b/externals/dynarmic/src/backend/x64/emit_x64_data_processing.cpp @@ -1080,29 +1080,40 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& carry_in = args[2]; + const bool is_cmp = inst->UseCount() == size_t(!!carry_inst + !!overflow_inst + !!nzcv_inst) && carry_in.IsImmediate() && carry_in.GetImmediateU1(); const Xbyak::Reg64 nzcv = DoNZCV(code, ctx.reg_alloc, nzcv_inst); - const Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(bitsize); + const Xbyak::Reg result = (is_cmp ? ctx.reg_alloc.UseGpr(args[0]) : ctx.reg_alloc.UseScratchGpr(args[0])).changeBit(bitsize); const Xbyak::Reg8 carry = DoCarry(ctx.reg_alloc, carry_in, carry_inst); const Xbyak::Reg8 overflow = overflow_inst ? ctx.reg_alloc.ScratchGpr().cvt8() : Xbyak::Reg8{-1}; // TODO: Consider using LEA. - // TODO: Optimize CMP case. // Note that x64 CF is inverse of what the ARM carry flag is here. - if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) { + bool invert_output_carry = true; + + if (is_cmp) { + if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) { + const u32 op_arg = args[1].GetImmediateU32(); + code.cmp(result, op_arg); + } else { + OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + op_arg.setBit(bitsize); + code.cmp(result, *op_arg); + } + } else if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) { const u32 op_arg = args[1].GetImmediateU32(); if (carry_in.IsImmediate()) { if (carry_in.GetImmediateU1()) { code.sub(result, op_arg); } else { - code.stc(); - code.sbb(result, op_arg); + code.add(result, ~op_arg); + invert_output_carry = false; } } else { code.bt(carry.cvt32(), 0); - code.cmc(); - code.sbb(result, op_arg); + code.adc(result, ~op_arg); + invert_output_carry = false; } } else { OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); @@ -1122,14 +1133,20 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit } if (nzcv_inst) { - code.cmc(); + if (invert_output_carry) { + code.cmc(); + } code.lahf(); code.seto(code.al); ctx.reg_alloc.DefineValue(nzcv_inst, nzcv); ctx.EraseInstruction(nzcv_inst); } if (carry_inst) { - code.setnc(carry); + if (invert_output_carry) { + code.setnc(carry); + } else { + code.setc(carry); + } ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); } @@ -1138,8 +1155,9 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit ctx.reg_alloc.DefineValue(overflow_inst, overflow); ctx.EraseInstruction(overflow_inst); } - - ctx.reg_alloc.DefineValue(inst, result); + if (!is_cmp) { + ctx.reg_alloc.DefineValue(inst, result); + } } void EmitX64::EmitSub32(EmitContext& ctx, IR::Inst* inst) { diff --git a/externals/dynarmic/src/backend/x64/emit_x64_floating_point.cpp b/externals/dynarmic/src/backend/x64/emit_x64_floating_point.cpp index b63bcbba4..f64a8b7ec 100755 --- a/externals/dynarmic/src/backend/x64/emit_x64_floating_point.cpp +++ b/externals/dynarmic/src/backend/x64/emit_x64_floating_point.cpp @@ -257,7 +257,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - if (!ctx.FPCR().DN()) { + if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { end = ProcessNaN(code, result); } if constexpr (std::is_member_function_pointer_v) { @@ -265,7 +265,9 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { } else { fn(result); } - if (ctx.FPCR().DN()) { + if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + // Do nothing + } else if (ctx.FPCR().DN()) { ForceToDefaultNaN(code, result); } else { PostProcessNaN(code, result, ctx.reg_alloc.ScratchXmm()); @@ -281,6 +283,20 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); + + if constexpr (std::is_member_function_pointer_v) { + (code.*fn)(result, operand); + } else { + fn(result, operand); + } + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + if (ctx.FPCR().DN()) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); @@ -590,9 +606,20 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { using FPT = mp::unsigned_integer_of_size; if constexpr (fsize != 16) { - if (code.HasFMA()) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); + + FCODE(vfmadd231s)(result, operand2, operand3); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + + if (code.HasFMA()) { Xbyak::Label end, fallback; const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); @@ -641,8 +668,6 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); @@ -810,6 +835,22 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* using FPT = mp::unsigned_integer_of_size; if constexpr (fsize != 16) { + if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Label end, fallback; + + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + + code.movaps(result, code.MConst(xword, FP::FPValue())); + FCODE(vfnmadd231s)(result, operand1, operand2); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + if (code.HasFMA()) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -998,6 +1039,21 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* using FPT = mp::unsigned_integer_of_size; if constexpr (fsize != 16) { + if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + + code.vmovaps(result, code.MConst(xword, FP::FPValue())); + FCODE(vfnmadd231s)(result, operand1, operand2); + FCODE(vmuls)(result, result, code.MConst(xword, FP::FPValue())); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + if (code.HasFMA() && code.HasAVX()) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/externals/dynarmic/src/backend/x64/emit_x64_vector_floating_point.cpp b/externals/dynarmic/src/backend/x64/emit_x64_vector_floating_point.cpp index 20b1c808b..d09e74125 100755 --- a/externals/dynarmic/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/externals/dynarmic/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -290,7 +290,7 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1(); - if (ctx.FPCR(fpcr_controlled).DN()) { + if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { Xbyak::Xmm result; if constexpr (std::is_member_function_pointer_v) { @@ -306,7 +306,9 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins }); } - ForceToDefaultNaN(code, ctx.FPCR(fpcr_controlled), result); + if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + ForceToDefaultNaN(code, ctx.FPCR(fpcr_controlled), result); + } ctx.reg_alloc.DefineValue(inst, result); return; @@ -342,7 +344,7 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - if (ctx.FPCR(fpcr_controlled).DN()) { + if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); @@ -356,7 +358,9 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i }); } - ForceToDefaultNaN(code, ctx.FPCR(fpcr_controlled), xmm_a); + if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + ForceToDefaultNaN(code, ctx.FPCR(fpcr_controlled), xmm_a); + } ctx.reg_alloc.DefineValue(inst, xmm_a); return; @@ -985,11 +989,23 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { }; if constexpr (fsize != 16) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const bool fpcr_controlled = args[3].GetImmediateU1(); + + if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]); + + MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ + FCODE(vfmadd231p)(result, xmm_b, xmm_c); + }); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + if (code.HasFMA() && code.HasAVX()) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const bool fpcr_controlled = args[3].GetImmediateU1(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); @@ -1025,8 +1041,6 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); @@ -1233,10 +1247,24 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in }; if constexpr (fsize != 16) { - if (code.HasFMA() && code.HasAVX()) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const bool fpcr_controlled = args[2].GetImmediateU1(); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const bool fpcr_controlled = args[2].GetImmediateU1(); + if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); + + MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ + code.movaps(result, GetVectorOf(code)); + FCODE(vfnmadd231p)(result, operand1, operand2); + }); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + + if (code.HasFMA() && code.HasAVX()) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); @@ -1269,8 +1297,6 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in } if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); @@ -1428,10 +1454,25 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in }; if constexpr (fsize != 16) { - if (code.HasFMA() && code.HasAVX()) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const bool fpcr_controlled = args[2].GetImmediateU1(); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const bool fpcr_controlled = args[2].GetImmediateU1(); + if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); + + MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{ + code.vmovaps(result, GetVectorOf(code)); + FCODE(vfnmadd231p)(result, operand1, operand2); + FCODE(vmulp)(result, result, GetVectorOf(code)); + }); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + + if (code.HasFMA() && code.HasAVX()) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); @@ -1470,8 +1511,6 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in } if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); diff --git a/externals/dynarmic/src/frontend/A64/translate/impl/data_processing_conditional_compare.cpp b/externals/dynarmic/src/frontend/A64/translate/impl/data_processing_conditional_compare.cpp index 837e2b521..591c3cf6b 100755 --- a/externals/dynarmic/src/frontend/A64/translate/impl/data_processing_conditional_compare.cpp +++ b/externals/dynarmic/src/frontend/A64/translate/impl/data_processing_conditional_compare.cpp @@ -27,7 +27,7 @@ bool TranslatorVisitor::CCMP_reg(bool sf, Reg Rm, Cond cond, Reg Rn, Imm<4> nzcv const IR::U32U64 operand1 = X(datasize, Rn); const IR::U32U64 operand2 = X(datasize, Rm); - const IR::NZCV then_flags = ir.NZCVFrom(ir.AddWithCarry(operand1, ir.Not(operand2), ir.Imm1(1))); + const IR::NZCV then_flags = ir.NZCVFrom(ir.SubWithCarry(operand1, operand2, ir.Imm1(1))); const IR::NZCV else_flags = ir.NZCVFromPackedFlags(ir.Imm32(flags)); ir.SetNZCV(ir.ConditionalSelect(cond, then_flags, else_flags)); return true; @@ -53,7 +53,7 @@ bool TranslatorVisitor::CCMP_imm(bool sf, Imm<5> imm5, Cond cond, Reg Rn, Imm<4> const IR::U32U64 operand1 = X(datasize, Rn); const IR::U32U64 operand2 = I(datasize, imm5.ZeroExtend()); - const IR::NZCV then_flags = ir.NZCVFrom(ir.AddWithCarry(operand1, ir.Not(operand2), ir.Imm1(1))); + const IR::NZCV then_flags = ir.NZCVFrom(ir.SubWithCarry(operand1, operand2, ir.Imm1(1))); const IR::NZCV else_flags = ir.NZCVFromPackedFlags(ir.Imm32(flags)); ir.SetNZCV(ir.ConditionalSelect(cond, then_flags, else_flags)); return true; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 5d781cd77..75fe30686 100755 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -98,7 +98,6 @@ add_library(common STATIC algorithm.h alignment.h assert.h - atomic_ops.cpp atomic_ops.h detached_tasks.cpp detached_tasks.h diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h index b46888589..2b1f515e8 100755 --- a/src/common/atomic_ops.h +++ b/src/common/atomic_ops.h @@ -4,14 +4,75 @@ #pragma once +#include +#include + #include "common/common_types.h" +#if _MSC_VER +#include +#endif + namespace Common { -[[nodiscard]] bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected); -[[nodiscard]] bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected); -[[nodiscard]] bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected); -[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected); -[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected); +#if _MSC_VER + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) { + const u8 result = + _InterlockedCompareExchange8(reinterpret_cast(pointer), value, expected); + return result == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) { + const u16 result = + _InterlockedCompareExchange16(reinterpret_cast(pointer), value, expected); + return result == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) { + const u32 result = + _InterlockedCompareExchange(reinterpret_cast(pointer), value, expected); + return result == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) { + const u64 result = _InterlockedCompareExchange64(reinterpret_cast(pointer), + value, expected); + return result == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) { + return _InterlockedCompareExchange128(reinterpret_cast(pointer), value[1], + value[0], + reinterpret_cast<__int64*>(expected.data())) != 0; +} + +#else + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) { + unsigned __int128 value_a; + unsigned __int128 expected_a; + std::memcpy(&value_a, value.data(), sizeof(u128)); + std::memcpy(&expected_a, expected.data(), sizeof(u128)); + return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); +} + +#endif } // namespace Common diff --git a/src/common/page_table.h b/src/common/page_table.h index 0c14e6433..61c5552e0 100755 --- a/src/common/page_table.h +++ b/src/common/page_table.h @@ -90,7 +90,7 @@ struct PageTable { PageTable& operator=(PageTable&&) noexcept = default; /** - * Resizes the page table to be able to accomodate enough pages within + * Resizes the page table to be able to accommodate enough pages within * a given address space. * * @param address_space_width_in_bits The address size width in bits. diff --git a/src/common/swap.h b/src/common/swap.h index 7665942a2..a80e191dc 100755 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -394,7 +394,7 @@ public: template friend S operator%(const S& p, const swapped_t v); - // Arithmetics + assignements + // Arithmetics + assignments template friend S operator+=(const S& p, const swapped_t v); @@ -451,7 +451,7 @@ S operator%(const S& i, const swap_struct_t v) { return i % v.swap(); } -// Arithmetics + assignements +// Arithmetics + assignments template S& operator+=(S& i, const swap_struct_t v) { i += v.swap(); diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index eb8a7782f..a65f6b832 100755 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -2,19 +2,74 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include +#include #include #include #ifdef _MSC_VER #include + +#pragma intrinsic(__umulh) +#pragma intrinsic(_udiv128) #else #include #endif +#include "common/atomic_ops.h" #include "common/uint128.h" #include "common/x64/native_clock.h" +namespace { + +[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) { +#ifdef __SIZEOF_INT128__ + const auto base = static_cast(numerator) << 64ULL; + return static_cast(base / divisor); +#elif defined(_M_X64) || defined(_M_ARM64) + std::array r = {0, numerator}; + u64 remainder; +#if _MSC_VER < 1923 + return udiv128(r[1], r[0], divisor, &remainder); +#else + return _udiv128(r[1], r[0], divisor, &remainder); +#endif +#else + // This one is bit more inaccurate. + return MultiplyAndDivide64(std::numeric_limits::max(), numerator, divisor); +#endif +} + +[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) { +#ifdef __SIZEOF_INT128__ + return (static_cast(a) * static_cast(b)) >> 64; +#elif defined(_M_X64) || defined(_M_ARM64) + return __umulh(a, b); // MSVC +#else + // Generic fallback + const u64 a_lo = u32(a); + const u64 a_hi = a >> 32; + const u64 b_lo = u32(b); + const u64 b_hi = b >> 32; + + const u64 a_x_b_hi = a_hi * b_hi; + const u64 a_x_b_mid = a_hi * b_lo; + const u64 b_x_a_mid = b_hi * a_lo; + const u64 a_x_b_lo = a_lo * b_lo; + + const u64 carry_bit = (static_cast(static_cast(a_x_b_mid)) + + static_cast(static_cast(b_x_a_mid)) + (a_x_b_lo >> 32)) >> + 32; + + const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; + + return multhi; +#endif +} + +} // namespace + namespace Common { u64 EstimateRDTSCFrequency() { @@ -48,54 +103,71 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ rtsc_frequency_} { _mm_mfence(); - last_measure = __rdtsc(); - accumulated_ticks = 0U; + time_point.inner.last_measure = __rdtsc(); + time_point.inner.accumulated_ticks = 0U; + ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency); + us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency); + ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency); + clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); + cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); } u64 NativeClock::GetRTSC() { - std::scoped_lock scope{rtsc_serialize}; - _mm_mfence(); - const u64 current_measure = __rdtsc(); - u64 diff = current_measure - last_measure; - diff = diff & ~static_cast(static_cast(diff) >> 63); // max(diff, 0) - if (current_measure > last_measure) { - last_measure = current_measure; - } - accumulated_ticks += diff; + TimePoint new_time_point{}; + TimePoint current_time_point{}; + do { + current_time_point.pack = time_point.pack; + _mm_mfence(); + const u64 current_measure = __rdtsc(); + u64 diff = current_measure - current_time_point.inner.last_measure; + diff = diff & ~static_cast(static_cast(diff) >> 63); // max(diff, 0) + new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure + ? current_measure + : current_time_point.inner.last_measure; + new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; + } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, + current_time_point.pack)); /// The clock cannot be more precise than the guest timer, remove the lower bits - return accumulated_ticks & inaccuracy_mask; + return new_time_point.inner.accumulated_ticks & inaccuracy_mask; } void NativeClock::Pause(bool is_paused) { if (!is_paused) { - _mm_mfence(); - last_measure = __rdtsc(); + TimePoint current_time_point{}; + TimePoint new_time_point{}; + do { + current_time_point.pack = time_point.pack; + new_time_point.pack = current_time_point.pack; + _mm_mfence(); + new_time_point.inner.last_measure = __rdtsc(); + } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, + current_time_point.pack)); } } std::chrono::nanoseconds NativeClock::GetTimeNS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; + return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; } std::chrono::microseconds NativeClock::GetTimeUS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; + return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; } std::chrono::milliseconds NativeClock::GetTimeMS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; + return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; } u64 NativeClock::GetClockCycles() { const u64 rtsc_value = GetRTSC(); - return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); + return MultiplyHigh(rtsc_value, clock_rtsc_factor); } u64 NativeClock::GetCPUCycles() { const u64 rtsc_value = GetRTSC(); - return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); + return MultiplyHigh(rtsc_value, cpu_rtsc_factor); } } // namespace X64 diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 6d1e32ac8..7cbd400d2 100755 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -6,7 +6,6 @@ #include -#include "common/spin_lock.h" #include "common/wall_clock.h" namespace Common { @@ -32,14 +31,28 @@ public: private: u64 GetRTSC(); + union alignas(16) TimePoint { + TimePoint() : pack{} {} + u128 pack{}; + struct Inner { + u64 last_measure{}; + u64 accumulated_ticks{}; + } inner; + }; + /// value used to reduce the native clocks accuracy as some apss rely on /// undefined behavior where the level of accuracy in the clock shouldn't /// be higher. static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1); - SpinLock rtsc_serialize{}; - u64 last_measure{}; - u64 accumulated_ticks{}; + TimePoint time_point; + // factors + u64 clock_rtsc_factor{}; + u64 cpu_rtsc_factor{}; + u64 ns_rtsc_factor{}; + u64 us_rtsc_factor{}; + u64 ms_rtsc_factor{}; + u64 rtsc_frequency; }; } // namespace X64 diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 8aaf11eee..0831dd5d2 100755 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -181,6 +181,9 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable& if (Settings::values.cpuopt_unsafe_reduce_fp_error) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; } + if (Settings::values.cpuopt_unsafe_inaccurate_nan) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + } } return std::make_unique(config); diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index d2e1dc724..4c5ebca22 100755 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -212,6 +212,9 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable& if (Settings::values.cpuopt_unsafe_reduce_fp_error) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; } + if (Settings::values.cpuopt_unsafe_inaccurate_nan) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + } } return std::make_shared(config); diff --git a/src/core/hle/kernel/memory/memory_block.h b/src/core/hle/kernel/memory/memory_block.h index 37fe19916..83acece1e 100755 --- a/src/core/hle/kernel/memory/memory_block.h +++ b/src/core/hle/kernel/memory/memory_block.h @@ -73,12 +73,12 @@ enum class MemoryState : u32 { ThreadLocal = static_cast(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagReferenceCounted, - Transfered = static_cast(Svc::MemoryState::Transfered) | FlagsMisc | - FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc | - FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, + Transferred = static_cast(Svc::MemoryState::Transferred) | FlagsMisc | + FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc | + FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, - SharedTransfered = static_cast(Svc::MemoryState::SharedTransfered) | FlagsMisc | - FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, + SharedTransferred = static_cast(Svc::MemoryState::SharedTransferred) | FlagsMisc | + FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, SharedCode = static_cast(Svc::MemoryState::SharedCode) | FlagMapped | FlagReferenceCounted | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, @@ -111,8 +111,8 @@ static_assert(static_cast(MemoryState::AliasCodeData) == 0x03FFBD09); static_assert(static_cast(MemoryState::Ipc) == 0x005C3C0A); static_assert(static_cast(MemoryState::Stack) == 0x005C3C0B); static_assert(static_cast(MemoryState::ThreadLocal) == 0x0040200C); -static_assert(static_cast(MemoryState::Transfered) == 0x015C3C0D); -static_assert(static_cast(MemoryState::SharedTransfered) == 0x005C380E); +static_assert(static_cast(MemoryState::Transferred) == 0x015C3C0D); +static_assert(static_cast(MemoryState::SharedTransferred) == 0x005C380E); static_assert(static_cast(MemoryState::SharedCode) == 0x0040380F); static_assert(static_cast(MemoryState::Inaccessible) == 0x00000010); static_assert(static_cast(MemoryState::NonSecureIpc) == 0x005C3811); diff --git a/src/core/hle/kernel/memory/page_table.cpp b/src/core/hle/kernel/memory/page_table.cpp index f3e8bc333..080886554 100755 --- a/src/core/hle/kernel/memory/page_table.cpp +++ b/src/core/hle/kernel/memory/page_table.cpp @@ -1007,8 +1007,8 @@ constexpr VAddr PageTable::GetRegionAddress(MemoryState state) const { case MemoryState::Shared: case MemoryState::AliasCode: case MemoryState::AliasCodeData: - case MemoryState::Transfered: - case MemoryState::SharedTransfered: + case MemoryState::Transferred: + case MemoryState::SharedTransferred: case MemoryState::SharedCode: case MemoryState::GeneratedCode: case MemoryState::CodeOut: @@ -1042,8 +1042,8 @@ constexpr std::size_t PageTable::GetRegionSize(MemoryState state) const { case MemoryState::Shared: case MemoryState::AliasCode: case MemoryState::AliasCodeData: - case MemoryState::Transfered: - case MemoryState::SharedTransfered: + case MemoryState::Transferred: + case MemoryState::SharedTransferred: case MemoryState::SharedCode: case MemoryState::GeneratedCode: case MemoryState::CodeOut: @@ -1080,8 +1080,8 @@ constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState s case MemoryState::AliasCodeData: case MemoryState::Stack: case MemoryState::ThreadLocal: - case MemoryState::Transfered: - case MemoryState::SharedTransfered: + case MemoryState::Transferred: + case MemoryState::SharedTransferred: case MemoryState::SharedCode: case MemoryState::GeneratedCode: case MemoryState::CodeOut: diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h index 356947741..d623f7a50 100755 --- a/src/core/hle/kernel/svc_types.h +++ b/src/core/hle/kernel/svc_types.h @@ -23,8 +23,8 @@ enum class MemoryState : u32 { Ipc = 0x0A, Stack = 0x0B, ThreadLocal = 0x0C, - Transfered = 0x0D, - SharedTransfered = 0x0E, + Transferred = 0x0D, + SharedTransferred = 0x0E, SharedCode = 0x0F, Inaccessible = 0x10, NonSecureIpc = 0x11, diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index cb13210e5..c9808060a 100755 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -560,14 +560,14 @@ void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequest AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) { on_new_message = - Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageRecieved"); + Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageReceived"); on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OperationModeChanged"); } AppletMessageQueue::~AppletMessageQueue() = default; -const std::shared_ptr& AppletMessageQueue::GetMesssageRecieveEvent() const { +const std::shared_ptr& AppletMessageQueue::GetMessageReceiveEvent() const { return on_new_message.readable; } @@ -675,7 +675,7 @@ void ICommonStateGetter::GetEventHandle(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 2, 1}; rb.Push(RESULT_SUCCESS); - rb.PushCopyObjects(msg_queue->GetMesssageRecieveEvent()); + rb.PushCopyObjects(msg_queue->GetMessageReceiveEvent()); } void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) { diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index b1da0d081..f51aca1af 100755 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h @@ -55,7 +55,7 @@ public: explicit AppletMessageQueue(Kernel::KernelCore& kernel); ~AppletMessageQueue(); - const std::shared_ptr& GetMesssageRecieveEvent() const; + const std::shared_ptr& GetMessageReceiveEvent() const; const std::shared_ptr& GetOperationModeChangedEvent() const; void PushMessage(AppletMessage msg); AppletMessage PopMessage(); diff --git a/src/core/settings.h b/src/core/settings.h index 0cd3c0c84..a324530bd 100755 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -131,6 +131,7 @@ struct Values { bool cpuopt_unsafe_unfuse_fma; bool cpuopt_unsafe_reduce_fp_error; + bool cpuopt_unsafe_inaccurate_nan; // Renderer Setting renderer_backend; @@ -221,7 +222,7 @@ struct Values { bool disable_macro_jit; bool extended_logging; - // Misceallaneous + // Miscellaneous std::string log_filter; bool use_dev_keys; diff --git a/src/input_common/gcadapter/gc_adapter.h b/src/input_common/gcadapter/gc_adapter.h index f1256c9da..7a6c545bd 100755 --- a/src/input_common/gcadapter/gc_adapter.h +++ b/src/input_common/gcadapter/gc_adapter.h @@ -120,17 +120,17 @@ private: /// For use in initialization, querying devices to find the adapter void Setup(); - /// Resets status of all GC controller devices to a disconected state + /// Resets status of all GC controller devices to a disconnected state void ResetDevices(); - /// Resets status of device connected to a disconected state + /// Resets status of device connected to a disconnected state void ResetDevice(std::size_t port); /// Returns true if we successfully gain access to GC Adapter bool CheckDeviceAccess(); /// Captures GC Adapter endpoint address - /// Returns true if the endpoind was set correctly + /// Returns true if the endpoint was set correctly bool GetGCEndpoint(libusb_device* device); /// For shutting down, clear all data, join all threads, release usb diff --git a/src/input_common/motion_input.cpp b/src/input_common/motion_input.cpp index f77ba535d..6a65f175e 100755 --- a/src/input_common/motion_input.cpp +++ b/src/input_common/motion_input.cpp @@ -129,7 +129,7 @@ void MotionInput::UpdateOrientation(u64 elapsed_time) { rad_gyro += ki * integral_error; rad_gyro += kd * derivative_error; } else { - // Give more weight to acelerometer values to compensate for the lack of gyro + // Give more weight to accelerometer values to compensate for the lack of gyro rad_gyro += 35.0f * kp * real_error; rad_gyro += 10.0f * ki * integral_error; rad_gyro += 10.0f * kd * derivative_error; diff --git a/src/input_common/mouse/mouse_input.h b/src/input_common/mouse/mouse_input.h index 65e64bee7..58803c1bf 100755 --- a/src/input_common/mouse/mouse_input.h +++ b/src/input_common/mouse/mouse_input.h @@ -20,7 +20,7 @@ enum class MouseButton { Left, Wheel, Right, - Foward, + Forward, Backward, Undefined, }; diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp index 8686a059c..c5da27a38 100755 --- a/src/input_common/udp/udp.cpp +++ b/src/input_common/udp/udp.cpp @@ -28,14 +28,14 @@ private: mutable std::mutex mutex; }; -/// A motion device factory that creates motion devices from JC Adapter +/// A motion device factory that creates motion devices from a UDP client UDPMotionFactory::UDPMotionFactory(std::shared_ptr client_) : client(std::move(client_)) {} /** * Creates motion device * @param params contains parameters for creating the device: - * - "port": the nth jcpad on the adapter + * - "port": the UDP port number */ std::unique_ptr UDPMotionFactory::Create(const Common::ParamPackage& params) { auto ip = params.Get("ip", "127.0.0.1"); @@ -90,14 +90,14 @@ private: mutable std::mutex mutex; }; -/// A motion device factory that creates motion devices from JC Adapter +/// A motion device factory that creates motion devices from a UDP client UDPTouchFactory::UDPTouchFactory(std::shared_ptr client_) : client(std::move(client_)) {} /** * Creates motion device * @param params contains parameters for creating the device: - * - "port": the nth jcpad on the adapter + * - "port": the UDP port number */ std::unique_ptr UDPTouchFactory::Create(const Common::ParamPackage& params) { auto ip = params.Get("ip", "127.0.0.1"); diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp index 4757dd2b4..d94492fc6 100755 --- a/src/tests/common/fibers.cpp +++ b/src/tests/common/fibers.cpp @@ -207,7 +207,7 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) { } /** This test checks for fiber thread exchange configuration and validates that fibers are - * that a fiber has been succesfully transfered from one thread to another and that the TLS + * that a fiber has been successfully transferred from one thread to another and that the TLS * region of the thread is kept while changing fibers. */ TEST_CASE("Fibers::InterExchange", "[common]") { @@ -299,7 +299,7 @@ static void ThreadStart3(u32 id, TestControl3& test_control) { } /** This test checks for one two threads racing for starting the same fiber. - * It checks execution occured in an ordered manner and by no time there were + * It checks execution occurred in an ordered manner and by no time there were * two contexts at the same time. */ TEST_CASE("Fibers::StartRace", "[common]") { diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index aa8c9f9de..55e632346 100755 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -53,7 +53,7 @@ void Vic::ProcessMethod(Method method, const std::vector& arguments) { void Vic::Execute() { if (output_surface_luma_address == 0) { - LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}", + LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}", vic_state.output_surface.luma_offset); return; } diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 370a63f74..85b4f0dff 100755 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -491,7 +491,7 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla } void VKDevice::ReportLoss() const { - LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); + LOG_CRITICAL(Render_Vulkan, "Device loss occurred!"); // Wait for the log to flush and for Nsight Aftermath to dump the results std::this_thread::sleep_for(std::chrono::seconds{15}); diff --git a/src/yuzu/applets/error.cpp b/src/yuzu/applets/error.cpp index 53a993cf6..8ee03ddb3 100755 --- a/src/yuzu/applets/error.cpp +++ b/src/yuzu/applets/error.cpp @@ -19,7 +19,7 @@ QtErrorDisplay::~QtErrorDisplay() = default; void QtErrorDisplay::ShowError(ResultCode error, std::function finished) const { callback = std::move(finished); emit MainWindowDisplayError( - tr("An error has occured.\nPlease try again or contact the developer of the " + tr("An error has occurred.\nPlease try again or contact the developer of the " "software.\n\nError Code: %1-%2 (0x%3)") .arg(static_cast(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) .arg(error.description, 4, 10, QChar::fromLatin1('0')) @@ -32,7 +32,7 @@ void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::secon const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count()); emit MainWindowDisplayError( - tr("An error occured on %1 at %2.\nPlease try again or contact the " + tr("An error occurred on %1 at %2.\nPlease try again or contact the " "developer of the software.\n\nError Code: %3-%4 (0x%5)") .arg(date_time.toString(QStringLiteral("dddd, MMMM d, yyyy"))) .arg(date_time.toString(QStringLiteral("h:mm:ss A"))) @@ -46,7 +46,7 @@ void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_te std::function finished) const { callback = std::move(finished); emit MainWindowDisplayError( - tr("An error has occured.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5") + tr("An error has occurred.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5") .arg(static_cast(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) .arg(error.description, 4, 10, QChar::fromLatin1('0')) .arg(error.raw, 8, 16, QChar::fromLatin1('0')) diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp index 649912557..a470056ef 100755 --- a/src/yuzu/compatdb.cpp +++ b/src/yuzu/compatdb.cpp @@ -72,7 +72,7 @@ void CompatDB::Submit() { void CompatDB::OnTestcaseSubmitted() { if (!testcase_watcher.result()) { QMessageBox::critical(this, tr("Communication error"), - tr("An error occured while sending the Testcase")); + tr("An error occurred while sending the Testcase")); button(NextButton)->setEnabled(true); button(NextButton)->setText(tr("Next")); button(CancelButton)->setVisible(true); diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 34c2a5f8b..43cd11ba0 100755 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -764,6 +764,8 @@ void Config::ReadCpuValues() { ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool(); Settings::values.cpuopt_unsafe_reduce_fp_error = ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool(); + Settings::values.cpuopt_unsafe_inaccurate_nan = + ReadSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true).toBool(); } qt_config->endGroup(); @@ -1327,6 +1329,8 @@ void Config::SaveCpuValues() { Settings::values.cpuopt_unsafe_unfuse_fma, true); WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), Settings::values.cpuopt_unsafe_reduce_fp_error, true); + WriteSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), + Settings::values.cpuopt_unsafe_inaccurate_nan, true); } qt_config->endGroup(); diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index 37fcd6adc..d055cbd60 100755 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp @@ -36,6 +36,8 @@ void ConfigureCpu::SetConfiguration() { ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma); ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error); + ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); + ui->cpuopt_unsafe_inaccurate_nan->setChecked(Settings::values.cpuopt_unsafe_inaccurate_nan); } void ConfigureCpu::AccuracyUpdated(int index) { @@ -61,6 +63,7 @@ void ConfigureCpu::ApplyConfiguration() { static_cast(ui->accuracy->currentIndex()); Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked(); Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked(); + Settings::values.cpuopt_unsafe_inaccurate_nan = ui->cpuopt_unsafe_inaccurate_nan->isChecked(); } void ConfigureCpu::changeEvent(QEvent* event) { diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index ebdd2e6e9..bcd0962e9 100755 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui @@ -109,6 +109,18 @@ + + + + Inaccurate NaN handling + + + + <div>This option improves speed by removing NaN checking. Please note this also reduces accuracy of certain floating-point instructions.</div> + + + + diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 8076a57c3..51fe44e1f 100755 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -142,7 +142,7 @@ constexpr int default_mouse_timeout = 2500; /** * "Callouts" are one-time instructional messages shown to the user. In the config settings, there * is a bitfield "callout_flags" options, used to track if a message has already been shown to the - * user. This is 32-bits - if we have more than 32 callouts, we should retire and recyle old ones. + * user. This is 32-bits - if we have more than 32 callouts, we should retire and recycle old ones. */ enum class CalloutFlag : uint32_t { Telemetry = 0x1, diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 2497c71ae..39e0d35aa 100755 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -202,7 +202,7 @@ int main(int argc, char** argv) { const u16 loader_id = static_cast(Core::System::ResultStatus::ErrorLoader); const u16 error_id = static_cast(load_result) - loader_id; LOG_CRITICAL(Frontend, - "While attempting to load the ROM requested, an error occured. Please " + "While attempting to load the ROM requested, an error occurred. Please " "refer to the yuzu wiki for more information or the yuzu discord for " "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}", loader_id, error_id, static_cast(error_id)); diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp index 6435ffabb..09cf2ad77 100755 --- a/src/yuzu_tester/yuzu.cpp +++ b/src/yuzu_tester/yuzu.cpp @@ -242,7 +242,7 @@ int main(int argc, char** argv) { const u16 loader_id = static_cast(Core::System::ResultStatus::ErrorLoader); const u16 error_id = static_cast(load_result) - loader_id; LOG_CRITICAL(Frontend, - "While attempting to load the ROM requested, an error occured. Please " + "While attempting to load the ROM requested, an error occurred. Please " "refer to the yuzu wiki for more information or the yuzu discord for " "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}", loader_id, error_id, static_cast(error_id));