diff --git a/README.md b/README.md index 148f8ce79..77e1d2ba5 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 2643. +This is the source code for early-access 2644. ## Legal Notice diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index d85da5134..c9710540e 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -149,7 +149,9 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { reg_alloc.AssertNoMoreUses(); - EmitAddCycles(block.CycleCount()); + if (conf.enable_cycle_counting) { + EmitAddCycles(block.CycleCount()); + } EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); code.int3(); @@ -184,7 +186,9 @@ void A32EmitX64::EmitCondPrelude(const A32EmitContext& ctx) { ASSERT(ctx.block.HasConditionFailedLocation()); Xbyak::Label pass = EmitCond(ctx.block.GetCondition()); - EmitAddCycles(ctx.block.ConditionFailedCycleCount()); + if (conf.enable_cycle_counting) { + EmitAddCycles(ctx.block.ConditionFailedCycleCount()); + } EmitTerminal(IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); code.L(pass); } @@ -715,31 +719,37 @@ void A32EmitX64::EmitA32UpdateUpperLocationDescriptor(A32EmitContext& ctx, IR::I } void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) { - ctx.reg_alloc.HostCall(nullptr); - code.SwitchMxcsrOnExit(); - code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); - code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); - Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code); - ctx.reg_alloc.EndOfAllocScope(); + + if (conf.enable_cycle_counting) { + ctx.reg_alloc.HostCall(nullptr); + code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); + code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); + Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code); + ctx.reg_alloc.EndOfAllocScope(); + } auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); Devirtualize<&A32::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code); - Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code); - code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN); - code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN); - code.SwitchMxcsrOnEntry(); + if (conf.enable_cycle_counting) { + Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code); + code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN); + code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN); + code.SwitchMxcsrOnEntry(); + } } void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) { - ctx.reg_alloc.HostCall(nullptr); - code.SwitchMxcsrOnExit(); - code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); - code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); - Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code); + + ctx.reg_alloc.HostCall(nullptr); + if (conf.enable_cycle_counting) { + code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); + code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); + Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code); + } ctx.reg_alloc.EndOfAllocScope(); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -751,10 +761,12 @@ void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) { code.mov(param[1], exception); }); - Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code); - code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN); - code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN); - code.SwitchMxcsrOnEntry(); + if (conf.enable_cycle_counting) { + Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code); + code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN); + code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN); + code.SwitchMxcsrOnEntry(); + } } static u32 GetFpscrImpl(A32JitState* jit_state) { @@ -1134,14 +1146,26 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc return; } - code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); + if (conf.enable_cycle_counting) { + code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); - patch_information[terminal.next].jg.emplace_back(code.getCurr()); - if (const auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJg(terminal.next, next_bb->entrypoint); + patch_information[terminal.next].jg.emplace_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJg(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJg(terminal.next); + } } else { - EmitPatchJg(terminal.next); + code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0); + + patch_information[terminal.next].jz.emplace_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJz(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJz(terminal.next); + } } + Xbyak::Label dest; code.jmp(dest, Xbyak::CodeGenerator::T_NEAR); @@ -1206,7 +1230,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr } void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - code.cmp(code.byte[r15 + offsetof(A32JitState, halt_requested)], u8(0)); + code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0); code.jne(code.GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location, is_single_step); } @@ -1222,6 +1246,17 @@ void A32EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr code.EnsurePatchLocationSize(patch_location, 14); } +void A32EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { + const CodePtr patch_location = code.getCurr(); + if (target_code_ptr) { + code.jz(target_code_ptr); + } else { + code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{target_desc}.PC()); + code.jz(code.GetReturnFromRunCodeAddress()); + } + code.EnsurePatchLocationSize(patch_location, 14); +} + void A32EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { const CodePtr patch_location = code.getCurr(); if (target_code_ptr) { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h index 7bdc7e023..e7d4c3523 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h @@ -138,6 +138,7 @@ protected: // Patching void Unpatch(const IR::LocationDescriptor& target_desc) override; void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; + void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override; }; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index 3d7997b49..2e7cb4e0d 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -16,6 +16,7 @@ #include "dynarmic/backend/x64/devirtualize.h" #include "dynarmic/backend/x64/jitstate_info.h" #include "dynarmic/common/assert.h" +#include "dynarmic/common/atomic.h" #include "dynarmic/common/cast_util.h" #include "dynarmic/common/common_types.h" #include "dynarmic/common/scope_exit.h" @@ -31,11 +32,12 @@ namespace Dynarmic::A32 { using namespace Backend::X64; -static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) { +static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg, const A32::UserConfig& conf) { return RunCodeCallbacks{ std::make_unique(LookupBlock, reinterpret_cast(arg)), std::make_unique(Devirtualize<&A32::UserCallbacks::AddTicks>(cb)), std::make_unique(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(cb)), + conf.enable_cycle_counting, }; } @@ -58,7 +60,7 @@ static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) struct Jit::Impl { Impl(Jit* jit, A32::UserConfig conf) - : block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) + : block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) , emitter(block_of_code, conf, jit) , polyfill_options(GenPolyfillOptions(block_of_code)) , conf(std::move(conf)) @@ -76,7 +78,7 @@ struct Jit::Impl { boost::icl::interval_set invalid_cache_ranges; bool invalidate_entire_cache = false; - void Execute() { + HaltReason Execute() { const CodePtr current_codeptr = [this] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask; @@ -88,11 +90,15 @@ struct Jit::Impl { return GetCurrentBlock(); }(); - block_of_code.RunCode(&jit_state, current_codeptr); + return block_of_code.RunCode(&jit_state, current_codeptr); } - void Step() { - block_of_code.StepCode(&jit_state, GetCurrentSingleStep()); + HaltReason Step() { + return block_of_code.StepCode(&jit_state, GetCurrentSingleStep()); + } + + void HaltExecution(HaltReason hr) { + Atomic::Or(&jit_state.halt_reason, static_cast(hr)); } void ClearExclusiveState() { @@ -123,7 +129,7 @@ struct Jit::Impl { void RequestCacheInvalidation() { if (jit_interface->is_executing) { - jit_state.halt_requested = true; + HaltExecution(HaltReason::CacheInvalidation); return; } @@ -182,28 +188,28 @@ Jit::Jit(UserConfig conf) Jit::~Jit() = default; -void Jit::Run() { +HaltReason Jit::Run() { ASSERT(!is_executing); is_executing = true; SCOPE_EXIT { this->is_executing = false; }; - impl->jit_state.halt_requested = false; - - impl->Execute(); + const HaltReason hr = impl->Execute(); impl->PerformCacheInvalidation(); + + return hr; } -void Jit::Step() { +HaltReason Jit::Step() { ASSERT(!is_executing); is_executing = true; SCOPE_EXIT { this->is_executing = false; }; - impl->jit_state.halt_requested = true; - - impl->Step(); + const HaltReason hr = impl->Step(); impl->PerformCacheInvalidation(); + + return hr; } void Jit::ClearCache() { @@ -221,8 +227,8 @@ void Jit::Reset() { impl->jit_state = {}; } -void Jit::HaltExecution() { - impl->jit_state.halt_requested = true; +void Jit::HaltExecution(HaltReason hr) { + impl->HaltExecution(hr); } void Jit::ClearExclusiveState() { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_jitstate.h b/externals/dynarmic/src/dynarmic/backend/x64/a32_jitstate.h index fa39e7e97..14cd5764e 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_jitstate.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_jitstate.h @@ -40,7 +40,7 @@ struct A32JitState { // For internal use (See: BlockOfCode::RunCode) u32 guest_MXCSR = 0x00001f80; u32 asimd_MXCSR = 0x00009fc0; - bool halt_requested = false; + volatile u32 halt_reason = 0; // Exclusive state u32 exclusive_state = 0; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 8a82fc4eb..4b598c6f7 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -121,7 +121,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { reg_alloc.AssertNoMoreUses(); - EmitAddCycles(block.CycleCount()); + if (conf.enable_cycle_counting) { + EmitAddCycles(block.CycleCount()); + } EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); code.int3(); @@ -619,14 +621,26 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc return; } - code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); + if (conf.enable_cycle_counting) { + code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); - patch_information[terminal.next].jg.emplace_back(code.getCurr()); - if (auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJg(terminal.next, next_bb->entrypoint); + patch_information[terminal.next].jg.emplace_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJg(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJg(terminal.next); + } } else { - EmitPatchJg(terminal.next); + code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0); + + patch_information[terminal.next].jz.emplace_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJz(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJz(terminal.next); + } } + code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); code.ForceReturnFromRunCode(); @@ -691,7 +705,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr } void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - code.cmp(code.byte[r15 + offsetof(A64JitState, halt_requested)], u8(0)); + code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0); code.jne(code.GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location, is_single_step); } @@ -708,6 +722,18 @@ void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr code.EnsurePatchLocationSize(patch_location, 23); } +void A64EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { + const CodePtr patch_location = code.getCurr(); + if (target_code_ptr) { + code.jz(target_code_ptr); + } else { + code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.jz(code.GetReturnFromRunCodeAddress()); + } + code.EnsurePatchLocationSize(patch_location, 23); +} + void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { const CodePtr patch_location = code.getCurr(); if (target_code_ptr) { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h index f5d1a9174..644628dca 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h @@ -135,6 +135,7 @@ protected: // Patching void Unpatch(const IR::LocationDescriptor& target_desc) override; void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; + void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override; }; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index abcc2727e..8471f43b7 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -15,6 +15,7 @@ #include "dynarmic/backend/x64/devirtualize.h" #include "dynarmic/backend/x64/jitstate_info.h" #include "dynarmic/common/assert.h" +#include "dynarmic/common/atomic.h" #include "dynarmic/common/scope_exit.h" #include "dynarmic/common/x64_disassemble.h" #include "dynarmic/frontend/A64/translate/a64_translate.h" @@ -26,11 +27,12 @@ namespace Dynarmic::A64 { using namespace Backend::X64; -static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) { +static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg, const A64::UserConfig& conf) { return RunCodeCallbacks{ std::make_unique(LookupBlock, reinterpret_cast(arg)), std::make_unique(Devirtualize<&A64::UserCallbacks::AddTicks>(cb)), std::make_unique(Devirtualize<&A64::UserCallbacks::GetTicksRemaining>(cb)), + conf.enable_cycle_counting, }; } @@ -55,7 +57,7 @@ struct Jit::Impl final { public: Impl(Jit* jit, UserConfig conf) : conf(conf) - , block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) + , block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) , emitter(block_of_code, conf, jit) , polyfill_options(GenPolyfillOptions(block_of_code)) { ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64); @@ -63,13 +65,12 @@ public: ~Impl() = default; - void Run() { + HaltReason Run() { ASSERT(!is_executing); PerformRequestedCacheInvalidation(); is_executing = true; SCOPE_EXIT { this->is_executing = false; }; - jit_state.halt_requested = false; // TODO: Check code alignment @@ -83,29 +84,33 @@ public: return GetCurrentBlock(); }(); - block_of_code.RunCode(&jit_state, current_code_ptr); + + const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr); PerformRequestedCacheInvalidation(); + + return hr; } - void Step() { + HaltReason Step() { ASSERT(!is_executing); PerformRequestedCacheInvalidation(); is_executing = true; SCOPE_EXIT { this->is_executing = false; }; - jit_state.halt_requested = true; - block_of_code.StepCode(&jit_state, GetCurrentSingleStep()); + const HaltReason hr = block_of_code.StepCode(&jit_state, GetCurrentSingleStep()); PerformRequestedCacheInvalidation(); + + return hr; } void ClearCache() { std::unique_lock lock{invalidation_mutex}; invalidate_entire_cache = true; if (is_executing) { - jit_state.halt_requested = true; + HaltExecution(HaltReason::CacheInvalidation); } } @@ -115,7 +120,7 @@ public: const auto range = boost::icl::discrete_interval::closed(start_address, end_address); invalid_cache_ranges.add(range); if (is_executing) { - jit_state.halt_requested = true; + HaltExecution(HaltReason::CacheInvalidation); } } @@ -124,8 +129,8 @@ public: jit_state = {}; } - void HaltExecution() { - jit_state.halt_requested = true; + void HaltExecution(HaltReason hr) { + Atomic::Or(&jit_state.halt_reason, static_cast(hr)); } u64 GetSP() const { @@ -279,7 +284,7 @@ private: void RequestCacheInvalidation() { if (is_executing) { - jit_state.halt_requested = true; + HaltExecution(HaltReason::CacheInvalidation); return; } @@ -321,12 +326,12 @@ Jit::Jit(UserConfig conf) Jit::~Jit() = default; -void Jit::Run() { - impl->Run(); +HaltReason Jit::Run() { + return impl->Run(); } -void Jit::Step() { - impl->Step(); +HaltReason Jit::Step() { + return impl->Step(); } void Jit::ClearCache() { @@ -341,8 +346,8 @@ void Jit::Reset() { impl->Reset(); } -void Jit::HaltExecution() { - impl->HaltExecution(); +void Jit::HaltExecution(HaltReason hr) { + impl->HaltExecution(hr); } u64 Jit::GetSP() const { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_jitstate.h b/externals/dynarmic/src/dynarmic/backend/x64/a64_jitstate.h index a0e20cff2..12479461a 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_jitstate.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_jitstate.h @@ -43,7 +43,7 @@ struct A64JitState { // For internal use (See: BlockOfCode::RunCode) u32 guest_MXCSR = 0x00001f80; u32 asimd_MXCSR = 0x00009fc0; - bool halt_requested = false; + volatile u32 halt_reason = 0; // Exclusive state static constexpr u64 RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index 4fe5767d5..605a90600 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -201,12 +201,12 @@ size_t BlockOfCode::SpaceRemaining() const { return std::min(reinterpret_cast(far_code_begin) - current_near_ptr, &top_[maxSize_] - current_far_ptr); } -void BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const { - run_code(jit_state, code_ptr); +HaltReason BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const { + return run_code(jit_state, code_ptr); } -void BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const { - step_code(jit_state, code_ptr); +HaltReason BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const { + return step_code(jit_state, code_ptr); } void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) { @@ -224,6 +224,8 @@ void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) { } void BlockOfCode::GenRunCode(std::function rcp) { + Xbyak::Label return_to_caller, return_to_caller_mxcsr_already_exited; + align(); run_code = getCurr(); @@ -236,12 +238,17 @@ void BlockOfCode::GenRunCode(std::function rcp) { mov(r15, ABI_PARAM1); mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register - cb.GetTicksRemaining->EmitCall(*this); - mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN); - mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN); + if (cb.enable_cycle_counting) { + cb.GetTicksRemaining->EmitCall(*this); + mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN); + mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN); + } rcp(*this); + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + jne(return_to_caller_mxcsr_already_exited, T_NEAR); + SwitchMxcsrOnEntry(); jmp(rbx); @@ -252,31 +259,44 @@ void BlockOfCode::GenRunCode(std::function rcp) { mov(r15, ABI_PARAM1); - mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1); - mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 1); + if (cb.enable_cycle_counting) { + mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1); + mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 1); + } rcp(*this); + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + jne(return_to_caller_mxcsr_already_exited, T_NEAR); + lock(); + or_(dword[r15 + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); + SwitchMxcsrOnEntry(); jmp(ABI_PARAM2); // Dispatcher loop - Xbyak::Label return_to_caller, return_to_caller_mxcsr_already_exited; - align(); return_from_run_code[0] = getCurr(); - cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); - jng(return_to_caller); + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + jne(return_to_caller); + if (cb.enable_cycle_counting) { + cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); + jng(return_to_caller); + } cb.LookupBlock->EmitCall(*this); jmp(ABI_RETURN); align(); return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr(); - cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); - jng(return_to_caller_mxcsr_already_exited); + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + jne(return_to_caller_mxcsr_already_exited); + if (cb.enable_cycle_counting) { + cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); + jng(return_to_caller_mxcsr_already_exited); + } SwitchMxcsrOnEntry(); cb.LookupBlock->EmitCall(*this); jmp(ABI_RETURN); @@ -291,10 +311,16 @@ void BlockOfCode::GenRunCode(std::function rcp) { return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr(); L(return_to_caller_mxcsr_already_exited); - cb.AddTicks->EmitCall(*this, [this](RegList param) { - mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); - sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); - }); + if (cb.enable_cycle_counting) { + cb.AddTicks->EmitCall(*this, [this](RegList param) { + mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); + sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); + }); + } + + xor_(eax, eax); + lock(); + xchg(dword[r15 + jsi.offsetof_halt_reason], eax); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); ret(); @@ -323,6 +349,10 @@ void BlockOfCode::LeaveStandardASIMD() { } void BlockOfCode::UpdateTicks() { + if (!cb.enable_cycle_counting) { + return; + } + cb.AddTicks->EmitCall(*this, [this](RegList param) { mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.h b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.h index 17aa9cd75..eac124b28 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.h @@ -20,6 +20,7 @@ #include "dynarmic/backend/x64/jitstate_info.h" #include "dynarmic/common/cast_util.h" #include "dynarmic/common/common_types.h" +#include "dynarmic/interface/halt_reason.h" namespace Dynarmic::Backend::X64 { @@ -29,6 +30,7 @@ struct RunCodeCallbacks { std::unique_ptr LookupBlock; std::unique_ptr AddTicks; std::unique_ptr GetTicksRemaining; + bool enable_cycle_counting; }; class BlockOfCode final : public Xbyak::CodeGenerator { @@ -50,9 +52,9 @@ public: size_t SpaceRemaining() const; /// Runs emulated code from code_ptr. - void RunCode(void* jit_state, CodePtr code_ptr) const; + HaltReason RunCode(void* jit_state, CodePtr code_ptr) const; /// Runs emulated code from code_ptr for a single cycle. - void StepCode(void* jit_state, CodePtr code_ptr) const; + HaltReason StepCode(void* jit_state, CodePtr code_ptr) const; /// Code emitter: Returns to dispatcher void ReturnFromRunCode(bool mxcsr_already_exited = false); /// Code emitter: Returns to dispatcher, forces return to host @@ -183,7 +185,7 @@ private: CodePtr near_code_ptr; CodePtr far_code_ptr; - using RunCodeFuncType = void (*)(void*, CodePtr); + using RunCodeFuncType = HaltReason (*)(void*, CodePtr); RunCodeFuncType run_code = nullptr; RunCodeFuncType step_code = nullptr; static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 68ecbe518..3a71842f3 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -306,6 +306,11 @@ void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_co EmitPatchJg(target_desc, target_code_ptr); } + for (CodePtr location : patch_info.jz) { + code.SetCodePtr(location); + EmitPatchJz(target_desc, target_code_ptr); + } + for (CodePtr location : patch_info.jmp) { code.SetCodePtr(location); EmitPatchJmp(target_desc, target_code_ptr); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h index 494caada6..5290da151 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h @@ -111,12 +111,14 @@ protected: // Patching struct PatchInformation { std::vector jg; + std::vector jz; std::vector jmp; std::vector mov_rcx; }; void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr); virtual void Unpatch(const IR::LocationDescriptor& target_desc); virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; + virtual void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index f1666700e..9e71df2ab 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -319,9 +319,9 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in } auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const bool ordered = true; + constexpr bool ordered = true; - if (ordered && bitsize == 128) { + if constexpr (ordered && bitsize == 128) { // Required for atomic 128-bit loads/stores ctx.reg_alloc.ScratchGpr(HostLoc::RAX); ctx.reg_alloc.ScratchGpr(HostLoc::RBX); @@ -394,7 +394,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i } auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const bool ordered = true; + constexpr bool ordered = true; const auto value = [&] { if constexpr (bitsize == 128) { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/jitstate_info.h b/externals/dynarmic/src/dynarmic/backend/x64/jitstate_info.h index e87188867..654b3f040 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/jitstate_info.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/jitstate_info.h @@ -20,7 +20,8 @@ struct JitStateInfo { , offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs)) , offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv)) , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc)) - , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) {} + , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) + , offsetof_halt_reason(offsetof(JitStateType, halt_reason)) {} const size_t offsetof_guest_MXCSR; const size_t offsetof_asimd_MXCSR; @@ -31,6 +32,7 @@ struct JitStateInfo { const size_t offsetof_cpsr_nzcv; const size_t offsetof_fpsr_exc; const size_t offsetof_fpsr_qc; + const size_t offsetof_halt_reason; }; } // namespace Dynarmic::Backend::X64 diff --git a/externals/dynarmic/src/dynarmic/common/atomic.h b/externals/dynarmic/src/dynarmic/common/atomic.h new file mode 100755 index 000000000..d9f00db40 --- /dev/null +++ b/externals/dynarmic/src/dynarmic/common/atomic.h @@ -0,0 +1,20 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include "dynarmic/common/common_types.h" + +namespace Dynarmic::Atomic { + +inline void Or(volatile u32* ptr, u32 value) { +#ifdef _MSC_VER + _InterlockedOr(reinterpret_cast(ptr), value); +#else + __atomic_or_fetch(ptr, value, __ATOMIC_SEQ_CST); +#endif +} + +} // namespace Dynarmic::Atomic diff --git a/externals/dynarmic/src/dynarmic/interface/A32/a32.h b/externals/dynarmic/src/dynarmic/interface/A32/a32.h index c36f9fcbf..48a5c342a 100755 --- a/externals/dynarmic/src/dynarmic/interface/A32/a32.h +++ b/externals/dynarmic/src/dynarmic/interface/A32/a32.h @@ -12,6 +12,7 @@ #include #include "dynarmic/interface/A32/config.h" +#include "dynarmic/interface/halt_reason.h" namespace Dynarmic { namespace A32 { @@ -27,13 +28,13 @@ public: * Runs the emulated CPU. * Cannot be recursively called. */ - void Run(); + HaltReason Run(); /** * Steps the emulated CPU. * Cannot be recursively called. */ - void Step(); + HaltReason Step(); /** * Clears the code cache of all compiled code. @@ -58,7 +59,7 @@ public: * Stops execution in Jit::Run. * Can only be called from a callback. */ - void HaltExecution(); + void HaltExecution(HaltReason hr = HaltReason::UserDefined1); /// View and modify registers. std::array& Regs(); diff --git a/externals/dynarmic/src/dynarmic/interface/A32/config.h b/externals/dynarmic/src/dynarmic/interface/A32/config.h index 75c1ec59c..5a6317c6b 100755 --- a/externals/dynarmic/src/dynarmic/interface/A32/config.h +++ b/externals/dynarmic/src/dynarmic/interface/A32/config.h @@ -208,6 +208,10 @@ struct UserConfig { /// to avoid writting certain unnecessary code only needed for cycle timers. bool wall_clock_cntpct = false; + /// This option allows you to disable cycle counting. If this is set to false, + /// AddTicks and GetTicksRemaining are never called, and no cycle counting is done. + bool enable_cycle_counting = true; + /// This option relates to the CPSR.E flag. Enabling this option disables modification /// of CPSR.E by the emulated program, forcing it to 0. /// NOTE: Calling Jit::SetCpsr with CPSR.E=1 while this option is enabled may result diff --git a/externals/dynarmic/src/dynarmic/interface/A64/a64.h b/externals/dynarmic/src/dynarmic/interface/A64/a64.h index d908fc21f..b5e008cef 100755 --- a/externals/dynarmic/src/dynarmic/interface/A64/a64.h +++ b/externals/dynarmic/src/dynarmic/interface/A64/a64.h @@ -13,6 +13,7 @@ #include #include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/halt_reason.h" namespace Dynarmic { namespace A64 { @@ -28,13 +29,13 @@ public: * Runs the emulated CPU. * Cannot be recursively called. */ - void Run(); + HaltReason Run(); /** * Step the emulated CPU for one instruction. * Cannot be recursively called. */ - void Step(); + HaltReason Step(); /** * Clears the code cache of all compiled code. @@ -59,7 +60,7 @@ public: * Stops execution in Jit::Run. * Can only be called from a callback. */ - void HaltExecution(); + void HaltExecution(HaltReason hr = HaltReason::UserDefined1); /// Read Stack Pointer std::uint64_t GetSP() const; @@ -118,7 +119,7 @@ public: /// Debugging: Dump a disassembly all of compiled code to the console. void DumpDisassembly() const; - /* + /* * Disassemble the instructions following the current pc and return * the resulting instructions as a vector of their string representations. */ diff --git a/externals/dynarmic/src/dynarmic/interface/A64/config.h b/externals/dynarmic/src/dynarmic/interface/A64/config.h index 7926fb5ac..bd7cedffe 100755 --- a/externals/dynarmic/src/dynarmic/interface/A64/config.h +++ b/externals/dynarmic/src/dynarmic/interface/A64/config.h @@ -273,9 +273,9 @@ struct UserConfig { /// to avoid writting certain unnecessary code only needed for cycle timers. bool wall_clock_cntpct = false; - // Determines whether AddTicks and GetTicksRemaining are called. - // If false, execution will continue until soon after Jit::HaltExecution is called. - // bool enable_ticks = true; // TODO + /// This option allows you to disable cycle counting. If this is set to false, + /// AddTicks and GetTicksRemaining are never called, and no cycle counting is done. + bool enable_cycle_counting = true; // Minimum size is about 8MiB. Maximum size is about 2GiB. Maximum size is limited by // the maximum length of a x64 jump. diff --git a/externals/dynarmic/src/dynarmic/interface/halt_reason.h b/externals/dynarmic/src/dynarmic/interface/halt_reason.h new file mode 100755 index 000000000..28da2b13f --- /dev/null +++ b/externals/dynarmic/src/dynarmic/interface/halt_reason.h @@ -0,0 +1,53 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2020 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include + +namespace Dynarmic { + +enum class HaltReason : std::uint32_t { + Step = 0x00000001, + CacheInvalidation = 0x00000002, + UserDefined1 = 0x01000000, + UserDefined2 = 0x02000000, + UserDefined3 = 0x04000000, + UserDefined4 = 0x08000000, + UserDefined5 = 0x10000000, + UserDefined6 = 0x20000000, + UserDefined7 = 0x40000000, + UserDefined8 = 0x80000000, +}; + +constexpr HaltReason operator~(HaltReason hr) { + return static_cast(~static_cast(hr)); +} + +constexpr HaltReason operator|(HaltReason hr1, HaltReason hr2) { + return static_cast(static_cast(hr1) | static_cast(hr2)); +} + +constexpr HaltReason operator&(HaltReason hr1, HaltReason hr2) { + return static_cast(static_cast(hr1) & static_cast(hr2)); +} + +constexpr HaltReason operator|=(HaltReason& result, HaltReason hr) { + return result = (result | hr); +} + +constexpr HaltReason operator&=(HaltReason& result, HaltReason hr) { + return result = (result & hr); +} + +constexpr bool operator!(HaltReason hr) { + return static_cast(hr) == 0; +} + +constexpr bool Has(HaltReason hr1, HaltReason hr2) { + return (static_cast(hr1) & static_cast(hr2)) != 0; +} + +} // namespace Dynarmic diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index c60322442..dce2f4195 100755 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -171,6 +171,9 @@ public: /// Prepare core for thread reschedule (if needed to correctly handle state) virtual void PrepareReschedule() = 0; + /// Signal an interrupt and ask the core to halt as soon as possible. + virtual void SignalInterrupt() = 0; + struct BacktraceEntry { std::string module; u64 address; diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index d4cbd0c20..581536509 100755 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -25,6 +25,9 @@ namespace Core { using namespace Common::Literals; +constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2; +constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3; + class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks { public: explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_) @@ -82,15 +85,13 @@ public: } void CallSVC(u32 swi) override { - parent.svc_called = true; parent.svc_swi = swi; - parent.jit->HaltExecution(); + parent.jit->HaltExecution(svc_call); } void AddTicks(u64 ticks) override { - if (parent.uses_wall_clock) { - return; - } + ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled"); + // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a // rough approximation of the amount of executed ticks in the system, it may be thrown off // if not all cores are doing a similar amount of work. Instead of doing this, we should @@ -106,12 +107,8 @@ public: } u64 GetTicksRemaining() override { - if (parent.uses_wall_clock) { - if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { - return minimum_run_cycles; - } - return 0U; - } + ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled"); + return std::max(parent.system.CoreTiming().GetDowncount(), 0); } @@ -146,6 +143,7 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* // Timing config.wall_clock_cntpct = uses_wall_clock; + config.enable_cycle_counting = !uses_wall_clock; // Code cache size config.code_cache_size = 512_MiB; @@ -228,13 +226,11 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* void ARM_Dynarmic_32::Run() { while (true) { - jit->Run(); - if (!svc_called) { - break; + const auto hr = jit->Run(); + if (Has(hr, svc_call)) { + Kernel::Svc::Call(system, svc_swi); } - svc_called = false; - Kernel::Svc::Call(system, svc_swi); - if (shutdown) { + if (Has(hr, break_loop)) { break; } } @@ -320,8 +316,11 @@ void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) { } void ARM_Dynarmic_32::PrepareReschedule() { - jit->HaltExecution(); - shutdown = true; + jit->HaltExecution(break_loop); +} + +void ARM_Dynarmic_32::SignalInterrupt() { + jit->HaltExecution(break_loop); } void ARM_Dynarmic_32::ClearInstructionCache() { diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index 5d47b600d..3f68a4ff1 100755 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -57,6 +57,7 @@ public: void LoadContext(const ThreadContext64& ctx) override {} void PrepareReschedule() override; + void SignalInterrupt() override; void ClearExclusiveState() override; void ClearInstructionCache() override; @@ -83,9 +84,6 @@ private: // SVC callback u32 svc_swi{}; - bool svc_called{}; - - bool shutdown{}; }; } // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 6d5a1ecfd..9715de70e 100755 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -26,6 +26,9 @@ namespace Core { using Vector = Dynarmic::A64::Vector; using namespace Common::Literals; +constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2; +constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3; + class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks { public: explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent_) @@ -105,7 +108,7 @@ public: break; } - parent.jit->HaltExecution(); + parent.jit->HaltExecution(Dynarmic::HaltReason::CacheInvalidation); } void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override { @@ -124,15 +127,12 @@ public: } void CallSVC(u32 swi) override { - parent.svc_called = true; parent.svc_swi = swi; - parent.jit->HaltExecution(); + parent.jit->HaltExecution(svc_call); } void AddTicks(u64 ticks) override { - if (parent.uses_wall_clock) { - return; - } + ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled"); // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a // rough approximation of the amount of executed ticks in the system, it may be thrown off @@ -147,12 +147,8 @@ public: } u64 GetTicksRemaining() override { - if (parent.uses_wall_clock) { - if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { - return minimum_run_cycles; - } - return 0U; - } + ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled"); + return std::max(parent.system.CoreTiming().GetDowncount(), 0); } @@ -208,6 +204,7 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* // Timing config.wall_clock_cntpct = uses_wall_clock; + config.enable_cycle_counting = !uses_wall_clock; // Code cache size config.code_cache_size = 512_MiB; @@ -290,13 +287,11 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* void ARM_Dynarmic_64::Run() { while (true) { - jit->Run(); - if (!svc_called) { - break; + const auto hr = jit->Run(); + if (Has(hr, svc_call)) { + Kernel::Svc::Call(system, svc_swi); } - svc_called = false; - Kernel::Svc::Call(system, svc_swi); - if (shutdown) { + if (Has(hr, break_loop)) { break; } } @@ -387,8 +382,11 @@ void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) { } void ARM_Dynarmic_64::PrepareReschedule() { - jit->HaltExecution(); - shutdown = true; + jit->HaltExecution(break_loop); +} + +void ARM_Dynarmic_64::SignalInterrupt() { + jit->HaltExecution(break_loop); } void ARM_Dynarmic_64::ClearInstructionCache() { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index 0c4e46c64..58bc7fbec 100755 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -51,6 +51,7 @@ public: void LoadContext(const ThreadContext64& ctx) override; void PrepareReschedule() override; + void SignalInterrupt() override; void ClearExclusiveState() override; void ClearInstructionCache() override; @@ -77,9 +78,6 @@ private: // SVC callback u32 svc_swi{}; - bool svc_called{}; - - bool shutdown{}; }; } // namespace Core diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index 7477668e4..18a5f40f8 100755 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -58,6 +58,7 @@ bool PhysicalCore::IsInterrupted() const { void PhysicalCore::Interrupt() { guard->lock(); interrupts[core_index].SetInterrupt(true); + arm_interface->SignalInterrupt(); guard->unlock(); } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index dca7205c3..a3a09c71c 100755 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -105,7 +105,7 @@ struct ImageDescriptor { using ImageDescriptors = boost::container::small_vector; struct Info { - static constexpr size_t MAX_INDIRECT_CBUFS{15}; + static constexpr size_t MAX_INDIRECT_CBUFS{14}; static constexpr size_t MAX_CBUFS{18}; static constexpr size_t MAX_SSBOS{32}; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index d8e19cb2f..334cb7b05 100755 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -561,7 +561,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr for (s32 layer = 0; layer < info.resources.layers; ++layer) { const std::span src = input.subspan(host_offset); - gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, num_tiles.depth, block.height, block.depth);