early-access version 2644

This commit is contained in:
pineappleEA 2022-04-04 23:44:17 +02:00
parent 190c6bed11
commit cea32b5c92
29 changed files with 346 additions and 156 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 2643. This is the source code for early-access 2644.
## Legal Notice ## Legal Notice

View File

@ -149,7 +149,9 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
reg_alloc.AssertNoMoreUses(); reg_alloc.AssertNoMoreUses();
if (conf.enable_cycle_counting) {
EmitAddCycles(block.CycleCount()); EmitAddCycles(block.CycleCount());
}
EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3(); code.int3();
@ -184,7 +186,9 @@ void A32EmitX64::EmitCondPrelude(const A32EmitContext& ctx) {
ASSERT(ctx.block.HasConditionFailedLocation()); ASSERT(ctx.block.HasConditionFailedLocation());
Xbyak::Label pass = EmitCond(ctx.block.GetCondition()); Xbyak::Label pass = EmitCond(ctx.block.GetCondition());
if (conf.enable_cycle_counting) {
EmitAddCycles(ctx.block.ConditionFailedCycleCount()); EmitAddCycles(ctx.block.ConditionFailedCycleCount());
}
EmitTerminal(IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); EmitTerminal(IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.L(pass); code.L(pass);
} }
@ -715,31 +719,37 @@ void A32EmitX64::EmitA32UpdateUpperLocationDescriptor(A32EmitContext& ctx, IR::I
} }
void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr);
code.SwitchMxcsrOnExit(); code.SwitchMxcsrOnExit();
if (conf.enable_cycle_counting) {
ctx.reg_alloc.HostCall(nullptr);
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code); Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.EndOfAllocScope();
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]); ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
Devirtualize<&A32::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code); Devirtualize<&A32::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code);
if (conf.enable_cycle_counting) {
Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code); Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN); code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN); code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN);
code.SwitchMxcsrOnEntry(); code.SwitchMxcsrOnEntry();
}
} }
void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr);
code.SwitchMxcsrOnExit(); code.SwitchMxcsrOnExit();
ctx.reg_alloc.HostCall(nullptr);
if (conf.enable_cycle_counting) {
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code); Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
}
ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.EndOfAllocScope();
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -751,10 +761,12 @@ void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(param[1], exception); code.mov(param[1], exception);
}); });
if (conf.enable_cycle_counting) {
Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code); Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN); code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN); code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN);
code.SwitchMxcsrOnEntry(); code.SwitchMxcsrOnEntry();
}
} }
static u32 GetFpscrImpl(A32JitState* jit_state) { static u32 GetFpscrImpl(A32JitState* jit_state) {
@ -1134,6 +1146,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
return; return;
} }
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.emplace_back(code.getCurr()); patch_information[terminal.next].jg.emplace_back(code.getCurr());
@ -1142,6 +1155,17 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
} else { } else {
EmitPatchJg(terminal.next); EmitPatchJg(terminal.next);
} }
} else {
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
patch_information[terminal.next].jz.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJz(terminal.next);
}
}
Xbyak::Label dest; Xbyak::Label dest;
code.jmp(dest, Xbyak::CodeGenerator::T_NEAR); code.jmp(dest, Xbyak::CodeGenerator::T_NEAR);
@ -1206,7 +1230,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
code.cmp(code.byte[r15 + offsetof(A32JitState, halt_requested)], u8(0)); code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
code.jne(code.GetForceReturnFromRunCodeAddress()); code.jne(code.GetForceReturnFromRunCodeAddress());
EmitTerminal(terminal.else_, initial_location, is_single_step); EmitTerminal(terminal.else_, initial_location, is_single_step);
} }
@ -1222,6 +1246,17 @@ void A32EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr
code.EnsurePatchLocationSize(patch_location, 14); code.EnsurePatchLocationSize(patch_location, 14);
} }
void A32EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {
code.jz(target_code_ptr);
} else {
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{target_desc}.PC());
code.jz(code.GetReturnFromRunCodeAddress());
}
code.EnsurePatchLocationSize(patch_location, 14);
}
void A32EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { void A32EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr(); const CodePtr patch_location = code.getCurr();
if (target_code_ptr) { if (target_code_ptr) {

View File

@ -138,6 +138,7 @@ protected:
// Patching // Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override; void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override; void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
}; };

View File

@ -16,6 +16,7 @@
#include "dynarmic/backend/x64/devirtualize.h" #include "dynarmic/backend/x64/devirtualize.h"
#include "dynarmic/backend/x64/jitstate_info.h" #include "dynarmic/backend/x64/jitstate_info.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/atomic.h"
#include "dynarmic/common/cast_util.h" #include "dynarmic/common/cast_util.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/common/scope_exit.h" #include "dynarmic/common/scope_exit.h"
@ -31,11 +32,12 @@ namespace Dynarmic::A32 {
using namespace Backend::X64; using namespace Backend::X64;
static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) { static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg, const A32::UserConfig& conf) {
return RunCodeCallbacks{ return RunCodeCallbacks{
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)), std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(cb)), std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(cb)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(cb)), std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(cb)),
conf.enable_cycle_counting,
}; };
} }
@ -58,7 +60,7 @@ static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code)
struct Jit::Impl { struct Jit::Impl {
Impl(Jit* jit, A32::UserConfig conf) Impl(Jit* jit, A32::UserConfig conf)
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) : block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit) , emitter(block_of_code, conf, jit)
, polyfill_options(GenPolyfillOptions(block_of_code)) , polyfill_options(GenPolyfillOptions(block_of_code))
, conf(std::move(conf)) , conf(std::move(conf))
@ -76,7 +78,7 @@ struct Jit::Impl {
boost::icl::interval_set<u32> invalid_cache_ranges; boost::icl::interval_set<u32> invalid_cache_ranges;
bool invalidate_entire_cache = false; bool invalidate_entire_cache = false;
void Execute() { HaltReason Execute() {
const CodePtr current_codeptr = [this] { const CodePtr current_codeptr = [this] {
// RSB optimization // RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask; const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
@ -88,11 +90,15 @@ struct Jit::Impl {
return GetCurrentBlock(); return GetCurrentBlock();
}(); }();
block_of_code.RunCode(&jit_state, current_codeptr); return block_of_code.RunCode(&jit_state, current_codeptr);
} }
void Step() { HaltReason Step() {
block_of_code.StepCode(&jit_state, GetCurrentSingleStep()); return block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
}
void HaltExecution(HaltReason hr) {
Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr));
} }
void ClearExclusiveState() { void ClearExclusiveState() {
@ -123,7 +129,7 @@ struct Jit::Impl {
void RequestCacheInvalidation() { void RequestCacheInvalidation() {
if (jit_interface->is_executing) { if (jit_interface->is_executing) {
jit_state.halt_requested = true; HaltExecution(HaltReason::CacheInvalidation);
return; return;
} }
@ -182,28 +188,28 @@ Jit::Jit(UserConfig conf)
Jit::~Jit() = default; Jit::~Jit() = default;
void Jit::Run() { HaltReason Jit::Run() {
ASSERT(!is_executing); ASSERT(!is_executing);
is_executing = true; is_executing = true;
SCOPE_EXIT { this->is_executing = false; }; SCOPE_EXIT { this->is_executing = false; };
impl->jit_state.halt_requested = false; const HaltReason hr = impl->Execute();
impl->Execute();
impl->PerformCacheInvalidation(); impl->PerformCacheInvalidation();
return hr;
} }
void Jit::Step() { HaltReason Jit::Step() {
ASSERT(!is_executing); ASSERT(!is_executing);
is_executing = true; is_executing = true;
SCOPE_EXIT { this->is_executing = false; }; SCOPE_EXIT { this->is_executing = false; };
impl->jit_state.halt_requested = true; const HaltReason hr = impl->Step();
impl->Step();
impl->PerformCacheInvalidation(); impl->PerformCacheInvalidation();
return hr;
} }
void Jit::ClearCache() { void Jit::ClearCache() {
@ -221,8 +227,8 @@ void Jit::Reset() {
impl->jit_state = {}; impl->jit_state = {};
} }
void Jit::HaltExecution() { void Jit::HaltExecution(HaltReason hr) {
impl->jit_state.halt_requested = true; impl->HaltExecution(hr);
} }
void Jit::ClearExclusiveState() { void Jit::ClearExclusiveState() {

View File

@ -40,7 +40,7 @@ struct A32JitState {
// For internal use (See: BlockOfCode::RunCode) // For internal use (See: BlockOfCode::RunCode)
u32 guest_MXCSR = 0x00001f80; u32 guest_MXCSR = 0x00001f80;
u32 asimd_MXCSR = 0x00009fc0; u32 asimd_MXCSR = 0x00009fc0;
bool halt_requested = false; volatile u32 halt_reason = 0;
// Exclusive state // Exclusive state
u32 exclusive_state = 0; u32 exclusive_state = 0;

View File

@ -121,7 +121,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
reg_alloc.AssertNoMoreUses(); reg_alloc.AssertNoMoreUses();
if (conf.enable_cycle_counting) {
EmitAddCycles(block.CycleCount()); EmitAddCycles(block.CycleCount());
}
EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3(); code.int3();
@ -619,14 +621,26 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
return; return;
} }
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.emplace_back(code.getCurr()); patch_information[terminal.next].jg.emplace_back(code.getCurr());
if (auto next_bb = GetBasicBlock(terminal.next)) { if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint); EmitPatchJg(terminal.next, next_bb->entrypoint);
} else { } else {
EmitPatchJg(terminal.next); EmitPatchJg(terminal.next);
} }
} else {
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
patch_information[terminal.next].jz.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJz(terminal.next);
}
}
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode(); code.ForceReturnFromRunCode();
@ -691,7 +705,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
code.cmp(code.byte[r15 + offsetof(A64JitState, halt_requested)], u8(0)); code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
code.jne(code.GetForceReturnFromRunCodeAddress()); code.jne(code.GetForceReturnFromRunCodeAddress());
EmitTerminal(terminal.else_, initial_location, is_single_step); EmitTerminal(terminal.else_, initial_location, is_single_step);
} }
@ -708,6 +722,18 @@ void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr
code.EnsurePatchLocationSize(patch_location, 23); code.EnsurePatchLocationSize(patch_location, 23);
} }
void A64EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {
code.jz(target_code_ptr);
} else {
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.jz(code.GetReturnFromRunCodeAddress());
}
code.EnsurePatchLocationSize(patch_location, 23);
}
void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr(); const CodePtr patch_location = code.getCurr();
if (target_code_ptr) { if (target_code_ptr) {

View File

@ -135,6 +135,7 @@ protected:
// Patching // Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override; void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override; void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
}; };

View File

@ -15,6 +15,7 @@
#include "dynarmic/backend/x64/devirtualize.h" #include "dynarmic/backend/x64/devirtualize.h"
#include "dynarmic/backend/x64/jitstate_info.h" #include "dynarmic/backend/x64/jitstate_info.h"
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/atomic.h"
#include "dynarmic/common/scope_exit.h" #include "dynarmic/common/scope_exit.h"
#include "dynarmic/common/x64_disassemble.h" #include "dynarmic/common/x64_disassemble.h"
#include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/frontend/A64/translate/a64_translate.h"
@ -26,11 +27,12 @@ namespace Dynarmic::A64 {
using namespace Backend::X64; using namespace Backend::X64;
static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) { static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg, const A64::UserConfig& conf) {
return RunCodeCallbacks{ return RunCodeCallbacks{
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)), std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::AddTicks>(cb)), std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::AddTicks>(cb)),
std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::GetTicksRemaining>(cb)), std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::GetTicksRemaining>(cb)),
conf.enable_cycle_counting,
}; };
} }
@ -55,7 +57,7 @@ struct Jit::Impl final {
public: public:
Impl(Jit* jit, UserConfig conf) Impl(Jit* jit, UserConfig conf)
: conf(conf) : conf(conf)
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) , block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit) , emitter(block_of_code, conf, jit)
, polyfill_options(GenPolyfillOptions(block_of_code)) { , polyfill_options(GenPolyfillOptions(block_of_code)) {
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64); ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
@ -63,13 +65,12 @@ public:
~Impl() = default; ~Impl() = default;
void Run() { HaltReason Run() {
ASSERT(!is_executing); ASSERT(!is_executing);
PerformRequestedCacheInvalidation(); PerformRequestedCacheInvalidation();
is_executing = true; is_executing = true;
SCOPE_EXIT { this->is_executing = false; }; SCOPE_EXIT { this->is_executing = false; };
jit_state.halt_requested = false;
// TODO: Check code alignment // TODO: Check code alignment
@ -83,29 +84,33 @@ public:
return GetCurrentBlock(); return GetCurrentBlock();
}(); }();
block_of_code.RunCode(&jit_state, current_code_ptr);
const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr);
PerformRequestedCacheInvalidation(); PerformRequestedCacheInvalidation();
return hr;
} }
void Step() { HaltReason Step() {
ASSERT(!is_executing); ASSERT(!is_executing);
PerformRequestedCacheInvalidation(); PerformRequestedCacheInvalidation();
is_executing = true; is_executing = true;
SCOPE_EXIT { this->is_executing = false; }; SCOPE_EXIT { this->is_executing = false; };
jit_state.halt_requested = true;
block_of_code.StepCode(&jit_state, GetCurrentSingleStep()); const HaltReason hr = block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
PerformRequestedCacheInvalidation(); PerformRequestedCacheInvalidation();
return hr;
} }
void ClearCache() { void ClearCache() {
std::unique_lock lock{invalidation_mutex}; std::unique_lock lock{invalidation_mutex};
invalidate_entire_cache = true; invalidate_entire_cache = true;
if (is_executing) { if (is_executing) {
jit_state.halt_requested = true; HaltExecution(HaltReason::CacheInvalidation);
} }
} }
@ -115,7 +120,7 @@ public:
const auto range = boost::icl::discrete_interval<u64>::closed(start_address, end_address); const auto range = boost::icl::discrete_interval<u64>::closed(start_address, end_address);
invalid_cache_ranges.add(range); invalid_cache_ranges.add(range);
if (is_executing) { if (is_executing) {
jit_state.halt_requested = true; HaltExecution(HaltReason::CacheInvalidation);
} }
} }
@ -124,8 +129,8 @@ public:
jit_state = {}; jit_state = {};
} }
void HaltExecution() { void HaltExecution(HaltReason hr) {
jit_state.halt_requested = true; Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr));
} }
u64 GetSP() const { u64 GetSP() const {
@ -279,7 +284,7 @@ private:
void RequestCacheInvalidation() { void RequestCacheInvalidation() {
if (is_executing) { if (is_executing) {
jit_state.halt_requested = true; HaltExecution(HaltReason::CacheInvalidation);
return; return;
} }
@ -321,12 +326,12 @@ Jit::Jit(UserConfig conf)
Jit::~Jit() = default; Jit::~Jit() = default;
void Jit::Run() { HaltReason Jit::Run() {
impl->Run(); return impl->Run();
} }
void Jit::Step() { HaltReason Jit::Step() {
impl->Step(); return impl->Step();
} }
void Jit::ClearCache() { void Jit::ClearCache() {
@ -341,8 +346,8 @@ void Jit::Reset() {
impl->Reset(); impl->Reset();
} }
void Jit::HaltExecution() { void Jit::HaltExecution(HaltReason hr) {
impl->HaltExecution(); impl->HaltExecution(hr);
} }
u64 Jit::GetSP() const { u64 Jit::GetSP() const {

View File

@ -43,7 +43,7 @@ struct A64JitState {
// For internal use (See: BlockOfCode::RunCode) // For internal use (See: BlockOfCode::RunCode)
u32 guest_MXCSR = 0x00001f80; u32 guest_MXCSR = 0x00001f80;
u32 asimd_MXCSR = 0x00009fc0; u32 asimd_MXCSR = 0x00009fc0;
bool halt_requested = false; volatile u32 halt_reason = 0;
// Exclusive state // Exclusive state
static constexpr u64 RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull; static constexpr u64 RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull;

View File

@ -201,12 +201,12 @@ size_t BlockOfCode::SpaceRemaining() const {
return std::min(reinterpret_cast<const u8*>(far_code_begin) - current_near_ptr, &top_[maxSize_] - current_far_ptr); return std::min(reinterpret_cast<const u8*>(far_code_begin) - current_near_ptr, &top_[maxSize_] - current_far_ptr);
} }
void BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const { HaltReason BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const {
run_code(jit_state, code_ptr); return run_code(jit_state, code_ptr);
} }
void BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const { HaltReason BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const {
step_code(jit_state, code_ptr); return step_code(jit_state, code_ptr);
} }
void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) { void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) {
@ -224,6 +224,8 @@ void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) {
} }
void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) { void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
Xbyak::Label return_to_caller, return_to_caller_mxcsr_already_exited;
align(); align();
run_code = getCurr<RunCodeFuncType>(); run_code = getCurr<RunCodeFuncType>();
@ -236,12 +238,17 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
mov(r15, ABI_PARAM1); mov(r15, ABI_PARAM1);
mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
if (cb.enable_cycle_counting) {
cb.GetTicksRemaining->EmitCall(*this); cb.GetTicksRemaining->EmitCall(*this);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN); mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN); mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN);
}
rcp(*this); rcp(*this);
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
SwitchMxcsrOnEntry(); SwitchMxcsrOnEntry();
jmp(rbx); jmp(rbx);
@ -252,31 +259,44 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
mov(r15, ABI_PARAM1); mov(r15, ABI_PARAM1);
if (cb.enable_cycle_counting) {
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1); mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 1); mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 1);
}
rcp(*this); rcp(*this);
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
lock();
or_(dword[r15 + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
SwitchMxcsrOnEntry(); SwitchMxcsrOnEntry();
jmp(ABI_PARAM2); jmp(ABI_PARAM2);
// Dispatcher loop // Dispatcher loop
Xbyak::Label return_to_caller, return_to_caller_mxcsr_already_exited;
align(); align();
return_from_run_code[0] = getCurr<const void*>(); return_from_run_code[0] = getCurr<const void*>();
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
jne(return_to_caller);
if (cb.enable_cycle_counting) {
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
jng(return_to_caller); jng(return_to_caller);
}
cb.LookupBlock->EmitCall(*this); cb.LookupBlock->EmitCall(*this);
jmp(ABI_RETURN); jmp(ABI_RETURN);
align(); align();
return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr<const void*>(); return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr<const void*>();
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited);
if (cb.enable_cycle_counting) {
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
jng(return_to_caller_mxcsr_already_exited); jng(return_to_caller_mxcsr_already_exited);
}
SwitchMxcsrOnEntry(); SwitchMxcsrOnEntry();
cb.LookupBlock->EmitCall(*this); cb.LookupBlock->EmitCall(*this);
jmp(ABI_RETURN); jmp(ABI_RETURN);
@ -291,10 +311,16 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr<const void*>(); return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr<const void*>();
L(return_to_caller_mxcsr_already_exited); L(return_to_caller_mxcsr_already_exited);
if (cb.enable_cycle_counting) {
cb.AddTicks->EmitCall(*this, [this](RegList param) { cb.AddTicks->EmitCall(*this, [this](RegList param) {
mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
}); });
}
xor_(eax, eax);
lock();
xchg(dword[r15 + jsi.offsetof_halt_reason], eax);
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
ret(); ret();
@ -323,6 +349,10 @@ void BlockOfCode::LeaveStandardASIMD() {
} }
void BlockOfCode::UpdateTicks() { void BlockOfCode::UpdateTicks() {
if (!cb.enable_cycle_counting) {
return;
}
cb.AddTicks->EmitCall(*this, [this](RegList param) { cb.AddTicks->EmitCall(*this, [this](RegList param) {
mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);

View File

@ -20,6 +20,7 @@
#include "dynarmic/backend/x64/jitstate_info.h" #include "dynarmic/backend/x64/jitstate_info.h"
#include "dynarmic/common/cast_util.h" #include "dynarmic/common/cast_util.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/interface/halt_reason.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
@ -29,6 +30,7 @@ struct RunCodeCallbacks {
std::unique_ptr<Callback> LookupBlock; std::unique_ptr<Callback> LookupBlock;
std::unique_ptr<Callback> AddTicks; std::unique_ptr<Callback> AddTicks;
std::unique_ptr<Callback> GetTicksRemaining; std::unique_ptr<Callback> GetTicksRemaining;
bool enable_cycle_counting;
}; };
class BlockOfCode final : public Xbyak::CodeGenerator { class BlockOfCode final : public Xbyak::CodeGenerator {
@ -50,9 +52,9 @@ public:
size_t SpaceRemaining() const; size_t SpaceRemaining() const;
/// Runs emulated code from code_ptr. /// Runs emulated code from code_ptr.
void RunCode(void* jit_state, CodePtr code_ptr) const; HaltReason RunCode(void* jit_state, CodePtr code_ptr) const;
/// Runs emulated code from code_ptr for a single cycle. /// Runs emulated code from code_ptr for a single cycle.
void StepCode(void* jit_state, CodePtr code_ptr) const; HaltReason StepCode(void* jit_state, CodePtr code_ptr) const;
/// Code emitter: Returns to dispatcher /// Code emitter: Returns to dispatcher
void ReturnFromRunCode(bool mxcsr_already_exited = false); void ReturnFromRunCode(bool mxcsr_already_exited = false);
/// Code emitter: Returns to dispatcher, forces return to host /// Code emitter: Returns to dispatcher, forces return to host
@ -183,7 +185,7 @@ private:
CodePtr near_code_ptr; CodePtr near_code_ptr;
CodePtr far_code_ptr; CodePtr far_code_ptr;
using RunCodeFuncType = void (*)(void*, CodePtr); using RunCodeFuncType = HaltReason (*)(void*, CodePtr);
RunCodeFuncType run_code = nullptr; RunCodeFuncType run_code = nullptr;
RunCodeFuncType step_code = nullptr; RunCodeFuncType step_code = nullptr;
static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0; static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0;

View File

@ -306,6 +306,11 @@ void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_co
EmitPatchJg(target_desc, target_code_ptr); EmitPatchJg(target_desc, target_code_ptr);
} }
for (CodePtr location : patch_info.jz) {
code.SetCodePtr(location);
EmitPatchJz(target_desc, target_code_ptr);
}
for (CodePtr location : patch_info.jmp) { for (CodePtr location : patch_info.jmp) {
code.SetCodePtr(location); code.SetCodePtr(location);
EmitPatchJmp(target_desc, target_code_ptr); EmitPatchJmp(target_desc, target_code_ptr);

View File

@ -111,12 +111,14 @@ protected:
// Patching // Patching
struct PatchInformation { struct PatchInformation {
std::vector<CodePtr> jg; std::vector<CodePtr> jg;
std::vector<CodePtr> jz;
std::vector<CodePtr> jmp; std::vector<CodePtr> jmp;
std::vector<CodePtr> mov_rcx; std::vector<CodePtr> mov_rcx;
}; };
void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr); void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
virtual void Unpatch(const IR::LocationDescriptor& target_desc); virtual void Unpatch(const IR::LocationDescriptor& target_desc);
virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0;

View File

@ -319,9 +319,9 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
} }
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool ordered = true; constexpr bool ordered = true;
if (ordered && bitsize == 128) { if constexpr (ordered && bitsize == 128) {
// Required for atomic 128-bit loads/stores // Required for atomic 128-bit loads/stores
ctx.reg_alloc.ScratchGpr(HostLoc::RAX); ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RBX); ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
@ -394,7 +394,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
} }
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool ordered = true; constexpr bool ordered = true;
const auto value = [&] { const auto value = [&] {
if constexpr (bitsize == 128) { if constexpr (bitsize == 128) {

View File

@ -20,7 +20,8 @@ struct JitStateInfo {
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs)) , offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv)) , offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc)) , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) {} , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
, offsetof_halt_reason(offsetof(JitStateType, halt_reason)) {}
const size_t offsetof_guest_MXCSR; const size_t offsetof_guest_MXCSR;
const size_t offsetof_asimd_MXCSR; const size_t offsetof_asimd_MXCSR;
@ -31,6 +32,7 @@ struct JitStateInfo {
const size_t offsetof_cpsr_nzcv; const size_t offsetof_cpsr_nzcv;
const size_t offsetof_fpsr_exc; const size_t offsetof_fpsr_exc;
const size_t offsetof_fpsr_qc; const size_t offsetof_fpsr_qc;
const size_t offsetof_halt_reason;
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View File

@ -0,0 +1,20 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/common/common_types.h"
namespace Dynarmic::Atomic {
inline void Or(volatile u32* ptr, u32 value) {
#ifdef _MSC_VER
_InterlockedOr(reinterpret_cast<volatile long*>(ptr), value);
#else
__atomic_or_fetch(ptr, value, __ATOMIC_SEQ_CST);
#endif
}
} // namespace Dynarmic::Atomic

View File

@ -12,6 +12,7 @@
#include <vector> #include <vector>
#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/config.h"
#include "dynarmic/interface/halt_reason.h"
namespace Dynarmic { namespace Dynarmic {
namespace A32 { namespace A32 {
@ -27,13 +28,13 @@ public:
* Runs the emulated CPU. * Runs the emulated CPU.
* Cannot be recursively called. * Cannot be recursively called.
*/ */
void Run(); HaltReason Run();
/** /**
* Steps the emulated CPU. * Steps the emulated CPU.
* Cannot be recursively called. * Cannot be recursively called.
*/ */
void Step(); HaltReason Step();
/** /**
* Clears the code cache of all compiled code. * Clears the code cache of all compiled code.
@ -58,7 +59,7 @@ public:
* Stops execution in Jit::Run. * Stops execution in Jit::Run.
* Can only be called from a callback. * Can only be called from a callback.
*/ */
void HaltExecution(); void HaltExecution(HaltReason hr = HaltReason::UserDefined1);
/// View and modify registers. /// View and modify registers.
std::array<std::uint32_t, 16>& Regs(); std::array<std::uint32_t, 16>& Regs();

View File

@ -208,6 +208,10 @@ struct UserConfig {
/// to avoid writting certain unnecessary code only needed for cycle timers. /// to avoid writting certain unnecessary code only needed for cycle timers.
bool wall_clock_cntpct = false; bool wall_clock_cntpct = false;
/// This option allows you to disable cycle counting. If this is set to false,
/// AddTicks and GetTicksRemaining are never called, and no cycle counting is done.
bool enable_cycle_counting = true;
/// This option relates to the CPSR.E flag. Enabling this option disables modification /// This option relates to the CPSR.E flag. Enabling this option disables modification
/// of CPSR.E by the emulated program, forcing it to 0. /// of CPSR.E by the emulated program, forcing it to 0.
/// NOTE: Calling Jit::SetCpsr with CPSR.E=1 while this option is enabled may result /// NOTE: Calling Jit::SetCpsr with CPSR.E=1 while this option is enabled may result

View File

@ -13,6 +13,7 @@
#include <vector> #include <vector>
#include "dynarmic/interface/A64/config.h" #include "dynarmic/interface/A64/config.h"
#include "dynarmic/interface/halt_reason.h"
namespace Dynarmic { namespace Dynarmic {
namespace A64 { namespace A64 {
@ -28,13 +29,13 @@ public:
* Runs the emulated CPU. * Runs the emulated CPU.
* Cannot be recursively called. * Cannot be recursively called.
*/ */
void Run(); HaltReason Run();
/** /**
* Step the emulated CPU for one instruction. * Step the emulated CPU for one instruction.
* Cannot be recursively called. * Cannot be recursively called.
*/ */
void Step(); HaltReason Step();
/** /**
* Clears the code cache of all compiled code. * Clears the code cache of all compiled code.
@ -59,7 +60,7 @@ public:
* Stops execution in Jit::Run. * Stops execution in Jit::Run.
* Can only be called from a callback. * Can only be called from a callback.
*/ */
void HaltExecution(); void HaltExecution(HaltReason hr = HaltReason::UserDefined1);
/// Read Stack Pointer /// Read Stack Pointer
std::uint64_t GetSP() const; std::uint64_t GetSP() const;

View File

@ -273,9 +273,9 @@ struct UserConfig {
/// to avoid writting certain unnecessary code only needed for cycle timers. /// to avoid writting certain unnecessary code only needed for cycle timers.
bool wall_clock_cntpct = false; bool wall_clock_cntpct = false;
// Determines whether AddTicks and GetTicksRemaining are called. /// This option allows you to disable cycle counting. If this is set to false,
// If false, execution will continue until soon after Jit::HaltExecution is called. /// AddTicks and GetTicksRemaining are never called, and no cycle counting is done.
// bool enable_ticks = true; // TODO bool enable_cycle_counting = true;
// Minimum size is about 8MiB. Maximum size is about 2GiB. Maximum size is limited by // Minimum size is about 8MiB. Maximum size is about 2GiB. Maximum size is limited by
// the maximum length of a x64 jump. // the maximum length of a x64 jump.

View File

@ -0,0 +1,53 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2020 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <cstdint>
namespace Dynarmic {
enum class HaltReason : std::uint32_t {
Step = 0x00000001,
CacheInvalidation = 0x00000002,
UserDefined1 = 0x01000000,
UserDefined2 = 0x02000000,
UserDefined3 = 0x04000000,
UserDefined4 = 0x08000000,
UserDefined5 = 0x10000000,
UserDefined6 = 0x20000000,
UserDefined7 = 0x40000000,
UserDefined8 = 0x80000000,
};
constexpr HaltReason operator~(HaltReason hr) {
return static_cast<HaltReason>(~static_cast<std::uint32_t>(hr));
}
constexpr HaltReason operator|(HaltReason hr1, HaltReason hr2) {
return static_cast<HaltReason>(static_cast<std::uint32_t>(hr1) | static_cast<std::uint32_t>(hr2));
}
constexpr HaltReason operator&(HaltReason hr1, HaltReason hr2) {
return static_cast<HaltReason>(static_cast<std::uint32_t>(hr1) & static_cast<std::uint32_t>(hr2));
}
constexpr HaltReason operator|=(HaltReason& result, HaltReason hr) {
return result = (result | hr);
}
constexpr HaltReason operator&=(HaltReason& result, HaltReason hr) {
return result = (result & hr);
}
constexpr bool operator!(HaltReason hr) {
return static_cast<std::uint32_t>(hr) == 0;
}
constexpr bool Has(HaltReason hr1, HaltReason hr2) {
return (static_cast<std::uint32_t>(hr1) & static_cast<std::uint32_t>(hr2)) != 0;
}
} // namespace Dynarmic

View File

@ -171,6 +171,9 @@ public:
/// Prepare core for thread reschedule (if needed to correctly handle state) /// Prepare core for thread reschedule (if needed to correctly handle state)
virtual void PrepareReschedule() = 0; virtual void PrepareReschedule() = 0;
/// Signal an interrupt and ask the core to halt as soon as possible.
virtual void SignalInterrupt() = 0;
struct BacktraceEntry { struct BacktraceEntry {
std::string module; std::string module;
u64 address; u64 address;

View File

@ -25,6 +25,9 @@ namespace Core {
using namespace Common::Literals; using namespace Common::Literals;
constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks { class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
public: public:
explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_) explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_)
@ -82,15 +85,13 @@ public:
} }
void CallSVC(u32 swi) override { void CallSVC(u32 swi) override {
parent.svc_called = true;
parent.svc_swi = swi; parent.svc_swi = swi;
parent.jit->HaltExecution(); parent.jit->HaltExecution(svc_call);
} }
void AddTicks(u64 ticks) override { void AddTicks(u64 ticks) override {
if (parent.uses_wall_clock) { ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
return;
}
// Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
// rough approximation of the amount of executed ticks in the system, it may be thrown off // rough approximation of the amount of executed ticks in the system, it may be thrown off
// if not all cores are doing a similar amount of work. Instead of doing this, we should // if not all cores are doing a similar amount of work. Instead of doing this, we should
@ -106,12 +107,8 @@ public:
} }
u64 GetTicksRemaining() override { u64 GetTicksRemaining() override {
if (parent.uses_wall_clock) { ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
return minimum_run_cycles;
}
return 0U;
}
return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0); return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
} }
@ -146,6 +143,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
// Timing // Timing
config.wall_clock_cntpct = uses_wall_clock; config.wall_clock_cntpct = uses_wall_clock;
config.enable_cycle_counting = !uses_wall_clock;
// Code cache size // Code cache size
config.code_cache_size = 512_MiB; config.code_cache_size = 512_MiB;
@ -228,13 +226,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
void ARM_Dynarmic_32::Run() { void ARM_Dynarmic_32::Run() {
while (true) { while (true) {
jit->Run(); const auto hr = jit->Run();
if (!svc_called) { if (Has(hr, svc_call)) {
break;
}
svc_called = false;
Kernel::Svc::Call(system, svc_swi); Kernel::Svc::Call(system, svc_swi);
if (shutdown) { }
if (Has(hr, break_loop)) {
break; break;
} }
} }
@ -320,8 +316,11 @@ void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) {
} }
void ARM_Dynarmic_32::PrepareReschedule() { void ARM_Dynarmic_32::PrepareReschedule() {
jit->HaltExecution(); jit->HaltExecution(break_loop);
shutdown = true; }
void ARM_Dynarmic_32::SignalInterrupt() {
jit->HaltExecution(break_loop);
} }
void ARM_Dynarmic_32::ClearInstructionCache() { void ARM_Dynarmic_32::ClearInstructionCache() {

View File

@ -57,6 +57,7 @@ public:
void LoadContext(const ThreadContext64& ctx) override {} void LoadContext(const ThreadContext64& ctx) override {}
void PrepareReschedule() override; void PrepareReschedule() override;
void SignalInterrupt() override;
void ClearExclusiveState() override; void ClearExclusiveState() override;
void ClearInstructionCache() override; void ClearInstructionCache() override;
@ -83,9 +84,6 @@ private:
// SVC callback // SVC callback
u32 svc_swi{}; u32 svc_swi{};
bool svc_called{};
bool shutdown{};
}; };
} // namespace Core } // namespace Core

View File

@ -26,6 +26,9 @@ namespace Core {
using Vector = Dynarmic::A64::Vector; using Vector = Dynarmic::A64::Vector;
using namespace Common::Literals; using namespace Common::Literals;
constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks { class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
public: public:
explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent_) explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent_)
@ -105,7 +108,7 @@ public:
break; break;
} }
parent.jit->HaltExecution(); parent.jit->HaltExecution(Dynarmic::HaltReason::CacheInvalidation);
} }
void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override { void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override {
@ -124,15 +127,12 @@ public:
} }
void CallSVC(u32 swi) override { void CallSVC(u32 swi) override {
parent.svc_called = true;
parent.svc_swi = swi; parent.svc_swi = swi;
parent.jit->HaltExecution(); parent.jit->HaltExecution(svc_call);
} }
void AddTicks(u64 ticks) override { void AddTicks(u64 ticks) override {
if (parent.uses_wall_clock) { ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
return;
}
// Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
// rough approximation of the amount of executed ticks in the system, it may be thrown off // rough approximation of the amount of executed ticks in the system, it may be thrown off
@ -147,12 +147,8 @@ public:
} }
u64 GetTicksRemaining() override { u64 GetTicksRemaining() override {
if (parent.uses_wall_clock) { ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
return minimum_run_cycles;
}
return 0U;
}
return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0); return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
} }
@ -208,6 +204,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
// Timing // Timing
config.wall_clock_cntpct = uses_wall_clock; config.wall_clock_cntpct = uses_wall_clock;
config.enable_cycle_counting = !uses_wall_clock;
// Code cache size // Code cache size
config.code_cache_size = 512_MiB; config.code_cache_size = 512_MiB;
@ -290,13 +287,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
void ARM_Dynarmic_64::Run() { void ARM_Dynarmic_64::Run() {
while (true) { while (true) {
jit->Run(); const auto hr = jit->Run();
if (!svc_called) { if (Has(hr, svc_call)) {
break;
}
svc_called = false;
Kernel::Svc::Call(system, svc_swi); Kernel::Svc::Call(system, svc_swi);
if (shutdown) { }
if (Has(hr, break_loop)) {
break; break;
} }
} }
@ -387,8 +382,11 @@ void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) {
} }
void ARM_Dynarmic_64::PrepareReschedule() { void ARM_Dynarmic_64::PrepareReschedule() {
jit->HaltExecution(); jit->HaltExecution(break_loop);
shutdown = true; }
void ARM_Dynarmic_64::SignalInterrupt() {
jit->HaltExecution(break_loop);
} }
void ARM_Dynarmic_64::ClearInstructionCache() { void ARM_Dynarmic_64::ClearInstructionCache() {

View File

@ -51,6 +51,7 @@ public:
void LoadContext(const ThreadContext64& ctx) override; void LoadContext(const ThreadContext64& ctx) override;
void PrepareReschedule() override; void PrepareReschedule() override;
void SignalInterrupt() override;
void ClearExclusiveState() override; void ClearExclusiveState() override;
void ClearInstructionCache() override; void ClearInstructionCache() override;
@ -77,9 +78,6 @@ private:
// SVC callback // SVC callback
u32 svc_swi{}; u32 svc_swi{};
bool svc_called{};
bool shutdown{};
}; };
} // namespace Core } // namespace Core

View File

@ -58,6 +58,7 @@ bool PhysicalCore::IsInterrupted() const {
void PhysicalCore::Interrupt() { void PhysicalCore::Interrupt() {
guard->lock(); guard->lock();
interrupts[core_index].SetInterrupt(true); interrupts[core_index].SetInterrupt(true);
arm_interface->SignalInterrupt();
guard->unlock(); guard->unlock();
} }

View File

@ -105,7 +105,7 @@ struct ImageDescriptor {
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
struct Info { struct Info {
static constexpr size_t MAX_INDIRECT_CBUFS{15}; static constexpr size_t MAX_INDIRECT_CBUFS{14};
static constexpr size_t MAX_CBUFS{18}; static constexpr size_t MAX_CBUFS{18};
static constexpr size_t MAX_SSBOS{32}; static constexpr size_t MAX_SSBOS{32};

View File

@ -561,7 +561,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
for (s32 layer = 0; layer < info.resources.layers; ++layer) { for (s32 layer = 0; layer < info.resources.layers; ++layer) {
const std::span<const u8> src = input.subspan(host_offset); const std::span<const u8> src = input.subspan(host_offset);
gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
num_tiles.depth, block.height, block.depth); num_tiles.depth, block.height, block.depth);