diff --git a/README.md b/README.md index 8baec9972..6ec51c597 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 2835. +This is the source code for early-access 2836. ## Legal Notice diff --git a/externals/dynarmic/.gitignore b/externals/dynarmic/.gitignore index 60652af3c..3621caac5 100755 --- a/externals/dynarmic/.gitignore +++ b/externals/dynarmic/.gitignore @@ -1,6 +1,8 @@ # Built files build/ build-*/ +cmake-build-*/ +.idea/ docs/Doxygen/ # Generated files src/dynarmic/backend/x64/mig/ diff --git a/externals/dynarmic/CMakeLists.txt b/externals/dynarmic/CMakeLists.txt index 931475413..98ab4f247 100755 --- a/externals/dynarmic/CMakeLists.txt +++ b/externals/dynarmic/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.8) -project(dynarmic LANGUAGES C CXX ASM VERSION 6.1.1) +project(dynarmic LANGUAGES C CXX ASM VERSION 6.2.0) # Determine if we're built as a subproject (using add_subdirectory) # or if this is the master project. diff --git a/externals/dynarmic/README.md b/externals/dynarmic/README.md index f57bca932..5b4b2f68e 100755 --- a/externals/dynarmic/README.md +++ b/externals/dynarmic/README.md @@ -267,21 +267,30 @@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ``` -### mp +### mcl & oaknut ``` -Copyright (C) 2017 MerryMage +MIT License -Permission to use, copy, modify, and/or distribute this software for -any purpose with or without fee is hereby granted. +Copyright (c) 2022 merryhime -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT -OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. ``` ### robin-map diff --git a/externals/dynarmic/externals/oaknut/.github/workflows/build-and-test.yml b/externals/dynarmic/externals/oaknut/.github/workflows/build-and-test.yml index dba7b8a97..59a5b6e9b 100755 --- a/externals/dynarmic/externals/oaknut/.github/workflows/build-and-test.yml +++ b/externals/dynarmic/externals/oaknut/.github/workflows/build-and-test.yml @@ -3,36 +3,38 @@ on: [push, pull_request] jobs: test_on_ubuntu: runs-on: ubuntu-latest - name: Build on ${{ matrix.distro }} ${{ matrix.arch }} - - strategy: - matrix: - include: - - arch: aarch64 - distro: ubuntu_latest + name: g++-10 steps: - - uses: actions/checkout@v3 - - uses: uraimo/run-on-arch-action@v2 - name: Build and Test - id: build - with: - arch: ${{ matrix.arch }} - distro: ${{ matrix.distro }} - shell: /bin/bash + - name: Checkout oaknut repo + uses: actions/checkout@v3 - install: | - apt-get update -q -y - apt-get install -q -y make cmake g++ git + - name: Install dependencies + run: > + sudo apt-get install -q -y + gcc-10-aarch64-linux-gnu + g++-10-aarch64-linux-gnu + ninja-build + qemu-user - pushd /tmp - git clone https://github.com/catchorg/Catch2.git - cd Catch2 - cmake -Bbuild -H. -DBUILD_TESTING=OFF - cmake --build build/ --target install - popd + - name: Checkout Catch2 v3 repo + run: git clone https://github.com/catchorg/Catch2.git externals/catch - run: | - cmake -Bbuild -H. - cmake --build build - ./build/oaknut-tests + - name: Configure CMake + env: + CC: aarch64-linux-gnu-gcc-10 + CXX: aarch64-linux-gnu-g++-10 + run: > + cmake + -B ${{github.workspace}}/build + -H. + -GNinja + -DDYNARMIC_USE_BUNDLED_CATCH=ON + + - name: Build + working-directory: ${{github.workspace}}/build + run: ninja + + - name: Test + working-directory: ${{github.workspace}}/build + run: qemu-aarch64 -L /usr/aarch64-linux-gnu ./oaknut-tests -d yes diff --git a/externals/dynarmic/externals/oaknut/CMakeLists.txt b/externals/dynarmic/externals/oaknut/CMakeLists.txt index b6f139b9c..b84422d25 100755 --- a/externals/dynarmic/externals/oaknut/CMakeLists.txt +++ b/externals/dynarmic/externals/oaknut/CMakeLists.txt @@ -39,7 +39,11 @@ target_compile_features(oaknut INTERFACE cxx_std_20) # Tests if (MASTER_PROJECT) - find_package(Catch2 3 REQUIRED) + if (DYNARMIC_USE_BUNDLED_CATCH) + add_subdirectory(externals/catch) + else() + find_package(Catch2 3 REQUIRED) + endif() add_executable(oaknut-tests tests/basic.cpp @@ -49,9 +53,4 @@ if (MASTER_PROJECT) target_include_directories(oaknut-tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/tests) target_link_libraries(oaknut-tests PRIVATE Catch2::Catch2WithMain merry::oaknut) target_compile_options(oaknut-tests PRIVATE -Wall -Wextra -Wcast-qual -pedantic -pedantic-errors -Wfatal-errors -Wno-missing-braces) - - include(CTest) - include(Catch) - catch_discover_tests(oaknut-tests) - enable_testing() endif() diff --git a/externals/dynarmic/externals/oaknut/include/oaknut/code_block.hpp b/externals/dynarmic/externals/oaknut/include/oaknut/code_block.hpp index c7b066642..6b7deac88 100755 --- a/externals/dynarmic/externals/oaknut/include/oaknut/code_block.hpp +++ b/externals/dynarmic/externals/oaknut/include/oaknut/code_block.hpp @@ -1,6 +1,9 @@ // SPDX-FileCopyrightText: Copyright (c) 2022 merryhime // SPDX-License-Identifier: MIT +#pragma once + +#include #include #include #include diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index 2a6424b9f..05da3601a 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -114,9 +114,6 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { // Start emitting. code.align(); const u8* const entrypoint = code.getCurr(); - code.SwitchToFarCode(); - const u8* const entrypoint_far = code.getCurr(); - code.SwitchToNearCode(); EmitCondPrelude(ctx); @@ -155,6 +152,11 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); code.int3(); + for (auto& deferred_emit : ctx.deferred_emits) { + deferred_emit(); + } + code.int3(); + const size_t size = static_cast(code.getCurr() - entrypoint); const A32::LocationDescriptor descriptor{block.Location()}; @@ -163,7 +165,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { const auto range = boost::icl::discrete_interval::closed(descriptor.PC(), end_location.PC() - 1); block_ranges.AddRange(range, descriptor); - return RegisterBlock(descriptor, entrypoint, entrypoint_far, size); + return RegisterBlock(descriptor, entrypoint, size); } void A32EmitX64::ClearCache() { @@ -1168,16 +1170,9 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc } } - Xbyak::Label dest; - code.jmp(dest, Xbyak::CodeGenerator::T_NEAR); - - code.SwitchToFarCode(); - code.align(16); - code.L(dest); code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC()); PushRSBHelper(rax, rbx, terminal.next); code.ForceReturnFromRunCode(); - code.SwitchToNearCode(); } void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h index e7d4c3523..b037071b4 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h @@ -110,6 +110,7 @@ protected: FakeCall FastmemCallback(u64 rip); // Memory access helpers + void EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak::Label* end = nullptr); template void EmitMemoryRead(A32EmitContext& ctx, IR::Inst* inst); template diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp index f41c3b9b9..210f347a2 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp @@ -235,4 +235,25 @@ void A32EmitX64::EmitA32ExclusiveWriteMemory64(A32EmitContext& ctx, IR::Inst* in } } +void A32EmitX64::EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak::Label* end) { + if (!conf.check_halt_on_memory_access) { + return; + } + + Xbyak::Label skip; + + const A32::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}}; + + code.test(dword[r15 + offsetof(A32JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); + if (end) { + code.jz(*end, code.T_NEAR); + } else { + code.jz(skip, code.T_NEAR); + } + EmitSetUpperLocationDescriptor(current_location, ctx.Location()); + code.mov(dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC()); + code.ForceReturnFromRunCode(); + code.L(skip); +} + } // namespace Dynarmic::Backend::X64 diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index 6d2bf6101..9fdcaf5f7 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -60,7 +60,7 @@ static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) struct Jit::Impl { Impl(Jit* jit, A32::UserConfig conf) - : block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) + : block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, GenRCP(conf)) , emitter(block_of_code, conf, jit) , polyfill_options(GenPolyfillOptions(block_of_code)) , conf(std::move(conf)) @@ -171,10 +171,9 @@ private: PerformCacheInvalidation(); } - IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, - {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions, conf.check_halt_on_memory_access}); + IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); Optimization::PolyfillPass(ir_block, polyfill_options); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { Optimization::A32GetSetElimination(ir_block); Optimization::DeadCodeElimination(ir_block); } diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index e0cd8c453..97472cf60 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -85,9 +85,6 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { // Start emitting. code.align(); const u8* const entrypoint = code.getCurr(); - code.SwitchToFarCode(); - const u8* const entrypoint_far = code.getCurr(); - code.SwitchToNearCode(); ASSERT(block.GetCondition() == IR::Cond::AL); @@ -126,6 +123,11 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); code.int3(); + for (auto& deferred_emit : ctx.deferred_emits) { + deferred_emit(); + } + code.int3(); + const size_t size = static_cast(code.getCurr() - entrypoint); const A64::LocationDescriptor descriptor{block.Location()}; @@ -134,7 +136,7 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { const auto range = boost::icl::discrete_interval::closed(descriptor.PC(), end_location.PC() - 1); block_ranges.AddRange(range, descriptor); - return RegisterBlock(descriptor, entrypoint, entrypoint_far, size); + return RegisterBlock(descriptor, entrypoint, size); } void A64EmitX64::ClearCache() { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h index 644628dca..b5e1a7461 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h @@ -108,6 +108,7 @@ protected: FakeCall FastmemCallback(u64 rip); // Memory access helpers + void EmitCheckMemoryAbort(A64EmitContext& ctx, IR::Inst* inst, Xbyak::Label* end = nullptr); template void EmitMemoryRead(A64EmitContext& ctx, IR::Inst* inst); template diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp index ecfab3436..47bcc84e1 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp @@ -407,4 +407,25 @@ void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* i } } +void A64EmitX64::EmitCheckMemoryAbort(A64EmitContext&, IR::Inst* inst, Xbyak::Label* end) { + if (!conf.check_halt_on_memory_access) { + return; + } + + Xbyak::Label skip; + + const A64::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}}; + + code.test(dword[r15 + offsetof(A64JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); + if (end) { + code.jz(*end, code.T_NEAR); + } else { + code.jz(skip, code.T_NEAR); + } + code.mov(rax, current_location.PC()); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.ForceReturnFromRunCode(); + code.L(skip); +} + } // namespace Dynarmic::Backend::X64 diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index fcc289b16..7440ab573 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -58,7 +58,7 @@ struct Jit::Impl final { public: Impl(Jit* jit, UserConfig conf) : conf(conf) - , block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) + , block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, GenRCP(conf)) , emitter(block_of_code, conf, jit) , polyfill_options(GenPolyfillOptions(block_of_code)) { ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64); @@ -269,10 +269,10 @@ private: // JIT Compile const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, - {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct, conf.hook_hint_instructions, conf.check_halt_on_memory_access}); + {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); Optimization::PolyfillPass(ir_block, polyfill_options); Optimization::A64CallbackConfigPass(ir_block, conf); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { Optimization::A64GetSetElimination(ir_block); Optimization::DeadCodeElimination(ir_block); } diff --git a/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index e41462cdf..c62882e92 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -185,22 +185,19 @@ HostFeature GetHostFeatures() { } // anonymous namespace -BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, size_t far_code_offset, std::function rcp) +BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, std::function rcp) : Xbyak::CodeGenerator(total_code_size, nullptr, &s_allocator) , cb(std::move(cb)) , jsi(jsi) - , far_code_offset(far_code_offset) , constant_pool(*this, CONSTANT_POOL_SIZE) , host_features(GetHostFeatures()) { - ASSERT(total_code_size > far_code_offset); EnableWriting(); GenRunCode(rcp); } void BlockOfCode::PreludeComplete() { prelude_complete = true; - near_code_begin = getCurr(); - far_code_begin = getCurr() + far_code_offset; + code_begin = getCurr(); ClearCache(); DisableWriting(); } @@ -219,21 +216,15 @@ void BlockOfCode::DisableWriting() { void BlockOfCode::ClearCache() { ASSERT(prelude_complete); - in_far_code = false; - near_code_ptr = near_code_begin; - far_code_ptr = far_code_begin; - SetCodePtr(near_code_begin); + SetCodePtr(code_begin); } size_t BlockOfCode::SpaceRemaining() const { ASSERT(prelude_complete); - const u8* current_near_ptr = in_far_code ? reinterpret_cast(near_code_ptr) : getCurr(); - const u8* current_far_ptr = in_far_code ? getCurr() : reinterpret_cast(far_code_ptr); - if (current_near_ptr >= far_code_begin) + const u8* current_ptr = getCurr(); + if (current_ptr >= &top_[maxSize_]) return 0; - if (current_far_ptr >= &top_[maxSize_]) - return 0; - return std::min(reinterpret_cast(far_code_begin) - current_near_ptr, &top_[maxSize_] - current_far_ptr); + return &top_[maxSize_] - current_ptr; } HaltReason BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const { @@ -406,26 +397,8 @@ Xbyak::Address BlockOfCode::XmmConst(const Xbyak::AddressFrame& frame, u64 lower return constant_pool.GetConstant(frame, lower, upper); } -void BlockOfCode::SwitchToFarCode() { - ASSERT(prelude_complete); - ASSERT(!in_far_code); - in_far_code = true; - near_code_ptr = getCurr(); - SetCodePtr(far_code_ptr); - - ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!"); -} - -void BlockOfCode::SwitchToNearCode() { - ASSERT(prelude_complete); - ASSERT(in_far_code); - in_far_code = false; - far_code_ptr = getCurr(); - SetCodePtr(near_code_ptr); -} - CodePtr BlockOfCode::GetCodeBegin() const { - return near_code_begin; + return code_begin; } size_t BlockOfCode::GetTotalCodeSize() const { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.h b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.h index 0c4dd04c6..d52cb682d 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.h @@ -36,7 +36,7 @@ struct RunCodeCallbacks { class BlockOfCode final : public Xbyak::CodeGenerator { public: - BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, size_t far_code_offset, std::function rcp); + BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, std::function rcp); BlockOfCode(const BlockOfCode&) = delete; /// Call when external emitters have finished emitting their preludes. @@ -49,7 +49,7 @@ public: /// Clears this block of code and resets code pointer to beginning. void ClearCache(); - /// Calculates how much space is remaining to use. This is the minimum of near code and far code. + /// Calculates how much space is remaining to use. size_t SpaceRemaining() const; /// Runs emulated code from code_ptr. @@ -125,11 +125,6 @@ public: mcl::bit::replicate_element(esize, value)); } - /// Far code sits far away from the near code. Execution remains primarily in near code. - /// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary. - void SwitchToFarCode(); - void SwitchToNearCode(); - CodePtr GetCodeBegin() const; size_t GetTotalCodeSize() const; @@ -180,18 +175,12 @@ public: private: RunCodeCallbacks cb; JitStateInfo jsi; - size_t far_code_offset; bool prelude_complete = false; - CodePtr near_code_begin = nullptr; - CodePtr far_code_begin = nullptr; + CodePtr code_begin = nullptr; ConstantPool constant_pool; - bool in_far_code = false; - CodePtr near_code_ptr; - CodePtr far_code_ptr; - using RunCodeFuncType = HaltReason (*)(void*, CodePtr); RunCodeFuncType run_code = nullptr; RunCodeFuncType step_code = nullptr; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index ab160ecad..5ffeefc1d 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -32,6 +32,8 @@ using namespace Xbyak::util; EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) : reg_alloc(reg_alloc), block(block) {} +EmitContext::~EmitContext() = default; + size_t EmitContext::GetInstOffset(IR::Inst* inst) const { return static_cast(std::distance(block.begin(), IR::Block::iterator(inst))); } @@ -274,11 +276,8 @@ Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { return pass; } -EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, CodePtr entrypoint_far, size_t size) { +EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) { PerfMapRegister(entrypoint, code.getCurr(), LocationDescriptorToFriendlyName(descriptor)); - code.SwitchToFarCode(); - PerfMapRegister(entrypoint_far, code.getCurr(), LocationDescriptorToFriendlyName(descriptor) + "_far"); - code.SwitchToNearCode(); Patch(descriptor, entrypoint); BlockDescriptor block_desc{entrypoint, size}; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h index 08a846405..2e5d434fa 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h @@ -6,6 +6,8 @@ #pragma once #include +#include +#include #include #include #include @@ -14,6 +16,7 @@ #include #include #include +#include #include #include "dynarmic/backend/x64/exception_handler.h" @@ -48,6 +51,7 @@ using HalfVectorArray = std::array> deferred_emits; }; +using SharedLabel = std::shared_ptr; + +inline SharedLabel GenSharedLabel() { + return std::make_shared(); +} + class EmitX64 { public: struct BlockDescriptor { @@ -93,7 +105,7 @@ protected: virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0; void EmitAddCycles(size_t cycles); Xbyak::Label EmitCond(IR::Cond cond); - BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, CodePtr entrypoint_far, size_t size); + BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size); void PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target); // Terminal instruction emitters diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 9dbbbd515..772d06358 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -152,18 +152,18 @@ void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) { } template -Xbyak::Label ProcessNaN(BlockOfCode& code, Xbyak::Xmm a) { - Xbyak::Label nan, end; +SharedLabel ProcessNaN(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm a) { + SharedLabel nan = GenSharedLabel(), end = GenSharedLabel(); FCODE(ucomis)(a, a); - code.jp(nan, code.T_NEAR); - code.SwitchToFarCode(); - code.L(nan); + code.jp(*nan, code.T_NEAR); - code.orps(a, code.XmmBConst(xword, fsize == 32 ? 0x00400000 : 0x0008'0000'0000'0000)); + ctx.deferred_emits.emplace_back([=, &code] { + code.L(*nan); + code.orps(a, code.XmmBConst(xword, fsize == 32 ? 0x00400000 : 0x0008'0000'0000'0000)); + code.jmp(*end, code.T_NEAR); + }); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); return end; } @@ -268,12 +268,12 @@ template void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Label end; + SharedLabel end = GenSharedLabel(); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { - end = ProcessNaN(code, result); + end = ProcessNaN(code, ctx, result); } if constexpr (std::is_member_function_pointer_v) { (code.*fn)(result, result); @@ -287,7 +287,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { } else { PostProcessNaN(code, result, ctx.reg_alloc.ScratchXmm()); } - code.L(end); + code.L(*end); ctx.reg_alloc.DefineValue(inst, result); } @@ -321,7 +321,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); - Xbyak::Label end, nan, op_are_nans; + SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); code.movaps(result, op1); if constexpr (std::is_member_function_pointer_v) { @@ -330,19 +330,21 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) fn(result, op2); } FCODE(ucomis)(result, result); - code.jp(nan, code.T_NEAR); - code.L(end); + code.jp(*nan, code.T_NEAR); + code.L(*end); - code.SwitchToFarCode(); - code.L(nan); - FCODE(ucomis)(op1, op2); - code.jp(op_are_nans); - // Here we must return a positive NaN, because the indefinite value on x86 is a negative NaN! - code.movaps(result, code.XmmBConst(xword, FP::FPInfo::DefaultNaN())); - code.jmp(end, code.T_NEAR); - code.L(op_are_nans); - EmitPostProcessNaNs(code, result, op1, op2, tmp, end); - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, &code] { + Xbyak::Label op_are_nans; + + code.L(*nan); + FCODE(ucomis)(op1, op2); + code.jp(op_are_nans); + // Here we must return a positive NaN, because the indefinite value on x86 is a negative NaN! + code.movaps(result, code.XmmBConst(xword, FP::FPInfo::DefaultNaN())); + code.jmp(*end, code.T_NEAR); + code.L(op_are_nans); + EmitPostProcessNaNs(code, result, op1, op2, tmp, *end); + }); ctx.reg_alloc.DefineValue(inst, result); } @@ -428,39 +430,39 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { DenormalsAreZero(code, ctx, {result, operand}); - Xbyak::Label equal, end, nan; + SharedLabel equal = GenSharedLabel(), end = GenSharedLabel(); FCODE(ucomis)(result, operand); - code.jz(equal, code.T_NEAR); + code.jz(*equal, code.T_NEAR); if constexpr (is_max) { FCODE(maxs)(result, operand); } else { FCODE(mins)(result, operand); } - code.L(end); + code.L(*end); - code.SwitchToFarCode(); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + Xbyak::Label nan; - code.L(equal); - code.jp(nan); - if constexpr (is_max) { - code.andps(result, operand); - } else { - code.orps(result, operand); - } - code.jmp(end); + code.L(*equal); + code.jp(nan); + if constexpr (is_max) { + code.andps(result, operand); + } else { + code.orps(result, operand); + } + code.jmp(*end); - code.L(nan); - if (ctx.FPCR().DN()) { - code.movaps(result, code.XmmBConst(xword, fsize == 32 ? f32_nan : f64_nan)); - code.jmp(end); - } else { - code.movaps(tmp, result); - FCODE(adds)(result, operand); - EmitPostProcessNaNs(code, result, tmp, operand, gpr_scratch, end); - } - - code.SwitchToNearCode(); + code.L(nan); + if (ctx.FPCR().DN()) { + code.movaps(result, code.XmmBConst(xword, fsize == 32 ? f32_nan : f64_nan)); + code.jmp(*end); + } else { + code.movaps(tmp, result); + FCODE(adds)(result, operand); + EmitPostProcessNaNs(code, result, tmp, operand, gpr_scratch, *end); + } + }); ctx.reg_alloc.DefineValue(inst, result); } @@ -469,7 +471,6 @@ template static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { using FPT = mcl::unsigned_integer_of_size; constexpr FPT default_nan = FP::FPInfo::DefaultNaN(); - constexpr u8 mantissa_msb_bit = static_cast(FP::FPInfo::explicit_mantissa_width - 1); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -492,7 +493,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr(); tmp.setBit(fsize); - const auto move_to_tmp = [&](const Xbyak::Xmm& xmm) { + const auto move_to_tmp = [=, &code](const Xbyak::Xmm& xmm) { if constexpr (fsize == 32) { code.movd(tmp.cvt32(), xmm); } else { @@ -500,78 +501,79 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i } }; - Xbyak::Label end, z, nan, op2_is_nan, snan, maybe_both_nan, normal; + SharedLabel end = GenSharedLabel(), z = GenSharedLabel(); FCODE(ucomis)(op1, op2); - code.jz(z, code.T_NEAR); - code.L(normal); + code.jz(*z, code.T_NEAR); if constexpr (is_max) { FCODE(maxs)(op2, op1); } else { FCODE(mins)(op2, op1); } - code.L(end); + code.L(*end); - code.SwitchToFarCode(); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + Xbyak::Label nan, op2_is_nan, snan, maybe_both_nan; - code.L(z); - code.jp(nan); - if constexpr (is_max) { - code.andps(op2, op1); - } else { - code.orps(op2, op1); - } - code.jmp(end); + constexpr u8 mantissa_msb_bit = static_cast(FP::FPInfo::explicit_mantissa_width - 1); - // NaN requirements: - // op1 op2 result - // SNaN anything op1 - // !SNaN SNaN op2 - // QNaN !NaN op2 - // !NaN QNaN op1 - // QNaN QNaN op1 + code.L(*z); + code.jp(nan); + if constexpr (is_max) { + code.andps(op2, op1); + } else { + code.orps(op2, op1); + } + code.jmp(*end); - code.L(nan); - FCODE(ucomis)(op1, op1); - code.jnp(op2_is_nan); + // NaN requirements: + // op1 op2 result + // SNaN anything op1 + // !SNaN SNaN op2 + // QNaN !NaN op2 + // !NaN QNaN op1 + // QNaN QNaN op1 - // op1 is NaN - move_to_tmp(op1); - code.bt(tmp, mantissa_msb_bit); - code.jc(maybe_both_nan); - if (ctx.FPCR().DN()) { - code.L(snan); - code.movaps(op2, code.XmmBConst(xword, default_nan)); - code.jmp(end); - } else { - code.movaps(op2, op1); - code.L(snan); - code.orps(op2, code.XmmBConst(xword, FP::FPInfo::mantissa_msb)); - code.jmp(end); - } + code.L(nan); + FCODE(ucomis)(op1, op1); + code.jnp(op2_is_nan); - code.L(maybe_both_nan); - FCODE(ucomis)(op2, op2); - code.jnp(end, code.T_NEAR); - if (ctx.FPCR().DN()) { - code.jmp(snan); - } else { + // op1 is NaN + move_to_tmp(op1); + code.bt(tmp, mantissa_msb_bit); + code.jc(maybe_both_nan); + if (ctx.FPCR().DN()) { + code.L(snan); + code.movaps(op2, code.XmmBConst(xword, default_nan)); + code.jmp(*end); + } else { + code.movaps(op2, op1); + code.L(snan); + code.orps(op2, code.XmmBConst(xword, FP::FPInfo::mantissa_msb)); + code.jmp(*end); + } + + code.L(maybe_both_nan); + FCODE(ucomis)(op2, op2); + code.jnp(*end, code.T_NEAR); + if (ctx.FPCR().DN()) { + code.jmp(snan); + } else { + move_to_tmp(op2); + code.bt(tmp.cvt64(), mantissa_msb_bit); + code.jnc(snan); + code.movaps(op2, op1); + code.jmp(*end); + } + + // op2 is NaN + code.L(op2_is_nan); move_to_tmp(op2); - code.bt(tmp.cvt64(), mantissa_msb_bit); + code.bt(tmp, mantissa_msb_bit); code.jnc(snan); code.movaps(op2, op1); - code.jmp(end); - } - - // op2 is NaN - code.L(op2_is_nan); - move_to_tmp(op2); - code.bt(tmp, mantissa_msb_bit); - code.jnc(snan); - code.movaps(op2, op1); - code.jmp(end); - - code.SwitchToNearCode(); + code.jmp(*end); + }); } ctx.reg_alloc.DefineValue(inst, op2); @@ -636,7 +638,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::FMA)) { - Xbyak::Label end, fallback; + SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); @@ -650,34 +652,34 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.movaps(tmp, code.XmmBConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); code.andps(tmp, result); FCODE(ucomis)(tmp, code.XmmBConst(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal)); - code.jz(fallback, code.T_NEAR); - code.L(end); + code.jz(*fallback, code.T_NEAR); + code.L(*end); - code.SwitchToFarCode(); - code.L(fallback); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + code.L(*fallback); - code.sub(rsp, 8); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.movq(code.ABI_PARAM1, operand1); - code.movq(code.ABI_PARAM2, operand2); - code.movq(code.ABI_PARAM3, operand3); - code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.movq(code.ABI_PARAM1, operand1); + code.movq(code.ABI_PARAM2, operand2); + code.movq(code.ABI_PARAM3, operand3); + code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); #ifdef _WIN32 - code.sub(rsp, 16 + ABI_SHADOW_SPACE); - code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); - code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); - code.CallFunction(&FP::FPMulAdd); - code.add(rsp, 16 + ABI_SHADOW_SPACE); + code.sub(rsp, 16 + ABI_SHADOW_SPACE); + code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); + code.CallFunction(&FP::FPMulAdd); + code.add(rsp, 16 + ABI_SHADOW_SPACE); #else - code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); - code.CallFunction(&FP::FPMulAdd); + code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.CallFunction(&FP::FPMulAdd); #endif - code.movq(result, code.ABI_RETURN); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); + code.movq(result, code.ABI_RETURN); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + code.jmp(*end, code.T_NEAR); + }); ctx.reg_alloc.DefineValue(inst, result); return; @@ -735,7 +737,7 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Reg64 tmp = do_default_nan ? INVALID_REG : ctx.reg_alloc.ScratchGpr(); - Xbyak::Label end, nan, op_are_nans; + SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); if (code.HasHostFeature(HostFeature::AVX)) { FCODE(vmuls)(result, op1, op2); @@ -744,30 +746,32 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { FCODE(muls)(result, op2); } FCODE(ucomis)(result, result); - code.jp(nan, code.T_NEAR); - code.L(end); + code.jp(*nan, code.T_NEAR); + code.L(*end); - code.SwitchToFarCode(); - code.L(nan); - FCODE(ucomis)(op1, op2); - code.jp(op_are_nans); - if (code.HasHostFeature(HostFeature::AVX)) { - code.vxorps(result, op1, op2); - } else { - code.movaps(result, op1); - code.xorps(result, op2); - } - code.andps(result, code.XmmBConst(xword, FP::FPInfo::sign_mask)); - code.orps(result, code.XmmBConst(xword, FP::FPValue())); - code.jmp(end, code.T_NEAR); - code.L(op_are_nans); - if (do_default_nan) { - code.movaps(result, code.XmmBConst(xword, FP::FPInfo::DefaultNaN())); - code.jmp(end, code.T_NEAR); - } else { - EmitPostProcessNaNs(code, result, op1, op2, tmp, end); - } - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, &code] { + Xbyak::Label op_are_nans; + + code.L(*nan); + FCODE(ucomis)(op1, op2); + code.jp(op_are_nans); + if (code.HasHostFeature(HostFeature::AVX)) { + code.vxorps(result, op1, op2); + } else { + code.movaps(result, op1); + code.xorps(result, op2); + } + code.andps(result, code.XmmBConst(xword, FP::FPInfo::sign_mask)); + code.orps(result, code.XmmBConst(xword, FP::FPValue())); + code.jmp(*end, code.T_NEAR); + code.L(op_are_nans); + if (do_default_nan) { + code.movaps(result, code.XmmBConst(xword, FP::FPInfo::DefaultNaN())); + code.jmp(*end, code.T_NEAR); + } else { + EmitPostProcessNaNs(code, result, op1, op2, tmp, *end); + } + }); ctx.reg_alloc.DefineValue(inst, result); } @@ -871,7 +875,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* } if (code.HasHostFeature(HostFeature::FMA)) { - Xbyak::Label end, fallback; + SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); @@ -880,25 +884,25 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.movaps(result, code.XmmBConst(xword, FP::FPValue())); FCODE(vfnmadd231s)(result, operand1, operand2); FCODE(ucomis)(result, result); - code.jp(fallback, code.T_NEAR); - code.L(end); + code.jp(*fallback, code.T_NEAR); + code.L(*end); - code.SwitchToFarCode(); - code.L(fallback); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + code.L(*fallback); - code.sub(rsp, 8); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.movq(code.ABI_PARAM1, operand1); - code.movq(code.ABI_PARAM2, operand2); - code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); - code.CallFunction(&FP::FPRecipStepFused); - code.movq(result, code.ABI_RETURN); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.movq(code.ABI_PARAM1, operand1); + code.movq(code.ABI_PARAM2, operand2); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.CallFunction(&FP::FPRecipStepFused); + code.movq(result, code.ABI_RETURN); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + code.jmp(*end, code.T_NEAR); + }); ctx.reg_alloc.DefineValue(inst, result); return; @@ -1034,8 +1038,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(); [[maybe_unused]] const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - Xbyak::Label fallback, bad_values, end, default_nan; - bool needs_fallback = false; + SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel(); code.movaps(value, operand); @@ -1045,7 +1048,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i // Detect NaNs, negatives, zeros, denormals and infinities FCODE(ucomis)(value, code.XmmBConst(xword, FPT(1) << FP::FPInfo::explicit_mantissa_width)); - code.jna(bad_values, code.T_NEAR); + code.jna(*bad_values, code.T_NEAR); FCODE(sqrts)(value, value); ICODE(mov)(result, code.XmmBConst(xword, FP::FPValue())); @@ -1054,109 +1057,110 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i ICODE(padd)(result, code.XmmBConst(xword, fsize == 32 ? 0x00004000 : 0x0000'0800'0000'0000)); code.pand(result, xmm0); - code.L(end); + code.L(*end); - code.SwitchToFarCode(); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + Xbyak::Label fallback, default_nan; + bool needs_fallback = false; - code.L(bad_values); - if constexpr (fsize == 32) { - code.movd(tmp, operand); + code.L(*bad_values); + if constexpr (fsize == 32) { + code.movd(tmp, operand); + + if (!ctx.FPCR().FZ()) { + if (ctx.FPCR().DN()) { + // a > 0x80000000 + code.cmp(tmp, 0x80000000); + code.ja(default_nan, code.T_NEAR); + } + + // a > 0 && a < 0x00800000; + code.sub(tmp, 1); + code.cmp(tmp, 0x007FFFFF); + code.jb(fallback); + needs_fallback = true; + } + + code.rsqrtss(result, operand); - if (!ctx.FPCR().FZ()) { if (ctx.FPCR().DN()) { - // a > 0x80000000 - code.cmp(tmp, 0x80000000); - code.ja(default_nan, code.T_NEAR); - } - - // a > 0 && a < 0x00800000; - code.sub(tmp, 1); - code.cmp(tmp, 0x007FFFFF); - code.jb(fallback); - needs_fallback = true; - } - - code.rsqrtss(result, operand); - - if (ctx.FPCR().DN()) { - code.ucomiss(result, result); - code.jnp(end, code.T_NEAR); - } else { - // FZ ? (a >= 0x80800000 && a <= 0xFF800000) : (a >= 0x80000001 && a <= 0xFF800000) - // !FZ path takes into account the subtraction by one from the earlier block - code.add(tmp, ctx.FPCR().FZ() ? 0x7F800000 : 0x80000000); - code.cmp(tmp, ctx.FPCR().FZ() ? 0x7F000001 : 0x7F800000); - code.jnb(end, code.T_NEAR); - } - - code.L(default_nan); - code.movd(result, code.XmmBConst<32>(xword, 0x7FC00000)); - code.jmp(end, code.T_NEAR); - } else { - Xbyak::Label nan, zero; - - code.movaps(value, operand); - DenormalsAreZero(code, ctx, {value}); - code.pxor(result, result); - - code.ucomisd(value, result); - if (ctx.FPCR().DN()) { - code.jc(default_nan); - code.je(zero); - } else { - code.jp(nan); - code.je(zero); - code.jc(default_nan); - } - - if (!ctx.FPCR().FZ()) { - needs_fallback = true; - code.jmp(fallback); - } else { - // result = 0 - code.jmp(end, code.T_NEAR); - } - - code.L(zero); - if (code.HasHostFeature(HostFeature::AVX)) { - code.vpor(result, value, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000)); - } else { - code.movaps(result, value); - code.por(result, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000)); - } - code.jmp(end, code.T_NEAR); - - code.L(nan); - if (!ctx.FPCR().DN()) { - if (code.HasHostFeature(HostFeature::AVX)) { - code.vpor(result, operand, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000)); + code.ucomiss(result, result); + code.jnp(*end, code.T_NEAR); } else { - code.movaps(result, operand); - code.por(result, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000)); + // FZ ? (a >= 0x80800000 && a <= 0xFF800000) : (a >= 0x80000001 && a <= 0xFF800000) + // !FZ path takes into account the subtraction by one from the earlier block + code.add(tmp, ctx.FPCR().FZ() ? 0x7F800000 : 0x80000000); + code.cmp(tmp, ctx.FPCR().FZ() ? 0x7F000001 : 0x7F800000); + code.jnb(*end, code.T_NEAR); } - code.jmp(end, code.T_NEAR); + + code.L(default_nan); + code.movd(result, code.XmmBConst<32>(xword, 0x7FC00000)); + code.jmp(*end, code.T_NEAR); + } else { + Xbyak::Label nan, zero; + + code.movaps(value, operand); + DenormalsAreZero(code, ctx, {value}); + code.pxor(result, result); + + code.ucomisd(value, result); + if (ctx.FPCR().DN()) { + code.jc(default_nan); + code.je(zero); + } else { + code.jp(nan); + code.je(zero); + code.jc(default_nan); + } + + if (!ctx.FPCR().FZ()) { + needs_fallback = true; + code.jmp(fallback); + } else { + // result = 0 + code.jmp(*end, code.T_NEAR); + } + + code.L(zero); + if (code.HasHostFeature(HostFeature::AVX)) { + code.vpor(result, value, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000)); + } else { + code.movaps(result, value); + code.por(result, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000)); + } + code.jmp(*end, code.T_NEAR); + + code.L(nan); + if (!ctx.FPCR().DN()) { + if (code.HasHostFeature(HostFeature::AVX)) { + code.vpor(result, operand, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000)); + } else { + code.movaps(result, operand); + code.por(result, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000)); + } + code.jmp(*end, code.T_NEAR); + } + + code.L(default_nan); + code.movq(result, code.XmmBConst<64>(xword, 0x7FF8'0000'0000'0000)); + code.jmp(*end, code.T_NEAR); } - code.L(default_nan); - code.movq(result, code.XmmBConst<64>(xword, 0x7FF8'0000'0000'0000)); - code.jmp(end, code.T_NEAR); - } - - code.L(fallback); - if (needs_fallback) { - code.sub(rsp, 8); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.movq(code.ABI_PARAM1, operand); - code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); - code.CallFunction(&FP::FPRSqrtEstimate); - code.movq(result, rax); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - } - - code.SwitchToNearCode(); + code.L(fallback); + if (needs_fallback) { + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.movq(code.ABI_PARAM1, operand); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); + code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.CallFunction(&FP::FPRSqrtEstimate); + code.movq(result, rax); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(*end, code.T_NEAR); + } + }); ctx.reg_alloc.DefineValue(inst, result); } else { @@ -1201,7 +1205,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* } if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) { - Xbyak::Label end, fallback; + SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); @@ -1220,27 +1224,27 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.cmp(tmp.cvt16(), fsize == 32 ? 0x7f00 : 0x7fe0); ctx.reg_alloc.Release(tmp); - code.jae(fallback, code.T_NEAR); + code.jae(*fallback, code.T_NEAR); FCODE(vmuls)(result, result, code.XmmBConst(xword, FP::FPValue())); - code.L(end); + code.L(*end); - code.SwitchToFarCode(); - code.L(fallback); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + code.L(*fallback); - code.sub(rsp, 8); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.movq(code.ABI_PARAM1, operand1); - code.movq(code.ABI_PARAM2, operand2); - code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); - code.CallFunction(&FP::FPRSqrtStepFused); - code.movq(result, code.ABI_RETURN); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.movq(code.ABI_PARAM1, operand1); + code.movq(code.ABI_PARAM2, operand2); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.CallFunction(&FP::FPRSqrtStepFused); + code.movq(result, code.ABI_RETURN); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + code.jmp(*end, code.T_NEAR); + }); ctx.reg_alloc.DefineValue(inst, result); return; @@ -1528,22 +1532,22 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { if constexpr (isize == 64) { const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); - Xbyak::Label saturate_max, end; - if (!unsigned_) { + SharedLabel saturate_max = GenSharedLabel(), end = GenSharedLabel(); + ZeroIfNaN<64>(code, src, scratch); code.movsd(scratch, code.XmmBConst<64>(xword, f64_max_s64_lim)); code.comisd(scratch, src); - code.jna(saturate_max, code.T_NEAR); + code.jna(*saturate_max, code.T_NEAR); code.cvttsd2si(result, src); // 64 bit gpr - code.L(end); + code.L(*end); - code.SwitchToFarCode(); - code.L(saturate_max); - code.mov(result, 0x7FFF'FFFF'FFFF'FFFF); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, &code] { + code.L(*saturate_max); + code.mov(result, 0x7FFF'FFFF'FFFF'FFFF); + code.jmp(*end, code.T_NEAR); + }); } else { Xbyak::Label below_max; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index b82aabb16..ead088c37 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -52,26 +52,27 @@ FakeCall AxxEmitX64::FastmemCallback(u64 rip_) { template void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const bool ordered = IsOrdered(args[1].GetImmediateAccType()); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); const auto fastmem_marker = ShouldFastmem(ctx, inst); if (!conf.page_table && !fastmem_marker) { // Neither fastmem nor page table: Use callbacks if constexpr (bitsize == 128) { - ctx.reg_alloc.HostCall(nullptr, {}, args[0]); + ctx.reg_alloc.HostCall(nullptr, {}, args[1]); if (ordered) { code.mfence(); } code.CallFunction(memory_read_128); ctx.reg_alloc.DefineValue(inst, xmm1); } else { - ctx.reg_alloc.HostCall(inst, {}, args[0]); + ctx.reg_alloc.HostCall(inst, {}, args[1]); if (ordered) { code.mfence(); } Devirtualize(conf.callbacks).EmitCall(code); code.ZeroExtendFrom(bitsize, code.ABI_RETURN); } + EmitCheckMemoryAbort(ctx, inst); return; } @@ -83,44 +84,50 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.ScratchGpr(HostLoc::RDX); } - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx(); const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)]; - Xbyak::Label abort, end; - bool require_abort_handling = false; + SharedLabel abort = GenSharedLabel(), end = GenSharedLabel(); if (fastmem_marker) { // Use fastmem - const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); + bool require_abort_handling; + const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling); const auto location = EmitReadMemoryMov(code, value_idx, src_ptr, ordered); - fastmem_patch_info.emplace( - mcl::bit_cast(location), - FastmemPatchInfo{ - mcl::bit_cast(code.getCurr()), - mcl::bit_cast(wrapped_fn), - *fastmem_marker, - conf.recompile_on_fastmem_failure, - }); + ctx.deferred_emits.emplace_back([=, this, &ctx] { + code.L(*abort); + code.call(wrapped_fn); + + fastmem_patch_info.emplace( + mcl::bit_cast(location), + FastmemPatchInfo{ + mcl::bit_cast(code.getCurr()), + mcl::bit_cast(wrapped_fn), + *fastmem_marker, + conf.recompile_on_fastmem_failure, + }); + + EmitCheckMemoryAbort(ctx, inst, end.get()); + code.jmp(*end, code.T_NEAR); + }); } else { // Use page table ASSERT(conf.page_table); - const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); - require_abort_handling = true; + const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, *abort, vaddr); EmitReadMemoryMov(code, value_idx, src_ptr, ordered); - } - code.L(end); - if (require_abort_handling) { - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, this, &ctx] { + code.L(*abort); + code.call(wrapped_fn); + EmitCheckMemoryAbort(ctx, inst, end.get()); + code.jmp(*end, code.T_NEAR); + }); } + code.L(*end); if constexpr (bitsize == 128) { ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx}); @@ -132,24 +139,25 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { template void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + const bool ordered = IsOrdered(args[3].GetImmediateAccType()); const auto fastmem_marker = ShouldFastmem(ctx, inst); if (!conf.page_table && !fastmem_marker) { // Neither fastmem nor page table: Use callbacks if constexpr (bitsize == 128) { - ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.Use(args[1], HostLoc::XMM1); + ctx.reg_alloc.Use(args[1], ABI_PARAM2); + ctx.reg_alloc.Use(args[2], HostLoc::XMM1); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); code.CallFunction(memory_write_128); } else { - ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); + ctx.reg_alloc.HostCall(nullptr, {}, args[1], args[2]); Devirtualize(conf.callbacks).EmitCall(code); } if (ordered) { code.mfence(); } + EmitCheckMemoryAbort(ctx, inst); return; } @@ -161,58 +169,64 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.ScratchGpr(HostLoc::RDX); } - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); const int value_idx = bitsize == 128 - ? ctx.reg_alloc.UseXmm(args[1]).getIdx() - : (ordered ? ctx.reg_alloc.UseScratchGpr(args[1]).getIdx() : ctx.reg_alloc.UseGpr(args[1]).getIdx()); + ? ctx.reg_alloc.UseXmm(args[2]).getIdx() + : (ordered ? ctx.reg_alloc.UseScratchGpr(args[2]).getIdx() : ctx.reg_alloc.UseGpr(args[2]).getIdx()); const auto wrapped_fn = write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)]; - Xbyak::Label abort, end; - bool require_abort_handling = false; + SharedLabel abort = GenSharedLabel(), end = GenSharedLabel(); if (fastmem_marker) { // Use fastmem - const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); + bool require_abort_handling; + const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling); const auto location = EmitWriteMemoryMov(code, dest_ptr, value_idx, ordered); - fastmem_patch_info.emplace( - mcl::bit_cast(location), - FastmemPatchInfo{ - mcl::bit_cast(code.getCurr()), - mcl::bit_cast(wrapped_fn), - *fastmem_marker, - conf.recompile_on_fastmem_failure, - }); + ctx.deferred_emits.emplace_back([=, this, &ctx] { + code.L(*abort); + code.call(wrapped_fn); + + fastmem_patch_info.emplace( + mcl::bit_cast(location), + FastmemPatchInfo{ + mcl::bit_cast(code.getCurr()), + mcl::bit_cast(wrapped_fn), + *fastmem_marker, + conf.recompile_on_fastmem_failure, + }); + + EmitCheckMemoryAbort(ctx, inst, end.get()); + code.jmp(*end, code.T_NEAR); + }); } else { // Use page table ASSERT(conf.page_table); - const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); - require_abort_handling = true; + const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, *abort, vaddr); EmitWriteMemoryMov(code, dest_ptr, value_idx, ordered); - } - code.L(end); - if (require_abort_handling) { - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, this, &ctx] { + code.L(*abort); + code.call(wrapped_fn); + EmitCheckMemoryAbort(ctx, inst, end.get()); + code.jmp(*end, code.T_NEAR); + }); } + code.L(*end); } template void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { ASSERT(conf.global_monitor != nullptr); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const bool ordered = IsOrdered(args[1].GetImmediateAccType()); + const bool ordered = IsOrdered(args[2].GetImmediateAccType()); if constexpr (bitsize != 128) { using T = mcl::unsigned_integer_of_size; - ctx.reg_alloc.HostCall(inst, {}, args[0]); + ctx.reg_alloc.HostCall(inst, {}, args[1]); code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); @@ -228,7 +242,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { code.ZeroExtendFrom(bitsize, code.ABI_RETURN); } else { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - ctx.reg_alloc.Use(args[0], ABI_PARAM2); + ctx.reg_alloc.Use(args[1], ABI_PARAM2); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); @@ -250,19 +264,21 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } + + EmitCheckMemoryAbort(ctx, inst); } template void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { ASSERT(conf.global_monitor != nullptr); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const bool ordered = IsOrdered(args[2].GetImmediateAccType()); + const bool ordered = IsOrdered(args[3].GetImmediateAccType()); if constexpr (bitsize != 128) { - ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); + ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]); } else { - ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.Use(args[1], HostLoc::XMM1); + ctx.reg_alloc.Use(args[1], ABI_PARAM2); + ctx.reg_alloc.Use(args[2], HostLoc::XMM1); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(inst); } @@ -308,6 +324,8 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); } code.L(end); + + EmitCheckMemoryAbort(ctx, inst); } template @@ -329,7 +347,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in ctx.reg_alloc.ScratchGpr(HostLoc::RDX); } - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx(); const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(); @@ -344,10 +362,10 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in const auto fastmem_marker = ShouldFastmem(ctx, inst); if (fastmem_marker) { - Xbyak::Label abort, end; + SharedLabel abort = GenSharedLabel(), end = GenSharedLabel(); bool require_abort_handling = false; - const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); + const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling); const auto location = EmitReadMemoryMov(code, value_idx, src_ptr, ordered); @@ -360,14 +378,14 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in conf.recompile_on_exclusive_fastmem_failure, }); - code.L(end); + code.L(*end); if (require_abort_handling) { - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, this] { + code.L(*abort); + code.call(wrapped_fn); + code.jmp(*end, code.T_NEAR); + }); } } else { code.call(wrapped_fn); @@ -383,6 +401,8 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in } else { ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx}); } + + EmitCheckMemoryAbort(ctx, inst); } template @@ -402,13 +422,13 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i ctx.reg_alloc.ScratchGpr(HostLoc::RBX); ctx.reg_alloc.ScratchGpr(HostLoc::RCX); ctx.reg_alloc.ScratchGpr(HostLoc::RDX); - return ctx.reg_alloc.UseXmm(args[1]); + return ctx.reg_alloc.UseXmm(args[2]); } else { ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - return ctx.reg_alloc.UseGpr(args[1]); + return ctx.reg_alloc.UseGpr(args[2]); } }(); - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); @@ -416,14 +436,14 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i EmitExclusiveLock(code, conf, tmp, eax); - Xbyak::Label end; + SharedLabel end = GenSharedLabel(); code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.mov(status, u32(1)); code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); - code.je(end, code.T_NEAR); + code.je(*end, code.T_NEAR); code.cmp(qword[tmp], vaddr); - code.jne(end, code.T_NEAR); + code.jne(*end, code.T_NEAR); EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax); @@ -448,10 +468,10 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const auto fastmem_marker = ShouldFastmem(ctx, inst); if (fastmem_marker) { - Xbyak::Label abort; + SharedLabel abort = GenSharedLabel(); bool require_abort_handling = false; - const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling, tmp); + const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling, tmp); const auto location = code.getCurr(); @@ -483,24 +503,24 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i code.setnz(status.cvt8()); - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); + ctx.deferred_emits.emplace_back([=, this] { + code.L(*abort); + code.call(wrapped_fn); - fastmem_patch_info.emplace( - mcl::bit_cast(location), - FastmemPatchInfo{ - mcl::bit_cast(code.getCurr()), - mcl::bit_cast(wrapped_fn), - *fastmem_marker, - conf.recompile_on_exclusive_fastmem_failure, - }); + fastmem_patch_info.emplace( + mcl::bit_cast(location), + FastmemPatchInfo{ + mcl::bit_cast(code.getCurr()), + mcl::bit_cast(wrapped_fn), + *fastmem_marker, + conf.recompile_on_exclusive_fastmem_failure, + }); - code.cmp(al, 0); - code.setz(status.cvt8()); - code.movzx(status.cvt32(), status.cvt8()); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + code.cmp(al, 0); + code.setz(status.cvt8()); + code.movzx(status.cvt32(), status.cvt8()); + code.jmp(*end, code.T_NEAR); + }); } else { code.call(wrapped_fn); code.cmp(al, 0); @@ -508,11 +528,13 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i code.movzx(status.cvt32(), status.cvt8()); } - code.L(end); + code.L(*end); EmitExclusiveUnlock(code, conf, tmp, eax); ctx.reg_alloc.DefineValue(inst, status); + + EmitCheckMemoryAbort(ctx, inst); } #undef AxxEmitX64 diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index 29146820c..c99980d61 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -53,19 +53,19 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi const u32 page_align_mask = static_cast(page_size - 1) & ~align_mask; - Xbyak::Label detect_boundary, resume; + SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel(); - code.jnz(detect_boundary, code.T_NEAR); - code.L(resume); + code.jnz(*detect_boundary, code.T_NEAR); + code.L(*resume); - code.SwitchToFarCode(); - code.L(detect_boundary); - code.mov(tmp, vaddr); - code.and_(tmp, page_align_mask); - code.cmp(tmp, page_align_mask); - code.jne(resume, code.T_NEAR); - // NOTE: We expect to fallthrough into abort code here. - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, &code] { + code.L(*detect_boundary); + code.mov(tmp, vaddr); + code.and_(tmp, page_align_mask); + code.cmp(tmp, page_align_mask); + code.jne(*resume, code.T_NEAR); + // NOTE: We expect to fallthrough into abort code here. + }); } template diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index d6964395e..0f94865a3 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -112,36 +112,35 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std:: code.cmp(bitmask, 0); } - Xbyak::Label end; - Xbyak::Label nan; + SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); - code.jnz(nan, code.T_NEAR); - code.L(end); + code.jnz(*nan, code.T_NEAR); + code.L(*end); - code.SwitchToFarCode(); - code.L(nan); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + code.L(*nan); - const Xbyak::Xmm result = xmms[0]; + const Xbyak::Xmm result = xmms[0]; - code.sub(rsp, 8); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - const size_t stack_space = xmms.size() * 16; - code.sub(rsp, stack_space + ABI_SHADOW_SPACE); - for (size_t i = 0; i < xmms.size(); ++i) { - code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], xmms[i]); - } - code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); - code.mov(code.ABI_PARAM2, ctx.FPCR(fpcr_controlled).Value()); + const size_t stack_space = xmms.size() * 16; + code.sub(rsp, static_cast(stack_space + ABI_SHADOW_SPACE)); + for (size_t i = 0; i < xmms.size(); ++i) { + code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], xmms[i]); + } + code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.mov(code.ABI_PARAM2, ctx.FPCR(fpcr_controlled).Value()); - code.CallFunction(nan_handler); + code.CallFunction(nan_handler); - code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); - code.add(rsp, stack_space + ABI_SHADOW_SPACE); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.add(rsp, static_cast(stack_space + ABI_SHADOW_SPACE)); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(*end, code.T_NEAR); + }); } template @@ -1117,7 +1116,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - Xbyak::Label end, fallback; + SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.movaps(result, xmm_a); @@ -1127,19 +1126,19 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.andnps(tmp, result); FCODE(vcmpeq_uqp)(tmp, tmp, GetSmallestNormalVector(code)); code.vptest(tmp, tmp); - code.jnz(fallback, code.T_NEAR); - code.L(end); + code.jnz(*fallback, code.T_NEAR); + code.L(*end); }); - code.SwitchToFarCode(); - code.L(fallback); - code.sub(rsp, 8); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, xmm_a, xmm_b, xmm_c, fallback_fn, fpcr_controlled); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + code.L(*fallback); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, xmm_a, xmm_b, xmm_c, fallback_fn, fpcr_controlled); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(*end, code.T_NEAR); + }); ctx.reg_alloc.DefineValue(inst, result); return; @@ -1377,7 +1376,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - Xbyak::Label end, fallback; + SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.movaps(result, GetVectorOf(code)); @@ -1385,19 +1384,19 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in FCODE(vcmpunordp)(tmp, result, result); code.vptest(tmp, tmp); - code.jnz(fallback, code.T_NEAR); - code.L(end); + code.jnz(*fallback, code.T_NEAR); + code.L(*end); }); - code.SwitchToFarCode(); - code.L(fallback); - code.sub(rsp, 8); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + code.L(*fallback); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(*end, code.T_NEAR); + }); ctx.reg_alloc.DefineValue(inst, result); return; @@ -1591,7 +1590,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); - Xbyak::Label end, fallback; + SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.vmovaps(result, GetVectorOf(code)); @@ -1602,21 +1601,21 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in FCODE(vandp)(tmp, result, mask); ICODE(vpcmpeq)(tmp, tmp, mask); code.ptest(tmp, tmp); - code.jnz(fallback, code.T_NEAR); + code.jnz(*fallback, code.T_NEAR); FCODE(vmulp)(result, result, GetVectorOf(code)); - code.L(end); + code.L(*end); }); - code.SwitchToFarCode(); - code.L(fallback); - code.sub(rsp, 8); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + code.L(*fallback); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(*end, code.T_NEAR); + }); ctx.reg_alloc.DefineValue(inst, result); return; diff --git a/externals/dynarmic/src/dynarmic/common/llvm_disassemble.cpp b/externals/dynarmic/src/dynarmic/common/llvm_disassemble.cpp index 72dfafd40..636ee3a25 100755 --- a/externals/dynarmic/src/dynarmic/common/llvm_disassemble.cpp +++ b/externals/dynarmic/src/dynarmic/common/llvm_disassemble.cpp @@ -69,6 +69,10 @@ std::string DisassembleAArch32([[maybe_unused]] bool is_thumb, [[maybe_unused]] char buffer[1024]; while (length) { size_t inst_size = LLVMDisasmInstruction(llvm_ctx, const_cast(instructions), length, pc, buffer, sizeof(buffer)); + const char* const disassembled = inst_size > 0 ? buffer : ""; + + if (inst_size == 0) + inst_size = is_thumb ? 2 : 4; result += fmt::format("{:08x} ", pc); for (size_t i = 0; i < 4; i++) { @@ -78,11 +82,9 @@ std::string DisassembleAArch32([[maybe_unused]] bool is_thumb, [[maybe_unused]] result += " "; } } - result += inst_size > 0 ? buffer : ""; + result += disassembled; result += '\n'; - if (inst_size == 0) - inst_size = is_thumb ? 2 : 4; if (length <= inst_size) break; @@ -111,7 +113,8 @@ std::string DisassembleAArch64([[maybe_unused]] u32 instruction, [[maybe_unused] char buffer[80]; size_t inst_size = LLVMDisasmInstruction(llvm_ctx, (u8*)&instruction, sizeof(instruction), pc, buffer, sizeof(buffer)); - result = inst_size > 0 ? buffer : ""; + result = fmt::format("{:016x} {:08x} ", pc, instruction); + result += inst_size > 0 ? buffer : ""; result += '\n'; LLVMDisasmDispose(llvm_ctx); diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.cpp index 9b9e645d0..7f686b327 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.cpp @@ -245,40 +245,40 @@ IR::UAny IREmitter::ReadMemory(size_t bitsize, const IR::U32& vaddr, IR::AccType } IR::U8 IREmitter::ReadMemory8(const IR::U32& vaddr, IR::AccType acc_type) { - return Inst(Opcode::A32ReadMemory8, vaddr, IR::Value{acc_type}); + return Inst(Opcode::A32ReadMemory8, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type}); } IR::U16 IREmitter::ReadMemory16(const IR::U32& vaddr, IR::AccType acc_type) { - const auto value = Inst(Opcode::A32ReadMemory16, vaddr, IR::Value{acc_type}); + const auto value = Inst(Opcode::A32ReadMemory16, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type}); return current_location.EFlag() ? ByteReverseHalf(value) : value; } IR::U32 IREmitter::ReadMemory32(const IR::U32& vaddr, IR::AccType acc_type) { - const auto value = Inst(Opcode::A32ReadMemory32, vaddr, IR::Value{acc_type}); + const auto value = Inst(Opcode::A32ReadMemory32, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type}); return current_location.EFlag() ? ByteReverseWord(value) : value; } IR::U64 IREmitter::ReadMemory64(const IR::U32& vaddr, IR::AccType acc_type) { - const auto value = Inst(Opcode::A32ReadMemory64, vaddr, IR::Value{acc_type}); + const auto value = Inst(Opcode::A32ReadMemory64, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type}); return current_location.EFlag() ? ByteReverseDual(value) : value; } IR::U8 IREmitter::ExclusiveReadMemory8(const IR::U32& vaddr, IR::AccType acc_type) { - return Inst(Opcode::A32ExclusiveReadMemory8, vaddr, IR::Value{acc_type}); + return Inst(Opcode::A32ExclusiveReadMemory8, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type}); } IR::U16 IREmitter::ExclusiveReadMemory16(const IR::U32& vaddr, IR::AccType acc_type) { - const auto value = Inst(Opcode::A32ExclusiveReadMemory16, vaddr, IR::Value{acc_type}); + const auto value = Inst(Opcode::A32ExclusiveReadMemory16, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type}); return current_location.EFlag() ? ByteReverseHalf(value) : value; } IR::U32 IREmitter::ExclusiveReadMemory32(const IR::U32& vaddr, IR::AccType acc_type) { - const auto value = Inst(Opcode::A32ExclusiveReadMemory32, vaddr, IR::Value{acc_type}); + const auto value = Inst(Opcode::A32ExclusiveReadMemory32, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type}); return current_location.EFlag() ? ByteReverseWord(value) : value; } std::pair IREmitter::ExclusiveReadMemory64(const IR::U32& vaddr, IR::AccType acc_type) { - const auto value = Inst(Opcode::A32ExclusiveReadMemory64, vaddr, IR::Value{acc_type}); + const auto value = Inst(Opcode::A32ExclusiveReadMemory64, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type}); const auto lo = LeastSignificantWord(value); const auto hi = MostSignificantWord(value).result; if (current_location.EFlag()) { @@ -303,55 +303,55 @@ void IREmitter::WriteMemory(size_t bitsize, const IR::U32& vaddr, const IR::UAny } void IREmitter::WriteMemory8(const IR::U32& vaddr, const IR::U8& value, IR::AccType acc_type) { - Inst(Opcode::A32WriteMemory8, vaddr, value, IR::Value{acc_type}); + Inst(Opcode::A32WriteMemory8, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type}); } void IREmitter::WriteMemory16(const IR::U32& vaddr, const IR::U16& value, IR::AccType acc_type) { if (current_location.EFlag()) { const auto v = ByteReverseHalf(value); - Inst(Opcode::A32WriteMemory16, vaddr, v, IR::Value{acc_type}); + Inst(Opcode::A32WriteMemory16, ImmCurrentLocationDescriptor(), vaddr, v, IR::Value{acc_type}); } else { - Inst(Opcode::A32WriteMemory16, vaddr, value, IR::Value{acc_type}); + Inst(Opcode::A32WriteMemory16, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type}); } } void IREmitter::WriteMemory32(const IR::U32& vaddr, const IR::U32& value, IR::AccType acc_type) { if (current_location.EFlag()) { const auto v = ByteReverseWord(value); - Inst(Opcode::A32WriteMemory32, vaddr, v, IR::Value{acc_type}); + Inst(Opcode::A32WriteMemory32, ImmCurrentLocationDescriptor(), vaddr, v, IR::Value{acc_type}); } else { - Inst(Opcode::A32WriteMemory32, vaddr, value, IR::Value{acc_type}); + Inst(Opcode::A32WriteMemory32, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type}); } } void IREmitter::WriteMemory64(const IR::U32& vaddr, const IR::U64& value, IR::AccType acc_type) { if (current_location.EFlag()) { const auto v = ByteReverseDual(value); - Inst(Opcode::A32WriteMemory64, vaddr, v, IR::Value{acc_type}); + Inst(Opcode::A32WriteMemory64, ImmCurrentLocationDescriptor(), vaddr, v, IR::Value{acc_type}); } else { - Inst(Opcode::A32WriteMemory64, vaddr, value, IR::Value{acc_type}); + Inst(Opcode::A32WriteMemory64, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type}); } } IR::U32 IREmitter::ExclusiveWriteMemory8(const IR::U32& vaddr, const IR::U8& value, IR::AccType acc_type) { - return Inst(Opcode::A32ExclusiveWriteMemory8, vaddr, value, IR::Value{acc_type}); + return Inst(Opcode::A32ExclusiveWriteMemory8, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type}); } IR::U32 IREmitter::ExclusiveWriteMemory16(const IR::U32& vaddr, const IR::U16& value, IR::AccType acc_type) { if (current_location.EFlag()) { const auto v = ByteReverseHalf(value); - return Inst(Opcode::A32ExclusiveWriteMemory16, vaddr, v, IR::Value{acc_type}); + return Inst(Opcode::A32ExclusiveWriteMemory16, ImmCurrentLocationDescriptor(), vaddr, v, IR::Value{acc_type}); } else { - return Inst(Opcode::A32ExclusiveWriteMemory16, vaddr, value, IR::Value{acc_type}); + return Inst(Opcode::A32ExclusiveWriteMemory16, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type}); } } IR::U32 IREmitter::ExclusiveWriteMemory32(const IR::U32& vaddr, const IR::U32& value, IR::AccType acc_type) { if (current_location.EFlag()) { const auto v = ByteReverseWord(value); - return Inst(Opcode::A32ExclusiveWriteMemory32, vaddr, v, IR::Value{acc_type}); + return Inst(Opcode::A32ExclusiveWriteMemory32, ImmCurrentLocationDescriptor(), vaddr, v, IR::Value{acc_type}); } else { - return Inst(Opcode::A32ExclusiveWriteMemory32, vaddr, value, IR::Value{acc_type}); + return Inst(Opcode::A32ExclusiveWriteMemory32, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type}); } } @@ -359,9 +359,9 @@ IR::U32 IREmitter::ExclusiveWriteMemory64(const IR::U32& vaddr, const IR::U32& v if (current_location.EFlag()) { const auto vlo = ByteReverseWord(value_lo); const auto vhi = ByteReverseWord(value_hi); - return Inst(Opcode::A32ExclusiveWriteMemory64, vaddr, Pack2x32To1x64(vlo, vhi), IR::Value{acc_type}); + return Inst(Opcode::A32ExclusiveWriteMemory64, ImmCurrentLocationDescriptor(), vaddr, Pack2x32To1x64(vlo, vhi), IR::Value{acc_type}); } else { - return Inst(Opcode::A32ExclusiveWriteMemory64, vaddr, Pack2x32To1x64(value_lo, value_hi), IR::Value{acc_type}); + return Inst(Opcode::A32ExclusiveWriteMemory64, ImmCurrentLocationDescriptor(), vaddr, Pack2x32To1x64(value_lo, value_hi), IR::Value{acc_type}); } } @@ -439,4 +439,8 @@ void IREmitter::CoprocStoreWords(size_t coproc_no, bool two, bool long_transfer, Inst(Opcode::A32CoprocStoreWords, IR::Value(coproc_info), address); } +IR::U64 IREmitter::ImmCurrentLocationDescriptor() { + return Imm64(IR::LocationDescriptor{current_location}.Value()); +} + } // namespace Dynarmic::A32 diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.h b/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.h index 8cdebe493..96d162b00 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.h +++ b/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.h @@ -110,6 +110,7 @@ public: private: enum ArchVersion arch_version; + IR::U64 ImmCurrentLocationDescriptor(); }; } // namespace Dynarmic::A32 diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/a32_translate.h b/externals/dynarmic/src/dynarmic/frontend/A32/translate/a32_translate.h index 23a63d115..0f2c3a121 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/a32_translate.h +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/a32_translate.h @@ -29,12 +29,6 @@ struct TranslationOptions { /// If this is false, we treat the instruction as a NOP. /// If this is true, we emit an ExceptionRaised instruction. bool hook_hint_instructions = true; - - /// This changes what IR we emit when we translate a memory instruction. - /// If this is false, memory accesses are not considered terminal. - /// If this is true, memory access are considered terminal. This allows - /// accurately emulating protection fault handlers. - bool check_halt_on_memory_access = false; }; /** diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.cpp index ae20c5a14..276f8384e 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.cpp @@ -53,15 +53,6 @@ bool TranslatorVisitor::RaiseException(Exception exception) { return false; } -bool TranslatorVisitor::MemoryInstructionContinues() { - if (options.check_halt_on_memory_access) { - ir.SetTerm(IR::Term::LinkBlock{ir.current_location.AdvancePC(static_cast(current_instruction_size))}); - return false; - } - - return true; -} - IR::UAny TranslatorVisitor::I(size_t bitsize, u64 value) { switch (bitsize) { case 8: diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.h b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.h index 551528eeb..61a97b1cc 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.h +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.h @@ -41,7 +41,6 @@ struct TranslatorVisitor final { bool UndefinedInstruction(); bool DecodeError(); bool RaiseException(Exception exception); - bool MemoryInstructionContinues(); struct ImmAndCarry { u32 imm32; diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_load_store_structures.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_load_store_structures.cpp index 1b6c97d5e..d444e023e 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_load_store_structures.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_load_store_structures.cpp @@ -119,7 +119,7 @@ bool TranslatorVisitor::v8_VST_multiple(bool D, Reg n, size_t Vd, Imm<4> type, s } } - return MemoryInstructionContinues(); + return true; } bool TranslatorVisitor::v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t size, size_t align, Reg m) { @@ -176,7 +176,7 @@ bool TranslatorVisitor::v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type, s } } - return MemoryInstructionContinues(); + return true; } bool TranslatorVisitor::v8_VLD_all_lanes(bool D, Reg n, size_t Vd, size_t nn, size_t sz, bool T, bool a, Reg m) { @@ -241,7 +241,7 @@ bool TranslatorVisitor::v8_VLD_all_lanes(bool D, Reg n, size_t Vd, size_t nn, si } } - return MemoryInstructionContinues(); + return true; } bool TranslatorVisitor::v8_VST_single(bool D, Reg n, size_t Vd, size_t sz, size_t nn, size_t index_align, Reg m) { @@ -305,7 +305,7 @@ bool TranslatorVisitor::v8_VST_single(bool D, Reg n, size_t Vd, size_t sz, size_ } } - return MemoryInstructionContinues(); + return true; } bool TranslatorVisitor::v8_VLD_single(bool D, Reg n, size_t Vd, size_t sz, size_t nn, size_t index_align, Reg m) { @@ -370,6 +370,6 @@ bool TranslatorVisitor::v8_VLD_single(bool D, Reg n, size_t Vd, size_t sz, size_ } } - return MemoryInstructionContinues(); + return true; } } // namespace Dynarmic::A32 diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp index a7fc8886e..7ef8b7e89 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp @@ -83,7 +83,7 @@ bool TranslatorVisitor::arm_LDR_lit(Cond cond, bool U, Reg t, Imm<12> imm12) { } ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDR , [, #+/-]{!} @@ -120,7 +120,7 @@ bool TranslatorVisitor::arm_LDR_imm(Cond cond, bool P, bool U, bool W, Reg n, Re } ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDR , [, #+/-]{!} @@ -150,7 +150,7 @@ bool TranslatorVisitor::arm_LDR_reg(Cond cond, bool P, bool U, bool W, Reg n, Re } ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRB , [PC, #+/-] @@ -170,7 +170,7 @@ bool TranslatorVisitor::arm_LDRB_lit(Cond cond, bool U, Reg t, Imm<12> imm12) { const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(ir.Imm32(address), IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRB , [, #+/-]{!} @@ -199,7 +199,7 @@ bool TranslatorVisitor::arm_LDRB_imm(Cond cond, bool P, bool U, bool W, Reg n, R const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRB , [, #+/-]{!} @@ -223,7 +223,7 @@ bool TranslatorVisitor::arm_LDRB_reg(Cond cond, bool P, bool U, bool W, Reg n, R const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRD , , [PC, #+/-] @@ -257,7 +257,7 @@ bool TranslatorVisitor::arm_LDRD_lit(Cond cond, bool U, Reg t, Imm<4> imm8a, Imm ir.SetRegister(t, ir.LeastSignificantWord(data)); ir.SetRegister(t2, ir.MostSignificantWord(data).result); } - return MemoryInstructionContinues(); + return true; } // LDRD , [, #+/-]{!} @@ -303,7 +303,7 @@ bool TranslatorVisitor::arm_LDRD_imm(Cond cond, bool P, bool U, bool W, Reg n, R ir.SetRegister(t, ir.LeastSignificantWord(data)); ir.SetRegister(t2, ir.MostSignificantWord(data).result); } - return MemoryInstructionContinues(); + return true; } // LDRD , [, #+/-]{!} @@ -343,7 +343,7 @@ bool TranslatorVisitor::arm_LDRD_reg(Cond cond, bool P, bool U, bool W, Reg n, R ir.SetRegister(t, ir.LeastSignificantWord(data)); ir.SetRegister(t2, ir.MostSignificantWord(data).result); } - return MemoryInstructionContinues(); + return true; } // LDRH , [PC, #-/+] @@ -368,7 +368,7 @@ bool TranslatorVisitor::arm_LDRH_lit(Cond cond, bool P, bool U, bool W, Reg t, I const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(ir.Imm32(address), IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRH , [, #+/-]{!} @@ -397,7 +397,7 @@ bool TranslatorVisitor::arm_LDRH_imm(Cond cond, bool P, bool U, bool W, Reg n, R const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRH , [, #+/-]{!} @@ -421,7 +421,7 @@ bool TranslatorVisitor::arm_LDRH_reg(Cond cond, bool P, bool U, bool W, Reg n, R const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRSB , [PC, #+/-] @@ -442,7 +442,7 @@ bool TranslatorVisitor::arm_LDRSB_lit(Cond cond, bool U, Reg t, Imm<4> imm8a, Im const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(ir.Imm32(address), IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRSB , [, #+/-]{!} @@ -471,7 +471,7 @@ bool TranslatorVisitor::arm_LDRSB_imm(Cond cond, bool P, bool U, bool W, Reg n, const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRSB , [, #+/-]{!} @@ -495,7 +495,7 @@ bool TranslatorVisitor::arm_LDRSB_reg(Cond cond, bool P, bool U, bool W, Reg n, const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRSH , [PC, #-/+] @@ -515,7 +515,7 @@ bool TranslatorVisitor::arm_LDRSH_lit(Cond cond, bool U, Reg t, Imm<4> imm8a, Im const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(ir.Imm32(address), IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRSH , [, #+/-]{!} @@ -544,7 +544,7 @@ bool TranslatorVisitor::arm_LDRSH_imm(Cond cond, bool P, bool U, bool W, Reg n, const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRSH , [, #+/-]{!} @@ -568,7 +568,7 @@ bool TranslatorVisitor::arm_LDRSH_reg(Cond cond, bool P, bool U, bool W, Reg n, const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // STR , [, #+/-]{!} @@ -585,7 +585,7 @@ bool TranslatorVisitor::arm_STR_imm(Cond cond, bool P, bool U, bool W, Reg n, Re const auto offset = ir.Imm32(imm12.ZeroExtend()); const auto address = GetAddress(ir, P, U, W, n, offset); ir.WriteMemory32(address, ir.GetRegister(t), IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } // STR , [, #+/-]{!} @@ -606,7 +606,7 @@ bool TranslatorVisitor::arm_STR_reg(Cond cond, bool P, bool U, bool W, Reg n, Re const auto offset = EmitImmShift(ir.GetRegister(m), shift, imm5, ir.GetCFlag()).result; const auto address = GetAddress(ir, P, U, W, n, offset); ir.WriteMemory32(address, ir.GetRegister(t), IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } // STRB , [, #+/-]{!} @@ -627,7 +627,7 @@ bool TranslatorVisitor::arm_STRB_imm(Cond cond, bool P, bool U, bool W, Reg n, R const auto offset = ir.Imm32(imm12.ZeroExtend()); const auto address = GetAddress(ir, P, U, W, n, offset); ir.WriteMemory8(address, ir.LeastSignificantByte(ir.GetRegister(t)), IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } // STRB , [, #+/-]{!} @@ -648,7 +648,7 @@ bool TranslatorVisitor::arm_STRB_reg(Cond cond, bool P, bool U, bool W, Reg n, R const auto offset = EmitImmShift(ir.GetRegister(m), shift, imm5, ir.GetCFlag()).result; const auto address = GetAddress(ir, P, U, W, n, offset); ir.WriteMemory8(address, ir.LeastSignificantByte(ir.GetRegister(t)), IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } // STRD , [, #+/-]{!} @@ -686,7 +686,7 @@ bool TranslatorVisitor::arm_STRD_imm(Cond cond, bool P, bool U, bool W, Reg n, R // NOTE: If alignment is exactly off by 4, each word is an atomic access. ir.WriteMemory64(address, data, IR::AccType::ATOMIC); - return MemoryInstructionContinues(); + return true; } // STRD , [, #+/-]{!} @@ -723,7 +723,7 @@ bool TranslatorVisitor::arm_STRD_reg(Cond cond, bool P, bool U, bool W, Reg n, R // NOTE: If alignment is exactly off by 4, each word is an atomic access. ir.WriteMemory64(address, data, IR::AccType::ATOMIC); - return MemoryInstructionContinues(); + return true; } // STRH , [, #+/-]{!} @@ -746,7 +746,7 @@ bool TranslatorVisitor::arm_STRH_imm(Cond cond, bool P, bool U, bool W, Reg n, R const auto address = GetAddress(ir, P, U, W, n, offset); ir.WriteMemory16(address, ir.LeastSignificantHalf(ir.GetRegister(t)), IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } // STRH , [, #+/-]{!} @@ -768,31 +768,29 @@ bool TranslatorVisitor::arm_STRH_reg(Cond cond, bool P, bool U, bool W, Reg n, R const auto address = GetAddress(ir, P, U, W, n, offset); ir.WriteMemory16(address, ir.LeastSignificantHalf(ir.GetRegister(t)), IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } -static bool LDMHelper(TranslatorVisitor& v, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) { +static bool LDMHelper(A32::IREmitter& ir, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) { auto address = start_address; for (size_t i = 0; i <= 14; i++) { if (mcl::bit::get_bit(i, list)) { - v.ir.SetRegister(static_cast(i), v.ir.ReadMemory32(address, IR::AccType::ATOMIC)); - address = v.ir.Add(address, v.ir.Imm32(4)); + ir.SetRegister(static_cast(i), ir.ReadMemory32(address, IR::AccType::ATOMIC)); + address = ir.Add(address, ir.Imm32(4)); } } if (W && !mcl::bit::get_bit(RegNumber(n), list)) { - v.ir.SetRegister(n, writeback_address); + ir.SetRegister(n, writeback_address); } if (mcl::bit::get_bit<15>(list)) { - v.ir.LoadWritePC(v.ir.ReadMemory32(address, IR::AccType::ATOMIC)); - if (v.options.check_halt_on_memory_access) - v.ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}}); - else if (n == Reg::R13) - v.ir.SetTerm(IR::Term::PopRSBHint{}); + ir.LoadWritePC(ir.ReadMemory32(address, IR::AccType::ATOMIC)); + if (n == Reg::R13) + ir.SetTerm(IR::Term::PopRSBHint{}); else - v.ir.SetTerm(IR::Term::FastDispatchHint{}); + ir.SetTerm(IR::Term::FastDispatchHint{}); return false; } - return v.MemoryInstructionContinues(); + return true; } // LDM {!}, @@ -810,7 +808,7 @@ bool TranslatorVisitor::arm_LDM(Cond cond, bool W, Reg n, RegList list) { const auto start_address = ir.GetRegister(n); const auto writeback_address = ir.Add(start_address, ir.Imm32(u32(mcl::bit::count_ones(list) * 4))); - return LDMHelper(*this, W, n, list, start_address, writeback_address); + return LDMHelper(ir, W, n, list, start_address, writeback_address); } // LDMDA {!}, @@ -828,7 +826,7 @@ bool TranslatorVisitor::arm_LDMDA(Cond cond, bool W, Reg n, RegList list) { const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list) - 4))); const auto writeback_address = ir.Sub(start_address, ir.Imm32(4)); - return LDMHelper(*this, W, n, list, start_address, writeback_address); + return LDMHelper(ir, W, n, list, start_address, writeback_address); } // LDMDB {!}, @@ -846,7 +844,7 @@ bool TranslatorVisitor::arm_LDMDB(Cond cond, bool W, Reg n, RegList list) { const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list)))); const auto writeback_address = start_address; - return LDMHelper(*this, W, n, list, start_address, writeback_address); + return LDMHelper(ir, W, n, list, start_address, writeback_address); } // LDMIB {!}, @@ -864,7 +862,7 @@ bool TranslatorVisitor::arm_LDMIB(Cond cond, bool W, Reg n, RegList list) { const auto start_address = ir.Add(ir.GetRegister(n), ir.Imm32(4)); const auto writeback_address = ir.Add(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list)))); - return LDMHelper(*this, W, n, list, start_address, writeback_address); + return LDMHelper(ir, W, n, list, start_address, writeback_address); } bool TranslatorVisitor::arm_LDM_usr() { @@ -875,21 +873,21 @@ bool TranslatorVisitor::arm_LDM_eret() { return InterpretThisInstruction(); } -static bool STMHelper(TranslatorVisitor& v, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) { +static bool STMHelper(A32::IREmitter& ir, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) { auto address = start_address; for (size_t i = 0; i <= 14; i++) { if (mcl::bit::get_bit(i, list)) { - v.ir.WriteMemory32(address, v.ir.GetRegister(static_cast(i)), IR::AccType::ATOMIC); - address = v.ir.Add(address, v.ir.Imm32(4)); + ir.WriteMemory32(address, ir.GetRegister(static_cast(i)), IR::AccType::ATOMIC); + address = ir.Add(address, ir.Imm32(4)); } } if (W) { - v.ir.SetRegister(n, writeback_address); + ir.SetRegister(n, writeback_address); } if (mcl::bit::get_bit<15>(list)) { - v.ir.WriteMemory32(address, v.ir.Imm32(v.ir.PC()), IR::AccType::ATOMIC); + ir.WriteMemory32(address, ir.Imm32(ir.PC()), IR::AccType::ATOMIC); } - return v.MemoryInstructionContinues(); + return true; } // STM {!}, @@ -904,7 +902,7 @@ bool TranslatorVisitor::arm_STM(Cond cond, bool W, Reg n, RegList list) { const auto start_address = ir.GetRegister(n); const auto writeback_address = ir.Add(start_address, ir.Imm32(u32(mcl::bit::count_ones(list) * 4))); - return STMHelper(*this, W, n, list, start_address, writeback_address); + return STMHelper(ir, W, n, list, start_address, writeback_address); } // STMDA {!}, @@ -919,7 +917,7 @@ bool TranslatorVisitor::arm_STMDA(Cond cond, bool W, Reg n, RegList list) { const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list) - 4))); const auto writeback_address = ir.Sub(start_address, ir.Imm32(4)); - return STMHelper(*this, W, n, list, start_address, writeback_address); + return STMHelper(ir, W, n, list, start_address, writeback_address); } // STMDB {!}, @@ -934,7 +932,7 @@ bool TranslatorVisitor::arm_STMDB(Cond cond, bool W, Reg n, RegList list) { const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list)))); const auto writeback_address = start_address; - return STMHelper(*this, W, n, list, start_address, writeback_address); + return STMHelper(ir, W, n, list, start_address, writeback_address); } // STMIB {!}, @@ -949,7 +947,7 @@ bool TranslatorVisitor::arm_STMIB(Cond cond, bool W, Reg n, RegList list) { const auto start_address = ir.Add(ir.GetRegister(n), ir.Imm32(4)); const auto writeback_address = ir.Add(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list)))); - return STMHelper(*this, W, n, list, start_address, writeback_address); + return STMHelper(ir, W, n, list, start_address, writeback_address); } bool TranslatorVisitor::arm_STM_usr() { diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/synchronization.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/synchronization.cpp index 5d86f2788..e9f04b70e 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/synchronization.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/synchronization.cpp @@ -29,7 +29,7 @@ bool TranslatorVisitor::arm_SWP(Cond cond, Reg n, Reg t, Reg t2) { ir.WriteMemory32(ir.GetRegister(n), ir.GetRegister(t2), IR::AccType::SWAP); // TODO: Alignment check ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // SWPB , , [] @@ -48,7 +48,7 @@ bool TranslatorVisitor::arm_SWPB(Cond cond, Reg n, Reg t, Reg t2) { ir.WriteMemory8(ir.GetRegister(n), ir.LeastSignificantByte(ir.GetRegister(t2)), IR::AccType::SWAP); // TODO: Alignment check ir.SetRegister(t, ir.ZeroExtendByteToWord(data)); - return MemoryInstructionContinues(); + return true; } // LDA , [] @@ -63,7 +63,7 @@ bool TranslatorVisitor::arm_LDA(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.SetRegister(t, ir.ReadMemory32(address, IR::AccType::ORDERED)); - return MemoryInstructionContinues(); + return true; } // LDAB , [] bool TranslatorVisitor::arm_LDAB(Cond cond, Reg n, Reg t) { @@ -77,7 +77,7 @@ bool TranslatorVisitor::arm_LDAB(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.SetRegister(t, ir.ZeroExtendToWord(ir.ReadMemory8(address, IR::AccType::ORDERED))); - return MemoryInstructionContinues(); + return true; } // LDAH , [] bool TranslatorVisitor::arm_LDAH(Cond cond, Reg n, Reg t) { @@ -91,7 +91,7 @@ bool TranslatorVisitor::arm_LDAH(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.SetRegister(t, ir.ZeroExtendToWord(ir.ReadMemory16(address, IR::AccType::ORDERED))); - return MemoryInstructionContinues(); + return true; } // LDAEX , [] @@ -106,7 +106,7 @@ bool TranslatorVisitor::arm_LDAEX(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.SetRegister(t, ir.ExclusiveReadMemory32(address, IR::AccType::ORDERED)); - return MemoryInstructionContinues(); + return true; } // LDAEXB , [] @@ -121,7 +121,7 @@ bool TranslatorVisitor::arm_LDAEXB(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ExclusiveReadMemory8(address, IR::AccType::ORDERED))); - return MemoryInstructionContinues(); + return true; } // LDAEXD , , [] @@ -139,7 +139,7 @@ bool TranslatorVisitor::arm_LDAEXD(Cond cond, Reg n, Reg t) { // DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR ir.SetRegister(t, lo); ir.SetRegister(t + 1, hi); - return MemoryInstructionContinues(); + return true; } // LDAEXH , [] @@ -154,7 +154,7 @@ bool TranslatorVisitor::arm_LDAEXH(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ExclusiveReadMemory16(address, IR::AccType::ORDERED))); - return MemoryInstructionContinues(); + return true; } // STL , [] @@ -169,7 +169,7 @@ bool TranslatorVisitor::arm_STL(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.WriteMemory32(address, ir.GetRegister(t), IR::AccType::ORDERED); - return MemoryInstructionContinues(); + return true; } // STLB , [] @@ -184,7 +184,7 @@ bool TranslatorVisitor::arm_STLB(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.WriteMemory8(address, ir.LeastSignificantByte(ir.GetRegister(t)), IR::AccType::ORDERED); - return MemoryInstructionContinues(); + return true; } // STLH , , [] @@ -199,7 +199,7 @@ bool TranslatorVisitor::arm_STLH(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.WriteMemory16(address, ir.LeastSignificantHalf(ir.GetRegister(t)), IR::AccType::ORDERED); - return MemoryInstructionContinues(); + return true; } // STLEXB , , [] @@ -220,7 +220,7 @@ bool TranslatorVisitor::arm_STLEXB(Cond cond, Reg n, Reg d, Reg t) { const auto value = ir.LeastSignificantByte(ir.GetRegister(t)); const auto passed = ir.ExclusiveWriteMemory8(address, value, IR::AccType::ORDERED); ir.SetRegister(d, passed); - return MemoryInstructionContinues(); + return true; } // STLEXD , , , [] bool TranslatorVisitor::arm_STLEXD(Cond cond, Reg n, Reg d, Reg t) { @@ -242,7 +242,7 @@ bool TranslatorVisitor::arm_STLEXD(Cond cond, Reg n, Reg d, Reg t) { const auto value_hi = ir.GetRegister(t2); const auto passed = ir.ExclusiveWriteMemory64(address, value_lo, value_hi, IR::AccType::ORDERED); ir.SetRegister(d, passed); - return MemoryInstructionContinues(); + return true; } // STLEXH , , [] @@ -263,7 +263,7 @@ bool TranslatorVisitor::arm_STLEXH(Cond cond, Reg n, Reg d, Reg t) { const auto value = ir.LeastSignificantHalf(ir.GetRegister(t)); const auto passed = ir.ExclusiveWriteMemory16(address, value, IR::AccType::ORDERED); ir.SetRegister(d, passed); - return MemoryInstructionContinues(); + return true; } // STLEX , , [] @@ -284,7 +284,7 @@ bool TranslatorVisitor::arm_STLEX(Cond cond, Reg n, Reg d, Reg t) { const auto value = ir.GetRegister(t); const auto passed = ir.ExclusiveWriteMemory32(address, value, IR::AccType::ORDERED); ir.SetRegister(d, passed); - return MemoryInstructionContinues(); + return true; } // LDREX , [] @@ -299,7 +299,7 @@ bool TranslatorVisitor::arm_LDREX(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.SetRegister(t, ir.ExclusiveReadMemory32(address, IR::AccType::ATOMIC)); - return MemoryInstructionContinues(); + return true; } // LDREXB , [] @@ -314,7 +314,7 @@ bool TranslatorVisitor::arm_LDREXB(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ExclusiveReadMemory8(address, IR::AccType::ATOMIC))); - return MemoryInstructionContinues(); + return true; } // LDREXD , , [] @@ -332,7 +332,7 @@ bool TranslatorVisitor::arm_LDREXD(Cond cond, Reg n, Reg t) { // DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR ir.SetRegister(t, lo); ir.SetRegister(t + 1, hi); - return MemoryInstructionContinues(); + return true; } // LDREXH , [] @@ -347,7 +347,7 @@ bool TranslatorVisitor::arm_LDREXH(Cond cond, Reg n, Reg t) { const auto address = ir.GetRegister(n); ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ExclusiveReadMemory16(address, IR::AccType::ATOMIC))); - return MemoryInstructionContinues(); + return true; } // STREX , , [] @@ -368,7 +368,7 @@ bool TranslatorVisitor::arm_STREX(Cond cond, Reg n, Reg d, Reg t) { const auto value = ir.GetRegister(t); const auto passed = ir.ExclusiveWriteMemory32(address, value, IR::AccType::ATOMIC); ir.SetRegister(d, passed); - return MemoryInstructionContinues(); + return true; } // STREXB , , [] @@ -389,7 +389,7 @@ bool TranslatorVisitor::arm_STREXB(Cond cond, Reg n, Reg d, Reg t) { const auto value = ir.LeastSignificantByte(ir.GetRegister(t)); const auto passed = ir.ExclusiveWriteMemory8(address, value, IR::AccType::ATOMIC); ir.SetRegister(d, passed); - return MemoryInstructionContinues(); + return true; } // STREXD , , , [] @@ -412,7 +412,7 @@ bool TranslatorVisitor::arm_STREXD(Cond cond, Reg n, Reg d, Reg t) { const auto value_hi = ir.GetRegister(t2); const auto passed = ir.ExclusiveWriteMemory64(address, value_lo, value_hi, IR::AccType::ATOMIC); ir.SetRegister(d, passed); - return MemoryInstructionContinues(); + return true; } // STREXH , , [] @@ -433,7 +433,7 @@ bool TranslatorVisitor::arm_STREXH(Cond cond, Reg n, Reg d, Reg t) { const auto value = ir.LeastSignificantHalf(ir.GetRegister(t)); const auto passed = ir.ExclusiveWriteMemory16(address, value, IR::AccType::ATOMIC); ir.SetRegister(d, passed); - return MemoryInstructionContinues(); + return true; } } // namespace Dynarmic::A32 diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp index 957cc4e14..0c531e9f2 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp @@ -449,7 +449,7 @@ bool TranslatorVisitor::thumb16_LDR_literal(Reg t, Imm<8> imm8) { const auto data = ir.ReadMemory32(ir.Imm32(address), IR::AccType::NORMAL); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // STR , [, ] @@ -459,7 +459,7 @@ bool TranslatorVisitor::thumb16_STR_reg(Reg m, Reg n, Reg t) { const auto data = ir.GetRegister(t); ir.WriteMemory32(address, data, IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } // STRH , [, ] @@ -469,7 +469,7 @@ bool TranslatorVisitor::thumb16_STRH_reg(Reg m, Reg n, Reg t) { const auto data = ir.LeastSignificantHalf(ir.GetRegister(t)); ir.WriteMemory16(address, data, IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } // STRB , [, ] @@ -479,7 +479,7 @@ bool TranslatorVisitor::thumb16_STRB_reg(Reg m, Reg n, Reg t) { const auto data = ir.LeastSignificantByte(ir.GetRegister(t)); ir.WriteMemory8(address, data, IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } // LDRSB , [, ] @@ -489,7 +489,7 @@ bool TranslatorVisitor::thumb16_LDRSB_reg(Reg m, Reg n, Reg t) { const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDR , [, ] @@ -499,7 +499,7 @@ bool TranslatorVisitor::thumb16_LDR_reg(Reg m, Reg n, Reg t) { const auto data = ir.ReadMemory32(address, IR::AccType::NORMAL); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRH , [, ] @@ -509,7 +509,7 @@ bool TranslatorVisitor::thumb16_LDRH_reg(Reg m, Reg n, Reg t) { const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRB , [, ] @@ -519,7 +519,7 @@ bool TranslatorVisitor::thumb16_LDRB_reg(Reg m, Reg n, Reg t) { const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // LDRH , [, ] @@ -529,7 +529,7 @@ bool TranslatorVisitor::thumb16_LDRSH_reg(Reg m, Reg n, Reg t) { const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // STR , [, #] @@ -540,7 +540,7 @@ bool TranslatorVisitor::thumb16_STR_imm_t1(Imm<5> imm5, Reg n, Reg t) { const auto data = ir.GetRegister(t); ir.WriteMemory32(address, data, IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } // LDR , [, #] @@ -551,7 +551,7 @@ bool TranslatorVisitor::thumb16_LDR_imm_t1(Imm<5> imm5, Reg n, Reg t) { const auto data = ir.ReadMemory32(address, IR::AccType::NORMAL); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // STRB , [, #] @@ -573,7 +573,7 @@ bool TranslatorVisitor::thumb16_LDRB_imm(Imm<5> imm5, Reg n, Reg t) { const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // STRH , [, #] @@ -583,7 +583,7 @@ bool TranslatorVisitor::thumb16_STRH_imm(Imm<5> imm5, Reg n, Reg t) { const auto data = ir.LeastSignificantHalf(ir.GetRegister(t)); ir.WriteMemory16(address, data, IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } // LDRH , [, #] @@ -593,7 +593,7 @@ bool TranslatorVisitor::thumb16_LDRH_imm(Imm<5> imm5, Reg n, Reg t) { const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL)); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // STR , [, #] @@ -605,7 +605,7 @@ bool TranslatorVisitor::thumb16_STR_imm_t2(Reg t, Imm<8> imm8) { const auto data = ir.GetRegister(t); ir.WriteMemory32(address, data, IR::AccType::NORMAL); - return MemoryInstructionContinues(); + return true; } // LDR , [, #] @@ -617,7 +617,7 @@ bool TranslatorVisitor::thumb16_LDR_imm_t2(Reg t, Imm<8> imm8) { const auto data = ir.ReadMemory32(address, IR::AccType::NORMAL); ir.SetRegister(t, data); - return MemoryInstructionContinues(); + return true; } // ADR ,