From 0e52ad15223dad8f4579aa52534f864b4425b34b Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sun, 6 Jun 2021 02:35:33 +0200 Subject: [PATCH] early-access version 1755 --- README.md | 2 +- .../.github/workflows/build-and-test.yml | 70 +++++++++++++ .../.github/workflows/clang-format.yml | 18 ++++ externals/dynarmic/README.md | 2 +- externals/dynarmic/externals/CMakeLists.txt | 4 +- .../src/dynarmic/backend/x64/a32_emit_x64.cpp | 2 + .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 16 ++- .../src/dynarmic/backend/x64/constants.h | 60 ++++++++++++ .../backend/x64/emit_x64_data_processing.cpp | 2 +- .../backend/x64/emit_x64_floating_point.cpp | 31 +++--- .../x64/emit_x64_vector_floating_point.cpp | 97 ++++++++++++++++++- .../backend/x64/exception_handler_macos.cpp | 2 +- .../backend/x64/exception_handler_posix.cpp | 2 +- .../translate/impl/asimd_two_regs_misc.cpp | 15 +-- .../dynarmic/src/dynarmic/ir/ir_emitter.cpp | 10 ++ .../dynarmic/src/dynarmic/ir/ir_emitter.h | 2 + .../dynarmic/src/dynarmic/ir/opcodes.inc | 2 + .../tests/A32/test_arm_instructions.cpp | 12 +-- externals/dynarmic/tests/A32/testenv.h | 3 +- externals/dynarmic/tests/A64/a64.cpp | 48 ++++----- externals/dynarmic/tests/A64/testenv.h | 3 +- externals/dynarmic/tests/CMakeLists.txt | 2 +- .../emu_window/emu_window_sdl2_gl.cpp | 2 +- 23 files changed, 325 insertions(+), 82 deletions(-) create mode 100755 externals/dynarmic/.github/workflows/build-and-test.yml create mode 100755 externals/dynarmic/.github/workflows/clang-format.yml diff --git a/README.md b/README.md index ae608c12a..3f65c59b0 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1753. +This is the source code for early-access 1755. ## Legal Notice diff --git a/externals/dynarmic/.github/workflows/build-and-test.yml b/externals/dynarmic/.github/workflows/build-and-test.yml new file mode 100755 index 000000000..ca74226ea --- /dev/null +++ b/externals/dynarmic/.github/workflows/build-and-test.yml @@ -0,0 +1,70 @@ +name: Build and Test + +on: [push, pull_request] + +env: + BUILD_TYPE: Release + +jobs: + build: + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + cpu_detection: [0, 1] + fail-fast: false + + runs-on: ${{matrix.os}} + + steps: + + - name: Install build dependencies + if: ${{matrix.os == 'ubuntu-latest'}} + run: sudo apt-get install llvm ninja-build + + - name: Install build dependencies + if: ${{matrix.os == 'macos-latest'}} + run: | + brew install llvm ninja + echo "/usr/local/opt/llvm/bin" >> $GITHUB_PATH + + - name: Checkout dynarmic repo + uses: actions/checkout@v2 + + - name: Checkout ext-boost repo + uses: actions/checkout@v2 + with: + repository: MerryMage/ext-boost + path: externals/ext-boost + + - name: Checkout unicorn repo + uses: actions/checkout@v2 + with: + repository: MerryMage/unicorn + path: externals/unicorn + + - name: Build unicorn + working-directory: externals/unicorn + run: UNICORN_ARCHS=aarch64,arm ./make.sh + + - name: Configure CMake + run: > + cmake + -B ${{github.workspace}}/build + -DBoost_INCLUDE_DIRS=${{github.workspace}}/externals/ext-boost + -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + -DDYNARMIC_ENABLE_CPU_FEATURE_DETECTION=${{matrix.cpu_detection}} + -DDYNARMIC_TESTS_USE_UNICORN=1 + -DDYNARMIC_USE_LLVM=1 + -DLIBUNICORN_INCLUDE_DIR=${{github.workspace}}/externals/unicorn/include + -DLIBUNICORN_LIBRARY=${{github.workspace}}/externals/unicorn/libunicorn.a + -G Ninja + + - name: Build + working-directory: ${{github.workspace}}/build + run: ninja + + - name: Test + env: + DYLD_FALLBACK_LIBRARY_PATH: ${{github.workspace}}/externals/unicorn + working-directory: ${{github.workspace}}/build + run: ctest --extra-verbose -C ${{env.BUILD_TYPE}} diff --git a/externals/dynarmic/.github/workflows/clang-format.yml b/externals/dynarmic/.github/workflows/clang-format.yml new file mode 100755 index 000000000..0138a51ab --- /dev/null +++ b/externals/dynarmic/.github/workflows/clang-format.yml @@ -0,0 +1,18 @@ +name: clang-format + +on: [push, pull_request] + +jobs: + clang-format: + + runs-on: ubuntu-latest + + steps: + + - name: Checkout dynarmic repo + uses: actions/checkout@v2 + + - uses: DoozyX/clang-format-lint-action@v0.12 + with: + source: 'src tests' + clangFormatVersion: 12 diff --git a/externals/dynarmic/README.md b/externals/dynarmic/README.md index b5b5533a3..dd58d2b96 100755 --- a/externals/dynarmic/README.md +++ b/externals/dynarmic/README.md @@ -1,7 +1,7 @@ Dynarmic ======== -[![Travis CI Build Status](https://api.travis-ci.org/MerryMage/dynarmic.svg?branch=master)](https://travis-ci.org/MerryMage/dynarmic/branches) [![Appveyor CI Build status](https://ci.appveyor.com/api/projects/status/maeiqr41rgm1innm/branch/master?svg=true)](https://ci.appveyor.com/project/MerryMage/dynarmic/branch/master) +[![Github Actions Build Status](https://github.com/MerryMage/dynarmic/actions/workflows/build-and-test.yml/badge.svg)](https://github.com/MerryMage/dynarmic/actions/workflows/build-and-test.yml) [![Appveyor CI Build status](https://ci.appveyor.com/api/projects/status/maeiqr41rgm1innm/branch/master?svg=true)](https://ci.appveyor.com/project/MerryMage/dynarmic/branch/master) A dynamic recompiler for ARM. diff --git a/externals/dynarmic/externals/CMakeLists.txt b/externals/dynarmic/externals/CMakeLists.txt index 9419e1da6..d1e17a620 100755 --- a/externals/dynarmic/externals/CMakeLists.txt +++ b/externals/dynarmic/externals/CMakeLists.txt @@ -28,7 +28,7 @@ target_include_directories(robin_map SYSTEM INTERFACE "$(code.getCurr()), Common::BitCast(wrapped_fn), *fastmem_marker, - } - ); + }); } else { // Use page table ASSERT(conf.page_table); @@ -1045,8 +1044,7 @@ void A64EmitX64::EmitMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) { Common::BitCast(code.getCurr()), Common::BitCast(wrapped_fn), *fastmem_marker, - } - ); + }); } else { // Use page table ASSERT(conf.page_table); @@ -1114,8 +1112,7 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { Common::BitCast(code.getCurr()), Common::BitCast(wrapped_fn), *fastmem_marker, - } - ); + }); } else { // Use page table ASSERT(conf.page_table); @@ -1187,8 +1184,7 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { Common::BitCast(code.getCurr()), Common::BitCast(wrapped_fn), *fastmem_marker, - } - ); + }); } else { // Use page table ASSERT(conf.page_table); @@ -1481,7 +1477,9 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { void A64EmitX64::Unpatch(const IR::LocationDescriptor& location) { EmitX64::Unpatch(location); if (conf.HasOptimization(OptimizationFlag::FastDispatch)) { + code.DisableWriting(); (*fast_dispatch_table_lookup)(location.Value()) = {}; + code.EnableWriting(); } } diff --git a/externals/dynarmic/src/dynarmic/backend/x64/constants.h b/externals/dynarmic/src/dynarmic/backend/x64/constants.h index a458d1020..682d8ebdf 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/constants.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/constants.h @@ -5,7 +5,11 @@ #pragma once +#include + +#include "dynarmic/common/bit_util.h" #include "dynarmic/common/common_types.h" +#include "dynarmic/common/fp/rounding_mode.h" namespace Dynarmic::Backend::X64 { @@ -42,4 +46,60 @@ constexpr u8 b = 0b11001100; constexpr u8 c = 0b10101010; } // namespace Tern +// Opcodes for use with vfixupimm +enum class FpFixup : u8 { + A = 0b0000, // A + B = 0b0001, // B + QNaN_B = 0b0010, // QNaN with sign of B + IndefNaN = 0b0011, // Indefinite QNaN (Negative QNaN with no payload on x86) + NegInf = 0b0100, // -Infinity + PosInf = 0b0101, // +Infinity + Inf_B = 0b0110, // Infinity with sign of B + NegZero = 0b0111, // -0.0 + PosZero = 0b1000, // +0.0 + NegOne = 0b1001, // -1.0 + PosOne = 0b1010, // +1.0 + Half = 0b1011, // 0.5 + Ninety = 0b1100, // 90.0 + HalfPi = 0b1101, // PI/2 + PosMax = 0b1110, // +{FLT_MAX,DBL_MAX} + NegMax = 0b1111, // -{FLT_MAX,DBL_MAX} +}; + +// Generates 32-bit LUT for vfixupimm instruction +constexpr u32 FixupLUT(FpFixup src_qnan = FpFixup::A, + FpFixup src_snan = FpFixup::A, + FpFixup src_zero = FpFixup::A, + FpFixup src_posone = FpFixup::A, + FpFixup src_neginf = FpFixup::A, + FpFixup src_posinf = FpFixup::A, + FpFixup src_pos = FpFixup::A, + FpFixup src_neg = FpFixup::A) { + u32 fixup_lut = 0; + fixup_lut = Common::ModifyBits<0, 3, u32>(fixup_lut, static_cast(src_qnan)); + fixup_lut = Common::ModifyBits<4, 7, u32>(fixup_lut, static_cast(src_snan)); + fixup_lut = Common::ModifyBits<8, 11, u32>(fixup_lut, static_cast(src_zero)); + fixup_lut = Common::ModifyBits<12, 15, u32>(fixup_lut, static_cast(src_posone)); + fixup_lut = Common::ModifyBits<16, 19, u32>(fixup_lut, static_cast(src_neginf)); + fixup_lut = Common::ModifyBits<20, 23, u32>(fixup_lut, static_cast(src_posinf)); + fixup_lut = Common::ModifyBits<24, 27, u32>(fixup_lut, static_cast(src_pos)); + fixup_lut = Common::ModifyBits<28, 31, u32>(fixup_lut, static_cast(src_neg)); + return fixup_lut; +} + +constexpr std::optional ConvertRoundingModeToX64Immediate(FP::RoundingMode rounding_mode) { + switch (rounding_mode) { + case FP::RoundingMode::ToNearest_TieEven: + return 0b00; + case FP::RoundingMode::TowardsPlusInfinity: + return 0b10; + case FP::RoundingMode::TowardsMinusInfinity: + return 0b01; + case FP::RoundingMode::TowardsZero: + return 0b11; + default: + return std::nullopt; + } +} + } // namespace Dynarmic::Backend::X64 diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp index 78cdd96a6..fabe88d2d 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp @@ -235,7 +235,7 @@ static void EmitExtractRegister(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(bit_size); - const Xbyak::Reg operand = ctx.reg_alloc.UseScratchGpr(args[1]).changeBit(bit_size); + const Xbyak::Reg operand = ctx.reg_alloc.UseGpr(args[1]).changeBit(bit_size); const u8 lsb = args[2].GetImmediateU8(); code.shrd(result, operand, lsb); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 9d908a084..2994a91e3 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -16,6 +16,7 @@ #include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/block_of_code.h" +#include "dynarmic/backend/x64/constants.h" #include "dynarmic/backend/x64/emit_x64.h" #include "dynarmic/common/assert.h" #include "dynarmic/common/cast_util.h" @@ -79,21 +80,6 @@ constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actua } \ } -std::optional ConvertRoundingModeToX64Immediate(FP::RoundingMode rounding_mode) { - switch (rounding_mode) { - case FP::RoundingMode::ToNearest_TieEven: - return 0b00; - case FP::RoundingMode::TowardsPlusInfinity: - return 0b10; - case FP::RoundingMode::TowardsMinusInfinity: - return 0b01; - case FP::RoundingMode::TowardsZero: - return 0b11; - default: - return std::nullopt; - } -} - template void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list to_daz) { if (ctx.FPCR().FZ()) { @@ -116,9 +102,18 @@ void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list template void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { - code.xorps(xmm_scratch, xmm_scratch); - FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN) - code.pand(xmm_value, xmm_scratch); + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, + FpFixup::PosZero); + FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0)); + } else if (code.HasHostFeature(HostFeature::AVX)) { + FCODE(vcmpords)(xmm_scratch, xmm_value, xmm_value); + FCODE(vandp)(xmm_value, xmm_value, xmm_scratch); + } else { + code.xorps(xmm_scratch, xmm_scratch); + FCODE(cmpords)(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN) + code.pand(xmm_value, xmm_scratch); + } } template diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index e4f77d6d6..3952527b1 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -19,6 +19,7 @@ #include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/block_of_code.h" +#include "dynarmic/backend/x64/constants.h" #include "dynarmic/backend/x64/emit_x64.h" #include "dynarmic/common/assert.h" #include "dynarmic/common/fp/fpcr.h" @@ -203,7 +204,11 @@ void ForceToDefaultNaN(BlockOfCode& code, FP::FPCR fpcr, Xbyak::Xmm result) { template void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) { const Xbyak::Xmm nan_mask = xmm0; - if (code.HasHostFeature(HostFeature::AVX)) { + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, + FpFixup::PosZero); + FCODE(vfixupimmp)(result, result, code.MConst(ptr_b, u64(nan_to_zero)), u8(0)); + } else if (code.HasHostFeature(HostFeature::AVX)) { FCODE(vcmpordp)(nan_mask, result, result); FCODE(vandp)(result, result, nan_mask); } else { @@ -637,6 +642,49 @@ void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, a); } +void EmitX64::EmitFPVectorFromHalf32(EmitContext& ctx, IR::Inst* inst) { + const auto rounding_mode = static_cast(inst->GetArg(1).GetU8()); + const bool fpcr_controlled = inst->GetArg(2).GetU1(); + + if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[0]); + + code.vcvtph2ps(result, value); + ForceToDefaultNaN<32>(code, ctx.FPCR(fpcr_controlled), result); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + + using rounding_list = mp::list< + mp::lift_value, + mp::lift_value, + mp::lift_value, + mp::lift_value, + mp::lift_value>; + + static const auto lut = Common::GenerateLookupTableFromList( + [](auto arg) { + return std::pair{ + mp::lower_to_tuple_v, + Common::FptrCast( + [](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { + constexpr auto t = mp::lower_to_tuple_v; + constexpr FP::RoundingMode rounding_mode = std::get<0>(t); + + for (size_t i = 0; i < output.size(); ++i) { + output[i] = FP::FPConvert(input[i], fpcr, rounding_mode, fpsr); + } + })}; + }, + mp::cartesian_product{}); + + EmitTwoOpFallback<2>(code, ctx, inst, lut.at(std::make_tuple(rounding_mode))); +} + void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -1602,6 +1650,53 @@ void EmitX64::EmitFPVectorSub64(EmitContext& ctx, IR::Inst* inst) { EmitThreeOpVectorOperation<64, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::subpd); } +void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) { + const auto rounding_mode = static_cast(inst->GetArg(1).GetU8()); + const bool fpcr_controlled = inst->GetArg(2).GetU1(); + + if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); + + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + + ForceToDefaultNaN<32>(code, ctx.FPCR(fpcr_controlled), result); + code.vcvtps2ph(result, result, static_cast(*round_imm)); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + + using rounding_list = mp::list< + mp::lift_value, + mp::lift_value, + mp::lift_value, + mp::lift_value, + mp::lift_value>; + + static const auto lut = Common::GenerateLookupTableFromList( + [](auto arg) { + return std::pair{ + mp::lower_to_tuple_v, + Common::FptrCast( + [](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { + constexpr auto t = mp::lower_to_tuple_v; + constexpr FP::RoundingMode rounding_mode = std::get<0>(t); + + for (size_t i = 0; i < output.size(); ++i) { + if (i < input.size()) { + output[i] = FP::FPConvert(input[i], fpcr, rounding_mode, fpsr); + } else { + output[i] = 0; + } + } + })}; + }, + mp::cartesian_product{}); + + EmitTwoOpFallback<2>(code, ctx, inst, lut.at(std::make_tuple(rounding_mode))); +} + template void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { using FPT = mp::unsigned_integer_of_size; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_macos.cpp b/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_macos.cpp index 21b928d84..53bc8abc6 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_macos.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_macos.cpp @@ -113,7 +113,7 @@ kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { const auto iter = FindCodeBlockInfo(ts->__rip); if (iter == code_block_infos.end()) { - fmt::print(stderr, "dynarmic: macOS MachHandler: Exception was not in registered code blocks (rip {:#016x})\n", ts->__rip); + fmt::print(stderr, "Unhandled EXC_BAD_ACCESS at rip {:#016x}\n", ts->__rip); return KERN_FAILURE; } diff --git a/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_posix.cpp b/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_posix.cpp index 37832c793..ff01ab606 100755 --- a/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_posix.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/exception_handler_posix.cpp @@ -149,7 +149,7 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { } } - fmt::print(stderr, "dynarmic: POSIX SigHandler: Exception was not in registered code blocks (rip {:#016x})\n", CTX_RIP); + fmt::print(stderr, "Unhandled {} at rip {:#016x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler.old_sa_segv : &sig_handler.old_sa_bus; if (retry_sa->sa_flags & SA_SIGINFO) { diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp index 449d46da6..b618088d3 100755 --- a/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp @@ -620,24 +620,13 @@ bool TranslatorVisitor::asimd_VCVT_half(bool D, size_t sz, size_t Vd, bool half_ } const size_t esize = 8U << sz; - const size_t num_elements = 4; const auto rounding_mode = FP::RoundingMode::ToNearest_TieEven; // StandardFPSCRValue().RMode const auto d = ToVector(half_to_single, Vd, D); const auto m = ToVector(!half_to_single, Vm, M); const auto operand = ir.GetVector(m); - IR::U128 result = ir.ZeroVector(); - for (size_t i = 0; i < num_elements; i++) { - if (half_to_single) { - const IR::U16 old_element = ir.VectorGetElement(esize, operand, i); - const IR::U32 new_element = ir.FPHalfToSingle(old_element, rounding_mode); - result = ir.VectorSetElement(esize * 2, result, i, new_element); - } else { - const IR::U32 old_element = ir.VectorGetElement(esize * 2, operand, i); - const IR::U16 new_element = ir.FPSingleToHalf(old_element, rounding_mode); - result = ir.VectorSetElement(esize, result, i, new_element); - } - } + const IR::U128 result = half_to_single ? ir.FPVectorFromHalf(esize * 2, operand, rounding_mode, false) + : ir.FPVectorToHalf(esize * 2, operand, rounding_mode, false); ir.SetVector(d, result); return true; } diff --git a/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp index bd6c1ebca..2f289d670 100755 --- a/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ b/externals/dynarmic/src/dynarmic/ir/ir_emitter.cpp @@ -2404,6 +2404,11 @@ U128 IREmitter::FPVectorEqual(size_t esize, const U128& a, const U128& b, bool f UNREACHABLE(); } +U128 IREmitter::FPVectorFromHalf(size_t esize, const U128& a, FP::RoundingMode rounding, bool fpcr_controlled) { + ASSERT(esize == 32); + return Inst(Opcode::FPVectorFromHalf32, a, Imm8(static_cast(rounding)), Imm1(fpcr_controlled)); +} + U128 IREmitter::FPVectorFromSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled) { ASSERT(fbits <= esize); switch (esize) { @@ -2613,6 +2618,11 @@ U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b, bool fpc UNREACHABLE(); } +U128 IREmitter::FPVectorToHalf(size_t esize, const U128& a, FP::RoundingMode rounding, bool fpcr_controlled) { + ASSERT(esize == 32); + return Inst(Opcode::FPVectorToHalf32, a, Imm8(static_cast(rounding)), Imm1(fpcr_controlled)); +} + U128 IREmitter::FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled) { ASSERT(fbits <= esize); diff --git a/externals/dynarmic/src/dynarmic/ir/ir_emitter.h b/externals/dynarmic/src/dynarmic/ir/ir_emitter.h index db5421365..caba937d4 100755 --- a/externals/dynarmic/src/dynarmic/ir/ir_emitter.h +++ b/externals/dynarmic/src/dynarmic/ir/ir_emitter.h @@ -370,6 +370,7 @@ public: U128 FPVectorAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); U128 FPVectorDiv(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); U128 FPVectorEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); + U128 FPVectorFromHalf(size_t esize, const U128& a, FP::RoundingMode rounding, bool fpcr_controlled = true); U128 FPVectorFromSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled = true); U128 FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled = true); U128 FPVectorGreater(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); @@ -389,6 +390,7 @@ public: U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); U128 FPVectorSqrt(size_t esize, const U128& a, bool fpcr_controlled = true); U128 FPVectorSub(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); + U128 FPVectorToHalf(size_t esize, const U128& a, FP::RoundingMode rounding, bool fpcr_controlled = true); U128 FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled = true); U128 FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled = true); diff --git a/externals/dynarmic/src/dynarmic/ir/opcodes.inc b/externals/dynarmic/src/dynarmic/ir/opcodes.inc index 50c36439b..f8995b19c 100755 --- a/externals/dynarmic/src/dynarmic/ir/opcodes.inc +++ b/externals/dynarmic/src/dynarmic/ir/opcodes.inc @@ -613,6 +613,7 @@ OPCODE(FPVectorDiv64, U128, U128 OPCODE(FPVectorEqual16, U128, U128, U128, U1 ) OPCODE(FPVectorEqual32, U128, U128, U128, U1 ) OPCODE(FPVectorEqual64, U128, U128, U128, U1 ) +OPCODE(FPVectorFromHalf32, U128, U128, U8, U1 ) OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8, U1 ) OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8, U1 ) OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8, U1 ) @@ -658,6 +659,7 @@ OPCODE(FPVectorSqrt32, U128, U128 OPCODE(FPVectorSqrt64, U128, U128, U1 ) OPCODE(FPVectorSub32, U128, U128, U128, U1 ) OPCODE(FPVectorSub64, U128, U128, U128, U1 ) +OPCODE(FPVectorToHalf32, U128, U128, U8, U1 ) OPCODE(FPVectorToSignedFixed16, U128, U128, U8, U8, U1 ) OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8, U1 ) OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8, U1 ) diff --git a/externals/dynarmic/tests/A32/test_arm_instructions.cpp b/externals/dynarmic/tests/A32/test_arm_instructions.cpp index fcfdf07be..40598fb03 100755 --- a/externals/dynarmic/tests/A32/test_arm_instructions.cpp +++ b/externals/dynarmic/tests/A32/test_arm_instructions.cpp @@ -528,7 +528,7 @@ TEST_CASE("arm: vcvt.s16.f64", "[arm][A32]") { TEST_CASE("arm: Memory access (fastmem)", "[arm][A32]") { constexpr size_t address_width = 12; - constexpr size_t memory_size = 1ull << address_width; // 4K + constexpr size_t memory_size = 1ull << address_width; // 4K constexpr size_t page_size = 4 * 1024; constexpr size_t buffer_size = 2 * page_size; char buffer[buffer_size]; @@ -547,13 +547,13 @@ TEST_CASE("arm: Memory access (fastmem)", "[arm][A32]") { memset(backing_memory, 0, memory_size); memcpy(backing_memory + 0x100, "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", 57); - env.MemoryWrite32(0, 0xE5904000); // LDR R4, [R0] - env.MemoryWrite32(4, 0xE5814000); // STR R4, [R1] - env.MemoryWrite32(8, 0xEAFFFFFE); // B . + env.MemoryWrite32(0, 0xE5904000); // LDR R4, [R0] + env.MemoryWrite32(4, 0xE5814000); // STR R4, [R1] + env.MemoryWrite32(8, 0xEAFFFFFE); // B . jit.Regs()[0] = 0x100; jit.Regs()[1] = 0x1F0; - jit.Regs()[15] = 0; // PC = 0 - jit.SetCpsr(0x000001d0); // User-mode + jit.Regs()[15] = 0; // PC = 0 + jit.SetCpsr(0x000001d0); // User-mode env.ticks_left = 3; jit.Run(); diff --git a/externals/dynarmic/tests/A32/testenv.h b/externals/dynarmic/tests/A32/testenv.h index 9a35b885f..68538ba2f 100755 --- a/externals/dynarmic/tests/A32/testenv.h +++ b/externals/dynarmic/tests/A32/testenv.h @@ -120,7 +120,8 @@ public: u64 ticks_left = 0; char* backing_memory = nullptr; - explicit A32FastmemTestEnv(char* addr) : backing_memory(addr) {} + explicit A32FastmemTestEnv(char* addr) + : backing_memory(addr) {} template T read(std::uint32_t vaddr) { diff --git a/externals/dynarmic/tests/A64/a64.cpp b/externals/dynarmic/tests/A64/a64.cpp index 9f82975fd..84c74a99f 100755 --- a/externals/dynarmic/tests/A64/a64.cpp +++ b/externals/dynarmic/tests/A64/a64.cpp @@ -36,15 +36,15 @@ TEST_CASE("A64: VQADD", "[a64]") { A64TestEnv env; A64::Jit jit{A64::UserConfig{&env}}; - env.code_mem.emplace_back(0x6e210c02); // UQADD v2.16b, v0.16b, v1.16b - env.code_mem.emplace_back(0x4e210c03); // SQADD v3.16b, v0.16b, v1.16b - env.code_mem.emplace_back(0x6e610c04); // UQADD v4.8h, v0.8h, v1.8h - env.code_mem.emplace_back(0x4e610c05); // SQADD v5.8h, v0.8h, v1.8h - env.code_mem.emplace_back(0x6ea10c06); // UQADD v6.4s, v0.4s, v1.4s - env.code_mem.emplace_back(0x4ea10c07); // SQADD v7.4s, v0.4s, v1.4s - env.code_mem.emplace_back(0x6ee10c08); // UQADD v8.2d, v0.2d, v1.2d - env.code_mem.emplace_back(0x4ee10c09); // SQADD v9.2d, v0.2d, v1.2d - env.code_mem.emplace_back(0x14000000); // B . + env.code_mem.emplace_back(0x6e210c02); // UQADD v2.16b, v0.16b, v1.16b + env.code_mem.emplace_back(0x4e210c03); // SQADD v3.16b, v0.16b, v1.16b + env.code_mem.emplace_back(0x6e610c04); // UQADD v4.8h, v0.8h, v1.8h + env.code_mem.emplace_back(0x4e610c05); // SQADD v5.8h, v0.8h, v1.8h + env.code_mem.emplace_back(0x6ea10c06); // UQADD v6.4s, v0.4s, v1.4s + env.code_mem.emplace_back(0x4ea10c07); // SQADD v7.4s, v0.4s, v1.4s + env.code_mem.emplace_back(0x6ee10c08); // UQADD v8.2d, v0.2d, v1.2d + env.code_mem.emplace_back(0x4ee10c09); // SQADD v9.2d, v0.2d, v1.2d + env.code_mem.emplace_back(0x14000000); // B . jit.SetVector(0, {0x7F7F7F7F7F7F7F7F, 0x7FFFFFFF7FFF7FFF}); jit.SetVector(1, {0x8010FF00807F0000, 0x8000000080008000}); @@ -67,15 +67,15 @@ TEST_CASE("A64: VQSUB", "[a64]") { A64TestEnv env; A64::Jit jit{A64::UserConfig{&env}}; - env.code_mem.emplace_back(0x6e212c02); // UQSUB v2.16b, v0.16b, v1.16b - env.code_mem.emplace_back(0x4e212c03); // SQSUB v3.16b, v0.16b, v1.16b - env.code_mem.emplace_back(0x6e612c04); // UQSUB v4.8h, v0.8h, v1.8h - env.code_mem.emplace_back(0x4e612c05); // SQSUB v5.8h, v0.8h, v1.8h - env.code_mem.emplace_back(0x6ea12c06); // UQSUB v6.4s, v0.4s, v1.4s - env.code_mem.emplace_back(0x4ea12c07); // SQSUB v7.4s, v0.4s, v1.4s - env.code_mem.emplace_back(0x6ee12c08); // UQSUB v8.2d, v0.2d, v1.2d - env.code_mem.emplace_back(0x4ee12c09); // SQSUB v9.2d, v0.2d, v1.2d - env.code_mem.emplace_back(0x14000000); // B . + env.code_mem.emplace_back(0x6e212c02); // UQSUB v2.16b, v0.16b, v1.16b + env.code_mem.emplace_back(0x4e212c03); // SQSUB v3.16b, v0.16b, v1.16b + env.code_mem.emplace_back(0x6e612c04); // UQSUB v4.8h, v0.8h, v1.8h + env.code_mem.emplace_back(0x4e612c05); // SQSUB v5.8h, v0.8h, v1.8h + env.code_mem.emplace_back(0x6ea12c06); // UQSUB v6.4s, v0.4s, v1.4s + env.code_mem.emplace_back(0x4ea12c07); // SQSUB v7.4s, v0.4s, v1.4s + env.code_mem.emplace_back(0x6ee12c08); // UQSUB v8.2d, v0.2d, v1.2d + env.code_mem.emplace_back(0x4ee12c09); // SQSUB v9.2d, v0.2d, v1.2d + env.code_mem.emplace_back(0x14000000); // B . jit.SetVector(0, {0x8010FF00807F0000, 0x8000000080008000}); jit.SetVector(1, {0x7F7F7F7F7F7F7F7F, 0x7FFFFFFF7FFF7FFF}); @@ -840,7 +840,7 @@ TEST_CASE("A64: Cache Maintenance Instructions", "[a64]") { TEST_CASE("A64: Memory access (fastmem)", "[a64]") { constexpr size_t address_width = 12; - constexpr size_t memory_size = 1ull << address_width; // 4K + constexpr size_t memory_size = 1ull << address_width; // 4K constexpr size_t page_size = 4 * 1024; constexpr size_t buffer_size = 2 * page_size; char buffer[buffer_size]; @@ -861,11 +861,11 @@ TEST_CASE("A64: Memory access (fastmem)", "[a64]") { memset(backing_memory, 0, memory_size); memcpy(backing_memory + 0x100, "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", 57); - env.MemoryWrite32(0, 0xA9401404); // LDP X4, X5, [X0] - env.MemoryWrite32(4, 0xF9400046); // LDR X6, [X2] - env.MemoryWrite32(8, 0xA9001424); // STP X4, X5, [X1] - env.MemoryWrite32(12, 0xF9000066); // STR X6, [X3] - env.MemoryWrite32(16, 0x14000000); // B . + env.MemoryWrite32(0, 0xA9401404); // LDP X4, X5, [X0] + env.MemoryWrite32(4, 0xF9400046); // LDR X6, [X2] + env.MemoryWrite32(8, 0xA9001424); // STP X4, X5, [X1] + env.MemoryWrite32(12, 0xF9000066); // STR X6, [X3] + env.MemoryWrite32(16, 0x14000000); // B . jit.SetRegister(0, 0x100); jit.SetRegister(1, 0x1F0); jit.SetRegister(2, 0x10F); diff --git a/externals/dynarmic/tests/A64/testenv.h b/externals/dynarmic/tests/A64/testenv.h index 79a2bff03..d18797f20 100755 --- a/externals/dynarmic/tests/A64/testenv.h +++ b/externals/dynarmic/tests/A64/testenv.h @@ -130,7 +130,8 @@ public: u64 ticks_left = 0; char* backing_memory = nullptr; - explicit A64FastmemTestEnv(char* addr) : backing_memory(addr) {} + explicit A64FastmemTestEnv(char* addr) + : backing_memory(addr) {} template T read(u64 vaddr) { diff --git a/externals/dynarmic/tests/CMakeLists.txt b/externals/dynarmic/tests/CMakeLists.txt index 105a4a004..bfa9baf85 100755 --- a/externals/dynarmic/tests/CMakeLists.txt +++ b/externals/dynarmic/tests/CMakeLists.txt @@ -61,4 +61,4 @@ target_include_directories(dynarmic_print_info PRIVATE . ../src) target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=0) -add_test(dynarmic_tests dynarmic_tests) +add_test(dynarmic_tests dynarmic_tests --durations yes) diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index a3c513e3a..837a44be7 100755 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -32,7 +32,7 @@ class SDLGLContext : public Core::Frontend::GraphicsContext { public: - explicit SDLGLContext(SDL_Window* window) : window(window) { + explicit SDLGLContext(SDL_Window* window_) : window{window_} { context = SDL_GL_CreateContext(window); }