early-access version 1546

This commit is contained in:
pineappleEA 2021-04-02 01:44:36 +02:00
parent c7fcf99851
commit 0a011dc368
7 changed files with 209 additions and 17 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 1543. This is the source code for early-access 1546.
## Legal Notice ## Legal Notice

View File

@ -163,6 +163,7 @@ if ("A32" IN_LIST DYNARMIC_FRONTENDS)
frontend/A32/translate/impl/thumb32_data_processing_shifted_register.cpp frontend/A32/translate/impl/thumb32_data_processing_shifted_register.cpp
frontend/A32/translate/impl/thumb32_load_byte.cpp frontend/A32/translate/impl/thumb32_load_byte.cpp
frontend/A32/translate/impl/thumb32_load_halfword.cpp frontend/A32/translate/impl/thumb32_load_halfword.cpp
frontend/A32/translate/impl/thumb32_load_store_dual.cpp
frontend/A32/translate/impl/thumb32_load_store_multiple.cpp frontend/A32/translate/impl/thumb32_load_store_multiple.cpp
frontend/A32/translate/impl/thumb32_load_word.cpp frontend/A32/translate/impl/thumb32_load_word.cpp
frontend/A32/translate/impl/thumb32_long_multiply.cpp frontend/A32/translate/impl/thumb32_long_multiply.cpp

View File

@ -13,6 +13,7 @@
#include "backend/x64/block_of_code.h" #include "backend/x64/block_of_code.h"
#include "backend/x64/perf_map.h" #include "backend/x64/perf_map.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/bit_util.h"
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> #include <windows.h>
@ -43,8 +44,8 @@ const std::array<Xbyak::Reg64, 6> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PA
namespace { namespace {
constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024; constexpr size_t TOTAL_CODE_SIZE = 256 * 1024 * 1024;
constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024; constexpr size_t FAR_CODE_OFFSET = 200 * 1024 * 1024;
constexpr size_t CONSTANT_POOL_SIZE = 2 * 1024 * 1024; constexpr size_t CONSTANT_POOL_SIZE = 2 * 1024 * 1024;
class CustomXbyakAllocator : public Xbyak::Allocator { class CustomXbyakAllocator : public Xbyak::Allocator {
@ -364,7 +365,21 @@ bool BlockOfCode::HasBMI2() const {
} }
bool BlockOfCode::HasFastBMI2() const { bool BlockOfCode::HasFastBMI2() const {
return DoesCpuSupport(Xbyak::util::Cpu::tBMI2) && !DoesCpuSupport(Xbyak::util::Cpu::tAMD); if (DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
// Check for Zen 3 or newer by its family (0x19).
// See also: https://en.wikichip.org/wiki/amd/cpuid
if (DoesCpuSupport(Xbyak::util::Cpu::tAMD)) {
std::array<u32, 4> data{};
cpu_info.getCpuid(1, data.data());
const u32 family_base = Common::Bits< 8, 11>(data[0]);
const u32 family_extended = Common::Bits<20, 27>(data[0]);
const u32 family = family_base + family_extended;
return family >= 0x19;
}
return true;
}
return false;
} }
bool BlockOfCode::HasFMA() const { bool BlockOfCode::HasFMA() const {

View File

@ -337,8 +337,12 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
enum CheckInputNaN {
Yes, No,
};
template<size_t fsize, template<typename> class Indexer, typename Function> template<size_t fsize, template<typename> class Indexer, typename Function>
void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, typename NaNHandler<fsize, Indexer, 3>::function_type nan_handler = NaNHandler<fsize, Indexer, 3>::GetDefault()) { void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, CheckInputNaN check_input_nan = CheckInputNaN::No, typename NaNHandler<fsize, Indexer, 3>::function_type nan_handler = NaNHandler<fsize, Indexer, 3>::GetDefault()) {
static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64"); static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64");
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -371,15 +375,31 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
code.movaps(nan_mask, xmm_b);
code.movaps(result, xmm_a); code.movaps(result, xmm_a);
if (check_input_nan == CheckInputNaN::Yes) {
if (code.HasAVX()) {
FCODE(vcmpunordp)(nan_mask, xmm_a, xmm_b);
} else {
code.movaps(nan_mask, xmm_b);
FCODE(cmpunordp)(nan_mask, xmm_a); FCODE(cmpunordp)(nan_mask, xmm_a);
}
}
if constexpr (std::is_member_function_pointer_v<Function>) { if constexpr (std::is_member_function_pointer_v<Function>) {
(code.*fn)(result, xmm_b); (code.*fn)(result, xmm_b);
} else { } else {
fn(result, xmm_b); fn(result, xmm_b);
} }
if (check_input_nan == CheckInputNaN::Yes) {
FCODE(cmpunordp)(nan_mask, result); FCODE(cmpunordp)(nan_mask, result);
} else if (code.HasAVX()) {
FCODE(vcmpunordp)(nan_mask, result, result);
} else {
code.movaps(nan_mask, result);
FCODE(cmpunordp)(nan_mask, nan_mask);
}
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler); HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
@ -951,7 +971,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.andnps(mask, eq); code.andnps(mask, eq);
code.orps(result, mask); code.orps(result, mask);
} }
}); }, CheckInputNaN::Yes);
} }
void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) {

View File

@ -13,17 +13,17 @@ INST(thumb32_LDMDB, "LDMDB/LDMEA", "1110100100W1nnnniiiiii
// Load/Store Dual, Load/Store Exclusive, Table Branch // Load/Store Dual, Load/Store Exclusive, Table Branch
//INST(thumb32_STREX, "STREX", "111010000100--------------------") //INST(thumb32_STREX, "STREX", "111010000100--------------------")
//INST(thumb32_LDREX, "LDREX", "111010000101--------------------") //INST(thumb32_LDREX, "LDREX", "111010000101--------------------")
//INST(thumb32_STRD_imm_1, "STRD (imm)", "11101000-110--------------------") INST(thumb32_STRD_imm_1, "STRD (imm)", "11101000U110nnnnttttssssiiiiiiii")
//INST(thumb32_STRD_imm_2, "STRD (imm)", "11101001-1-0--------------------") INST(thumb32_STRD_imm_2, "STRD (imm)", "11101001U1W0nnnnttttssssiiiiiiii")
//INST(thumb32_LDRD_imm_1, "LDRD (lit)", "11101000-1111111----------------") INST(thumb32_LDRD_lit_1, "LDRD (lit)", "11101000U1111111ttttssssiiiiiiii")
//INST(thumb32_LDRD_imm_2, "LDRD (lit)", "11101001-1-11111----------------") INST(thumb32_LDRD_lit_2, "LDRD (lit)", "11101001U1W11111ttttssssiiiiiiii")
//INST(thumb32_LDRD_imm_1, "LDRD (imm)", "11101000-111--------------------") INST(thumb32_LDRD_imm_1, "LDRD (imm)", "11101000U111nnnnttttssssiiiiiiii")
//INST(thumb32_LDRD_imm_2, "LDRD (imm)", "11101001-1-1--------------------") INST(thumb32_LDRD_imm_2, "LDRD (imm)", "11101001U1W1nnnnttttssssiiiiiiii")
//INST(thumb32_STREXB, "STREXB", "111010001100------------0100----") //INST(thumb32_STREXB, "STREXB", "111010001100------------0100----")
//INST(thumb32_STREXH, "STREXH", "111010001100------------0101----") //INST(thumb32_STREXH, "STREXH", "111010001100------------0101----")
//INST(thumb32_STREXD, "STREXD", "111010001100------------0111----") //INST(thumb32_STREXD, "STREXD", "111010001100------------0111----")
//INST(thumb32_TBB, "TBB", "111010001101------------0000----") INST(thumb32_TBB, "TBB", "111010001101nnnn111100000000mmmm")
//INST(thumb32_TBH, "TBH", "111010001101------------0001----") INST(thumb32_TBH, "TBH", "111010001101nnnn111100000001mmmm")
//INST(thumb32_LDREXB, "LDREXB", "111010001101------------0100----") //INST(thumb32_LDREXB, "LDREXB", "111010001101------------0100----")
//INST(thumb32_LDREXH, "LDREXH", "111010001101------------0101----") //INST(thumb32_LDREXH, "LDREXH", "111010001101------------0101----")
//INST(thumb32_LDREXD, "LDREXD", "111010001101------------0111----") //INST(thumb32_LDREXD, "LDREXD", "111010001101------------0111----")

View File

@ -0,0 +1,146 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2021 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "common/bit_util.h"
#include "frontend/A32/translate/impl/translate_thumb.h"
namespace Dynarmic::A32 {
static bool ITBlockCheck(const A32::IREmitter& ir) {
return ir.current_location.IT().IsInITBlock() && !ir.current_location.IT().IsLastInITBlock();
}
static bool TableBranch(ThumbTranslatorVisitor& v, Reg n, Reg m, bool half) {
if (m == Reg::PC) {
return v.UnpredictableInstruction();
}
if (ITBlockCheck(v.ir)) {
return v.UnpredictableInstruction();
}
const auto reg_m = v.ir.GetRegister(m);
const auto reg_n = v.ir.GetRegister(n);
IR::U32 halfwords;
if (half) {
const auto data = v.ir.ReadMemory16(v.ir.Add(reg_n, v.ir.LogicalShiftLeft(reg_m, v.ir.Imm8(1))));
halfwords = v.ir.ZeroExtendToWord(data);
} else {
halfwords = v.ir.ZeroExtendToWord(v.ir.ReadMemory8(v.ir.Add(reg_n, reg_m)));
}
const auto current_pc = v.ir.Imm32(v.ir.PC());
const auto branch_value = v.ir.Add(current_pc, v.ir.Add(halfwords, halfwords));
v.ir.UpdateUpperLocationDescriptor();
v.ir.BranchWritePC(branch_value);
v.ir.SetTerm(IR::Term::FastDispatchHint{});
return false;
}
static bool LoadDualImmediate(ThumbTranslatorVisitor& v, bool P, bool U, bool W,
Reg n, Reg t, Reg t2, Imm<8> imm8) {
if (W && (n == t || n == t2)) {
return v.UnpredictableInstruction();
}
if (t == Reg::PC || t2 == Reg::PC || t == t2) {
return v.UnpredictableInstruction();
}
const u32 imm = imm8.ZeroExtend() << 2;
const IR::U32 reg_n = v.ir.GetRegister(n);
const IR::U32 offset_address = U ? v.ir.Add(reg_n, v.ir.Imm32(imm))
: v.ir.Sub(reg_n, v.ir.Imm32(imm));
const IR::U32 address_1 = P ? offset_address
: reg_n;
const IR::U32 address_2 = v.ir.Add(address_1, v.ir.Imm32(4));
v.ir.SetRegister(t, v.ir.ReadMemory32(address_1));
v.ir.SetRegister(t2, v.ir.ReadMemory32(address_2));
if (W) {
v.ir.SetRegister(n, offset_address);
}
return true;
}
static bool LoadDualLiteral(ThumbTranslatorVisitor& v, bool U, bool W, Reg t, Reg t2, Imm<8> imm8) {
if (t == Reg::PC || t2 == Reg::PC || t == t2) {
return v.UnpredictableInstruction();
}
if (W) {
return v.UnpredictableInstruction();
}
const auto imm = imm8.ZeroExtend() << 2;
const auto address_1 = U ? v.ir.Add(v.ir.Imm32(v.ir.AlignPC(4)), v.ir.Imm32(imm))
: v.ir.Sub(v.ir.Imm32(v.ir.AlignPC(4)), v.ir.Imm32(imm));
const auto address_2 = v.ir.Add(address_1, v.ir.Imm32(4));
v.ir.SetRegister(t, v.ir.ReadMemory32(address_1));
v.ir.SetRegister(t2, v.ir.ReadMemory32(address_2));
return true;
}
static bool StoreDual(ThumbTranslatorVisitor& v, bool P, bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8) {
if (W && (n == t || n == t2)) {
return v.UnpredictableInstruction();
}
if (n == Reg::PC || t == Reg::PC || t2 == Reg::PC) {
return v.UnpredictableInstruction();
}
const u32 imm = imm8.ZeroExtend() << 2;
const IR::U32 reg_n = v.ir.GetRegister(n);
const IR::U32 reg_t = v.ir.GetRegister(t);
const IR::U32 reg_t2 = v.ir.GetRegister(t2);
const IR::U32 offset_address = U ? v.ir.Add(reg_n, v.ir.Imm32(imm))
: v.ir.Sub(reg_n, v.ir.Imm32(imm));
const IR::U32 address_1 = P ? offset_address
: reg_n;
const IR::U32 address_2 = v.ir.Add(address_1, v.ir.Imm32(4));
v.ir.WriteMemory32(address_1, reg_t);
v.ir.WriteMemory32(address_2, reg_t2);
if (W) {
v.ir.SetRegister(n, offset_address);
}
return true;
}
bool ThumbTranslatorVisitor::thumb32_LDRD_imm_1(bool U, Reg n, Reg t, Reg t2, Imm<8> imm8) {
return LoadDualImmediate(*this, false, U, true, n, t, t2, imm8);
}
bool ThumbTranslatorVisitor::thumb32_LDRD_imm_2(bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8) {
return LoadDualImmediate(*this, true, U, W, n, t, t2, imm8);
}
bool ThumbTranslatorVisitor::thumb32_LDRD_lit_1(bool U, Reg t, Reg t2, Imm<8> imm8) {
return LoadDualLiteral(*this, U, true, t, t2, imm8);
}
bool ThumbTranslatorVisitor::thumb32_LDRD_lit_2(bool U, bool W, Reg t, Reg t2, Imm<8> imm8) {
return LoadDualLiteral(*this, U, W, t, t2, imm8);
}
bool ThumbTranslatorVisitor::thumb32_STRD_imm_1(bool U, Reg n, Reg t, Reg t2, Imm<8> imm8) {
return StoreDual(*this, false, U, true, n, t, t2, imm8);
}
bool ThumbTranslatorVisitor::thumb32_STRD_imm_2(bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8) {
return StoreDual(*this, true, U, W, n, t, t2, imm8);
}
bool ThumbTranslatorVisitor::thumb32_TBB(Reg n, Reg m) {
return TableBranch(*this, n, m, false);
}
bool ThumbTranslatorVisitor::thumb32_TBH(Reg n, Reg m) {
return TableBranch(*this, n, m, true);
}
} // namespace Dynarmic::A32

View File

@ -179,6 +179,16 @@ struct ThumbTranslatorVisitor final {
bool thumb32_STMIA(bool W, Reg n, Imm<15> reg_list); bool thumb32_STMIA(bool W, Reg n, Imm<15> reg_list);
bool thumb32_STMDB(bool W, Reg n, Imm<15> reg_list); bool thumb32_STMDB(bool W, Reg n, Imm<15> reg_list);
// thumb32 load/store dual, load/store exclusive, table branch instructions
bool thumb32_LDRD_imm_1(bool U, Reg n, Reg t, Reg t2, Imm<8> imm8);
bool thumb32_LDRD_imm_2(bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8);
bool thumb32_LDRD_lit_1(bool U, Reg t, Reg t2, Imm<8> imm8);
bool thumb32_LDRD_lit_2(bool U, bool W, Reg t, Reg t2, Imm<8> imm8);
bool thumb32_STRD_imm_1(bool U, Reg n, Reg t, Reg t2, Imm<8> imm8);
bool thumb32_STRD_imm_2(bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8);
bool thumb32_TBB(Reg n, Reg m);
bool thumb32_TBH(Reg n, Reg m);
// thumb32 data processing (shifted register) instructions // thumb32 data processing (shifted register) instructions
bool thumb32_TST_reg(Reg n, Imm<3> imm3, Imm<2> imm2, ShiftType type, Reg m); bool thumb32_TST_reg(Reg n, Imm<3> imm3, Imm<2> imm2, ShiftType type, Reg m);
bool thumb32_AND_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2> imm2, ShiftType type, Reg m); bool thumb32_AND_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2> imm2, ShiftType type, Reg m);