early-access version 1546
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/perf_map.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
@@ -43,8 +44,8 @@ const std::array<Xbyak::Reg64, 6> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PA
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024;
|
||||
constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
|
||||
constexpr size_t TOTAL_CODE_SIZE = 256 * 1024 * 1024;
|
||||
constexpr size_t FAR_CODE_OFFSET = 200 * 1024 * 1024;
|
||||
constexpr size_t CONSTANT_POOL_SIZE = 2 * 1024 * 1024;
|
||||
|
||||
class CustomXbyakAllocator : public Xbyak::Allocator {
|
||||
@@ -364,7 +365,21 @@ bool BlockOfCode::HasBMI2() const {
|
||||
}
|
||||
|
||||
bool BlockOfCode::HasFastBMI2() const {
|
||||
return DoesCpuSupport(Xbyak::util::Cpu::tBMI2) && !DoesCpuSupport(Xbyak::util::Cpu::tAMD);
|
||||
if (DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
|
||||
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
|
||||
// Check for Zen 3 or newer by its family (0x19).
|
||||
// See also: https://en.wikichip.org/wiki/amd/cpuid
|
||||
if (DoesCpuSupport(Xbyak::util::Cpu::tAMD)) {
|
||||
std::array<u32, 4> data{};
|
||||
cpu_info.getCpuid(1, data.data());
|
||||
const u32 family_base = Common::Bits< 8, 11>(data[0]);
|
||||
const u32 family_extended = Common::Bits<20, 27>(data[0]);
|
||||
const u32 family = family_base + family_extended;
|
||||
return family >= 0x19;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BlockOfCode::HasFMA() const {
|
||||
|
@@ -337,8 +337,12 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
enum CheckInputNaN {
|
||||
Yes, No,
|
||||
};
|
||||
|
||||
template<size_t fsize, template<typename> class Indexer, typename Function>
|
||||
void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, typename NaNHandler<fsize, Indexer, 3>::function_type nan_handler = NaNHandler<fsize, Indexer, 3>::GetDefault()) {
|
||||
void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, CheckInputNaN check_input_nan = CheckInputNaN::No, typename NaNHandler<fsize, Indexer, 3>::function_type nan_handler = NaNHandler<fsize, Indexer, 3>::GetDefault()) {
|
||||
static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64");
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
@@ -371,15 +375,31 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movaps(nan_mask, xmm_b);
|
||||
code.movaps(result, xmm_a);
|
||||
FCODE(cmpunordp)(nan_mask, xmm_a);
|
||||
|
||||
if (check_input_nan == CheckInputNaN::Yes) {
|
||||
if (code.HasAVX()) {
|
||||
FCODE(vcmpunordp)(nan_mask, xmm_a, xmm_b);
|
||||
} else {
|
||||
code.movaps(nan_mask, xmm_b);
|
||||
FCODE(cmpunordp)(nan_mask, xmm_a);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||
(code.*fn)(result, xmm_b);
|
||||
} else {
|
||||
fn(result, xmm_b);
|
||||
}
|
||||
FCODE(cmpunordp)(nan_mask, result);
|
||||
|
||||
if (check_input_nan == CheckInputNaN::Yes) {
|
||||
FCODE(cmpunordp)(nan_mask, result);
|
||||
} else if (code.HasAVX()) {
|
||||
FCODE(vcmpunordp)(nan_mask, result, result);
|
||||
} else {
|
||||
code.movaps(nan_mask, result);
|
||||
FCODE(cmpunordp)(nan_mask, nan_mask);
|
||||
}
|
||||
|
||||
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
|
||||
|
||||
@@ -951,7 +971,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||
code.andnps(mask, eq);
|
||||
code.orps(result, mask);
|
||||
}
|
||||
});
|
||||
}, CheckInputNaN::Yes);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
Reference in New Issue
Block a user