early-access version 1255
This commit is contained in:
179
src/common/x64/cpu_detect.cpp
Executable file
179
src/common/x64/cpu_detect.cpp
Executable file
@@ -0,0 +1,179 @@
|
||||
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include "common/common_types.h"
|
||||
#include "common/x64/cpu_detect.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
|
||||
#if defined(__DragonFly__) || defined(__FreeBSD__)
|
||||
// clang-format off
|
||||
#include <sys/types.h>
|
||||
#include <machine/cpufunc.h>
|
||||
// clang-format on
|
||||
#endif
|
||||
|
||||
static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
|
||||
#if defined(__DragonFly__) || defined(__FreeBSD__)
|
||||
// Despite the name, this is just do_cpuid() with ECX as second input.
|
||||
cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
|
||||
#else
|
||||
info[0] = function_id; // eax
|
||||
info[2] = subfunction_id; // ecx
|
||||
__asm__("cpuid"
|
||||
: "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
|
||||
: "a"(function_id), "c"(subfunction_id));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void __cpuid(int info[4], int function_id) {
|
||||
return __cpuidex(info, function_id, 0);
|
||||
}
|
||||
|
||||
#define _XCR_XFEATURE_ENABLED_MASK 0
|
||||
static inline u64 _xgetbv(u32 index) {
|
||||
u32 eax, edx;
|
||||
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
||||
return ((u64)edx << 32) | eax;
|
||||
}
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
namespace Common {
|
||||
|
||||
// Detects the various CPU features
|
||||
static CPUCaps Detect() {
|
||||
CPUCaps caps = {};
|
||||
|
||||
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
|
||||
// yuzu at all anyway
|
||||
|
||||
int cpu_id[4];
|
||||
memset(caps.brand_string, 0, sizeof(caps.brand_string));
|
||||
|
||||
// Detect CPU's CPUID capabilities and grab CPU string
|
||||
__cpuid(cpu_id, 0x00000000);
|
||||
u32 max_std_fn = cpu_id[0]; // EAX
|
||||
|
||||
std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
|
||||
std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
|
||||
std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
|
||||
if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
|
||||
caps.manufacturer = Manufacturer::Intel;
|
||||
else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
|
||||
caps.manufacturer = Manufacturer::AMD;
|
||||
else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
|
||||
caps.manufacturer = Manufacturer::Hygon;
|
||||
else
|
||||
caps.manufacturer = Manufacturer::Unknown;
|
||||
|
||||
u32 family = {};
|
||||
u32 model = {};
|
||||
|
||||
__cpuid(cpu_id, 0x80000000);
|
||||
|
||||
u32 max_ex_fn = cpu_id[0];
|
||||
|
||||
// Set reasonable default brand string even if brand string not available
|
||||
strcpy(caps.cpu_string, caps.brand_string);
|
||||
|
||||
// Detect family and other miscellaneous features
|
||||
if (max_std_fn >= 1) {
|
||||
__cpuid(cpu_id, 0x00000001);
|
||||
family = (cpu_id[0] >> 8) & 0xf;
|
||||
model = (cpu_id[0] >> 4) & 0xf;
|
||||
if (family == 0xf) {
|
||||
family += (cpu_id[0] >> 20) & 0xff;
|
||||
}
|
||||
if (family >= 6) {
|
||||
model += ((cpu_id[0] >> 16) & 0xf) << 4;
|
||||
}
|
||||
|
||||
if ((cpu_id[3] >> 25) & 1)
|
||||
caps.sse = true;
|
||||
if ((cpu_id[3] >> 26) & 1)
|
||||
caps.sse2 = true;
|
||||
if ((cpu_id[2]) & 1)
|
||||
caps.sse3 = true;
|
||||
if ((cpu_id[2] >> 9) & 1)
|
||||
caps.ssse3 = true;
|
||||
if ((cpu_id[2] >> 19) & 1)
|
||||
caps.sse4_1 = true;
|
||||
if ((cpu_id[2] >> 20) & 1)
|
||||
caps.sse4_2 = true;
|
||||
if ((cpu_id[2] >> 25) & 1)
|
||||
caps.aes = true;
|
||||
|
||||
// AVX support requires 3 separate checks:
|
||||
// - Is the AVX bit set in CPUID?
|
||||
// - Is the XSAVE bit set in CPUID?
|
||||
// - XGETBV result has the XCR bit set.
|
||||
if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) {
|
||||
if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
|
||||
caps.avx = true;
|
||||
if ((cpu_id[2] >> 12) & 1)
|
||||
caps.fma = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_std_fn >= 7) {
|
||||
__cpuidex(cpu_id, 0x00000007, 0x00000000);
|
||||
// Can't enable AVX2 unless the XSAVE/XGETBV checks above passed
|
||||
if ((cpu_id[1] >> 5) & 1)
|
||||
caps.avx2 = caps.avx;
|
||||
if ((cpu_id[1] >> 3) & 1)
|
||||
caps.bmi1 = true;
|
||||
if ((cpu_id[1] >> 8) & 1)
|
||||
caps.bmi2 = true;
|
||||
// Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
|
||||
if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
|
||||
(cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
|
||||
caps.avx512 = caps.avx2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (max_ex_fn >= 0x80000004) {
|
||||
// Extract CPU model string
|
||||
__cpuid(cpu_id, 0x80000002);
|
||||
std::memcpy(caps.cpu_string, cpu_id, sizeof(cpu_id));
|
||||
__cpuid(cpu_id, 0x80000003);
|
||||
std::memcpy(caps.cpu_string + 16, cpu_id, sizeof(cpu_id));
|
||||
__cpuid(cpu_id, 0x80000004);
|
||||
std::memcpy(caps.cpu_string + 32, cpu_id, sizeof(cpu_id));
|
||||
}
|
||||
|
||||
if (max_ex_fn >= 0x80000001) {
|
||||
// Check for more features
|
||||
__cpuid(cpu_id, 0x80000001);
|
||||
if ((cpu_id[2] >> 16) & 1)
|
||||
caps.fma4 = true;
|
||||
}
|
||||
|
||||
if (max_ex_fn >= 0x80000007) {
|
||||
__cpuid(cpu_id, 0x80000007);
|
||||
if (cpu_id[3] & (1 << 8)) {
|
||||
caps.invariant_tsc = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_std_fn >= 0x16) {
|
||||
__cpuid(cpu_id, 0x16);
|
||||
caps.base_frequency = cpu_id[0];
|
||||
caps.max_frequency = cpu_id[1];
|
||||
caps.bus_frequency = cpu_id[2];
|
||||
}
|
||||
|
||||
return caps;
|
||||
}
|
||||
|
||||
const CPUCaps& GetCPUCaps() {
|
||||
static CPUCaps caps = Detect();
|
||||
return caps;
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
48
src/common/x64/cpu_detect.h
Executable file
48
src/common/x64/cpu_detect.h
Executable file
@@ -0,0 +1,48 @@
|
||||
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Common {
|
||||
|
||||
enum class Manufacturer : u32 {
|
||||
Intel = 0,
|
||||
AMD = 1,
|
||||
Hygon = 2,
|
||||
Unknown = 3,
|
||||
};
|
||||
|
||||
/// x86/x64 CPU capabilities that may be detected by this module
|
||||
struct CPUCaps {
|
||||
Manufacturer manufacturer;
|
||||
char cpu_string[0x21];
|
||||
char brand_string[0x41];
|
||||
bool sse;
|
||||
bool sse2;
|
||||
bool sse3;
|
||||
bool ssse3;
|
||||
bool sse4_1;
|
||||
bool sse4_2;
|
||||
bool lzcnt;
|
||||
bool avx;
|
||||
bool avx2;
|
||||
bool avx512;
|
||||
bool bmi1;
|
||||
bool bmi2;
|
||||
bool fma;
|
||||
bool fma4;
|
||||
bool aes;
|
||||
bool invariant_tsc;
|
||||
u32 base_frequency;
|
||||
u32 max_frequency;
|
||||
u32 bus_frequency;
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets the supported capabilities of the host CPU
|
||||
* @return Reference to a CPUCaps struct with the detected host CPU capabilities
|
||||
*/
|
||||
const CPUCaps& GetCPUCaps();
|
||||
|
||||
} // namespace Common
|
||||
103
src/common/x64/native_clock.cpp
Executable file
103
src/common/x64/native_clock.cpp
Executable file
@@ -0,0 +1,103 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include "common/uint128.h"
|
||||
#include "common/x64/native_clock.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
u64 EstimateRDTSCFrequency() {
|
||||
const auto milli_10 = std::chrono::milliseconds{10};
|
||||
// get current time
|
||||
_mm_mfence();
|
||||
const u64 tscStart = __rdtsc();
|
||||
const auto startTime = std::chrono::high_resolution_clock::now();
|
||||
// wait roughly 3 seconds
|
||||
while (true) {
|
||||
auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::high_resolution_clock::now() - startTime);
|
||||
if (milli.count() >= 3000)
|
||||
break;
|
||||
std::this_thread::sleep_for(milli_10);
|
||||
}
|
||||
const auto endTime = std::chrono::high_resolution_clock::now();
|
||||
_mm_mfence();
|
||||
const u64 tscEnd = __rdtsc();
|
||||
// calculate difference
|
||||
const u64 timer_diff =
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
|
||||
const u64 tsc_diff = tscEnd - tscStart;
|
||||
const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
|
||||
return tsc_freq;
|
||||
}
|
||||
|
||||
namespace X64 {
|
||||
NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_,
|
||||
u64 rtsc_frequency_)
|
||||
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
|
||||
rtsc_frequency_} {
|
||||
_mm_mfence();
|
||||
last_measure = __rdtsc();
|
||||
accumulated_ticks = 0U;
|
||||
}
|
||||
|
||||
u64 NativeClock::GetRTSC() {
|
||||
std::scoped_lock scope{rtsc_serialize};
|
||||
_mm_mfence();
|
||||
const u64 current_measure = __rdtsc();
|
||||
u64 diff = current_measure - last_measure;
|
||||
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
|
||||
if (current_measure > last_measure) {
|
||||
last_measure = current_measure;
|
||||
}
|
||||
accumulated_ticks += diff;
|
||||
/// The clock cannot be more precise than the guest timer, remove the lower bits
|
||||
return accumulated_ticks & inaccuracy_mask;
|
||||
}
|
||||
|
||||
void NativeClock::Pause(bool is_paused) {
|
||||
if (!is_paused) {
|
||||
_mm_mfence();
|
||||
last_measure = __rdtsc();
|
||||
}
|
||||
}
|
||||
|
||||
std::chrono::nanoseconds NativeClock::GetTimeNS() {
|
||||
const u64 rtsc_value = GetRTSC();
|
||||
return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
|
||||
}
|
||||
|
||||
std::chrono::microseconds NativeClock::GetTimeUS() {
|
||||
const u64 rtsc_value = GetRTSC();
|
||||
return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)};
|
||||
}
|
||||
|
||||
std::chrono::milliseconds NativeClock::GetTimeMS() {
|
||||
const u64 rtsc_value = GetRTSC();
|
||||
return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)};
|
||||
}
|
||||
|
||||
u64 NativeClock::GetClockCycles() {
|
||||
const u64 rtsc_value = GetRTSC();
|
||||
return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
|
||||
}
|
||||
|
||||
u64 NativeClock::GetCPUCycles() {
|
||||
const u64 rtsc_value = GetRTSC();
|
||||
return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
|
||||
}
|
||||
|
||||
} // namespace X64
|
||||
|
||||
} // namespace Common
|
||||
49
src/common/x64/native_clock.h
Executable file
49
src/common/x64/native_clock.h
Executable file
@@ -0,0 +1,49 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/spin_lock.h"
|
||||
#include "common/wall_clock.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
namespace X64 {
|
||||
class NativeClock final : public WallClock {
|
||||
public:
|
||||
explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_,
|
||||
u64 rtsc_frequency_);
|
||||
|
||||
std::chrono::nanoseconds GetTimeNS() override;
|
||||
|
||||
std::chrono::microseconds GetTimeUS() override;
|
||||
|
||||
std::chrono::milliseconds GetTimeMS() override;
|
||||
|
||||
u64 GetClockCycles() override;
|
||||
|
||||
u64 GetCPUCycles() override;
|
||||
|
||||
void Pause(bool is_paused) override;
|
||||
|
||||
private:
|
||||
u64 GetRTSC();
|
||||
|
||||
/// value used to reduce the native clocks accuracy as some apss rely on
|
||||
/// undefined behavior where the level of accuracy in the clock shouldn't
|
||||
/// be higher.
|
||||
static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
|
||||
|
||||
SpinLock rtsc_serialize{};
|
||||
u64 last_measure{};
|
||||
u64 accumulated_ticks{};
|
||||
u64 rtsc_frequency;
|
||||
};
|
||||
} // namespace X64
|
||||
|
||||
u64 EstimateRDTSCFrequency();
|
||||
|
||||
} // namespace Common
|
||||
229
src/common/x64/xbyak_abi.h
Executable file
229
src/common/x64/xbyak_abi.h
Executable file
@@ -0,0 +1,229 @@
|
||||
// Copyright 2016 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <initializer_list>
|
||||
#include <xbyak.h>
|
||||
#include "common/assert.h"
|
||||
|
||||
namespace Common::X64 {
|
||||
|
||||
constexpr size_t RegToIndex(const Xbyak::Reg& reg) {
|
||||
using Kind = Xbyak::Reg::Kind;
|
||||
ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
|
||||
"RegSet only support GPRs and XMM registers.");
|
||||
ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
|
||||
return static_cast<size_t>(reg.getIdx()) + (reg.getKind() == Kind::REG ? 0 : 16);
|
||||
}
|
||||
|
||||
constexpr Xbyak::Reg64 IndexToReg64(size_t reg_index) {
|
||||
ASSERT(reg_index < 16);
|
||||
return Xbyak::Reg64(static_cast<int>(reg_index));
|
||||
}
|
||||
|
||||
constexpr Xbyak::Xmm IndexToXmm(size_t reg_index) {
|
||||
ASSERT(reg_index >= 16 && reg_index < 32);
|
||||
return Xbyak::Xmm(static_cast<int>(reg_index - 16));
|
||||
}
|
||||
|
||||
constexpr Xbyak::Reg IndexToReg(size_t reg_index) {
|
||||
if (reg_index < 16) {
|
||||
return IndexToReg64(reg_index);
|
||||
} else {
|
||||
return IndexToXmm(reg_index);
|
||||
}
|
||||
}
|
||||
|
||||
inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
|
||||
std::bitset<32> bits;
|
||||
for (const Xbyak::Reg& reg : regs) {
|
||||
bits[RegToIndex(reg)] = true;
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
|
||||
constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
// Microsoft x64 ABI
|
||||
constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rcx,
|
||||
Xbyak::util::rdx,
|
||||
Xbyak::util::r8,
|
||||
Xbyak::util::r9,
|
||||
Xbyak::util::r10,
|
||||
Xbyak::util::r11,
|
||||
// XMMs
|
||||
Xbyak::util::xmm0,
|
||||
Xbyak::util::xmm1,
|
||||
Xbyak::util::xmm2,
|
||||
Xbyak::util::xmm3,
|
||||
Xbyak::util::xmm4,
|
||||
Xbyak::util::xmm5,
|
||||
});
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rbx,
|
||||
Xbyak::util::rsi,
|
||||
Xbyak::util::rdi,
|
||||
Xbyak::util::rbp,
|
||||
Xbyak::util::r12,
|
||||
Xbyak::util::r13,
|
||||
Xbyak::util::r14,
|
||||
Xbyak::util::r15,
|
||||
// XMMs
|
||||
Xbyak::util::xmm6,
|
||||
Xbyak::util::xmm7,
|
||||
Xbyak::util::xmm8,
|
||||
Xbyak::util::xmm9,
|
||||
Xbyak::util::xmm10,
|
||||
Xbyak::util::xmm11,
|
||||
Xbyak::util::xmm12,
|
||||
Xbyak::util::xmm13,
|
||||
Xbyak::util::xmm14,
|
||||
Xbyak::util::xmm15,
|
||||
});
|
||||
|
||||
constexpr size_t ABI_SHADOW_SPACE = 0x20;
|
||||
|
||||
#else
|
||||
|
||||
// System V x86-64 ABI
|
||||
constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rcx,
|
||||
Xbyak::util::rdx,
|
||||
Xbyak::util::rdi,
|
||||
Xbyak::util::rsi,
|
||||
Xbyak::util::r8,
|
||||
Xbyak::util::r9,
|
||||
Xbyak::util::r10,
|
||||
Xbyak::util::r11,
|
||||
// XMMs
|
||||
Xbyak::util::xmm0,
|
||||
Xbyak::util::xmm1,
|
||||
Xbyak::util::xmm2,
|
||||
Xbyak::util::xmm3,
|
||||
Xbyak::util::xmm4,
|
||||
Xbyak::util::xmm5,
|
||||
Xbyak::util::xmm6,
|
||||
Xbyak::util::xmm7,
|
||||
Xbyak::util::xmm8,
|
||||
Xbyak::util::xmm9,
|
||||
Xbyak::util::xmm10,
|
||||
Xbyak::util::xmm11,
|
||||
Xbyak::util::xmm12,
|
||||
Xbyak::util::xmm13,
|
||||
Xbyak::util::xmm14,
|
||||
Xbyak::util::xmm15,
|
||||
});
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rbx,
|
||||
Xbyak::util::rbp,
|
||||
Xbyak::util::r12,
|
||||
Xbyak::util::r13,
|
||||
Xbyak::util::r14,
|
||||
Xbyak::util::r15,
|
||||
});
|
||||
|
||||
constexpr size_t ABI_SHADOW_SPACE = 0;
|
||||
|
||||
#endif
|
||||
|
||||
struct ABIFrameInfo {
|
||||
s32 subtraction;
|
||||
s32 xmm_offset;
|
||||
};
|
||||
|
||||
inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
|
||||
size_t needed_frame_size) {
|
||||
const auto count = (regs & ABI_ALL_GPRS).count();
|
||||
rsp_alignment -= count * 8;
|
||||
size_t subtraction = 0;
|
||||
const auto xmm_count = (regs & ABI_ALL_XMMS).count();
|
||||
if (xmm_count) {
|
||||
// If we have any XMMs to save, we must align the stack here.
|
||||
subtraction = rsp_alignment & 0xF;
|
||||
}
|
||||
subtraction += 0x10 * xmm_count;
|
||||
size_t xmm_base_subtraction = subtraction;
|
||||
subtraction += needed_frame_size;
|
||||
subtraction += ABI_SHADOW_SPACE;
|
||||
// Final alignment.
|
||||
rsp_alignment -= subtraction;
|
||||
subtraction += rsp_alignment & 0xF;
|
||||
|
||||
return ABIFrameInfo{static_cast<s32>(subtraction),
|
||||
static_cast<s32>(subtraction - xmm_base_subtraction)};
|
||||
}
|
||||
|
||||
inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
|
||||
|
||||
for (size_t i = 0; i < regs.size(); ++i) {
|
||||
if (regs[i] && ABI_ALL_GPRS[i]) {
|
||||
code.push(IndexToReg64(i));
|
||||
}
|
||||
}
|
||||
|
||||
if (frame_info.subtraction != 0) {
|
||||
code.sub(code.rsp, frame_info.subtraction);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < regs.size(); ++i) {
|
||||
if (regs[i] && ABI_ALL_XMMS[i]) {
|
||||
code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
|
||||
frame_info.xmm_offset += 0x10;
|
||||
}
|
||||
}
|
||||
|
||||
return ABI_SHADOW_SPACE;
|
||||
}
|
||||
|
||||
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
|
||||
|
||||
for (size_t i = 0; i < regs.size(); ++i) {
|
||||
if (regs[i] && ABI_ALL_XMMS[i]) {
|
||||
code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
|
||||
frame_info.xmm_offset += 0x10;
|
||||
}
|
||||
}
|
||||
|
||||
if (frame_info.subtraction != 0) {
|
||||
code.add(code.rsp, frame_info.subtraction);
|
||||
}
|
||||
|
||||
// GPRs need to be popped in reverse order
|
||||
for (size_t j = 0; j < regs.size(); ++j) {
|
||||
const size_t i = regs.size() - j - 1;
|
||||
if (regs[i] && ABI_ALL_GPRS[i]) {
|
||||
code.pop(IndexToReg64(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Common::X64
|
||||
47
src/common/x64/xbyak_util.h
Executable file
47
src/common/x64/xbyak_util.h
Executable file
@@ -0,0 +1,47 @@
|
||||
// Copyright 2016 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <xbyak.h>
|
||||
#include "common/x64/xbyak_abi.h"
|
||||
|
||||
namespace Common::X64 {
|
||||
|
||||
// Constants for use with cmpps/cmpss
|
||||
enum {
|
||||
CMP_EQ = 0,
|
||||
CMP_LT = 1,
|
||||
CMP_LE = 2,
|
||||
CMP_UNORD = 3,
|
||||
CMP_NEQ = 4,
|
||||
CMP_NLT = 5,
|
||||
CMP_NLE = 6,
|
||||
CMP_ORD = 7,
|
||||
};
|
||||
|
||||
constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) {
|
||||
const u64 distance = target - (ref + 5);
|
||||
return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
|
||||
}
|
||||
|
||||
inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
|
||||
return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
|
||||
static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
|
||||
size_t addr = reinterpret_cast<size_t>(f);
|
||||
if (IsWithin2G(code, addr)) {
|
||||
code.call(f);
|
||||
} else {
|
||||
// ABI_RETURN is a safe temp register to use before a call
|
||||
code.mov(ABI_RETURN, addr);
|
||||
code.call(ABI_RETURN);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Common::X64
|
||||
Reference in New Issue
Block a user