From b8666b8fd14403985aef9608bb3b94a5c3fddd79 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Tue, 16 Feb 2021 00:42:49 +0100 Subject: [PATCH] early-access version 1465 --- README.md | 2 +- src/common/CMakeLists.txt | 1 - src/common/uint128.h | 88 ++++++++++++++++++++++++++++++++- src/common/wall_clock.cpp | 13 +++-- src/common/x64/native_clock.cpp | 58 ---------------------- 5 files changed, 98 insertions(+), 64 deletions(-) diff --git a/README.md b/README.md index 724b82658..70e54a835 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1464. +This is the source code for early-access 1465. ## Legal Notice diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 263c457cd..b657506b1 100755 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -168,7 +168,6 @@ add_library(common STATIC time_zone.cpp time_zone.h tree.h - uint128.cpp uint128.h uuid.cpp uuid.h diff --git a/src/common/uint128.h b/src/common/uint128.h index a313765be..83560a9ce 100755 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -4,12 +4,98 @@ #pragma once +#include #include + +#ifdef _MSC_VER +#include +#pragma intrinsic(__umulh) +#pragma intrinsic(_umul128) +#pragma intrinsic(_udiv128) +#else +#include +#endif + #include "common/common_types.h" namespace Common { // This function multiplies 2 u64 values and divides it by a u64 value. -[[nodiscard]] u64 MultiplyAndDivide64(u64 a, u64 b, u64 d); +[[nodiscard]] static inline u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { +#ifdef _MSC_VER + u128 r{}; + r[0] = _umul128(a, b, &r[1]); + u64 remainder; +#if _MSC_VER < 1923 + return udiv128(r[1], r[0], d, &remainder); +#else + return _udiv128(r[1], r[0], d, &remainder); +#endif +#else + const u64 diva = a / d; + const u64 moda = a % d; + const u64 divb = b / d; + const u64 modb = b % d; + return diva * b + moda * divb + moda * modb / d; +#endif +} + +// This function multiplies 2 u64 values and produces a u128 value; +[[nodiscard]] static inline u128 Multiply64Into128(u64 a, u64 b) { + u128 result; +#ifdef _MSC_VER + result[0] = _umul128(a, b, &result[1]); +#else + unsigned __int128 tmp = a; + tmp *= b; + std::memcpy(&result, &tmp, sizeof(u128)); +#endif + return result; +} + +[[nodiscard]] static inline u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) { +#ifdef __SIZEOF_INT128__ + const auto base = static_cast(numerator) << 64ULL; + return static_cast(base / divisor); +#elif defined(_M_X64) || defined(_M_ARM64) + std::array r = {0, numerator}; + u64 remainder; +#if _MSC_VER < 1923 + return udiv128(r[1], r[0], divisor, &remainder); +#else + return _udiv128(r[1], r[0], divisor, &remainder); +#endif +#else + // This one is bit more inaccurate. + return MultiplyAndDivide64(std::numeric_limits::max(), numerator, divisor); +#endif +} + +[[nodiscard]] static inline u64 MultiplyHigh(u64 a, u64 b) { +#ifdef __SIZEOF_INT128__ + return (static_cast(a) * static_cast(b)) >> 64; +#elif defined(_M_X64) || defined(_M_ARM64) + return __umulh(a, b); // MSVC +#else + // Generic fallback + const u64 a_lo = u32(a); + const u64 a_hi = a >> 32; + const u64 b_lo = u32(b); + const u64 b_hi = b >> 32; + + const u64 a_x_b_hi = a_hi * b_hi; + const u64 a_x_b_mid = a_hi * b_lo; + const u64 b_x_a_mid = b_hi * a_lo; + const u64 a_x_b_lo = a_lo * b_lo; + + const u64 carry_bit = (static_cast(static_cast(a_x_b_mid)) + + static_cast(static_cast(b_x_a_mid)) + (a_x_b_lo >> 32)) >> + 32; + + const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; + + return multhi; +#endif +} } // namespace Common diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index c0505e898..1545993bd 100755 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -2,6 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + +#include "common/uint128.h" #include "common/wall_clock.h" #ifdef ARCHITECTURE_x86_64 @@ -17,7 +20,9 @@ using base_time_point = std::chrono::time_point; class StandardWallClock final : public WallClock { public: explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) - : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) { + : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false), + emulated_clock_factor{GetFixedPoint64Factor(emulated_clock_frequency, 1000000000)}, + emulated_cpu_factor{GetFixedPoint64Factor(emulated_cpu_frequency, 1000000000)} { start_time = base_timer::now(); } @@ -40,11 +45,11 @@ public: } u64 GetClockCycles() override { - return GetTimeNS().count() * (emulated_clock_frequency / 1000) / 1000000; + return MultiplyHigh(GetTimeNS().count(), emulated_clock_factor); } u64 GetCPUCycles() override { - return GetTimeNS().count() * (emulated_cpu_frequency / 1000) / 1000000; + return MultiplyHigh(GetTimeNS().count(), emulated_cpu_factor); } void Pause([[maybe_unused]] bool is_paused) override { @@ -53,6 +58,8 @@ public: private: base_time_point start_time; + const u64 emulated_clock_factor; + const u64 emulated_cpu_factor; }; #ifdef ARCHITECTURE_x86_64 diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index a65f6b832..87de40624 100755 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -8,68 +8,10 @@ #include #include -#ifdef _MSC_VER -#include - -#pragma intrinsic(__umulh) -#pragma intrinsic(_udiv128) -#else -#include -#endif - #include "common/atomic_ops.h" #include "common/uint128.h" #include "common/x64/native_clock.h" -namespace { - -[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) { -#ifdef __SIZEOF_INT128__ - const auto base = static_cast(numerator) << 64ULL; - return static_cast(base / divisor); -#elif defined(_M_X64) || defined(_M_ARM64) - std::array r = {0, numerator}; - u64 remainder; -#if _MSC_VER < 1923 - return udiv128(r[1], r[0], divisor, &remainder); -#else - return _udiv128(r[1], r[0], divisor, &remainder); -#endif -#else - // This one is bit more inaccurate. - return MultiplyAndDivide64(std::numeric_limits::max(), numerator, divisor); -#endif -} - -[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) { -#ifdef __SIZEOF_INT128__ - return (static_cast(a) * static_cast(b)) >> 64; -#elif defined(_M_X64) || defined(_M_ARM64) - return __umulh(a, b); // MSVC -#else - // Generic fallback - const u64 a_lo = u32(a); - const u64 a_hi = a >> 32; - const u64 b_lo = u32(b); - const u64 b_hi = b >> 32; - - const u64 a_x_b_hi = a_hi * b_hi; - const u64 a_x_b_mid = a_hi * b_lo; - const u64 b_x_a_mid = b_hi * a_lo; - const u64 a_x_b_lo = a_lo * b_lo; - - const u64 carry_bit = (static_cast(static_cast(a_x_b_mid)) + - static_cast(static_cast(b_x_a_mid)) + (a_x_b_lo >> 32)) >> - 32; - - const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; - - return multhi; -#endif -} - -} // namespace - namespace Common { u64 EstimateRDTSCFrequency() {