diff --git a/README.md b/README.md index 432595912..0150c92b7 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1791. +This is the source code for early-access 1792. ## Legal Notice diff --git a/src/common/common_sizes.h b/src/common/common_sizes.h index 7e9fd968b..d07b7ee5a 100755 --- a/src/common/common_sizes.h +++ b/src/common/common_sizes.h @@ -24,6 +24,7 @@ enum : u64 { Size_128_MB = 128ULL * Size_1_MB, Size_448_MB = 448ULL * Size_1_MB, Size_507_MB = 507ULL * Size_1_MB, + Size_512_MB = 512ULL * Size_1_MB, Size_562_MB = 562ULL * Size_1_MB, Size_1554_MB = 1554ULL * Size_1_MB, Size_2048_MB = 2048ULL * Size_1_MB, diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9825d3008..6d04d00da 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -16,6 +16,7 @@ #include +#include "common/common_sizes.h" #include "common/common_types.h" #include "common/div_ceil.h" #include "common/microprofile.h" @@ -65,8 +66,8 @@ class BufferCache { static constexpr BufferId NULL_BUFFER_ID{0}; - static constexpr u64 expected_memory = 512ULL * 1024ULL * 1024ULL; - static constexpr u64 critical_memory = 1024ULL * 1024ULL * 1024ULL; + static constexpr u64 EXPECTED_MEMORY = Common::Size_512_MB; + static constexpr u64 CRITICAL_MEMORY = Common::Size_1_GB; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -105,6 +106,8 @@ public: void TickFrame(); + void RunGarbageCollector(); + void WriteMemory(VAddr cpu_addr, u64 size); void CachedWriteMemory(VAddr cpu_addr, u64 size); @@ -349,30 +352,8 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, } template -void BufferCache

::TickFrame() { - const bool enabled_gc = Settings::values.use_caches_gc.GetValue(); - SCOPE_EXIT({ - ++frame_tick; - delayed_destruction_ring.Tick(); - }); - // Calculate hits and shots and move hit bits to the right - const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); - const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); - std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, - uniform_cache_hits.begin() + 1); - std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, - uniform_cache_shots.begin() + 1); - uniform_cache_hits[0] = 0; - uniform_cache_shots[0] = 0; - - const bool skip_preferred = hits * 256 < shots * 251; - uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; - - const bool activate_gc = enabled_gc && total_used_memory >= expected_memory; - if (!activate_gc) { - return; - } - const bool aggressive_gc = total_used_memory >= critical_memory; +void BufferCache

::RunGarbageCollector() { + const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; for (; num_iterations > 0; --num_iterations) { @@ -391,6 +372,28 @@ void BufferCache

::TickFrame() { } } +template +void BufferCache

::TickFrame() { + // Calculate hits and shots and move hit bits to the right + const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); + const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); + std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, + uniform_cache_hits.begin() + 1); + std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, + uniform_cache_shots.begin() + 1); + uniform_cache_hits[0] = 0; + uniform_cache_shots[0] = 0; + + const bool skip_preferred = hits * 256 < shots * 251; + uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; + + if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) { + RunGarbageCollector(); + } + ++frame_tick; + delayed_destruction_ring.Tick(); +} + template void BufferCache

::WriteMemory(VAddr cpu_addr, u64 size) { ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index df8be12ff..12c619aca 100755 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -235,6 +235,7 @@ struct TextureCacheParams { static constexpr bool ENABLE_VALIDATION = true; static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool HAS_EMULATED_COPIES = true; + static constexpr bool HAS_DEVICE_MEMORY_INFO = false; using Runtime = OpenGL::TextureCacheRuntime; using Image = OpenGL::Image; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 52860b4cf..e8ef6f5c3 100755 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -818,6 +818,10 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, }); } +u64 TextureCacheRuntime::GetDeviceLocalMemory() const { + return device.GetDeviceLocalMemory(); +} + Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 4a57d378b..d392f721b 100755 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -97,6 +97,8 @@ struct TextureCacheRuntime { // All known Vulkan drivers can natively handle BGR textures return true; } + + u64 GetDeviceLocalMemory() const; }; class Image : public VideoCommon::ImageBase { @@ -257,6 +259,7 @@ struct TextureCacheParams { static constexpr bool ENABLE_VALIDATION = true; static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool HAS_EMULATED_COPIES = false; + static constexpr bool HAS_DEVICE_MEMORY_INFO = true; using Runtime = Vulkan::TextureCacheRuntime; using Image = Vulkan::Image; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 6308aef94..eb1746265 100755 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -283,4 +283,11 @@ std::pair GetASTCBlockSize(PixelFormat format) { return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; } +u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format) { + constexpr u64 RGBA8_PIXEL_SIZE = 4; + const u64 base_block_size = static_cast(DefaultBlockWidth(format)) * + static_cast(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE; + return (base_size * base_block_size) / BytesPerBlock(format); +} + } // namespace VideoCore::Surface diff --git a/src/video_core/surface.h b/src/video_core/surface.h index c40ab89d0..1503db81f 100755 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -462,4 +462,6 @@ bool IsPixelFormatSRGB(PixelFormat format); std::pair GetASTCBlockSize(PixelFormat format); +u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format); + } // namespace VideoCore::Surface diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 1259e8263..6180b8c0e 100755 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -79,7 +79,7 @@ public: Iterator(SlotVector* slot_vector_, SlotId id_) noexcept : slot_vector{slot_vector_}, id{id_} {} - bool IsValid(const u64* bitset) noexcept { + bool IsValid(const u64* bitset) const noexcept { return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b820492b1..64b576cbc 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -20,6 +20,7 @@ #include "common/alignment.h" #include "common/common_funcs.h" +#include "common/common_sizes.h" #include "common/common_types.h" #include "common/logging/log.h" #include "common/settings.h" @@ -70,14 +71,16 @@ class TextureCache { static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; /// True when some copies have to be emulated static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; + /// True when the API can provide info about the memory of the device. + static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; /// Image view ID for null descriptors static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; /// Sampler ID for bugged sampler ids static constexpr SamplerId NULL_SAMPLER_ID{0}; - static constexpr u64 expected_memory = 1024ULL * 1024ULL * 1024ULL; - static constexpr u64 critical_memory = 2 * 1024ULL * 1024ULL * 1024ULL; + static constexpr u64 DEFAULT_EXPECTED_MEMORY = Common::Size_1_GB; + static constexpr u64 DEFAULT_CRITICAL_MEMORY = Common::Size_2_GB; using Runtime = typename P::Runtime; using Image = typename P::Image; @@ -107,6 +110,9 @@ public: /// Notify the cache that a new frame has been queued void TickFrame(); + /// Runs the Garbage Collector. + void RunGarbageCollector(); + /// Return a constant reference to the given image view id [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; @@ -338,6 +344,8 @@ private: bool has_deleted_images = false; u64 total_used_memory = 0; + u64 expected_memory; + u64 critical_memory; SlotVector slot_images; SlotVector slot_image_views; @@ -381,19 +389,21 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_samplers.insert(runtime, sampler_descriptor)); deletion_iterator = slot_images.begin(); + + if constexpr (HAS_DEVICE_MEMORY_INFO) { + const auto device_memory = runtime.GetDeviceLocalMemory(); + const u64 possible_expected_memory = (device_memory * 3) / 10; + const u64 possible_critical_memory = (device_memory * 6) / 10; + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + } else { + expected_memory = DEFAULT_EXPECTED_MEMORY; + critical_memory = DEFAULT_CRITICAL_MEMORY; + } } template -void TextureCache

::TickFrame() { - const bool enabled_gc = Settings::values.use_caches_gc.GetValue(); - if (!enabled_gc) { - // @Note(Blinkhawk): compile error with SCOPE_EXIT on msvc. - sentenced_images.Tick(); - sentenced_framebuffers.Tick(); - sentenced_image_view.Tick(); - ++frame_tick; - return; - } +void TextureCache

::RunGarbageCollector() { const bool high_priority_mode = total_used_memory >= expected_memory; const bool aggressive_mode = total_used_memory >= critical_memory; const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; @@ -450,11 +460,18 @@ void TextureCache

::TickFrame() { UnregisterImage(image_id); DeleteImage(image_id); if (is_bad_overlap) { - num_iterations++; + ++num_iterations; } } ++deletion_iterator; } +} + +template +void TextureCache

::TickFrame() { + if (Settings::values.use_caches_gc.GetValue()) { + RunGarbageCollector(); + } sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); @@ -1276,8 +1293,13 @@ void TextureCache

::RegisterImage(ImageId image_id) { image.flags |= ImageFlagBits::Registered; ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { page_table[page].push_back(image_id); }); - total_used_memory += - Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024); + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory += Common::AlignUp(tentative_size, 1024); } template @@ -1287,8 +1309,13 @@ void TextureCache

::UnregisterImage(ImageId image_id) { "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::BadOverlap; - total_used_memory -= - Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024); + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 64206b3d2..724a0141c 100755 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -408,6 +408,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); + CollectPhysicalMemoryInfo(); CollectTelemetryParameters(); CollectToolingInfo(); @@ -818,6 +819,19 @@ void Device::CollectTelemetryParameters() { } } +void Device::CollectPhysicalMemoryInfo() { + const auto mem_properties = physical.GetMemoryProperties(); + const std::size_t num_properties = mem_properties.memoryTypeCount; + device_access_memory = 0; + for (std::size_t element = 0; element < num_properties; element++) { + if ((mem_properties.memoryTypes[element].propertyFlags & + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { + const std::size_t heap_index = mem_properties.memoryTypes[element].heapIndex; + device_access_memory += mem_properties.memoryHeaps[heap_index].size; + } + } +} + void Device::CollectToolingInfo() { if (!ext_tooling_info) { return; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 67d70cd22..a1aba973b 100755 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -225,6 +225,10 @@ public: return use_asynchronous_shaders; } + u64 GetDeviceLocalMemory() const { + return device_access_memory; + } + private: /// Checks if the physical device is suitable. void CheckSuitability(bool requires_swapchain) const; @@ -244,6 +248,9 @@ private: /// Collects information about attached tools. void CollectToolingInfo(); + /// Collects information about the device's local memory. + void CollectPhysicalMemoryInfo(); + /// Returns a list of queue initialization descriptors. std::vector GetDeviceQueueCreateInfos() const; @@ -302,6 +309,8 @@ private: /// Nsight Aftermath GPU crash tracker std::unique_ptr nsight_aftermath_tracker; + + u64 device_access_memory; }; } // namespace Vulkan diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index d5acf6c3a..6b83695b2 100755 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -221,7 +221,7 @@ const std::array Config::default // This must be in alphabetical order according to action name as it must have the same order as // UISetting::values.shortcuts, which is alphabetically ordered. // clang-format off -const std::array Config::default_hotkeys{{ +const std::array Config::default_hotkeys{{ {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}}, {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}}, {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}}, @@ -238,6 +238,7 @@ const std::array Config::default_hotkeys{{ {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}}, {QStringLiteral("Toggle Mouse Panning"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F9"), Qt::ApplicationShortcut}}, {QStringLiteral("Toggle Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Z"), Qt::ApplicationShortcut}}, + {QStringLiteral("Toggle Frame Limiter"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+U"), Qt::ApplicationShortcut}}, {QStringLiteral("Toggle Status Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+S"), Qt::WindowShortcut}}, }}; // clang-format on diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index ce3355588..3c1de0ac9 100755 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -42,7 +42,7 @@ public: default_mouse_buttons; static const std::array default_keyboard_keys; static const std::array default_keyboard_mods; - static const std::array default_hotkeys; + static const std::array default_hotkeys; private: void Initialize(const std::string& config_name); diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 4bab3d074..eaf55c517 100755 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -109,7 +109,7 @@ Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games. - Enable GPU caches garbage collection (unsafe) + Enable GPU cache garbage collection (unsafe) diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index be8933c5c..ab6ec1e5a 100755 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -1025,7 +1025,9 @@ void GMainWindow::InitializeHotkeys() { connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this), &QShortcut::activated, this, [] { Settings::values.audio_muted = !Settings::values.audio_muted; }); - + connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Frame Limiter"), this), + &QShortcut::activated, this, + [] { Settings::values.unlimit_fps = !Settings::values.unlimit_fps; }); connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Mouse Panning"), this), &QShortcut::activated, this, [&] { Settings::values.mouse_panning = !Settings::values.mouse_panning; diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 38d0d00ac..a81b1594f 100755 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -227,7 +227,7 @@ use_asynchronous_gpu_emulation = # 0: Off, 1 (default): On use_vsync = -# Whether to use garbage collection or not. +# Whether to use garbage collection or not for GPU caches. # 0 (default): Off, 1: On use_caches_gc =