diff --git a/README.md b/README.md index 5f1bae83c..274cce907 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1494. +This is the source code for early-access 1495. ## Legal Notice diff --git a/src/core/core.cpp b/src/core/core.cpp index de6305e2a..305f56ff1 100755 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -299,28 +299,17 @@ struct System::Impl { gpu_core->WaitIdle(); } - // Shutdown emulation session services.reset(); service_manager.reset(); cheat_engine.reset(); telemetry_session.reset(); - - // Close all CPU/threading state cpu_manager.Shutdown(); - - // Release the Time Manager's resources time_manager.Shutdown(); - - // Shutdown kernel and core timing core_timing.Shutdown(); - kernel.Shutdown(); - - // Close app loader app_loader.reset(); gpu_core.reset(); perf_stats.reset(); - - // Clear all applets + kernel.Shutdown(); applet_manager.ClearAll(); LOG_DEBUG(Core, "Shutdown OK"); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2a6844ab1..4de1e37e5 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -91,7 +92,7 @@ class BufferCache { }; public: - static constexpr u32 SKIP_CACHE_SIZE = 4096; + static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4096; explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -240,9 +241,9 @@ private: template void ChangeRegister(BufferId buffer_id); - void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); + bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); - void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); + bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, std::span copies); @@ -297,6 +298,11 @@ private: std::array fast_bound_uniform_buffers{}; + std::array uniform_cache_hits{}; + std::array uniform_cache_shots{}; + + u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; + bool has_deleted_buffers = false; std::conditional_t, Empty> @@ -328,6 +334,19 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, template void BufferCache

::TickFrame() { + // Calculate hits and shots and move hit bits to the right + const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); + const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); + std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, + uniform_cache_hits.begin() + 1); + std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, + uniform_cache_shots.begin() + 1); + uniform_cache_hits[0] = 0; + uniform_cache_shots[0] = 0; + + const bool skip_preferred = hits * 256 < shots * 251; + uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; + delayed_destruction_ring.Tick(); } @@ -671,7 +690,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const VAddr cpu_addr = binding.cpu_addr; const u32 size = binding.size; Buffer& buffer = slot_buffers[binding.buffer_id]; - if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { + if (size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size)) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { // Fast path for Nvidia @@ -692,7 +711,12 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 return; } // Classic cached path - SynchronizeBuffer(buffer, cpu_addr, size); + const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); + if (sync_cached) { + ++uniform_cache_hits[0]; + } + ++uniform_cache_shots[0]; + if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { // Skip binding if it's not needed and if the bound buffer is not the fast version // This exists to avoid instances where the fast buffer is bound and a GPU write happens @@ -1106,15 +1130,15 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { } template -void BufferCache

::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { +bool BufferCache

::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { if (buffer.CpuAddr() == 0) { - return; + return true; } - SynchronizeBufferImpl(buffer, cpu_addr, size); + return SynchronizeBufferImpl(buffer, cpu_addr, size); } template -void BufferCache

::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { +bool BufferCache

::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { boost::container::small_vector copies; u64 total_size_bytes = 0; u64 largest_copy = 0; @@ -1128,10 +1152,11 @@ void BufferCache

::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s largest_copy = std::max(largest_copy, range_size); }); if (total_size_bytes == 0) { - return; + return true; } const std::span copies_span(copies.data(), copies.size()); UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); + return false; } template diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 6da3906a4..c225d1fc9 100755 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -73,7 +73,8 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_) for (auto& stage_uniforms : fast_uniforms) { for (OGLBuffer& buffer : stage_uniforms) { buffer.Create(); - glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); + glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr, + GL_STREAM_DRAW); } } for (auto& stage_uniforms : copy_uniforms) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index bc6ed173d..b6adbca57 100755 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -501,10 +501,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); const u32 num_dispatches_z = image.info.resources.layers; - const std::array num_image_blocks{swizzle.num_tiles.width, swizzle.num_tiles.height}; update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values),