From 94223944e12a5cdf424ade42adf51bf1b5008842 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sun, 8 Jan 2023 21:39:56 +0100 Subject: [PATCH] early-access version 3301 --- README.md | 2 +- src/common/settings.h | 2 +- .../frontend/maxwell/translate_program.cpp | 2 +- src/shader_recompiler/host_translate_info.h | 1 - .../global_memory_to_storage_buffer_pass.cpp | 13 ++++--------- src/shader_recompiler/ir_opt/passes.h | 2 +- src/video_core/buffer_cache/buffer_cache.h | 13 +++---------- .../renderer_opengl/gl_buffer_cache.h | 4 ---- .../renderer_opengl/gl_rasterizer.cpp | 4 ++++ .../renderer_opengl/gl_shader_cache.cpp | 1 - .../renderer_vulkan/renderer_vulkan.cpp | 2 ++ .../renderer_vulkan/vk_buffer_cache.cpp | 4 ---- .../renderer_vulkan/vk_buffer_cache.h | 2 -- .../renderer_vulkan/vk_pipeline_cache.cpp | 1 - .../renderer_vulkan/vk_rasterizer.cpp | 2 ++ src/video_core/renderer_vulkan/vk_scheduler.cpp | 5 +++++ src/video_core/renderer_vulkan/vk_scheduler.h | 7 +++++++ .../renderer_vulkan/vk_turbo_mode.cpp | 17 +++++++++++++++++ src/video_core/renderer_vulkan/vk_turbo_mode.h | 9 +++++++++ 19 files changed, 57 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 768142948..3fa5519a1 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 3300. +This is the source code for early-access 3301. ## Legal Notice diff --git a/src/common/settings.h b/src/common/settings.h index 536c31d16..0786d7897 100755 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -415,7 +415,7 @@ struct Values { // Renderer SwitchableSetting renderer_backend{ RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; - SwitchableSetting renderer_force_max_clock{true, "force_max_clock"}; + SwitchableSetting renderer_force_max_clock{false, "force_max_clock"}; Setting renderer_debug{false, "debug"}; Setting renderer_shader_feedback{false, "shader_feedback"}; Setting enable_nsight_aftermath{false, "nsight_aftermath"}; diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c670f4373..8915a6537 100755 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -292,7 +292,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool low_addr{TrackLowAddress(&inst)}) { @@ -416,10 +415,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. - IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; - - // Align the offset base to match the host alignment requirements - low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); + const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; return ir.ISub(offset, low_cbuf); } @@ -514,7 +510,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, } } // Anonymous namespace -void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { +void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageInfo info; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -538,8 +534,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn const IR::U32 index{IR::Value{static_cast(info.set.index_of(it))}}; IR::Block* const block{storage_inst.block}; IR::Inst* const inst{storage_inst.inst}; - const IR::U32 offset{ - StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; + const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 0a1a19350..7c737580c 100755 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -15,7 +15,7 @@ namespace Shader::Optimization { void CollectShaderInfoPass(Environment& env, IR::Program& program); void ConstantPropagationPass(Environment& env, IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); -void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); +void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); void LowerInt64ToInt32(IR::Program& program); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index fd6bc56de..b2b0c8ae6 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1938,21 +1938,14 @@ typename BufferCache

::Binding BufferCache

::StorageBufferBinding(GPUVAddr s bool is_written) const { const GPUVAddr gpu_addr = gpu_memory->Read(ssbo_addr); const u32 size = gpu_memory->Read(ssbo_addr + 8); - const u32 alignment = runtime.GetStorageBufferAlignment(); - - const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); - const u32 aligned_size = - Common::AlignUp(static_cast(gpu_addr - aligned_gpu_addr) + size, alignment); - - const std::optional cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); + const std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr || size == 0) { return NULL_BINDING; } - - const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE); + const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); const Binding binding{ .cpu_addr = *cpu_addr, - .size = is_written ? aligned_size : static_cast(cpu_end - *cpu_addr), + .size = is_written ? size : static_cast(cpu_end - *cpu_addr), .buffer_id = BufferId{}, }; return binding; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 0c0e70c26..0c6df648b 100755 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -160,10 +160,6 @@ public: return device.CanReportMemoryUsage(); } - u32 GetStorageBufferAlignment() const { - return static_cast(device.GetShaderStorageBufferAlignment()); - } - private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f50776911..5bea7e66c 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -140,6 +140,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load void RasterizerOpenGL::Clear(u32 layer_count) { MICROPROFILE_SCOPE(OpenGL_Clears); + gpu_memory->FlushCaching(); const auto& regs = maxwell3d->regs; bool use_color{}; bool use_depth{}; @@ -208,6 +209,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { MICROPROFILE_SCOPE(OpenGL_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); + gpu_memory->FlushCaching(); query_cache.UpdateCounters(); GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; @@ -361,6 +363,7 @@ void RasterizerOpenGL::DrawTexture() { } void RasterizerOpenGL::DispatchCompute() { + gpu_memory->FlushCaching(); ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; if (!pipeline) { return; @@ -568,6 +571,7 @@ void RasterizerOpenGL::TickFrame() { } bool RasterizerOpenGL::AccelerateConditionalRendering() { + gpu_memory->FlushCaching(); if (Settings::IsGPULevelHigh()) { // Reimplement Host conditional rendering. return false; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7580c8155..d46670dba 100755 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -236,7 +236,6 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .needs_demote_reorder = device.IsAmd(), .support_snorm_render_buffer = false, .support_viewport_index_layer = device.HasVertexViewportLayer(), - .min_ssbo_alignment = static_cast(device.GetShaderStorageBufferAlignment()), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), } { if (use_asynchronous_shaders) { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index a1b87e9c7..48518b9d9 100755 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -99,6 +99,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, state_tracker, scheduler) { if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { turbo_mode.emplace(instance, dld); + scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); } Report(); } catch (const vk::Exception& exception) { @@ -107,6 +108,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, } RendererVulkan::~RendererVulkan() { + scheduler.RegisterOnSubmit([] {}); void(device.GetLogical().WaitIdle()); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index d24515311..fad3c7790 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -330,10 +330,6 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const { return device.CanReportMemoryUsage(); } -u32 BufferCacheRuntime::GetStorageBufferAlignment() const { - return static_cast(device.GetStorageBufferAlignment()); -} - void BufferCacheRuntime::Finish() { scheduler.Finish(); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index b46a59055..7e3d03bc0 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -73,8 +73,6 @@ public: bool CanReportMemoryUsage() const; - u32 GetStorageBufferAlignment() const; - [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e9067d1bf..16ac058c8 100755 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -344,7 +344,6 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, .support_snorm_render_buffer = true, .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), - .min_ssbo_alignment = static_cast(device.GetStorageBufferAlignment()), .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 93731ddad..56728ebd3 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -298,6 +298,7 @@ void RasterizerVulkan::Clear(u32 layer_count) { MICROPROFILE_SCOPE(Vulkan_Clearing); FlushWork(); + gpu_memory->FlushCaching(); query_cache.UpdateCounters(); @@ -656,6 +657,7 @@ void RasterizerVulkan::TickFrame() { } bool RasterizerVulkan::AccelerateConditionalRendering() { + gpu_memory->FlushCaching(); if (Settings::IsGPULevelHigh()) { // TODO(Blinkhawk): Reimplement Host conditional rendering. return false; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 5b4c7395f..00f4386d8 100755 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -213,6 +213,11 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s .signalSemaphoreCount = num_signal_semaphores, .pSignalSemaphores = signal_semaphores.data(), }; + + if (on_submit) { + on_submit(); + } + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { case VK_SUCCESS: break; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index fefa45eff..e5ad75e65 100755 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -66,6 +67,11 @@ public: query_cache = &query_cache_; } + // Registers a callback to perform on queue submission. + void RegisterOnSubmit(std::function&& func) { + on_submit = std::move(func); + } + /// Send work to a separate thread. template void Record(T&& command) { @@ -216,6 +222,7 @@ private: vk::CommandBuffer current_cmdbuf; std::unique_ptr chunk; + std::function on_submit; State state; diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp index 852b86f84..c42594149 100755 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -14,11 +14,21 @@ using namespace Common::Literals; TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} { + { + std::scoped_lock lk{m_submission_lock}; + m_submission_time = std::chrono::steady_clock::now(); + } m_thread = std::jthread([&](auto stop_token) { Run(stop_token); }); } TurboMode::~TurboMode() = default; +void TurboMode::QueueSubmitted() { + std::scoped_lock lk{m_submission_lock}; + m_submission_time = std::chrono::steady_clock::now(); + m_submission_cv.notify_one(); +} + void TurboMode::Run(std::stop_token stop_token) { auto& dld = m_device.GetLogical(); @@ -199,6 +209,13 @@ void TurboMode::Run(std::stop_token stop_token) { // Wait for completion. fence.Wait(); + + // Wait for the next graphics queue submission if necessary. + std::unique_lock lk{m_submission_lock}; + Common::CondvarWait(m_submission_cv, lk, stop_token, [this] { + return (std::chrono::steady_clock::now() - m_submission_time) <= + std::chrono::milliseconds{100}; + }); } } diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h index 2060e2395..99b5ac50b 100755 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.h +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h @@ -3,6 +3,9 @@ #pragma once +#include +#include + #include "common/polyfill_thread.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" @@ -15,11 +18,17 @@ public: explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld); ~TurboMode(); + void QueueSubmitted(); + private: void Run(std::stop_token stop_token); Device m_device; MemoryAllocator m_allocator; + std::mutex m_submission_lock; + std::condition_variable_any m_submission_cv; + std::chrono::time_point m_submission_time{}; + std::jthread m_thread; };