From 83d2501797598123fa415c29cd936b1001a3517b Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Tue, 20 Jul 2021 11:00:32 +0200 Subject: [PATCH] early-access version 1896 --- CMakeModules/GenerateSCMRev.cmake | 13 ++----- README.md | 2 +- .../service/am/applets/applet_controller.h | 2 +- src/video_core/renderer_opengl/gl_device.cpp | 6 ++-- .../renderer_opengl/gl_graphics_pipeline.cpp | 30 ++++++++++++++-- .../renderer_opengl/gl_graphics_pipeline.h | 7 +++- .../renderer_vulkan/vk_buffer_cache.cpp | 36 +++++++++++-------- .../renderer_vulkan/vk_buffer_cache.h | 6 ++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- .../configure_graphics_advanced.ui | 2 +- 10 files changed, 65 insertions(+), 41 deletions(-) diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 77358768e..43ca730ec 100755 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -48,15 +48,6 @@ if (BUILD_REPOSITORY) endif() endif() -# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) -set(VIDEO_CORE "${SRC_DIR}/src/video_core") -set(HASH_FILES - # ... -) -set(COMBINED "") -foreach (F IN LISTS HASH_FILES) - file(READ ${F} TMP) - set(COMBINED "${COMBINED}${TMP}") -endforeach() -string(MD5 SHADER_CACHE_VERSION "${COMBINED}") +# The variable SRC_DIR must be passed into the script +# (since it uses the current build directory for all values of CMAKE_*_DIR) configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY) diff --git a/README.md b/README.md index 3f0cf69c3..91bb06acd 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1893. +This is the source code for early-access 1896. ## Legal Notice diff --git a/src/core/hle/service/am/applets/applet_controller.h b/src/core/hle/service/am/applets/applet_controller.h index 9706dee26..0a34c4fc0 100755 --- a/src/core/hle/service/am/applets/applet_controller.h +++ b/src/core/hle/service/am/applets/applet_controller.h @@ -80,7 +80,7 @@ struct ControllerSupportArgOld { static_assert(sizeof(ControllerSupportArgOld) == 0x21C, "ControllerSupportArgOld has incorrect size."); -// LibraryAppletVersion 0x7 +// LibraryAppletVersion 0x7, 0x8 struct ControllerSupportArgNew { ControllerSupportArgHeader header{}; std::array identification_colors{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 6afe6c1e1..9692b8e94 100755 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -180,11 +180,9 @@ Device::Device() { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); shader_backend = Settings::ShaderBackend::GLSL; } - // Completely disable async shaders for now, as it causes graphical glitches - use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. - // use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && - // !(is_amd || (is_intel && !is_linux)); + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && + !(is_amd || (is_intel && !is_linux)); use_driver_cache = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c8b2d833d..fac0034fb 100755 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -237,10 +237,12 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{[this, device, sources, sources_spirv, - shader_notify](ShaderContext::Context*) mutable { + const bool in_parallel = thread_worker != nullptr; + const auto backend = device.GetShaderBackend(); + auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv), + shader_notify, backend, in_parallel](ShaderContext::Context*) mutable { for (size_t stage = 0; stage < 5; ++stage) { - switch (device.GetShaderBackend()) { + switch (backend) { case Settings::ShaderBackend::GLSL: if (!sources[stage].empty()) { source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); @@ -249,6 +251,10 @@ GraphicsPipeline::GraphicsPipeline( case Settings::ShaderBackend::GLASM: if (!sources[stage].empty()) { assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); + if (in_parallel) { + // Make sure program is built before continuing when building in parallel + glGetString(GL_PROGRAM_ERROR_STRING_NV); + } } break; case Settings::ShaderBackend::SPIRV: @@ -258,10 +264,20 @@ GraphicsPipeline::GraphicsPipeline( break; } } + if (in_parallel && backend != Settings::ShaderBackend::GLASM) { + // Make sure programs have built if we are building shaders in parallel + for (OGLProgram& program : source_programs) { + if (program.handle != 0) { + GLint status{}; + glGetProgramiv(program.handle, GL_LINK_STATUS, &status); + } + } + } if (shader_notify) { shader_notify->MarkShaderComplete(); } is_built = true; + built_condvar.notify_one(); }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); @@ -434,6 +450,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); + if (!is_built.load(std::memory_order::relaxed)) { + WaitForBuild(); + } if (assembly_programs[0].handle != 0) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { @@ -545,4 +564,9 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { num_xfb_strides = static_cast(current_stream - xfb_streams.data()); } +void GraphicsPipeline::WaitForBuild() { + std::unique_lock lock{built_mutex}; + built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 5e34b9537..4e28d9a42 100755 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -119,6 +119,8 @@ private: void GenerateTransformFeedbackState(); + void WaitForBuild(); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -143,13 +145,16 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; - std::atomic_bool is_built{false}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; GLsizei num_xfb_strides{}; std::array xfb_attribs{}; std::array xfb_streams{}; + + std::mutex built_mutex; + std::condition_variable built_condvar; + std::atomic_bool is_built{false}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index e8f8ac5f3..f4b3ee95c 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -212,8 +212,8 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat } if (vk_buffer == VK_NULL_HANDLE) { // Vulkan doesn't support null index buffers. Replace it with our own null buffer. - ReserveNullIndexBuffer(); - vk_buffer = *null_index_buffer; + ReserveNullBuffer(); + vk_buffer = *null_buffer; } scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) { cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type); @@ -221,16 +221,14 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat } void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { - const u32 total_indices = first + count; - if (total_indices == 0) { - ReserveNullIndexBuffer(); - scheduler.Record([buffer = *null_index_buffer, - index_type = quad_array_lut_index_type](vk::CommandBuffer cmdbuf) { - cmdbuf.BindIndexBuffer(buffer, 0, index_type); + if (count == 0) { + ReserveNullBuffer(); + scheduler.Record([this](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(*null_buffer, 0, VK_INDEX_TYPE_UINT32); }); return; } - ReserveQuadArrayLUT(total_indices, true); + ReserveQuadArrayLUT(first + count, true); // The LUT has the indices 0, 1, 2, and 3 copied as an array // To apply these 'first' offsets we can apply an offset based on the modulus. @@ -264,6 +262,14 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, // Already logged in the rasterizer return; } + if (buffer == VK_NULL_HANDLE) { + // Vulkan doesn't support null transform feedback buffers. + // Replace it with our own null buffer. + ReserveNullBuffer(); + buffer = *null_buffer; + offset = 0; + size = 0; + } scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) { const VkDeviceSize vk_offset = offset; const VkDeviceSize vk_size = size; @@ -348,11 +354,11 @@ void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle }); } -void BufferCacheRuntime::ReserveNullIndexBuffer() { - if (null_index_buffer) { +void BufferCacheRuntime::ReserveNullBuffer() { + if (null_buffer) { return; } - null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ + null_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -363,12 +369,12 @@ void BufferCacheRuntime::ReserveNullIndexBuffer() { .pQueueFamilyIndices = nullptr, }); if (device.HasDebuggingToolAttached()) { - null_index_buffer.SetObjectNameEXT("Null index buffer"); + null_buffer.SetObjectNameEXT("Null index buffer"); } - null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal); + null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) { + scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) { cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0); }); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index d29aab907..c27402ff0 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -111,7 +111,7 @@ private: void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); - void ReserveNullIndexBuffer(); + void ReserveNullBuffer(); const Device& device; MemoryAllocator& memory_allocator; @@ -124,8 +124,8 @@ private: VkIndexType quad_array_lut_index_type{}; u32 current_num_indices = 0; - vk::Buffer null_index_buffer; - MemoryCommit null_index_buffer_commit; + vk::Buffer null_buffer; + MemoryCommit null_buffer_commit; Uint8Pass uint8_pass; QuadIndexedPass quad_index_pass; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2ce8b4156..57b163247 100755 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -271,7 +271,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, - use_asynchronous_shaders{false}, + use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 772e5fed3..379dc5d2e 100755 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -82,7 +82,7 @@ Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental. - Use asynchronous shader building (experimental) + Use asynchronous shader building