From b6f5e3823d8622fca86def56a4c4bed3d0cb2953 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Mon, 29 May 2023 00:16:57 +0200 Subject: [PATCH] early-access version 3621 --- README.md | 2 +- externals/microprofile/microprofile.h | 8 +- src/common/bit_field.h | 5 + src/video_core/CMakeLists.txt | 4 +- src/video_core/buffer_cache/buffer_cache.h | 10 +- .../buffer_cache/buffer_cache_base.h | 1 + .../renderer_opengl/gl_buffer_cache.cpp | 58 ++++++- .../renderer_opengl/gl_buffer_cache.h | 29 +++- .../renderer_opengl/gl_rasterizer.cpp | 6 +- .../renderer_opengl/gl_rasterizer.h | 1 + .../gl_staging_buffer_pool.cpp | 150 ++++++++++++++++++ .../renderer_opengl/gl_staging_buffer_pool.h | 95 +++++++++++ .../renderer_opengl/gl_texture_cache.cpp | 96 +++-------- .../renderer_opengl/gl_texture_cache.h | 47 ++---- .../renderer_opengl/util_shaders.cpp | 9 +- src/video_core/renderer_opengl/util_shaders.h | 10 +- .../renderer_vulkan/vk_buffer_cache.h | 1 + .../renderer_vulkan/vk_master_semaphore.cpp | 11 +- src/yuzu/game_list.cpp | 4 + src/yuzu/game_list.h | 1 + src/yuzu/main.cpp | 20 +++ src/yuzu/main.h | 1 + vcpkg.json | 4 +- 23 files changed, 419 insertions(+), 154 deletions(-) create mode 100755 src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp create mode 100755 src/video_core/renderer_opengl/gl_staging_buffer_pool.h diff --git a/README.md b/README.md index e2f905d7e..c02301dab 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 3618. +This is the source code for early-access 3621. ## Legal Notice diff --git a/externals/microprofile/microprofile.h b/externals/microprofile/microprofile.h index 3adc33a6c..7f9885f56 100755 --- a/externals/microprofile/microprofile.h +++ b/externals/microprofile/microprofile.h @@ -1697,7 +1697,13 @@ void MicroProfileFlip() { int nTimer = MicroProfileLogTimerIndex(LE); uint8_t nGroup = pTimerToGroup[nTimer]; - MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX); + + // To avoid crashing due to OOB memory accesses/asserts + // simply skip this iteration + // MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX); + if (nStackPos >= MICROPROFILE_STACK_MAX) { + break; + } MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS); pGroupStackPos[nGroup]++; pStack[nStackPos++] = k; diff --git a/src/common/bit_field.h b/src/common/bit_field.h index dee97c4fe..9fd214f28 100755 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h @@ -188,3 +188,8 @@ private: template using BitFieldBE = BitField; + +template +inline auto format_as(BitField bitfield) { + return bitfield.Value(); +} diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6930276bd..f78b35f8c 100755 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -133,8 +133,8 @@ add_library(video_core STATIC renderer_opengl/gl_shader_util.h renderer_opengl/gl_state_tracker.cpp renderer_opengl/gl_state_tracker.h - renderer_opengl/gl_stream_buffer.cpp - renderer_opengl/gl_stream_buffer.h + renderer_opengl/gl_staging_buffer_pool.cpp + renderer_opengl/gl_staging_buffer_pool.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h renderer_opengl/gl_texture_cache_base.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 4be952c10..169b57dc6 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -474,7 +474,6 @@ void BufferCache

::CommitAsyncFlushesHigh() { if (committed_ranges.empty()) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - async_buffers.emplace_back(std::optional{}); } return; @@ -535,7 +534,6 @@ void BufferCache

::CommitAsyncFlushesHigh() { committed_ranges.clear(); if (downloads.empty()) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - async_buffers.emplace_back(std::optional{}); } return; @@ -687,7 +685,7 @@ void BufferCache

::BindHostIndexBuffer() { const u32 size = channel_state->index_buffer.size; const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { - if constexpr (USE_MEMORY_MAPS) { + if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { auto upload_staging = runtime.UploadStagingBuffer(size); std::array copies{ {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; @@ -1458,7 +1456,7 @@ bool BufferCache

::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, template void BufferCache

::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, std::span copies) { - if constexpr (USE_MEMORY_MAPS) { + if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { MappedUploadMemory(buffer, total_size_bytes, copies); } else { ImmediateUploadMemory(buffer, largest_copy, copies); @@ -1469,7 +1467,7 @@ template void BufferCache

::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, [[maybe_unused]] u64 largest_copy, [[maybe_unused]] std::span copies) { - if constexpr (!USE_MEMORY_MAPS) { + if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) { std::span immediate_buffer; for (const BufferCopy& copy : copies) { std::span upload_span; @@ -1528,7 +1526,7 @@ bool BufferCache

::InlineMemory(VAddr dest_address, size_t copy_size, auto& buffer = slot_buffers[buffer_id]; SynchronizeBuffer(buffer, dest_address, static_cast(copy_size)); - if constexpr (USE_MEMORY_MAPS) { + if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { auto upload_staging = runtime.UploadStagingBuffer(copy_size); std::array copies{BufferCopy{ .src_offset = upload_staging.offset, diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index c689fe06b..60a1f285e 100755 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -173,6 +173,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches()} { @@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_) }(); } +StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestUploadBuffer(size); +} + +StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestDownloadBuffer(size); +} + u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { if (device.CanReportMemoryUsage()) { return device_access_memory - device.GetCurrentDedicatedVideoMemory(); @@ -147,13 +157,47 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { return 2_GiB; } +void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer, + std::span copies, bool barrier) { + if (barrier) { + PreCopyBarrier(); + } + for (const VideoCommon::BufferCopy& copy : copies) { + glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast(copy.src_offset), + static_cast(copy.dst_offset), + static_cast(copy.size)); + } + if (barrier) { + PostCopyBarrier(); + } +} + +void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, + std::span copies, bool barrier) { + CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier); +} + +void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, + std::span copies, bool barrier) { + CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); +} + void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, std::span copies) { - for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData( - src_buffer.Handle(), dst_buffer.Handle(), static_cast(copy.src_offset), - static_cast(copy.dst_offset), static_cast(copy.size)); - } + CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); +} + +void BufferCacheRuntime::PreCopyBarrier() { + // TODO: finer grained barrier? + glMemoryBarrier(GL_ALL_BARRIER_BITS); +} + +void BufferCacheRuntime::PostCopyBarrier() { + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); +} + +void BufferCacheRuntime::Finish() { + glFinish(); } void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index ea40a425e..a38627d75 100755 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -12,7 +12,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" namespace OpenGL { @@ -60,11 +60,28 @@ class BufferCacheRuntime { public: static constexpr u8 INVALID_BINDING = std::numeric_limits::max(); - explicit BufferCacheRuntime(const Device& device_); + explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_); + + [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size); + + [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); + + void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, + std::span copies, bool barrier = true); + + void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, + std::span copies, bool barrier = true); + + void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, + std::span copies, bool barrier = true); void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, std::span copies); + void PreCopyBarrier(); + void PostCopyBarrier(); + void Finish(); + void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); @@ -169,6 +186,7 @@ private: }; const Device& device; + StagingBufferPool& staging_buffer_pool; bool has_fast_buffer_sub_data = false; bool use_assembly_shaders = false; @@ -201,7 +219,7 @@ private: struct BufferCacheParams { using Runtime = OpenGL::BufferCacheRuntime; using Buffer = OpenGL::Buffer; - using Async_Buffer = u32; + using Async_Buffer = OpenGL::StagingBufferMap; using MemoryTracker = VideoCommon::MemoryTrackerBase; static constexpr bool IS_OPENGL = true; @@ -209,9 +227,12 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; - static constexpr bool USE_MEMORY_MAPS = false; + static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; + + // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads + static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; }; using BufferCache = VideoCommon::BufferCache; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1b8f3860f..c29e2ff60 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -24,6 +24,7 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" @@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra StateTracker& state_tracker_) : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), - texture_cache_runtime(device, program_manager, state_tracker), - texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device), + texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), + texture_cache(texture_cache_runtime, *this), + buffer_cache_runtime(device, staging_buffer_pool), buffer_cache(*this, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6fa755e62..636a34e56 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -230,6 +230,7 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; + StagingBufferPool staging_buffer_pool; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp new file mode 100755 index 000000000..bbb06e51f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp @@ -0,0 +1,150 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include + +#include + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" + +MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192)); + +namespace OpenGL { + +StagingBufferMap::~StagingBufferMap() { + if (sync) { + sync->Create(); + } +} + +StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) + : storage_flags{storage_flags_}, map_flags{map_flags_} {} + +StagingBuffers::~StagingBuffers() = default; + +StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) { + MICROPROFILE_SCOPE(OpenGL_BufferRequest); + + const size_t index = RequestBuffer(requested_size); + OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; + sync_indices[index] = insert_fence ? ++current_sync_index : 0; + return StagingBufferMap{ + .mapped_span = std::span(maps[index], requested_size), + .sync = sync, + .buffer = buffers[index].handle, + }; +} + +size_t StagingBuffers::RequestBuffer(size_t requested_size) { + if (const std::optional index = FindBuffer(requested_size); index) { + return *index; + } + + OGLBuffer& buffer = buffers.emplace_back(); + buffer.Create(); + const auto next_pow2_size = Common::NextPow2(requested_size); + glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr, + storage_flags | GL_MAP_PERSISTENT_BIT); + maps.push_back(static_cast(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size, + map_flags | GL_MAP_PERSISTENT_BIT))); + syncs.emplace_back(); + sync_indices.emplace_back(); + sizes.push_back(next_pow2_size); + + ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && + maps.size() == sizes.size()); + + return buffers.size() - 1; +} + +std::optional StagingBuffers::FindBuffer(size_t requested_size) { + size_t known_unsignaled_index = current_sync_index + 1; + size_t smallest_buffer = std::numeric_limits::max(); + std::optional found; + const size_t num_buffers = sizes.size(); + for (size_t index = 0; index < num_buffers; ++index) { + const size_t buffer_size = sizes[index]; + if (buffer_size < requested_size || buffer_size >= smallest_buffer) { + continue; + } + if (syncs[index].handle != 0) { + if (sync_indices[index] >= known_unsignaled_index) { + // This fence is later than a fence that is known to not be signaled + continue; + } + if (!syncs[index].IsSignaled()) { + // Since this fence hasn't been signaled, it's safe to assume all later + // fences haven't been signaled either + known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]); + continue; + } + syncs[index].Release(); + } + smallest_buffer = buffer_size; + found = index; + } + return found; +} + +StreamBuffer::StreamBuffer() { + static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; + buffer.Create(); + glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); + glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); + mapped_pointer = + static_cast(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); + for (OGLSync& sync : fences) { + sync.Create(); + } +} + +std::pair, size_t> StreamBuffer::Request(size_t size) noexcept { + ASSERT(size < REGION_SIZE); + for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; + ++region) { + fences[region].Create(); + } + used_iterator = iterator; + + for (size_t region = Region(free_iterator) + 1, + region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); + region < region_end; ++region) { + glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); + fences[region].Release(); + } + if (iterator + size >= free_iterator) { + free_iterator = iterator + size; + } + if (iterator + size > STREAM_BUFFER_SIZE) { + for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { + fences[region].Create(); + } + used_iterator = 0; + iterator = 0; + free_iterator = size; + + for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { + glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); + fences[region].Release(); + } + } + const size_t offset = iterator; + iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); + return {std::span(mapped_pointer + offset, size), offset}; +} + +StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) { + return upload_buffers.RequestMap(size, true); +} + +StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) { + return download_buffers.RequestMap(size, false); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h new file mode 100755 index 000000000..60f72d3a0 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h @@ -0,0 +1,95 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include "common/common_types.h" +#include "common/literals.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +using namespace Common::Literals; + +struct StagingBufferMap { + ~StagingBufferMap(); + + std::span mapped_span; + size_t offset = 0; + OGLSync* sync; + GLuint buffer; +}; + +struct StagingBuffers { + explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); + ~StagingBuffers(); + + StagingBufferMap RequestMap(size_t requested_size, bool insert_fence); + + size_t RequestBuffer(size_t requested_size); + + std::optional FindBuffer(size_t requested_size); + + std::vector syncs; + std::vector buffers; + std::vector maps; + std::vector sizes; + std::vector sync_indices; + GLenum storage_flags; + GLenum map_flags; + size_t current_sync_index = 0; +}; + +class StreamBuffer { + static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB; + static constexpr size_t NUM_SYNCS = 16; + static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS; + static constexpr size_t MAX_ALIGNMENT = 256; + static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0); + static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0); + static_assert(REGION_SIZE % MAX_ALIGNMENT == 0); + +public: + explicit StreamBuffer(); + + [[nodiscard]] std::pair, size_t> Request(size_t size) noexcept; + + [[nodiscard]] GLuint Handle() const noexcept { + return buffer.handle; + } + +private: + [[nodiscard]] static size_t Region(size_t offset) noexcept { + return offset / REGION_SIZE; + } + + size_t iterator = 0; + size_t used_iterator = 0; + size_t free_iterator = 0; + u8* mapped_pointer = nullptr; + OGLBuffer buffer; + std::array fences; +}; + +class StagingBufferPool { +public: + StagingBufferPool() = default; + ~StagingBufferPool() = default; + + StagingBufferMap RequestUploadBuffer(size_t size); + StagingBufferMap RequestDownloadBuffer(size_t size); + +private: + StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; + StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index d4359e319..e0610d649 100755 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -439,6 +439,11 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form return GL_R32UI; } +[[nodiscard]] bool IsAstcRecompressionEnabled() { + return Settings::values.astc_recompression.GetValue() != + Settings::AstcRecompression::Uncompressed; +} + [[nodiscard]] GLenum SelectAstcFormat(PixelFormat format, bool is_srgb) { switch (Settings::values.astc_recompression.GetValue()) { case Settings::AstcRecompression::Bc1: @@ -451,19 +456,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; } } - } // Anonymous namespace -ImageBufferMap::~ImageBufferMap() { - if (sync) { - sync->Create(); - } -} - TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, - StateTracker& state_tracker_) - : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager), - format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} { + StateTracker& state_tracker_, + StagingBufferPool& staging_buffer_pool_) + : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_}, + util_shaders(program_manager), format_conversion_pass{util_shaders}, + resolution{Settings::values.resolution_info} { static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; @@ -553,12 +553,12 @@ void TextureCacheRuntime::Finish() { glFinish(); } -ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { - return upload_buffers.RequestMap(size, true); +StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestUploadBuffer(size); } -ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { - return download_buffers.RequestMap(size, false); +StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestDownloadBuffer(size); } u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { @@ -643,7 +643,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, is_linear ? GL_LINEAR : GL_NEAREST); } -void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, +void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map, std::span swizzles) { switch (image.info.type) { case ImageType::e2D: @@ -685,64 +685,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept { return device.HasASTC(); } -TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) - : storage_flags{storage_flags_}, map_flags{map_flags_} {} - -TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; - -ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, - bool insert_fence) { - const size_t index = RequestBuffer(requested_size); - OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; - return ImageBufferMap{ - .mapped_span = std::span(maps[index], requested_size), - .sync = sync, - .buffer = buffers[index].handle, - }; -} - -size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { - if (const std::optional index = FindBuffer(requested_size); index) { - return *index; - } - - OGLBuffer& buffer = buffers.emplace_back(); - buffer.Create(); - glNamedBufferStorage(buffer.handle, requested_size, nullptr, - storage_flags | GL_MAP_PERSISTENT_BIT); - maps.push_back(static_cast(glMapNamedBufferRange(buffer.handle, 0, requested_size, - map_flags | GL_MAP_PERSISTENT_BIT))); - - syncs.emplace_back(); - sizes.push_back(requested_size); - - ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && - maps.size() == sizes.size()); - - return buffers.size() - 1; -} - -std::optional TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { - size_t smallest_buffer = std::numeric_limits::max(); - std::optional found; - const size_t num_buffers = sizes.size(); - for (size_t index = 0; index < num_buffers; ++index) { - const size_t buffer_size = sizes[index]; - if (buffer_size < requested_size || buffer_size >= smallest_buffer) { - continue; - } - if (syncs[index].handle != 0) { - if (!syncs[index].IsSignaled()) { - continue; - } - syncs[index].Release(); - } - smallest_buffer = buffer_size; - found = index; - } - return found; -} - Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { @@ -760,7 +702,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - if (IsPixelFormatASTC(info.format)) { + if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) { gl_internal_format = SelectAstcFormat(info.format, is_srgb); gl_format = GL_NONE; } @@ -818,7 +760,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset, } } -void Image::UploadMemory(const ImageBufferMap& map, +void Image::UploadMemory(const StagingBufferMap& map, std::span copies) { UploadMemory(map.buffer, map.offset, copies); } @@ -865,7 +807,7 @@ void Image::DownloadMemory(std::span buffer_handles, std::span b } } -void Image::DownloadMemory(ImageBufferMap& map, +void Image::DownloadMemory(StagingBufferMap& map, std::span copies) { DownloadMemory(map.buffer, map.offset, copies); } @@ -1155,7 +1097,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI const bool is_srgb = IsPixelFormatSRGB(info.format); internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; - if (IsPixelFormatASTC(info.format)) { + if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) { internal_format = SelectAstcFormat(info.format, is_srgb); } } else { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 2e80d7cd2..986ef09b2 100755 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -11,6 +11,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -37,15 +38,6 @@ using VideoCommon::Region2D; using VideoCommon::RenderTargets; using VideoCommon::SlotVector; -struct ImageBufferMap { - ~ImageBufferMap(); - - std::span mapped_span; - size_t offset = 0; - OGLSync* sync; - GLuint buffer; -}; - struct FormatProperties { GLenum compatibility_class; bool compatibility_by_size; @@ -74,14 +66,15 @@ class TextureCacheRuntime { public: explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, - StateTracker& state_tracker); + StateTracker& state_tracker, + StagingBufferPool& staging_buffer_pool); ~TextureCacheRuntime(); void Finish(); - ImageBufferMap UploadStagingBuffer(size_t size); + StagingBufferMap UploadStagingBuffer(size_t size); - ImageBufferMap DownloadStagingBuffer(size_t size); + StagingBufferMap DownloadStagingBuffer(size_t size); u64 GetDeviceLocalMemory() const { return device_access_memory; @@ -120,7 +113,7 @@ public: const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); - void AccelerateImageUpload(Image& image, const ImageBufferMap& map, + void AccelerateImageUpload(Image& image, const StagingBufferMap& map, std::span swizzles); void InsertUploadMemoryBarrier(); @@ -149,35 +142,16 @@ public: } private: - struct StagingBuffers { - explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); - ~StagingBuffers(); - - ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); - - size_t RequestBuffer(size_t requested_size); - - std::optional FindBuffer(size_t requested_size); - - std::vector syncs; - std::vector buffers; - std::vector maps; - std::vector sizes; - GLenum storage_flags; - GLenum map_flags; - }; - const Device& device; StateTracker& state_tracker; + StagingBufferPool& staging_buffer_pool; + UtilShaders util_shaders; FormatConversionPass format_conversion_pass; std::array, 3> format_properties; bool has_broken_texture_view_formats = false; - StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; - StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; - OGLTexture null_image_1d_array; OGLTexture null_image_cube_array; OGLTexture null_image_3d; @@ -213,7 +187,7 @@ public: void UploadMemory(GLuint buffer_handle, size_t buffer_offset, std::span copies); - void UploadMemory(const ImageBufferMap& map, + void UploadMemory(const StagingBufferMap& map, std::span copies); void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, @@ -222,7 +196,8 @@ public: void DownloadMemory(std::span buffer_handle, std::span buffer_offset, std::span copies); - void DownloadMemory(ImageBufferMap& map, std::span copies); + void DownloadMemory(StagingBufferMap& map, + std::span copies); GLuint StorageHandle() noexcept; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 4e39c8bb0..8cd609bd2 100755 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -19,6 +19,7 @@ #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/accelerated_swizzle.h" @@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) UtilShaders::~UtilShaders() = default; -void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, +void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map, std::span swizzles) { static constexpr GLuint BINDING_INPUT_BUFFER = 0; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; @@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, +void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map, std::span swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; @@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, +void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map, std::span swizzles) { static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; @@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, +void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map, std::span swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_INPUT_BUFFER = 0; diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 4ce911f3e..7a87ae84b 100755 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -16,23 +16,23 @@ namespace OpenGL { class Image; class ProgramManager; -struct ImageBufferMap; +struct StagingBufferMap; class UtilShaders { public: explicit UtilShaders(ProgramManager& program_manager); ~UtilShaders(); - void ASTCDecode(Image& image, const ImageBufferMap& map, + void ASTCDecode(Image& image, const StagingBufferMap& map, std::span swizzles); - void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, + void BlockLinearUpload2D(Image& image, const StagingBufferMap& map, std::span swizzles); - void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, + void BlockLinearUpload3D(Image& image, const StagingBufferMap& map, std::span swizzles); - void PitchUpload(Image& image, const ImageBufferMap& map, + void PitchUpload(Image& image, const StagingBufferMap& map, std::span swizzles); void CopyBC4(Image& dst_image, Image& src_image, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 39eb19d04..686f915c6 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -157,6 +157,7 @@ struct BufferCacheParams { static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; + static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true; }; using BufferCache = VideoCommon::BufferCache; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 0ddf71abc..15a631b79 100755 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -128,15 +128,12 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, const std::array signal_values{host_tick, u64(0)}; const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; - const u32 num_wait_semaphores = wait_semaphore ? 2 : 1; - const std::array wait_values{host_tick - 1, u64(1)}; - const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; - + const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; const VkTimelineSemaphoreSubmitInfo timeline_si{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .pNext = nullptr, - .waitSemaphoreValueCount = num_wait_semaphores, - .pWaitSemaphoreValues = wait_values.data(), + .waitSemaphoreValueCount = 0, + .pWaitSemaphoreValues = nullptr, .signalSemaphoreValueCount = num_signal_semaphores, .pSignalSemaphoreValues = signal_values.data(), }; @@ -144,7 +141,7 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &timeline_si, .waitSemaphoreCount = num_wait_semaphores, - .pWaitSemaphores = wait_semaphores.data(), + .pWaitSemaphores = &wait_semaphore, .pWaitDstStageMask = wait_stage_masks.data(), .commandBufferCount = 1, .pCommandBuffers = cmdbuf.address(), diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 73659e7ed..9dac6e17e 100755 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -544,6 +544,7 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); + QAction* remove_cache_storage = remove_menu->addAction(tr("Remove Cache Storage")); QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache")); QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache")); remove_menu->addSeparator(); @@ -614,6 +615,9 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() { emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path); }); + connect(remove_cache_storage, &QAction::triggered, [this, program_id, path] { + emit RemoveFileRequested(program_id, GameListRemoveTarget::CacheStorage, path); + }); connect(dump_romfs, &QAction::triggered, [this, program_id, path]() { emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::Normal); }); diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index 1fcf59c74..af6f0ed7a 100755 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h @@ -45,6 +45,7 @@ enum class GameListRemoveTarget { VkShaderCache, AllShaderCache, CustomConfiguration, + CacheStorage, }; enum class DumpRomFSTarget { diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 203ddec78..c6714a26d 100755 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -2418,6 +2418,8 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ return tr("Delete All Transferable Shader Caches?"); case GameListRemoveTarget::CustomConfiguration: return tr("Remove Custom Game Configuration?"); + case GameListRemoveTarget::CacheStorage: + return tr("Remove Cache Storage?"); default: return QString{}; } @@ -2441,6 +2443,9 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ case GameListRemoveTarget::CustomConfiguration: RemoveCustomConfiguration(program_id, game_path); break; + case GameListRemoveTarget::CacheStorage: + RemoveCacheStorage(program_id); + break; } } @@ -2530,6 +2535,21 @@ void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& g } } +void GMainWindow::RemoveCacheStorage(u64 program_id) { + const auto nand_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::NANDDir); + auto vfs_nand_dir = + vfs->OpenDirectory(Common::FS::PathToUTF8String(nand_dir), FileSys::Mode::Read); + + const auto cache_storage_path = FileSys::SaveDataFactory::GetFullPath( + *system, vfs_nand_dir, FileSys::SaveDataSpaceId::NandUser, + FileSys::SaveDataType::CacheStorage, 0 /* program_id */, {}, 0); + + const auto path = Common::FS::ConcatPathSafe(nand_dir, cache_storage_path); + + // Not an error if it wasn't cleared. + Common::FS::RemoveDirRecursively(path); +} + void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path, DumpRomFSTarget target) { const auto failed = [this] { diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 38d5e7469..5fd6caa69 100755 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -370,6 +370,7 @@ private: void RemoveVulkanDriverPipelineCache(u64 program_id); void RemoveAllTransferableShaderCaches(u64 program_id); void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); + void RemoveCacheStorage(u64 program_id); std::optional SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); InstallResult InstallNSPXCI(const QString& filename); InstallResult InstallNCA(const QString& filename); diff --git a/vcpkg.json b/vcpkg.json index 84d3cf41b..72b6767f1 100755 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,7 +1,7 @@ { "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg-tool/main/docs/vcpkg.schema.json", "name": "yuzu", - "builtin-baseline": "acc3bcf76b84ae5041c86ab55fe138ae7b8255c7", + "builtin-baseline": "656fcc6ab2b05c6d999b7eaca717027ac3738f71", "version": "1.0", "dependencies": [ "boost-algorithm", @@ -53,7 +53,7 @@ }, { "name": "fmt", - "version": "9.0.0" + "version": "10.0.0" } ] }