early-access version 3621

This commit is contained in:
pineappleEA 2023-05-29 00:16:57 +02:00
parent c6c10a4fa2
commit b6f5e3823d
23 changed files with 419 additions and 154 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 3618.
This is the source code for early-access 3621.
## Legal Notice

View File

@ -1697,7 +1697,13 @@ void MicroProfileFlip()
{
int nTimer = MicroProfileLogTimerIndex(LE);
uint8_t nGroup = pTimerToGroup[nTimer];
MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX);
// To avoid crashing due to OOB memory accesses/asserts
// simply skip this iteration
// MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX);
if (nStackPos >= MICROPROFILE_STACK_MAX) {
break;
}
MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
pGroupStackPos[nGroup]++;
pStack[nStackPos++] = k;

View File

@ -188,3 +188,8 @@ private:
template <std::size_t Position, std::size_t Bits, typename T>
using BitFieldBE = BitField<Position, Bits, T, BETag>;
template <std::size_t Position, std::size_t Bits, typename T, typename EndianTag = LETag>
inline auto format_as(BitField<Position, Bits, T, EndianTag> bitfield) {
return bitfield.Value();
}

View File

@ -133,8 +133,8 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_util.h
renderer_opengl/gl_state_tracker.cpp
renderer_opengl/gl_state_tracker.h
renderer_opengl/gl_stream_buffer.cpp
renderer_opengl/gl_stream_buffer.h
renderer_opengl/gl_staging_buffer_pool.cpp
renderer_opengl/gl_staging_buffer_pool.h
renderer_opengl/gl_texture_cache.cpp
renderer_opengl/gl_texture_cache.h
renderer_opengl/gl_texture_cache_base.cpp

View File

@ -474,7 +474,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
if (committed_ranges.empty()) {
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
async_buffers.emplace_back(std::optional<Async_Buffer>{});
}
return;
@ -535,7 +534,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
committed_ranges.clear();
if (downloads.empty()) {
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
async_buffers.emplace_back(std::optional<Async_Buffer>{});
}
return;
@ -687,7 +685,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
const u32 size = channel_state->index_buffer.size;
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
if constexpr (USE_MEMORY_MAPS) {
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(size);
std::array<BufferCopy, 1> copies{
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
@ -1458,7 +1456,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr,
template <class P>
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies) {
if constexpr (USE_MEMORY_MAPS) {
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
MappedUploadMemory(buffer, total_size_bytes, copies);
} else {
ImmediateUploadMemory(buffer, largest_copy, copies);
@ -1469,7 +1467,7 @@ template <class P>
void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
[[maybe_unused]] u64 largest_copy,
[[maybe_unused]] std::span<const BufferCopy> copies) {
if constexpr (!USE_MEMORY_MAPS) {
if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) {
std::span<u8> immediate_buffer;
for (const BufferCopy& copy : copies) {
std::span<const u8> upload_span;
@ -1528,7 +1526,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
auto& buffer = slot_buffers[buffer_id];
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
if constexpr (USE_MEMORY_MAPS) {
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(copy_size);
std::array copies{BufferCopy{
.src_offset = upload_staging.offset,

View File

@ -173,6 +173,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;

View File

@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
return views.back().texture.handle;
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
StagingBufferPool& staging_buffer_pool_)
: device{device_}, staging_buffer_pool{staging_buffer_pool_},
has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
}();
}
StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.RequestUploadBuffer(size);
}
StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_buffer_pool.RequestDownloadBuffer(size);
}
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
if (device.CanReportMemoryUsage()) {
return device_access_memory - device.GetCurrentDedicatedVideoMemory();
@ -147,13 +157,47 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
return 2_GiB;
}
void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
if (barrier) {
PreCopyBarrier();
}
for (const VideoCommon::BufferCopy& copy : copies) {
glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset),
static_cast<GLintptr>(copy.dst_offset),
static_cast<GLsizeiptr>(copy.size));
}
if (barrier) {
PostCopyBarrier();
}
}
void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier);
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) {
glCopyNamedBufferSubData(
src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
}
CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies);
}
void BufferCacheRuntime::PreCopyBarrier() {
// TODO: finer grained barrier?
glMemoryBarrier(GL_ALL_BARRIER_BITS);
}
void BufferCacheRuntime::PostCopyBarrier() {
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
}
void BufferCacheRuntime::Finish() {
glFinish();
}
void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {

View File

@ -12,7 +12,7 @@
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
namespace OpenGL {
@ -60,11 +60,28 @@ class BufferCacheRuntime {
public:
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
explicit BufferCacheRuntime(const Device& device_);
explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_);
[[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
[[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
void PreCopyBarrier();
void PostCopyBarrier();
void Finish();
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
@ -169,6 +186,7 @@ private:
};
const Device& device;
StagingBufferPool& staging_buffer_pool;
bool has_fast_buffer_sub_data = false;
bool use_assembly_shaders = false;
@ -201,7 +219,7 @@ private:
struct BufferCacheParams {
using Runtime = OpenGL::BufferCacheRuntime;
using Buffer = OpenGL::Buffer;
using Async_Buffer = u32;
using Async_Buffer = OpenGL::StagingBufferMap;
using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
static constexpr bool IS_OPENGL = true;
@ -209,9 +227,12 @@ struct BufferCacheParams {
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false;
static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
// TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;

View File

@ -24,6 +24,7 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
StateTracker& state_tracker_)
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_),
program_manager(program_manager_), state_tracker(state_tracker_),
texture_cache_runtime(device, program_manager, state_tracker),
texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
texture_cache(texture_cache_runtime, *this),
buffer_cache_runtime(device, staging_buffer_pool),
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),

View File

@ -230,6 +230,7 @@ private:
ProgramManager& program_manager;
StateTracker& state_tracker;
StagingBufferPool staging_buffer_pool;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;

View File

@ -0,0 +1,150 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <memory>
#include <span>
#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192));
namespace OpenGL {
StagingBufferMap::~StagingBufferMap() {
if (sync) {
sync->Create();
}
}
StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
: storage_flags{storage_flags_}, map_flags{map_flags_} {}
StagingBuffers::~StagingBuffers() = default;
StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) {
MICROPROFILE_SCOPE(OpenGL_BufferRequest);
const size_t index = RequestBuffer(requested_size);
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
sync_indices[index] = insert_fence ? ++current_sync_index : 0;
return StagingBufferMap{
.mapped_span = std::span(maps[index], requested_size),
.sync = sync,
.buffer = buffers[index].handle,
};
}
size_t StagingBuffers::RequestBuffer(size_t requested_size) {
if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
return *index;
}
OGLBuffer& buffer = buffers.emplace_back();
buffer.Create();
const auto next_pow2_size = Common::NextPow2(requested_size);
glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr,
storage_flags | GL_MAP_PERSISTENT_BIT);
maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size,
map_flags | GL_MAP_PERSISTENT_BIT)));
syncs.emplace_back();
sync_indices.emplace_back();
sizes.push_back(next_pow2_size);
ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
maps.size() == sizes.size());
return buffers.size() - 1;
}
std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
size_t known_unsignaled_index = current_sync_index + 1;
size_t smallest_buffer = std::numeric_limits<size_t>::max();
std::optional<size_t> found;
const size_t num_buffers = sizes.size();
for (size_t index = 0; index < num_buffers; ++index) {
const size_t buffer_size = sizes[index];
if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
continue;
}
if (syncs[index].handle != 0) {
if (sync_indices[index] >= known_unsignaled_index) {
// This fence is later than a fence that is known to not be signaled
continue;
}
if (!syncs[index].IsSignaled()) {
// Since this fence hasn't been signaled, it's safe to assume all later
// fences haven't been signaled either
known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]);
continue;
}
syncs[index].Release();
}
smallest_buffer = buffer_size;
found = index;
}
return found;
}
StreamBuffer::StreamBuffer() {
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
buffer.Create();
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
mapped_pointer =
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
for (OGLSync& sync : fences) {
sync.Create();
}
}
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
ASSERT(size < REGION_SIZE);
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
++region) {
fences[region].Create();
}
used_iterator = iterator;
for (size_t region = Region(free_iterator) + 1,
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
region < region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
if (iterator + size >= free_iterator) {
free_iterator = iterator + size;
}
if (iterator + size > STREAM_BUFFER_SIZE) {
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
fences[region].Create();
}
used_iterator = 0;
iterator = 0;
free_iterator = size;
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
}
const size_t offset = iterator;
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
return {std::span(mapped_pointer + offset, size), offset};
}
StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) {
return upload_buffers.RequestMap(size, true);
}
StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) {
return download_buffers.RequestMap(size, false);
}
} // namespace OpenGL

View File

@ -0,0 +1,95 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <optional>
#include <span>
#include <utility>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/literals.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
using namespace Common::Literals;
struct StagingBufferMap {
~StagingBufferMap();
std::span<u8> mapped_span;
size_t offset = 0;
OGLSync* sync;
GLuint buffer;
};
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
~StagingBuffers();
StagingBufferMap RequestMap(size_t requested_size, bool insert_fence);
size_t RequestBuffer(size_t requested_size);
std::optional<size_t> FindBuffer(size_t requested_size);
std::vector<OGLSync> syncs;
std::vector<OGLBuffer> buffers;
std::vector<u8*> maps;
std::vector<size_t> sizes;
std::vector<size_t> sync_indices;
GLenum storage_flags;
GLenum map_flags;
size_t current_sync_index = 0;
};
class StreamBuffer {
static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
static constexpr size_t NUM_SYNCS = 16;
static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
static constexpr size_t MAX_ALIGNMENT = 256;
static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
public:
explicit StreamBuffer();
[[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
[[nodiscard]] GLuint Handle() const noexcept {
return buffer.handle;
}
private:
[[nodiscard]] static size_t Region(size_t offset) noexcept {
return offset / REGION_SIZE;
}
size_t iterator = 0;
size_t used_iterator = 0;
size_t free_iterator = 0;
u8* mapped_pointer = nullptr;
OGLBuffer buffer;
std::array<OGLSync, NUM_SYNCS> fences;
};
class StagingBufferPool {
public:
StagingBufferPool() = default;
~StagingBufferPool() = default;
StagingBufferMap RequestUploadBuffer(size_t size);
StagingBufferMap RequestDownloadBuffer(size_t size);
private:
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
};
} // namespace OpenGL

View File

@ -439,6 +439,11 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
return GL_R32UI;
}
[[nodiscard]] bool IsAstcRecompressionEnabled() {
return Settings::values.astc_recompression.GetValue() !=
Settings::AstcRecompression::Uncompressed;
}
[[nodiscard]] GLenum SelectAstcFormat(PixelFormat format, bool is_srgb) {
switch (Settings::values.astc_recompression.GetValue()) {
case Settings::AstcRecompression::Bc1:
@ -451,19 +456,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
}
}
} // Anonymous namespace
ImageBufferMap::~ImageBufferMap() {
if (sync) {
sync->Create();
}
}
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
StateTracker& state_tracker_)
: device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager),
format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} {
StateTracker& state_tracker_,
StagingBufferPool& staging_buffer_pool_)
: device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_},
util_shaders(program_manager), format_conversion_pass{util_shaders},
resolution{Settings::values.resolution_info} {
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
@ -553,12 +553,12 @@ void TextureCacheRuntime::Finish() {
glFinish();
}
ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return upload_buffers.RequestMap(size, true);
StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.RequestUploadBuffer(size);
}
ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return download_buffers.RequestMap(size, false);
StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_buffer_pool.RequestDownloadBuffer(size);
}
u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
@ -643,7 +643,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
is_linear ? GL_LINEAR : GL_NEAREST);
}
void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
switch (image.info.type) {
case ImageType::e2D:
@ -685,64 +685,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
return device.HasASTC();
}
TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
: storage_flags{storage_flags_}, map_flags{map_flags_} {}
TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
bool insert_fence) {
const size_t index = RequestBuffer(requested_size);
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
return ImageBufferMap{
.mapped_span = std::span(maps[index], requested_size),
.sync = sync,
.buffer = buffers[index].handle,
};
}
size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
return *index;
}
OGLBuffer& buffer = buffers.emplace_back();
buffer.Create();
glNamedBufferStorage(buffer.handle, requested_size, nullptr,
storage_flags | GL_MAP_PERSISTENT_BIT);
maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
map_flags | GL_MAP_PERSISTENT_BIT)));
syncs.emplace_back();
sizes.push_back(requested_size);
ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
maps.size() == sizes.size());
return buffers.size() - 1;
}
std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
size_t smallest_buffer = std::numeric_limits<size_t>::max();
std::optional<size_t> found;
const size_t num_buffers = sizes.size();
for (size_t index = 0; index < num_buffers; ++index) {
const size_t buffer_size = sizes[index];
if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
continue;
}
if (syncs[index].handle != 0) {
if (!syncs[index].IsSignaled()) {
continue;
}
syncs[index].Release();
}
smallest_buffer = buffer_size;
found = index;
}
return found;
}
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
@ -760,7 +702,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_,
gl_format = GL_RGBA;
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
if (IsPixelFormatASTC(info.format)) {
if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) {
gl_internal_format = SelectAstcFormat(info.format, is_srgb);
gl_format = GL_NONE;
}
@ -818,7 +760,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset,
}
}
void Image::UploadMemory(const ImageBufferMap& map,
void Image::UploadMemory(const StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
UploadMemory(map.buffer, map.offset, copies);
}
@ -865,7 +807,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b
}
}
void Image::DownloadMemory(ImageBufferMap& map,
void Image::DownloadMemory(StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
DownloadMemory(map.buffer, map.offset, copies);
}
@ -1155,7 +1097,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
const bool is_srgb = IsPixelFormatSRGB(info.format);
internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
if (IsPixelFormatASTC(info.format)) {
if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) {
internal_format = SelectAstcFormat(info.format, is_srgb);
}
} else {

View File

@ -11,6 +11,7 @@
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
@ -37,15 +38,6 @@ using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
using VideoCommon::SlotVector;
struct ImageBufferMap {
~ImageBufferMap();
std::span<u8> mapped_span;
size_t offset = 0;
OGLSync* sync;
GLuint buffer;
};
struct FormatProperties {
GLenum compatibility_class;
bool compatibility_by_size;
@ -74,14 +66,15 @@ class TextureCacheRuntime {
public:
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
StateTracker& state_tracker);
StateTracker& state_tracker,
StagingBufferPool& staging_buffer_pool);
~TextureCacheRuntime();
void Finish();
ImageBufferMap UploadStagingBuffer(size_t size);
StagingBufferMap UploadStagingBuffer(size_t size);
ImageBufferMap DownloadStagingBuffer(size_t size);
StagingBufferMap DownloadStagingBuffer(size_t size);
u64 GetDeviceLocalMemory() const {
return device_access_memory;
@ -120,7 +113,7 @@ public:
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
void AccelerateImageUpload(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void InsertUploadMemoryBarrier();
@ -149,35 +142,16 @@ public:
}
private:
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
~StagingBuffers();
ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
size_t RequestBuffer(size_t requested_size);
std::optional<size_t> FindBuffer(size_t requested_size);
std::vector<OGLSync> syncs;
std::vector<OGLBuffer> buffers;
std::vector<u8*> maps;
std::vector<size_t> sizes;
GLenum storage_flags;
GLenum map_flags;
};
const Device& device;
StateTracker& state_tracker;
StagingBufferPool& staging_buffer_pool;
UtilShaders util_shaders;
FormatConversionPass format_conversion_pass;
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
bool has_broken_texture_view_formats = false;
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
@ -213,7 +187,7 @@ public:
void UploadMemory(GLuint buffer_handle, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void UploadMemory(const ImageBufferMap& map,
void UploadMemory(const StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
@ -222,7 +196,8 @@ public:
void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;

View File

@ -19,6 +19,7 @@
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
UtilShaders::~UtilShaders() = default;
void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;

View File

@ -16,23 +16,23 @@ namespace OpenGL {
class Image;
class ProgramManager;
struct ImageBufferMap;
struct StagingBufferMap;
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
~UtilShaders();
void ASTCDecode(Image& image, const ImageBufferMap& map,
void ASTCDecode(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
void BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
void BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void PitchUpload(Image& image, const ImageBufferMap& map,
void PitchUpload(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void CopyBC4(Image& dst_image, Image& src_image,

View File

@ -157,6 +157,7 @@ struct BufferCacheParams {
static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;

View File

@ -128,15 +128,12 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
const std::array signal_values{host_tick, u64(0)};
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
const u32 num_wait_semaphores = wait_semaphore ? 2 : 1;
const std::array wait_values{host_tick - 1, u64(1)};
const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
const VkTimelineSemaphoreSubmitInfo timeline_si{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.pNext = nullptr,
.waitSemaphoreValueCount = num_wait_semaphores,
.pWaitSemaphoreValues = wait_values.data(),
.waitSemaphoreValueCount = 0,
.pWaitSemaphoreValues = nullptr,
.signalSemaphoreValueCount = num_signal_semaphores,
.pSignalSemaphoreValues = signal_values.data(),
};
@ -144,7 +141,7 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &timeline_si,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitSemaphores = &wait_semaphore,
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),

View File

@ -544,6 +544,7 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update"));
QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC"));
QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
QAction* remove_cache_storage = remove_menu->addAction(tr("Remove Cache Storage"));
QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache"));
QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache"));
remove_menu->addSeparator();
@ -614,6 +615,9 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() {
emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path);
});
connect(remove_cache_storage, &QAction::triggered, [this, program_id, path] {
emit RemoveFileRequested(program_id, GameListRemoveTarget::CacheStorage, path);
});
connect(dump_romfs, &QAction::triggered, [this, program_id, path]() {
emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::Normal);
});

View File

@ -45,6 +45,7 @@ enum class GameListRemoveTarget {
VkShaderCache,
AllShaderCache,
CustomConfiguration,
CacheStorage,
};
enum class DumpRomFSTarget {

View File

@ -2418,6 +2418,8 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
return tr("Delete All Transferable Shader Caches?");
case GameListRemoveTarget::CustomConfiguration:
return tr("Remove Custom Game Configuration?");
case GameListRemoveTarget::CacheStorage:
return tr("Remove Cache Storage?");
default:
return QString{};
}
@ -2441,6 +2443,9 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
case GameListRemoveTarget::CustomConfiguration:
RemoveCustomConfiguration(program_id, game_path);
break;
case GameListRemoveTarget::CacheStorage:
RemoveCacheStorage(program_id);
break;
}
}
@ -2530,6 +2535,21 @@ void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& g
}
}
void GMainWindow::RemoveCacheStorage(u64 program_id) {
const auto nand_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::NANDDir);
auto vfs_nand_dir =
vfs->OpenDirectory(Common::FS::PathToUTF8String(nand_dir), FileSys::Mode::Read);
const auto cache_storage_path = FileSys::SaveDataFactory::GetFullPath(
*system, vfs_nand_dir, FileSys::SaveDataSpaceId::NandUser,
FileSys::SaveDataType::CacheStorage, 0 /* program_id */, {}, 0);
const auto path = Common::FS::ConcatPathSafe(nand_dir, cache_storage_path);
// Not an error if it wasn't cleared.
Common::FS::RemoveDirRecursively(path);
}
void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path,
DumpRomFSTarget target) {
const auto failed = [this] {

View File

@ -370,6 +370,7 @@ private:
void RemoveVulkanDriverPipelineCache(u64 program_id);
void RemoveAllTransferableShaderCaches(u64 program_id);
void RemoveCustomConfiguration(u64 program_id, const std::string& game_path);
void RemoveCacheStorage(u64 program_id);
std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id);
InstallResult InstallNSPXCI(const QString& filename);
InstallResult InstallNCA(const QString& filename);

View File

@ -1,7 +1,7 @@
{
"$schema": "https://raw.githubusercontent.com/microsoft/vcpkg-tool/main/docs/vcpkg.schema.json",
"name": "yuzu",
"builtin-baseline": "acc3bcf76b84ae5041c86ab55fe138ae7b8255c7",
"builtin-baseline": "656fcc6ab2b05c6d999b7eaca717027ac3738f71",
"version": "1.0",
"dependencies": [
"boost-algorithm",
@ -53,7 +53,7 @@
},
{
"name": "fmt",
"version": "9.0.0"
"version": "10.0.0"
}
]
}