early-access version 3592

This commit is contained in:
pineappleEA
2023-05-15 19:35:46 +02:00
parent d18469456b
commit 165c5bc7d0
11 changed files with 127 additions and 227 deletions

View File

@@ -47,31 +47,35 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
void(slot_image_views.insert(runtime, NullImageViewParams{}));
void(slot_samplers.insert(runtime, sampler_descriptor));
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
const u64 device_mem_per = device_memory / 100;
minimum_memory = device_mem_per * 25;
expected_memory = device_mem_per * 50;
critical_memory = device_mem_per * 80;
LOG_INFO(HW_GPU, "Texture cache device memory limits: min {} expected {} critical {}",
minimum_memory, expected_memory, critical_memory);
if constexpr (HAS_DEVICE_MEMORY_INFO) {
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
const s64 min_spacing_critical = device_memory - 1_GiB;
const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
expected_memory = static_cast<u64>(
std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
DEFAULT_EXPECTED_MEMORY));
critical_memory = static_cast<u64>(
std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
DEFAULT_CRITICAL_MEMORY));
minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2);
} else {
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
minimum_memory = 0;
}
}
template <class P>
void TextureCache<P>::RunGarbageCollector() {
if (total_used_memory < minimum_memory) {
return;
}
bool is_expected = total_used_memory >= expected_memory;
bool is_critical = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = is_critical ? 10ULL : is_expected ? 25ULL : 50ULL;
size_t num_iterations = is_critical ? 40 : (is_expected ? 20 : 10);
boost::container::small_vector<
std::tuple<ImageId, bool, boost::container::small_vector<BufferImageCopy, 16>>, 40>
to_delete;
u64 total_download_size{0};
u32 largest_download_size{0};
const auto clean_up = [&](ImageId image_id) {
bool high_priority_mode = total_used_memory >= expected_memory;
bool aggressive_mode = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
const auto clean_up = [this, &num_iterations, &high_priority_mode,
&aggressive_mode](ImageId image_id) {
if (num_iterations == 0) {
return true;
}
@@ -82,70 +86,51 @@ void TextureCache<P>::RunGarbageCollector() {
// used by the async decoder thread.
return false;
}
const bool do_download = image.IsSafeDownload() &&
False(image.flags & ImageFlagBits::BadOverlap) &&
(False(image.flags & ImageFlagBits::CostlyLoad) || is_critical);
if (do_download) {
total_download_size += image.unswizzled_size_bytes;
largest_download_size = std::max(largest_download_size, image.unswizzled_size_bytes);
const bool must_download =
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
if (!high_priority_mode &&
(must_download || True(image.flags & ImageFlagBits::CostlyLoad))) {
return false;
}
if (must_download) {
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
if (total_used_memory < critical_memory) {
if (aggressive_mode) {
// Sink the aggresiveness.
num_iterations >>= 2;
aggressive_mode = false;
return false;
}
if (high_priority_mode && total_used_memory < expected_memory) {
num_iterations >>= 1;
high_priority_mode = false;
}
}
to_delete.push_back({image_id, do_download, {}});
return false;
};
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
if (total_download_size > 0) {
auto map = runtime.DownloadStagingBuffer(total_download_size);
for (auto& [image_id, do_download, copies] : to_delete) {
if (!do_download) {
continue;
}
Image& image = slot_images[image_id];
copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
}
runtime.Finish();
swizzle_data_buffer.resize_destructive(Common::AlignUp(largest_download_size, 1024));
u64 offset{0};
for (auto& [image_id, do_download, copies] : to_delete) {
Image& image = slot_images[image_id];
if (do_download) {
for (auto& copy : copies) {
copy.buffer_offset += offset;
}
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer);
offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
}
} else {
for (auto& [image_id, do_download, copies] : to_delete) {
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
}
}
}
template <class P>
void TextureCache<P>::TickFrame() {
// If we can obtain the memory info, use it instead of the estimate.
if (runtime.CanReportMemoryUsage() && frame_tick % 60 == 0) {
if (runtime.CanReportMemoryUsage()) {
total_used_memory = runtime.GetDeviceMemoryUsage();
}
RunGarbageCollector();
if (total_used_memory > minimum_memory) {
RunGarbageCollector();
}
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();

View File

@@ -3,8 +3,6 @@
#pragma once
#include <boost/container/small_vector.hpp>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/texture_cache/slot_vector.h"
@@ -16,7 +14,6 @@ constexpr size_t MAX_MIP_LEVELS = 14;
constexpr SlotId CORRUPT_ID{0xfffffffe};
using BufferId = SlotId;
using ImageId = SlotId;
using ImageMapId = SlotId;
using ImageViewId = SlotId;
@@ -149,12 +146,6 @@ struct BufferCopy {
size_t size;
};
struct BufferCopies {
u64 total_size;
u64 largest_copy;
boost::container::small_vector<BufferCopy, 16> copies;
};
struct SwizzleParameters {
Extent3D num_tiles;
Extent3D block;

View File

@@ -914,7 +914,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
}
}
boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
const Extent3D size = info.size;
const u32 bytes_per_block = BytesPerBlock(info.format);
if (info.type == ImageType::Linear) {
@@ -942,7 +942,7 @@ boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const Ima
u32 host_offset = 0;
boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
std::vector<BufferImageCopy> copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);

View File

@@ -5,7 +5,6 @@
#include <optional>
#include <span>
#include <boost/container/small_vector.hpp>
#include "common/common_types.h"
#include "common/scratch_buffer.h"
@@ -74,8 +73,7 @@ struct OverlapResult {
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies);
[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
const ImageInfo& info);
[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);