early-access version 1786
This commit is contained in:
@@ -113,6 +113,43 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie
|
||||
image_view_ids.push_back(image_view_id);
|
||||
}
|
||||
|
||||
bool ImageBase::IsSafeDownload() const noexcept {
|
||||
// Skip images that were not modified from the GPU
|
||||
if (False(flags & ImageFlagBits::GpuModified)) {
|
||||
return false;
|
||||
}
|
||||
// Skip images that .are. modified from the CPU
|
||||
// We don't want to write sensitive data from the guest
|
||||
if (True(flags & ImageFlagBits::CpuModified)) {
|
||||
return false;
|
||||
}
|
||||
if (info.num_samples > 1) {
|
||||
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ImageBase::CheckBadOverlapState() {
|
||||
if (False(flags & ImageFlagBits::BadOverlap)) {
|
||||
return;
|
||||
}
|
||||
if (!overlapping_images.empty()) {
|
||||
return;
|
||||
}
|
||||
flags &= ~ImageFlagBits::BadOverlap;
|
||||
}
|
||||
|
||||
void ImageBase::CheckAliasState() {
|
||||
if (False(flags & ImageFlagBits::Alias)) {
|
||||
return;
|
||||
}
|
||||
if (!aliased_images.empty()) {
|
||||
return;
|
||||
}
|
||||
flags &= ~ImageFlagBits::Alias;
|
||||
}
|
||||
|
||||
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
|
||||
static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
|
||||
ASSERT(lhs.info.type == rhs.info.type);
|
||||
|
@@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 {
|
||||
Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
|
||||
Registered = 1 << 6, ///< True when the image is registered
|
||||
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
|
||||
|
||||
// Garbage Collection Flags
|
||||
BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
|
||||
///< garbage collection priority
|
||||
Alias = 1 << 9, ///< This image has aliases and has priority on garbage
|
||||
///< collection
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
||||
|
||||
@@ -44,11 +50,16 @@ struct ImageBase {
|
||||
|
||||
void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
|
||||
|
||||
[[nodiscard]] bool IsSafeDownload() const noexcept;
|
||||
|
||||
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
|
||||
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
|
||||
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
|
||||
}
|
||||
|
||||
void CheckBadOverlapState();
|
||||
void CheckAliasState();
|
||||
|
||||
ImageInfo info;
|
||||
|
||||
u32 guest_size_bytes = 0;
|
||||
@@ -72,6 +83,7 @@ struct ImageBase {
|
||||
std::vector<SubresourceBase> slice_subresources;
|
||||
|
||||
std::vector<AliasedImage> aliased_images;
|
||||
std::vector<ImageId> overlapping_images;
|
||||
};
|
||||
|
||||
struct ImageAllocBase {
|
||||
|
@@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bit>
|
||||
#include <concepts>
|
||||
#include <numeric>
|
||||
#include <type_traits>
|
||||
@@ -32,6 +33,60 @@ template <class T>
|
||||
requires std::is_nothrow_move_assignable_v<T>&&
|
||||
std::is_nothrow_move_constructible_v<T> class SlotVector {
|
||||
public:
|
||||
class Iterator {
|
||||
friend SlotVector<T>;
|
||||
|
||||
public:
|
||||
constexpr Iterator() = default;
|
||||
|
||||
Iterator& operator++() noexcept {
|
||||
const u64* const bitset = slot_vector->stored_bitset.data();
|
||||
const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
|
||||
if (id.index < size) {
|
||||
do {
|
||||
++id.index;
|
||||
} while (id.index < size && !IsValid(bitset));
|
||||
if (id.index == size) {
|
||||
id.index = SlotId::INVALID_INDEX;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Iterator operator++(int) noexcept {
|
||||
const Iterator copy{*this};
|
||||
++*this;
|
||||
return copy;
|
||||
}
|
||||
|
||||
bool operator==(const Iterator& other) const noexcept {
|
||||
return id.index == other.id.index;
|
||||
}
|
||||
|
||||
bool operator!=(const Iterator& other) const noexcept {
|
||||
return id.index != other.id.index;
|
||||
}
|
||||
|
||||
std::pair<SlotId, T*> operator*() const noexcept {
|
||||
return {id, std::addressof((*slot_vector)[id])};
|
||||
}
|
||||
|
||||
T* operator->() const noexcept {
|
||||
return std::addressof((*slot_vector)[id]);
|
||||
}
|
||||
|
||||
private:
|
||||
Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
|
||||
: slot_vector{slot_vector_}, id{id_} {}
|
||||
|
||||
bool IsValid(const u64* bitset) noexcept {
|
||||
return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
|
||||
}
|
||||
|
||||
SlotVector<T>* slot_vector;
|
||||
SlotId id;
|
||||
};
|
||||
|
||||
~SlotVector() noexcept {
|
||||
size_t index = 0;
|
||||
for (u64 bits : stored_bitset) {
|
||||
@@ -70,6 +125,20 @@ public:
|
||||
ResetStorageBit(id.index);
|
||||
}
|
||||
|
||||
[[nodiscard]] Iterator begin() noexcept {
|
||||
const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
|
||||
if (it == stored_bitset.end()) {
|
||||
return end();
|
||||
}
|
||||
const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
|
||||
const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
|
||||
return Iterator(this, first_id);
|
||||
}
|
||||
|
||||
[[nodiscard]] Iterator end() noexcept {
|
||||
return Iterator(this, SlotId{SlotId::INVALID_INDEX});
|
||||
}
|
||||
|
||||
private:
|
||||
struct NonTrivialDummy {
|
||||
NonTrivialDummy() noexcept {}
|
||||
@@ -140,7 +209,6 @@ private:
|
||||
|
||||
Entry* values = nullptr;
|
||||
size_t values_capacity = 0;
|
||||
size_t values_size = 0;
|
||||
|
||||
std::vector<u64> stored_bitset;
|
||||
std::vector<u32> free_list;
|
||||
|
@@ -22,6 +22,7 @@
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/compatible_formats.h"
|
||||
#include "video_core/delayed_destruction_ring.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
@@ -75,6 +76,9 @@ class TextureCache {
|
||||
/// Sampler ID for bugged sampler ids
|
||||
static constexpr SamplerId NULL_SAMPLER_ID{0};
|
||||
|
||||
static constexpr u64 expected_memory = 1024ULL * 1024ULL * 1024ULL;
|
||||
static constexpr u64 critical_memory = 2 * 1024ULL * 1024ULL * 1024ULL;
|
||||
|
||||
using Runtime = typename P::Runtime;
|
||||
using Image = typename P::Image;
|
||||
using ImageAlloc = typename P::ImageAlloc;
|
||||
@@ -333,6 +337,7 @@ private:
|
||||
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
|
||||
|
||||
bool has_deleted_images = false;
|
||||
u64 total_used_memory = 0;
|
||||
|
||||
SlotVector<Image> slot_images;
|
||||
SlotVector<ImageView> slot_image_views;
|
||||
@@ -353,6 +358,7 @@ private:
|
||||
|
||||
u64 modification_tick = 0;
|
||||
u64 frame_tick = 0;
|
||||
typename SlotVector<Image>::Iterator deletion_iterator;
|
||||
};
|
||||
|
||||
template <class P>
|
||||
@@ -373,11 +379,82 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
|
||||
// This way the null resource becomes a compile time constant
|
||||
void(slot_image_views.insert(runtime, NullImageParams{}));
|
||||
void(slot_samplers.insert(runtime, sampler_descriptor));
|
||||
|
||||
deletion_iterator = slot_images.begin();
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TickFrame() {
|
||||
// Tick sentenced resources in this order to ensure they are destroyed in the right order
|
||||
const bool enabled_gc = Settings::values.use_caches_gc.GetValue();
|
||||
if (!enabled_gc) {
|
||||
// @Note(Blinkhawk): compile error with SCOPE_EXIT on msvc.
|
||||
sentenced_images.Tick();
|
||||
sentenced_framebuffers.Tick();
|
||||
sentenced_image_view.Tick();
|
||||
++frame_tick;
|
||||
return;
|
||||
}
|
||||
const bool high_priority_mode = total_used_memory >= expected_memory;
|
||||
const bool aggressive_mode = total_used_memory >= critical_memory;
|
||||
const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
|
||||
int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
|
||||
for (; num_iterations > 0; --num_iterations) {
|
||||
if (deletion_iterator == slot_images.end()) {
|
||||
deletion_iterator = slot_images.begin();
|
||||
if (deletion_iterator == slot_images.end()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto [image_id, image_tmp] = *deletion_iterator;
|
||||
Image* image = image_tmp; // fix clang error.
|
||||
const bool is_alias = True(image->flags & ImageFlagBits::Alias);
|
||||
const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
|
||||
const bool must_download = image->IsSafeDownload();
|
||||
bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
|
||||
const u64 ticks_needed =
|
||||
is_bad_overlap
|
||||
? ticks_to_destroy >> 4
|
||||
: ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
|
||||
should_care |= aggressive_mode;
|
||||
if (should_care && image->frame_tick + ticks_needed < frame_tick) {
|
||||
if (is_bad_overlap) {
|
||||
const bool overlap_check = std::ranges::all_of(
|
||||
image->overlapping_images, [&, image](const ImageId& overlap_id) {
|
||||
auto& overlap = slot_images[overlap_id];
|
||||
return overlap.frame_tick >= image->frame_tick;
|
||||
});
|
||||
if (!overlap_check) {
|
||||
++deletion_iterator;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!is_bad_overlap && must_download) {
|
||||
const bool alias_check = std::ranges::none_of(
|
||||
image->aliased_images, [&, image](const AliasedImage& alias) {
|
||||
auto& alias_image = slot_images[alias.id];
|
||||
return (alias_image.frame_tick < image->frame_tick) ||
|
||||
(alias_image.modification_tick < image->modification_tick);
|
||||
});
|
||||
|
||||
if (alias_check) {
|
||||
auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
|
||||
const auto copies = FullDownloadCopies(image->info);
|
||||
image->DownloadMemory(map, copies);
|
||||
runtime.Finish();
|
||||
SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
|
||||
}
|
||||
}
|
||||
if (True(image->flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(*image);
|
||||
}
|
||||
UnregisterImage(image_id);
|
||||
DeleteImage(image_id);
|
||||
if (is_bad_overlap) {
|
||||
num_iterations++;
|
||||
}
|
||||
}
|
||||
++deletion_iterator;
|
||||
}
|
||||
sentenced_images.Tick();
|
||||
sentenced_framebuffers.Tick();
|
||||
sentenced_image_view.Tick();
|
||||
@@ -568,17 +645,7 @@ template <class P>
|
||||
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
|
||||
std::vector<ImageId> images;
|
||||
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
|
||||
// Skip images that were not modified from the GPU
|
||||
if (False(image.flags & ImageFlagBits::GpuModified)) {
|
||||
return;
|
||||
}
|
||||
// Skip images that .are. modified from the CPU
|
||||
// We don't want to write sensitive data from the guest
|
||||
if (True(image.flags & ImageFlagBits::CpuModified)) {
|
||||
return;
|
||||
}
|
||||
if (image.info.num_samples > 1) {
|
||||
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
|
||||
if (!image.IsSafeDownload()) {
|
||||
return;
|
||||
}
|
||||
image.flags &= ~ImageFlagBits::GpuModified;
|
||||
@@ -967,6 +1034,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||
std::vector<ImageId> overlap_ids;
|
||||
std::vector<ImageId> left_aliased_ids;
|
||||
std::vector<ImageId> right_aliased_ids;
|
||||
std::vector<ImageId> bad_overlap_ids;
|
||||
ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
|
||||
if (info.type != overlap.info.type) {
|
||||
return;
|
||||
@@ -992,9 +1060,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
|
||||
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
|
||||
left_aliased_ids.push_back(overlap_id);
|
||||
overlap.flags |= ImageFlagBits::Alias;
|
||||
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
|
||||
broken_views, native_bgr)) {
|
||||
right_aliased_ids.push_back(overlap_id);
|
||||
overlap.flags |= ImageFlagBits::Alias;
|
||||
} else {
|
||||
bad_overlap_ids.push_back(overlap_id);
|
||||
overlap.flags |= ImageFlagBits::BadOverlap;
|
||||
}
|
||||
});
|
||||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||
@@ -1022,10 +1095,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||
for (const ImageId aliased_id : right_aliased_ids) {
|
||||
ImageBase& aliased = slot_images[aliased_id];
|
||||
AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
|
||||
new_image.flags |= ImageFlagBits::Alias;
|
||||
}
|
||||
for (const ImageId aliased_id : left_aliased_ids) {
|
||||
ImageBase& aliased = slot_images[aliased_id];
|
||||
AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
|
||||
new_image.flags |= ImageFlagBits::Alias;
|
||||
}
|
||||
for (const ImageId aliased_id : bad_overlap_ids) {
|
||||
ImageBase& aliased = slot_images[aliased_id];
|
||||
aliased.overlapping_images.push_back(new_image_id);
|
||||
new_image.overlapping_images.push_back(aliased_id);
|
||||
new_image.flags |= ImageFlagBits::BadOverlap;
|
||||
}
|
||||
RegisterImage(new_image_id);
|
||||
return new_image_id;
|
||||
@@ -1195,6 +1276,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
||||
image.flags |= ImageFlagBits::Registered;
|
||||
ForEachPage(image.cpu_addr, image.guest_size_bytes,
|
||||
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
|
||||
total_used_memory +=
|
||||
Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
@@ -1203,6 +1286,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
||||
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
|
||||
"Trying to unregister an already registered image");
|
||||
image.flags &= ~ImageFlagBits::Registered;
|
||||
image.flags &= ~ImageFlagBits::BadOverlap;
|
||||
total_used_memory -=
|
||||
Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024);
|
||||
ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
|
||||
const auto page_it = page_table.find(page);
|
||||
if (page_it == page_table.end()) {
|
||||
@@ -1276,9 +1362,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
|
||||
std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
|
||||
return other_alias.id == image_id;
|
||||
});
|
||||
other_image.CheckAliasState();
|
||||
ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
|
||||
num_removed_aliases);
|
||||
}
|
||||
for (const ImageId overlap_id : image.overlapping_images) {
|
||||
ImageBase& other_image = slot_images[overlap_id];
|
||||
[[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
|
||||
other_image.overlapping_images,
|
||||
[image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
|
||||
other_image.CheckBadOverlapState();
|
||||
ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
|
||||
num_removed_overlaps);
|
||||
}
|
||||
for (const ImageViewId image_view_id : image_view_ids) {
|
||||
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
|
||||
slot_image_views.erase(image_view_id);
|
||||
|
@@ -47,6 +47,7 @@
|
||||
#include "video_core/texture_cache/formatter.h"
|
||||
#include "video_core/texture_cache/samples_helper.h"
|
||||
#include "video_core/texture_cache/util.h"
|
||||
#include "video_core/textures/astc.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
@@ -580,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
|
||||
|
||||
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
|
||||
const std::span<const u8> src = input.subspan(host_offset);
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
|
||||
|
||||
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
|
||||
num_tiles.depth, block.height, block.depth);
|
||||
|
||||
@@ -884,8 +887,16 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
|
||||
ASSERT(copy.image_extent == mip_size);
|
||||
ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
|
||||
ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
|
||||
DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
|
||||
output.subspan(output_offset));
|
||||
if (IsPixelFormatASTC(info.format)) {
|
||||
ASSERT(copy.image_extent.depth == 1);
|
||||
Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset),
|
||||
copy.image_extent.width, copy.image_extent.height,
|
||||
copy.image_subresource.num_layers, tile_size.width,
|
||||
tile_size.height, output.subspan(output_offset));
|
||||
} else {
|
||||
DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
|
||||
output.subspan(output_offset));
|
||||
}
|
||||
copy.buffer_offset = output_offset;
|
||||
copy.buffer_row_length = mip_size.width;
|
||||
copy.buffer_image_height = mip_size.height;
|
||||
|
Reference in New Issue
Block a user