early-access version 3914
This commit is contained in:
@@ -549,7 +549,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||
it++;
|
||||
}
|
||||
|
||||
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
|
||||
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads;
|
||||
u64 total_size_bytes = 0;
|
||||
u64 largest_copy = 0;
|
||||
for (const IntervalSet& intervals : committed_ranges) {
|
||||
@@ -925,6 +925,11 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
buffer.MarkUsage(offset, size);
|
||||
const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
|
||||
|
||||
if (is_written) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
|
||||
}
|
||||
|
||||
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
|
||||
runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
|
||||
++binding_index;
|
||||
@@ -942,6 +947,11 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
|
||||
const u32 size = binding.size;
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0;
|
||||
if (is_written) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
|
||||
}
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const PixelFormat format = binding.format;
|
||||
buffer.MarkUsage(offset, size);
|
||||
@@ -974,6 +984,8 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
|
||||
const u32 size = binding.size;
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
buffer.MarkUsage(offset, size);
|
||||
host_bindings.buffers.push_back(&buffer);
|
||||
@@ -1026,6 +1038,11 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
|
||||
buffer.MarkUsage(offset, size);
|
||||
const bool is_written =
|
||||
((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
|
||||
|
||||
if (is_written) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
|
||||
}
|
||||
|
||||
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
|
||||
runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
|
||||
++binding_index;
|
||||
@@ -1043,6 +1060,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
|
||||
const u32 size = binding.size;
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
const bool is_written =
|
||||
((channel_state->written_compute_texture_buffers >> index) & 1) != 0;
|
||||
if (is_written) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
|
||||
}
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const PixelFormat format = binding.format;
|
||||
buffer.MarkUsage(offset, size);
|
||||
@@ -1218,16 +1241,11 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
|
||||
const u32 written_mask = channel_state->written_storage_buffers[stage];
|
||||
ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
|
||||
// Resolve buffer
|
||||
Binding& binding = channel_state->storage_buffers[stage][index];
|
||||
const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
binding.buffer_id = buffer_id;
|
||||
// Mark buffer as written if needed
|
||||
if (((written_mask >> index) & 1) != 0) {
|
||||
MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1236,10 +1254,6 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
|
||||
ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
|
||||
Binding& binding = channel_state->texture_buffers[stage][index];
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
// Mark buffer as written if needed
|
||||
if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1269,7 +1283,6 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
|
||||
.size = size,
|
||||
.buffer_id = buffer_id,
|
||||
};
|
||||
MarkWrittenBuffer(buffer_id, *cpu_addr, size);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
@@ -1296,10 +1309,6 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
|
||||
// Resolve buffer
|
||||
Binding& binding = channel_state->compute_storage_buffers[index];
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
// Mark as written if needed
|
||||
if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1308,18 +1317,11 @@ void BufferCache<P>::UpdateComputeTextureBuffers() {
|
||||
ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
|
||||
Binding& binding = channel_state->compute_texture_buffers[index];
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
// Mark as written if needed
|
||||
if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
|
||||
if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) {
|
||||
SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size);
|
||||
}
|
||||
memory_tracker.MarkRegionAsGpuModified(cpu_addr, size);
|
||||
|
||||
const IntervalType base_interval{cpu_addr, cpu_addr + size};
|
||||
|
||||
@@ -1357,6 +1357,8 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||
join_bad_overlap_ids.clear();
|
||||
join_copies_to_do.clear();
|
||||
join_alias_indices.clear();
|
||||
boost::container::small_vector<ImageId, 8> merge_mips;
|
||||
ImageId merge_with_existing_id{};
|
||||
const bool this_is_linear = info.type == ImageType::Linear;
|
||||
const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
|
||||
if (True(overlap.flags & ImageFlagBits::Remapped)) {
|
||||
@@ -1397,6 +1399,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||
join_right_aliased_ids.push_back(overlap_id);
|
||||
overlap.flags |= ImageFlagBits::Alias;
|
||||
join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
|
||||
} else if (IsSubLevel(new_image_base, overlap)) {
|
||||
if (new_image_base.info.resources.levels > overlap.info.resources.levels) {
|
||||
merge_mips.push_back(overlap_id);
|
||||
} else {
|
||||
merge_with_existing_id = overlap_id;
|
||||
}
|
||||
} else {
|
||||
join_bad_overlap_ids.push_back(overlap_id);
|
||||
}
|
||||
@@ -1439,6 +1447,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||
}
|
||||
}
|
||||
|
||||
if (merge_with_existing_id) {
|
||||
return merge_with_existing_id;
|
||||
}
|
||||
|
||||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||
Image& new_image = slot_images[new_image_id];
|
||||
|
||||
@@ -1467,6 +1479,32 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||
ScaleDown(new_image);
|
||||
}
|
||||
|
||||
const auto& resolution = Settings::values.resolution_info;
|
||||
const u32 up_scale = can_rescale ? resolution.up_scale : 1;
|
||||
const u32 down_shift = can_rescale ? resolution.down_shift : 0;
|
||||
|
||||
for (auto overlap_id : merge_mips) {
|
||||
auto& overlap = slot_images[overlap_id];
|
||||
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
||||
new_image.flags |= ImageFlagBits::GpuModified;
|
||||
new_image.modification_tick = overlap.modification_tick;
|
||||
|
||||
const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
|
||||
auto copies =
|
||||
MakeShrinkImageCopies(new_image.info, overlap.info, base, up_scale, down_shift);
|
||||
if (new_image.info.num_samples != overlap.info.num_samples) {
|
||||
runtime.CopyImageMSAA(new_image, overlap, std::move(copies));
|
||||
} else {
|
||||
runtime.CopyImage(new_image, overlap, std::move(copies));
|
||||
}
|
||||
}
|
||||
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(overlap, overlap_id);
|
||||
}
|
||||
UnregisterImage(overlap_id);
|
||||
DeleteImage(overlap_id);
|
||||
}
|
||||
|
||||
std::ranges::sort(join_copies_to_do, [this](const JoinCopy& lhs, const JoinCopy& rhs) {
|
||||
const ImageBase& lhs_image = slot_images[lhs.id];
|
||||
const ImageBase& rhs_image = slot_images[rhs.id];
|
||||
@@ -1523,10 +1561,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||
}
|
||||
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
||||
new_image.flags |= ImageFlagBits::GpuModified;
|
||||
const auto& resolution = Settings::values.resolution_info;
|
||||
const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
|
||||
const u32 up_scale = can_rescale ? resolution.up_scale : 1;
|
||||
const u32 down_shift = can_rescale ? resolution.down_shift : 0;
|
||||
auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
|
||||
if (overlap.info.num_samples != new_image.info.num_samples) {
|
||||
runtime.CopyImageMSAA(new_image, overlap, std::move(copies));
|
||||
|
||||
@@ -1233,6 +1233,33 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr
|
||||
.has_value();
|
||||
}
|
||||
|
||||
bool IsSubLevel(const ImageBase& image, const ImageBase& overlap) {
|
||||
const std::optional<SubresourceBase> base = image.TryFindBase(overlap.gpu_addr);
|
||||
if (!base) {
|
||||
return false;
|
||||
}
|
||||
if (!IsViewCompatible(image.info.format, overlap.info.format, false, true)) {
|
||||
return false;
|
||||
}
|
||||
if (AdjustMipSize(image.info.size, base->level) != overlap.info.size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto level_info = MakeLevelInfo(image.info);
|
||||
auto level_sizes = CalculateLevelSizes(level_info, image.info.resources.levels);
|
||||
auto total_size{0};
|
||||
auto level = base->level;
|
||||
while (level) {
|
||||
total_size += level_sizes[level - 1];
|
||||
level--;
|
||||
}
|
||||
|
||||
if (overlap.gpu_addr - total_size != image.gpu_addr) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsSubCopy(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr) {
|
||||
const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
|
||||
if (!base) {
|
||||
|
||||
@@ -111,6 +111,8 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima
|
||||
GPUVAddr candidate_addr, RelaxedOptions options, bool broken_views,
|
||||
bool native_bgr);
|
||||
|
||||
[[nodiscard]] bool IsSubLevel(const ImageBase& image, const ImageBase& overlap);
|
||||
|
||||
[[nodiscard]] bool IsSubCopy(const ImageInfo& candidate, const ImageBase& image,
|
||||
GPUVAddr candidate_addr);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user