early-access version 2790
This commit is contained in:
@@ -63,7 +63,7 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
|
||||
case Hash(TextureFormat::A4B4G4R4, UNORM):
|
||||
return PixelFormat::A4B4G4R4_UNORM;
|
||||
case Hash(TextureFormat::G4R4, UNORM):
|
||||
return PixelFormat::R4G4_UNORM;
|
||||
return PixelFormat::G4R4_UNORM;
|
||||
case Hash(TextureFormat::A5B5G5R1, UNORM):
|
||||
return PixelFormat::A5B5G5R1_UNORM;
|
||||
case Hash(TextureFormat::R8, UNORM):
|
||||
|
@@ -153,8 +153,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
|
||||
return "BC7_SRGB";
|
||||
case PixelFormat::A4B4G4R4_UNORM:
|
||||
return "A4B4G4R4_UNORM";
|
||||
case PixelFormat::R4G4_UNORM:
|
||||
return "R4G4_UNORM";
|
||||
case PixelFormat::G4R4_UNORM:
|
||||
return "G4R4_UNORM";
|
||||
case PixelFormat::ASTC_2D_4X4_SRGB:
|
||||
return "ASTC_2D_4X4_SRGB";
|
||||
case PixelFormat::ASTC_2D_8X8_SRGB:
|
||||
|
@@ -7,6 +7,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/texture_cache/formatter.h"
|
||||
#include "video_core/texture_cache/image_base.h"
|
||||
@@ -182,10 +183,6 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
|
||||
};
|
||||
const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1;
|
||||
const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1;
|
||||
if (is_lhs_compressed && is_rhs_compressed) {
|
||||
LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented");
|
||||
return;
|
||||
}
|
||||
const s32 lhs_mips = lhs.info.resources.levels;
|
||||
const s32 rhs_mips = rhs.info.resources.levels;
|
||||
const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips);
|
||||
@@ -199,12 +196,12 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
|
||||
Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level);
|
||||
Extent3D rhs_size = MipSize(rhs.info.size, mip_level);
|
||||
if (is_lhs_compressed) {
|
||||
lhs_size.width /= lhs_block.width;
|
||||
lhs_size.height /= lhs_block.height;
|
||||
lhs_size.width = Common::DivCeil(lhs_size.width, lhs_block.width);
|
||||
lhs_size.height = Common::DivCeil(lhs_size.height, lhs_block.height);
|
||||
}
|
||||
if (is_rhs_compressed) {
|
||||
rhs_size.width /= rhs_block.width;
|
||||
rhs_size.height /= rhs_block.height;
|
||||
rhs_size.width = Common::DivCeil(rhs_size.width, rhs_block.width);
|
||||
rhs_size.height = Common::DivCeil(rhs_size.height, rhs_block.height);
|
||||
}
|
||||
const Extent3D copy_size{
|
||||
.width = std::min(lhs_size.width, rhs_size.width),
|
||||
|
@@ -88,6 +88,9 @@ struct ImageBase {
|
||||
u32 scale_rating = 0;
|
||||
u64 scale_tick = 0;
|
||||
bool has_scaled = false;
|
||||
|
||||
size_t channel = 0;
|
||||
|
||||
ImageFlagBits flags = ImageFlagBits::CpuModified;
|
||||
|
||||
GPUVAddr gpu_addr = 0;
|
||||
|
@@ -26,6 +26,7 @@ struct RenderTargets {
|
||||
ImageViewId depth_buffer_id{};
|
||||
std::array<u8, NUM_RT> draw_buffers{};
|
||||
Extent2D size{};
|
||||
bool is_rescaled{};
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
16
src/video_core/texture_cache/texture_cache.cpp
Executable file
16
src/video_core/texture_cache/texture_cache.cpp
Executable file
@@ -0,0 +1,16 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv3 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/control/channel_state_cache.inc"
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept
|
||||
: ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory},
|
||||
compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {}
|
||||
|
||||
template class VideoCommon::ChannelSetupCaches<VideoCommon::TextureCacheChannelInfo>;
|
||||
|
||||
} // namespace VideoCommon
|
@@ -1,5 +1,7 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
// SPDX-FileCopyrightText: 2021 yuzu emulator team
|
||||
// (https://github.com/skyline-emu/)
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
|
||||
// or any later version Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -7,6 +9,7 @@
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/texture_cache/image_view_base.h"
|
||||
@@ -29,12 +32,8 @@ using VideoCore::Surface::SurfaceType;
|
||||
using namespace Common::Literals;
|
||||
|
||||
template <class P>
|
||||
TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_)
|
||||
: runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
|
||||
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
|
||||
TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_)
|
||||
: runtime{runtime_}, rasterizer{rasterizer_} {
|
||||
// Configure null sampler
|
||||
TSCEntry sampler_descriptor{};
|
||||
sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
|
||||
@@ -93,7 +92,7 @@ void TextureCache<P>::RunGarbageCollector() {
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(map, copies);
|
||||
runtime.Finish();
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||
}
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, image_id);
|
||||
@@ -152,22 +151,24 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
|
||||
template <class P>
|
||||
template <bool has_blacklists>
|
||||
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
|
||||
FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views);
|
||||
FillImageViews<has_blacklists>(channel_state->graphics_image_table,
|
||||
channel_state->graphics_image_view_ids, views);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
|
||||
FillImageViews<true>(compute_image_table, compute_image_view_ids, views);
|
||||
FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids,
|
||||
views);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
|
||||
if (index > graphics_sampler_table.Limit()) {
|
||||
if (index > channel_state->graphics_sampler_table.Limit()) {
|
||||
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
|
||||
return &slot_samplers[NULL_SAMPLER_ID];
|
||||
}
|
||||
const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
|
||||
SamplerId& id = graphics_sampler_ids[index];
|
||||
const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index);
|
||||
SamplerId& id = channel_state->graphics_sampler_ids[index];
|
||||
if (is_new) {
|
||||
id = FindSampler(descriptor);
|
||||
}
|
||||
@@ -176,12 +177,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
|
||||
|
||||
template <class P>
|
||||
typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
|
||||
if (index > compute_sampler_table.Limit()) {
|
||||
if (index > channel_state->compute_sampler_table.Limit()) {
|
||||
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
|
||||
return &slot_samplers[NULL_SAMPLER_ID];
|
||||
}
|
||||
const auto [descriptor, is_new] = compute_sampler_table.Read(index);
|
||||
SamplerId& id = compute_sampler_ids[index];
|
||||
const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index);
|
||||
SamplerId& id = channel_state->compute_sampler_ids[index];
|
||||
if (is_new) {
|
||||
id = FindSampler(descriptor);
|
||||
}
|
||||
@@ -191,34 +192,36 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
|
||||
template <class P>
|
||||
void TextureCache<P>::SynchronizeGraphicsDescriptors() {
|
||||
using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
|
||||
const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
|
||||
const u32 tic_limit = maxwell3d.regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
|
||||
if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
|
||||
graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex;
|
||||
const u32 tic_limit = maxwell3d->regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit;
|
||||
if (channel_state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(),
|
||||
tsc_limit)) {
|
||||
channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
}
|
||||
if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
|
||||
graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
if (channel_state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
|
||||
channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::SynchronizeComputeDescriptors() {
|
||||
const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
|
||||
const u32 tic_limit = kepler_compute.regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
|
||||
const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
|
||||
if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
|
||||
compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
|
||||
const u32 tic_limit = kepler_compute->regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
|
||||
const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
|
||||
if (channel_state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
|
||||
channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
}
|
||||
if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
|
||||
compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
if (channel_state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(),
|
||||
tic_limit)) {
|
||||
channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
u32 scale_rating = 0;
|
||||
bool rescaled = false;
|
||||
std::array<ImageId, NUM_RT> tmp_color_images{};
|
||||
@@ -315,7 +318,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
|
||||
template <class P>
|
||||
void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
||||
using namespace VideoCommon::Dirty;
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
if (!flags[Dirty::RenderTargets]) {
|
||||
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
|
||||
@@ -342,7 +345,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
||||
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
|
||||
|
||||
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
|
||||
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index));
|
||||
}
|
||||
u32 up_scale = 1;
|
||||
u32 down_shift = 0;
|
||||
@@ -351,8 +354,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
||||
down_shift = Settings::values.resolution_info.down_shift;
|
||||
}
|
||||
render_targets.size = Extent2D{
|
||||
(maxwell3d.regs.render_area.width * up_scale) >> down_shift,
|
||||
(maxwell3d.regs.render_area.height * up_scale) >> down_shift,
|
||||
(maxwell3d->regs.render_area.width * up_scale) >> down_shift,
|
||||
(maxwell3d->regs.render_area.height * up_scale) >> down_shift,
|
||||
};
|
||||
|
||||
flags[Dirty::DepthBiasGlobal] = true;
|
||||
@@ -458,7 +461,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(map, copies);
|
||||
runtime.Finish();
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -477,12 +480,20 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
|
||||
void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
|
||||
std::vector<ImageId> deleted_images;
|
||||
ForEachImageInRegionGPU(gpu_addr, size,
|
||||
ForEachImageInRegionGPU(as_id, gpu_addr, size,
|
||||
[&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||
for (const ImageId id : deleted_images) {
|
||||
Image& image = slot_images[id];
|
||||
if (True(image.flags & ImageFlagBits::CpuModified)) {
|
||||
return;
|
||||
}
|
||||
image.flags |= ImageFlagBits::CpuModified;
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, id);
|
||||
}
|
||||
/*
|
||||
if (True(image.flags & ImageFlagBits::Remapped)) {
|
||||
continue;
|
||||
}
|
||||
@@ -490,6 +501,7 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, id);
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
@@ -655,7 +667,7 @@ void TextureCache<P>::PopAsyncFlushes() {
|
||||
for (const ImageId image_id : download_ids) {
|
||||
const ImageBase& image = slot_images[image_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span);
|
||||
download_map.offset += image.unswizzled_size_bytes;
|
||||
download_span = download_span.subspan(image.unswizzled_size_bytes);
|
||||
}
|
||||
@@ -714,26 +726,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
|
||||
const GPUVAddr gpu_addr = image.gpu_addr;
|
||||
|
||||
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
|
||||
gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
|
||||
const auto uploads = FullUploadSwizzles(image.info);
|
||||
runtime.AccelerateImageUpload(image, staging, uploads);
|
||||
} else if (True(image.flags & ImageFlagBits::Converted)) {
|
||||
std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
|
||||
auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
|
||||
auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data);
|
||||
ConvertImage(unswizzled_data, image.info, mapped_span, copies);
|
||||
image.UploadMemory(staging, copies);
|
||||
} else {
|
||||
const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
|
||||
const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span);
|
||||
image.UploadMemory(staging, copies);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
|
||||
if (!IsValidEntry(gpu_memory, config)) {
|
||||
if (!IsValidEntry(*gpu_memory, config)) {
|
||||
return NULL_IMAGE_VIEW_ID;
|
||||
}
|
||||
const auto [pair, is_new] = image_views.try_emplace(config);
|
||||
const auto [pair, is_new] = channel_state->image_views.try_emplace(config);
|
||||
ImageViewId& image_view_id = pair->second;
|
||||
if (is_new) {
|
||||
image_view_id = CreateImageView(config);
|
||||
@@ -777,9 +789,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
|
||||
template <class P>
|
||||
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||
RelaxedOptions options) {
|
||||
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
|
||||
cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
|
||||
if (!cpu_addr) {
|
||||
return ImageId{};
|
||||
}
|
||||
@@ -860,7 +872,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
|
||||
image.scale_tick = frame_tick + 1;
|
||||
}
|
||||
const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
|
||||
auto& dirty = maxwell3d.dirty.flags;
|
||||
auto& dirty = maxwell3d->dirty.flags;
|
||||
dirty[Dirty::RenderTargets] = true;
|
||||
dirty[Dirty::ZetaBuffer] = true;
|
||||
for (size_t rt = 0; rt < NUM_RT; ++rt) {
|
||||
@@ -880,12 +892,15 @@ void TextureCache<P>::InvalidateScale(Image& image) {
|
||||
}
|
||||
image.image_view_ids.clear();
|
||||
image.image_view_infos.clear();
|
||||
if constexpr (ENABLE_VALIDATION) {
|
||||
std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
|
||||
std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
|
||||
for (size_t c : active_channel_ids) {
|
||||
auto& channel_info = channel_storage[c];
|
||||
if constexpr (ENABLE_VALIDATION) {
|
||||
std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID);
|
||||
std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID);
|
||||
}
|
||||
channel_info.graphics_image_table.Invalidate();
|
||||
channel_info.compute_image_table.Invalidate();
|
||||
}
|
||||
graphics_image_table.Invalidate();
|
||||
compute_image_table.Invalidate();
|
||||
has_deleted_images = true;
|
||||
}
|
||||
|
||||
@@ -929,10 +944,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
|
||||
template <class P>
|
||||
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||
RelaxedOptions options) {
|
||||
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
const auto size = CalculateGuestSizeInBytes(info);
|
||||
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
|
||||
cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
|
||||
if (!cpu_addr) {
|
||||
const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
|
||||
virtual_invalid_space += Common::AlignUp(size, 32);
|
||||
@@ -1050,7 +1065,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||
Image& new_image = slot_images[new_image_id];
|
||||
|
||||
if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
|
||||
if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
|
||||
new_image.flags |= ImageFlagBits::Sparse;
|
||||
}
|
||||
|
||||
@@ -1192,7 +1207,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
|
||||
if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
|
||||
return NULL_SAMPLER_ID;
|
||||
}
|
||||
const auto [pair, is_new] = samplers.try_emplace(config);
|
||||
const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
|
||||
if (is_new) {
|
||||
pair->second = slot_samplers.insert(runtime, config);
|
||||
}
|
||||
@@ -1201,7 +1216,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const auto& regs = maxwell3d->regs;
|
||||
if (index >= regs.rt_control.count) {
|
||||
return ImageViewId{};
|
||||
}
|
||||
@@ -1219,7 +1234,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const auto& regs = maxwell3d->regs;
|
||||
if (!regs.zeta_enable) {
|
||||
return ImageViewId{};
|
||||
}
|
||||
@@ -1316,11 +1331,17 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
|
||||
|
||||
template <class P>
|
||||
template <typename Func>
|
||||
void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
|
||||
void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size,
|
||||
Func&& func) {
|
||||
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
|
||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||
boost::container::small_vector<ImageId, 8> images;
|
||||
ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
|
||||
auto storage_id = getStorageID(as_id);
|
||||
if (!storage_id) {
|
||||
return;
|
||||
}
|
||||
auto& gpu_page_table = gpu_page_table_storage[*storage_id];
|
||||
ForEachGPUPage(gpu_addr, size, [this, gpu_page_table, &images, gpu_addr, size, func](u64 page) {
|
||||
const auto it = gpu_page_table.find(page);
|
||||
if (it == gpu_page_table.end()) {
|
||||
if constexpr (BOOL_BREAK) {
|
||||
@@ -1403,9 +1424,9 @@ template <typename Func>
|
||||
void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
|
||||
using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
|
||||
static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
|
||||
const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
|
||||
const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
|
||||
for (const auto& [gpu_addr, size] : segments) {
|
||||
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr);
|
||||
if constexpr (RETURNS_BOOL) {
|
||||
if (func(gpu_addr, *cpu_addr, size)) {
|
||||
@@ -1448,8 +1469,9 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
||||
}
|
||||
image.lru_index = lru_cache.Insert(image_id, frame_tick);
|
||||
|
||||
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
||||
[this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
|
||||
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
|
||||
(*channel_state->gpu_page_table)[page].push_back(image_id);
|
||||
});
|
||||
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||
auto map_id =
|
||||
slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
|
||||
@@ -1480,9 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
||||
image.flags &= ~ImageFlagBits::BadOverlap;
|
||||
lru_cache.Free(image.lru_index);
|
||||
const auto& clear_page_table =
|
||||
[this, image_id](
|
||||
u64 page,
|
||||
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
|
||||
[this, image_id](u64 page,
|
||||
std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>&
|
||||
selected_page_table) {
|
||||
const auto page_it = selected_page_table.find(page);
|
||||
if (page_it == selected_page_table.end()) {
|
||||
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
|
||||
@@ -1497,8 +1519,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
||||
}
|
||||
image_ids.erase(vector_it);
|
||||
};
|
||||
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
||||
[this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
|
||||
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
|
||||
clear_page_table(page, (*channel_state->gpu_page_table));
|
||||
});
|
||||
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||
const auto map_id = image.map_view_id;
|
||||
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
|
||||
@@ -1631,7 +1654,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
|
||||
|
||||
// Mark render targets as dirty
|
||||
auto& dirty = maxwell3d.dirty.flags;
|
||||
auto& dirty = maxwell3d->dirty.flags;
|
||||
dirty[Dirty::RenderTargets] = true;
|
||||
dirty[Dirty::ZetaBuffer] = true;
|
||||
for (size_t rt = 0; rt < NUM_RT; ++rt) {
|
||||
@@ -1681,24 +1704,30 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
||||
if (alloc_images.empty()) {
|
||||
image_allocs_table.erase(alloc_it);
|
||||
}
|
||||
if constexpr (ENABLE_VALIDATION) {
|
||||
std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
|
||||
std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
|
||||
for (size_t c : active_channel_ids) {
|
||||
auto& channel_info = channel_storage[c];
|
||||
if constexpr (ENABLE_VALIDATION) {
|
||||
std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID);
|
||||
std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID);
|
||||
}
|
||||
channel_info.graphics_image_table.Invalidate();
|
||||
channel_info.compute_image_table.Invalidate();
|
||||
}
|
||||
graphics_image_table.Invalidate();
|
||||
compute_image_table.Invalidate();
|
||||
has_deleted_images = true;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
|
||||
auto it = image_views.begin();
|
||||
while (it != image_views.end()) {
|
||||
const auto found = std::ranges::find(removed_views, it->second);
|
||||
if (found != removed_views.end()) {
|
||||
it = image_views.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
for (size_t c : active_channel_ids) {
|
||||
auto& channel_info = channel_storage[c];
|
||||
auto it = channel_info.image_views.begin();
|
||||
while (it != channel_info.image_views.end()) {
|
||||
const auto found = std::ranges::find(removed_views, it->second);
|
||||
if (found != removed_views.end()) {
|
||||
it = channel_info.image_views.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1729,6 +1758,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
|
||||
boost::container::small_vector<const AliasedImage*, 1> aliased_images;
|
||||
Image& image = slot_images[image_id];
|
||||
bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
|
||||
bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
|
||||
u64 most_recent_tick = image.modification_tick;
|
||||
for (const AliasedImage& aliased : image.aliased_images) {
|
||||
ImageBase& aliased_image = slot_images[aliased.id];
|
||||
@@ -1736,9 +1766,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
|
||||
most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
|
||||
aliased_images.push_back(&aliased);
|
||||
any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled);
|
||||
if (True(aliased_image.flags & ImageFlagBits::GpuModified)) {
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
}
|
||||
any_modified |= True(aliased_image.flags & ImageFlagBits::GpuModified);
|
||||
}
|
||||
}
|
||||
if (aliased_images.empty()) {
|
||||
@@ -1753,6 +1781,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
|
||||
}
|
||||
}
|
||||
image.modification_tick = most_recent_tick;
|
||||
if (any_modified) {
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
}
|
||||
std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
|
||||
const ImageBase& lhs_image = slot_images[lhs->id];
|
||||
const ImageBase& rhs_image = slot_images[rhs->id];
|
||||
@@ -1931,6 +1962,7 @@ std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
|
||||
.color_buffer_ids = {color_view_id},
|
||||
.depth_buffer_id = depth_view_id,
|
||||
.size = {extent.width >> samples_x, extent.height >> samples_y},
|
||||
.is_rescaled = is_rescaled,
|
||||
});
|
||||
return {framebuffer_id, view_id};
|
||||
}
|
||||
@@ -1943,7 +1975,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
|
||||
const ImageViewBase& image_view = slot_image_views[id];
|
||||
const ImageBase& image = slot_images[image_view.image_id];
|
||||
const Extent3D size = image_view.size;
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const auto& regs = maxwell3d->regs;
|
||||
const auto& scissor = regs.scissor_test[0];
|
||||
if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
|
||||
// Images with multiple resources can't be cleared in a single call
|
||||
@@ -1958,4 +1990,19 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
|
||||
scissor.max_y >= size.height;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
|
||||
VideoCommon::ChannelSetupCaches<TextureCacheChannelInfo>::CreateChannel(channel);
|
||||
const auto it = channel_map.find(channel.bind_id);
|
||||
auto* this_state = &channel_storage[it->second];
|
||||
const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()];
|
||||
this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id];
|
||||
}
|
||||
|
||||
/// Bind a channel for execution.
|
||||
template <class P>
|
||||
void TextureCache<P>::OnGPUASRegister([[maybe_unused]] size_t map_id) {
|
||||
gpu_page_table_storage.emplace_back();
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
@@ -1,8 +1,12 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
// SPDX-FileCopyrightText: 2021 yuzu emulator team
|
||||
// (https://github.com/skyline-emu/)
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
|
||||
// or any later version Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <type_traits>
|
||||
@@ -11,9 +15,11 @@
|
||||
#include <queue>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/hash.h"
|
||||
#include "common/literals.h"
|
||||
#include "common/lru_cache.h"
|
||||
#include "video_core/compatible_formats.h"
|
||||
#include "video_core/control/channel_state_cache.h"
|
||||
#include "video_core/delayed_destruction_ring.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/surface.h"
|
||||
@@ -26,6 +32,10 @@
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Tegra::Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
using Tegra::Texture::SwizzleSource;
|
||||
@@ -44,8 +54,35 @@ struct ImageViewInOut {
|
||||
ImageViewId id{};
|
||||
};
|
||||
|
||||
using TextureCacheGPUMap = std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>;
|
||||
|
||||
class TextureCacheChannelInfo : public ChannelInfo {
|
||||
public:
|
||||
TextureCacheChannelInfo() = delete;
|
||||
TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept;
|
||||
TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete;
|
||||
TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete;
|
||||
TextureCacheChannelInfo(TextureCacheChannelInfo&& other) noexcept = default;
|
||||
TextureCacheChannelInfo& operator=(TextureCacheChannelInfo&& other) noexcept = default;
|
||||
|
||||
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
|
||||
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
|
||||
std::vector<SamplerId> graphics_sampler_ids;
|
||||
std::vector<ImageViewId> graphics_image_view_ids;
|
||||
|
||||
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
|
||||
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
|
||||
std::vector<SamplerId> compute_sampler_ids;
|
||||
std::vector<ImageViewId> compute_image_view_ids;
|
||||
|
||||
std::unordered_map<TICEntry, ImageViewId> image_views;
|
||||
std::unordered_map<TSCEntry, SamplerId> samplers;
|
||||
|
||||
TextureCacheGPUMap* gpu_page_table;
|
||||
};
|
||||
|
||||
template <class P>
|
||||
class TextureCache {
|
||||
class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelInfo> {
|
||||
/// Address shift for caching images into a hash table
|
||||
static constexpr u64 PAGE_BITS = 20;
|
||||
|
||||
@@ -58,6 +95,8 @@ class TextureCache {
|
||||
/// True when the API can provide info about the memory of the device.
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
|
||||
|
||||
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
|
||||
|
||||
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
|
||||
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
|
||||
@@ -77,16 +116,8 @@ class TextureCache {
|
||||
PixelFormat src_format;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct IdentityHash {
|
||||
[[nodiscard]] size_t operator()(T value) const noexcept {
|
||||
return static_cast<size_t>(value);
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
|
||||
Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
|
||||
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
|
||||
|
||||
/// Notify the cache that a new frame has been queued
|
||||
void TickFrame();
|
||||
@@ -142,7 +173,7 @@ public:
|
||||
void UnmapMemory(VAddr cpu_addr, size_t size);
|
||||
|
||||
/// Remove images in a region
|
||||
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
|
||||
void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
|
||||
|
||||
/// Blit an image with the given parameters
|
||||
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
@@ -171,6 +202,9 @@ public:
|
||||
|
||||
[[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
|
||||
|
||||
/// Create channel state.
|
||||
void CreateChannel(Tegra::Control::ChannelState& channel) final override;
|
||||
|
||||
std::mutex mutex;
|
||||
|
||||
private:
|
||||
@@ -205,6 +239,8 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
void OnGPUASRegister(size_t map_id) final override;
|
||||
|
||||
/// Runs the Garbage Collector.
|
||||
void RunGarbageCollector();
|
||||
|
||||
@@ -273,7 +309,7 @@ private:
|
||||
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
|
||||
|
||||
template <typename Func>
|
||||
void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
|
||||
void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
|
||||
|
||||
template <typename Func>
|
||||
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
|
||||
@@ -338,31 +374,16 @@ private:
|
||||
u64 GetScaledImageSizeBytes(ImageBase& image);
|
||||
|
||||
Runtime& runtime;
|
||||
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
|
||||
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
|
||||
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
|
||||
std::vector<SamplerId> graphics_sampler_ids;
|
||||
std::vector<ImageViewId> graphics_image_view_ids;
|
||||
|
||||
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
|
||||
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
|
||||
std::vector<SamplerId> compute_sampler_ids;
|
||||
std::vector<ImageViewId> compute_image_view_ids;
|
||||
std::deque<TextureCacheGPUMap> gpu_page_table_storage;
|
||||
|
||||
RenderTargets render_targets;
|
||||
|
||||
std::unordered_map<TICEntry, ImageViewId> image_views;
|
||||
std::unordered_map<TSCEntry, SamplerId> samplers;
|
||||
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
|
||||
|
||||
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
|
||||
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
|
||||
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
|
||||
|
||||
std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
|
||||
std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
|
||||
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
|
||||
|
||||
VAddr virtual_invalid_space{};
|
||||
|
@@ -510,22 +510,18 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
|
||||
const LevelInfo level_info = MakeLevelInfo(info);
|
||||
const Extent2D tile_size = DefaultBlockSize(info.format);
|
||||
const u32 bytes_per_block = BytesPerBlock(info.format);
|
||||
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
|
||||
|
||||
const s32 level = copy.image_subresource.base_level;
|
||||
const Extent3D level_size = AdjustMipSize(size, level);
|
||||
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
|
||||
const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
|
||||
|
||||
UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
|
||||
|
||||
UNIMPLEMENTED_IF(copy.image_offset.x != 0);
|
||||
UNIMPLEMENTED_IF(copy.image_offset.y != 0);
|
||||
UNIMPLEMENTED_IF(copy.image_offset.z != 0);
|
||||
UNIMPLEMENTED_IF(copy.image_extent != level_size);
|
||||
|
||||
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
|
||||
const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
|
||||
|
||||
size_t host_offset = copy.buffer_offset;
|
||||
|
||||
const u32 num_levels = info.resources.levels;
|
||||
@@ -536,6 +532,12 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
|
||||
tile_size.height, info.tile_width_spacing);
|
||||
const size_t subresource_size = sizes[level];
|
||||
|
||||
const Extent2D gob = GobSize(bpp_log2, level_info.block.height, info.tile_width_spacing);
|
||||
|
||||
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
|
||||
const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
|
||||
const u32 stride_alignment = StrideAlignment(num_tiles, level_info.block, gob, bpp_log2);
|
||||
|
||||
const auto dst_data = std::make_unique<u8[]>(subresource_size);
|
||||
const std::span<u8> dst(dst_data.get(), subresource_size);
|
||||
|
||||
@@ -544,7 +546,7 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
|
||||
|
||||
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
|
||||
num_tiles.depth, block.height, block.depth);
|
||||
num_tiles.depth, block.height, block.depth, stride_alignment);
|
||||
|
||||
gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
|
||||
|
||||
@@ -755,7 +757,7 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
|
||||
if (address == 0) {
|
||||
return false;
|
||||
}
|
||||
if (address > (1ULL << 48)) {
|
||||
if (address >= (1ULL << 40)) {
|
||||
return false;
|
||||
}
|
||||
if (gpu_memory.GpuToCpuAddress(address).has_value()) {
|
||||
|
Reference in New Issue
Block a user