early-access version 3275

This commit is contained in:
pineappleEA
2023-01-03 22:43:57 +01:00
parent 4b11185ae8
commit 0f64bad6bd
26 changed files with 350 additions and 160 deletions

View File

@@ -646,7 +646,28 @@ bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
template <class P>
void TextureCache<P>::CommitAsyncFlushes() {
// This is intentionally passing the value by copy
committed_downloads.push(uncommitted_downloads);
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
const std::span<const ImageId> download_ids = uncommitted_downloads;
if (download_ids.empty()) {
committed_downloads.emplace_back(std::move(uncommitted_downloads));
uncommitted_downloads.clear();
async_buffers.emplace_back(std::optional<AsyncBuffer>{});
return;
}
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
}
async_buffers.emplace_back(download_map);
}
committed_downloads.emplace_back(std::move(uncommitted_downloads));
uncommitted_downloads.clear();
}
@@ -655,37 +676,58 @@ void TextureCache<P>::PopAsyncFlushes() {
if (committed_downloads.empty()) {
return;
}
const std::span<const ImageId> download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop();
return;
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
const std::span<const ImageId> download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop_front();
async_buffers.pop_front();
return;
}
auto download_map = *async_buffers.front();
std::span<u8> download_span = download_map.mapped_span;
for (size_t i = download_ids.size(); i > 0; i--) {
const ImageBase& image = slot_images[download_ids[i - 1]];
const auto copies = FullDownloadCopies(image.info);
download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
std::span<u8> download_span_alt = download_span.subspan(download_map.offset);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt,
swizzle_data_buffer);
}
runtime.FreeDeferredStagingBuffer(download_map);
committed_downloads.pop_front();
async_buffers.pop_front();
} else {
const std::span<const ImageId> download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop_front();
return;
}
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
const size_t original_offset = download_map.offset;
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += image.unswizzled_size_bytes;
}
// Wait for downloads to finish
runtime.Finish();
download_map.offset = original_offset;
std::span<u8> download_span = download_map.mapped_span;
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
swizzle_data_buffer);
download_map.offset += image.unswizzled_size_bytes;
download_span = download_span.subspan(image.unswizzled_size_bytes);
}
committed_downloads.pop_front();
}
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
const size_t original_offset = download_map.offset;
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += image.unswizzled_size_bytes;
}
// Wait for downloads to finish
runtime.Finish();
download_map.offset = original_offset;
std::span<u8> download_span = download_map.mapped_span;
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
swizzle_data_buffer);
download_map.offset += image.unswizzled_size_bytes;
download_span = download_span.subspan(image.unswizzled_size_bytes);
}
committed_downloads.pop();
}
template <class P>
@@ -1475,6 +1517,27 @@ void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
}
}
template <class P>
void TextureCache<P>::BubbleUpImages(VAddr cpu_addr, size_t size) {
ForEachCPUPage(cpu_addr, size, [this](u64 page) {
const auto it = page_table.find(page);
if (it == page_table.end()) {
return;
}
std::vector<ImageMapId>& map_vector = it->second;
for (size_t i = 1; i < map_vector.size(); i++) {
ImageMapView& bottom_map = slot_map_views[map_vector[i - 1]];
ImageMapView& top_map = slot_map_views[map_vector[i]];
if (slot_images[bottom_map.image_id].modification_tick <
slot_images[top_map.image_id].modification_tick) {
std::swap(map_vector[i - 1], map_vector[i]);
} else {
return;
}
}
});
}
template <class P>
ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
Image& image = slot_images[image_id];
@@ -1788,6 +1851,7 @@ template <class P>
void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
image.flags |= ImageFlagBits::GpuModified;
image.modification_tick = ++modification_tick;
BubbleUpImages(image.cpu_addr, image.guest_size_bytes);
}
template <class P>

View File

@@ -92,6 +92,8 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// True when the API can do asynchronous texture downloads.
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
@@ -106,6 +108,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
using ImageView = typename P::ImageView;
using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer;
using AsyncBuffer = typename P::AsyncBuffer;
struct BlitImages {
ImageId dst_id;
@@ -316,6 +319,8 @@ private:
template <typename Func>
void ForEachSparseSegment(ImageBase& image, Func&& func);
void BubbleUpImages(VAddr cpu_addr, size_t size);
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
@@ -403,7 +408,8 @@ private:
// TODO: This data structure is not optimal and it should be reworked
std::vector<ImageId> uncommitted_downloads;
std::queue<std::vector<ImageId>> committed_downloads;
std::deque<std::vector<ImageId>> committed_downloads;
std::deque<std::optional<AsyncBuffer>> async_buffers;
struct LRUItemParams {
using ObjectType = ImageId;