early-access version 2226

This commit is contained in:
pineappleEA 2021-11-20 07:23:22 +01:00
parent 72fb249491
commit fb13a8600c
10 changed files with 200 additions and 16 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 2225. This is the source code for early-access 2226.
## Legal Notice ## Legal Notice

View File

@ -3,7 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#version 450 #version 450
// #extension GL_ARB_shader_stencil_export : require #extension GL_ARB_shader_stencil_export : require
layout(binding = 0) uniform sampler2D color_texture; layout(binding = 0) uniform sampler2D color_texture;
@ -13,5 +13,5 @@ void main() {
uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b; uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b;
gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f); gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f);
// gl_FragStencilRefARB = int(color.a); gl_FragStencilRefARB = int(color.a);
} }

View File

@ -3,7 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#version 450 #version 450
// #extension GL_ARB_shader_stencil_export : require #extension GL_ARB_shader_stencil_export : require
layout(binding = 0) uniform sampler2D color_texture; layout(binding = 0) uniform sampler2D color_texture;
@ -15,5 +15,5 @@ void main() {
| (uint(color.r * (exp2(11) - 1.0f))); | (uint(color.r * (exp2(11) - 1.0f)));
gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f);
// gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF);
} }

View File

@ -3,7 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#version 450 #version 450
// #extension GL_ARB_shader_stencil_export : require #extension GL_ARB_shader_stencil_export : require
layout(binding = 0) uniform sampler2D color_texture; layout(binding = 0) uniform sampler2D color_texture;
@ -14,5 +14,5 @@ void main() {
| (uint(color.g * (exp2(16) - 1.0f)) << 16); | (uint(color.g * (exp2(16) - 1.0f)) << 16);
gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f);
// gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF);
} }

View File

@ -527,7 +527,7 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
} }
} }
void TextureCacheRuntime::ConvertImage(Image& dst, Image& src, void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) { std::span<const VideoCommon::ImageCopy> copies) {
LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format);
format_conversion_pass.ConvertImage(dst, src, copies); format_conversion_pass.ConvertImage(dst, src, copies);

View File

@ -84,9 +84,13 @@ public:
u64 GetDeviceLocalMemory() const; u64 GetDeviceLocalMemory() const;
bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) {
return true;
}
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) {
UNIMPLEMENTED(); UNIMPLEMENTED();
@ -339,7 +343,6 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true; static constexpr bool HAS_EMULATED_COPIES = true;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true;
using Runtime = OpenGL::TextureCacheRuntime; using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image; using Image = OpenGL::Image;

View File

@ -313,6 +313,19 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
}; };
} }
[[nodiscard]] VkBufferImageCopy MakeBufferImageCopy(const VideoCommon::ImageCopy& copy, bool is_src,
VkImageAspectFlags aspect_mask) noexcept {
return VkBufferImageCopy{
.bufferOffset = 0,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource = MakeImageSubresourceLayers(
is_src ? copy.src_subresource : copy.dst_subresource, aspect_mask),
.imageOffset = MakeOffset3D(is_src ? copy.src_offset : copy.dst_offset),
.imageExtent = MakeExtent3D(copy.extent),
};
}
[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( [[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
std::vector<VkBufferCopy> result(copies.size()); std::vector<VkBufferCopy> result(copies.size());
@ -759,6 +772,167 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_buffer_pool.Request(size, MemoryUsage::Download); return staging_buffer_pool.Request(size, MemoryUsage::Download);
} }
bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
if (VideoCore::Surface::GetFormatType(dst.info.format) ==
VideoCore::Surface::SurfaceType::DepthStencil) {
return !device.IsExtShaderStencilExportSupported();
}
return false;
}
[[nodiscard]] size_t NextPow2(size_t value) {
return static_cast<size_t>(1ULL << ((8U * sizeof(size_t)) - std::countl_zero(value - 1U)));
}
VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL);
if (buffer_commits[level]) {
return *buffers[level];
}
const auto new_size = NextPow2(needed_size);
VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
buffers[level] = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = new_size,
.usage = flags,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
buffer_commits[level] = std::make_unique<MemoryCommit>(
memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal));
return *buffers[level];
}
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
std::vector<VkBufferImageCopy> vk_out_copies(copies.size());
const VkImageAspectFlags src_aspect_mask = src.AspectMask();
const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) {
return MakeBufferImageCopy(copy, true, src_aspect_mask);
});
std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) {
return MakeBufferImageCopy(copy, false, dst_aspect_mask);
});
const u32 img_bpp = BytesPerBlock(src.info.format);
size_t total_size = 0;
for (const auto& copy : copies) {
total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp;
}
const VkBuffer copy_buffer = GetTemporaryBuffer(total_size);
const VkImage dst_image = dst.Handle();
const VkImage src_image = src.Handle();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask,
vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) {
RangedBarrierRange dst_range;
RangedBarrierRange src_range;
for (const VkBufferImageCopy& copy : vk_in_copies) {
src_range.AddLayers(copy.imageSubresource);
}
for (const VkBufferImageCopy& copy : vk_out_copies) {
dst_range.AddLayers(copy.imageSubresource);
}
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
};
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const std::array pre_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange = src_range.SubresourceRange(src_aspect_mask),
},
};
const std::array middle_in_barrier{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = 0,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange = src_range.SubresourceRange(src_aspect_mask),
},
};
const std::array middle_out_barrier{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange = dst_range.SubresourceRange(dst_aspect_mask),
},
};
const std::array post_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange = dst_range.SubresourceRange(dst_aspect_mask),
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, {}, {}, pre_barriers);
cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer,
vk_in_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, WRITE_BARRIER, nullptr, middle_in_barrier);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, READ_BARRIER, {}, middle_out_barrier);
cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, {}, {}, post_barriers);
});
}
void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region, const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Filter filter,

View File

@ -61,6 +61,10 @@ public:
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
bool ShouldReinterpret(Image& dst, Image& src);
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled);
bool CanAccelerateImageUpload(Image&) const noexcept { bool CanAccelerateImageUpload(Image&) const noexcept {
@ -82,6 +86,8 @@ public:
return true; return true;
} }
[[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
const Device& device; const Device& device;
VKScheduler& scheduler; VKScheduler& scheduler;
MemoryAllocator& memory_allocator; MemoryAllocator& memory_allocator;
@ -90,6 +96,10 @@ public:
ASTCDecoderPass& astc_decoder_pass; ASTCDecoderPass& astc_decoder_pass;
RenderPassCache& render_pass_cache; RenderPassCache& render_pass_cache;
const Settings::ResolutionScalingInfo& resolution; const Settings::ResolutionScalingInfo& resolution;
constexpr static size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
}; };
class Image : public VideoCommon::ImageBase { class Image : public VideoCommon::ImageBase {
@ -316,7 +326,6 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false; static constexpr bool HAS_EMULATED_COPIES = false;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false;
using Runtime = Vulkan::TextureCacheRuntime; using Runtime = Vulkan::TextureCacheRuntime;
using Image = Vulkan::Image; using Image = Vulkan::Image;

View File

@ -1762,8 +1762,8 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
} }
UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
if constexpr (HAS_PIXEL_FORMAT_CONVERSIONS) { if (runtime.ShouldReinterpret(dst, src)) {
return runtime.ConvertImage(dst, src, copies); return runtime.ReinterpretImage(dst, src, copies);
} }
for (const ImageCopy& copy : copies) { for (const ImageCopy& copy : copies) {
UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);

View File

@ -59,8 +59,6 @@ class TextureCache {
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device. /// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// True when the API provides utilities for pixel format conversions.
static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = P::HAS_PIXEL_FORMAT_CONVERSIONS;
static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;