early-access version 2156

This commit is contained in:
pineappleEA
2021-10-24 06:39:01 +02:00
parent 238cebb24e
commit f85f34d123
119 changed files with 8931 additions and 610 deletions

View File

@@ -363,7 +363,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
BlitImageHelper::~BlitImageHelper() = default;
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) {
@@ -373,9 +373,8 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
.operation = operation,
};
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
const VkPipeline pipeline = FindOrEmplacePipeline(key);
const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
src_view](vk::CommandBuffer cmdbuf) {
@@ -398,10 +397,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
Tegra::Engines::Fermi2D::Operation operation) {
ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
const BlitImagePipelineKey key{
.renderpass = dst_framebuffer->RenderPass(),
.operation = operation,
};
const VkPipelineLayout layout = *two_textures_pipeline_layout;
const VkSampler sampler = *nearest_sampler;
const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
src_stencil_view, this](vk::CommandBuffer cmdbuf) {
@@ -419,40 +421,45 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
}
void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const ImageView& src_image_view, u32 up_scale,
u32 down_shift) {
ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const ImageView& src_image_view, u32 up_scale,
u32 down_shift) {
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const ImageView& src_image_view, u32 up_scale,
u32 down_shift) {
ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const ImageView& src_image_view, u32 up_scale,
u32 down_shift) {
ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
}
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const ImageView& src_image_view, u32 up_scale, u32 down_shift) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkSampler sampler = *nearest_sampler;
const VkExtent2D extent{
.width = src_image_view.size.width,
.height = src_image_view.size.height,
.width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
.height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
};
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift,
this](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
.x = 0,
.y = 0,
@@ -488,7 +495,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
scheduler.InvalidateState();
}
VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) {
const auto it = std::ranges::find(blit_color_keys, key);
if (it != blit_color_keys.end()) {
return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
@@ -542,12 +549,14 @@ VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& ke
return *blit_color_pipelines.back();
}
VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
if (blit_depth_stencil_pipeline) {
return *blit_depth_stencil_pipeline;
VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key) {
const auto it = std::ranges::find(blit_depth_stencil_keys, key);
if (it != blit_depth_stencil_keys.end()) {
return *blit_depth_stencil_pipelines[std::distance(blit_depth_stencil_keys.begin(), it)];
}
blit_depth_stencil_keys.push_back(key);
const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
blit_depth_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -560,15 +569,15 @@ VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *two_textures_pipeline_layout,
.renderPass = renderpass,
.renderPass = key.renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
return *blit_depth_stencil_pipeline;
}));
return *blit_depth_stencil_pipelines.back();
}
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {

View File

@@ -34,7 +34,7 @@ public:
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
~BlitImageHelper();
void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
@@ -44,21 +44,25 @@ public:
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
u32 up_scale, u32 down_shift);
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
u32 up_scale, u32 down_shift);
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
u32 up_scale, u32 down_shift);
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
u32 up_scale, u32 down_shift);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
const ImageView& src_image_view, u32 up_scale, u32 down_shift);
[[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
@@ -84,7 +88,8 @@ private:
std::vector<BlitImagePipelineKey> blit_color_keys;
std::vector<vk::Pipeline> blit_color_pipelines;
vk::Pipeline blit_depth_stencil_pipeline;
std::vector<BlitImagePipelineKey> blit_depth_stencil_keys;
std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
vk::Pipeline convert_d32_to_r32_pipeline;
vk::Pipeline convert_r32_to_d32_pipeline;
vk::Pipeline convert_d16_to_r16_pipeline;

View File

@@ -10,6 +10,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
@@ -20,6 +21,8 @@
namespace Vulkan {
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
class DescriptorLayoutBuilder {
public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
@@ -68,18 +71,28 @@ public:
}
vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
using Shader::Backend::SPIRV::RescalingLayout;
const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u;
const VkPushConstantRange range{
.stageFlags = static_cast<VkShaderStageFlags>(
is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS),
.offset = 0,
.size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset,
};
return device->GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = descriptor_set_layout ? 1U : 0U,
.pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &range,
});
}
void Add(const Shader::Info& info, VkShaderStageFlags stage) {
is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0;
Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
@@ -115,6 +128,7 @@ private:
}
const Device* device{};
bool is_compute{};
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
u32 binding{};
@@ -122,31 +136,68 @@ private:
size_t offset{};
};
inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
const ImageId*& image_view_ids, TextureCache& texture_cache,
VKUpdateDescriptorQueue& update_descriptor_queue) {
for (const auto& desc : info.texture_buffer_descriptors) {
image_view_ids += desc.count;
class RescalingPushConstant {
public:
explicit RescalingPushConstant() noexcept {}
void PushTexture(bool is_rescaled) noexcept {
*texture_ptr |= is_rescaled ? texture_bit : 0u;
texture_bit <<= 1u;
if (texture_bit == 0u) {
texture_bit = 1u;
++texture_ptr;
}
}
for (const auto& desc : info.image_buffer_descriptors) {
image_view_ids += desc.count;
void PushImage(bool is_rescaled) noexcept {
*image_ptr |= is_rescaled ? image_bit : 0u;
image_bit <<= 1u;
if (image_bit == 0u) {
image_bit = 1u;
++image_ptr;
}
}
const std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS>& Data() const noexcept {
return words;
}
private:
std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS> words{};
u32* texture_ptr{words.data()};
u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS};
u32 texture_bit{1u};
u32 image_bit{1u};
};
inline void PushImageDescriptors(TextureCache& texture_cache,
VKUpdateDescriptorQueue& update_descriptor_queue,
const Shader::Info& info, RescalingPushConstant& rescaling,
const VkSampler*& samplers,
const VideoCommon::ImageViewInOut*& views) {
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
views += num_texture_buffers;
views += num_image_buffers;
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const VideoCommon::ImageViewId image_view_id{(views++)->id};
const VkSampler sampler{*(samplers++)};
ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
ImageView& image_view{texture_cache.GetImageView((views++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
update_descriptor_queue.AddImage(vk_image_view);
rescaling.PushImage(texture_cache.IsRescaling(image_view));
}
}
}

View File

@@ -12,14 +12,22 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/host_shaders/fxaa_frag_spv.h"
#include "video_core/host_shaders/fxaa_vert_spv.h"
#include "video_core/host_shaders/present_bicubic_frag_spv.h"
#include "video_core/host_shaders/present_gaussian_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_fsr.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
@@ -144,8 +152,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
UpdateDescriptorSet(image_index,
use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
VkImageView source_image_view =
use_accelerated ? screen_info.image_view : *raw_image_views[image_index];
BufferData data;
SetUniformData(data, layout);
@@ -222,9 +230,134 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
read_barrier);
cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, write_barrier);
});
}
const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue();
if (use_accelerated && anti_alias_pass != Settings::AntiAliasing::None) {
UpdateAADescriptorSet(image_index, source_image_view, false);
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
};
scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = 0,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = {},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
{
VkImageMemoryBarrier fsr_write_barrier = base_barrier;
fsr_write_barrier.image = *aa_image;
fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, fsr_write_barrier);
}
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = *aa_renderpass,
.framebuffer = *aa_framebuffer,
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
switch (anti_alias_pass) {
case Settings::AntiAliasing::Fxaa:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline);
break;
default:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline);
break;
}
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0,
aa_descriptor_sets[image_index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
{
VkImageMemoryBarrier blit_read_barrier = base_barrier;
blit_read_barrier.image = *aa_image;
blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier);
}
});
source_image_view = *aa_image_view;
}
if (fsr) {
auto crop_rect = framebuffer.crop_rect;
if (crop_rect.GetWidth() == 0) {
crop_rect.right = framebuffer.width;
}
if (crop_rect.GetHeight() == 0) {
crop_rect.bottom = framebuffer.height;
}
crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
VkExtent2D fsr_input_size{
.width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
.height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
};
VkImageView fsr_image_view =
fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
UpdateDescriptorSet(image_index, fsr_image_view, true);
} else {
const bool is_nn =
Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor;
UpdateDescriptorSet(image_index, source_image_view, is_nn);
}
scheduler.Record(
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
@@ -258,8 +391,28 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
.offset = {0, 0},
.extent = size,
};
const auto filter = Settings::values.scaling_filter.GetValue();
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
switch (filter) {
case Settings::ScalingFilter::NearestNeighbor:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
break;
case Settings::ScalingFilter::Bilinear:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
break;
case Settings::ScalingFilter::Bicubic:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline);
break;
case Settings::ScalingFilter::Gaussian:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *gaussian_pipeline);
break;
case Settings::ScalingFilter::ScaleForce:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline);
break;
default:
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
break;
}
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
@@ -281,11 +434,16 @@ VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& frameb
}
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
return CreateFramebuffer(image_view, extent, renderpass);
}
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent,
vk::RenderPass& rd) {
return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.renderPass = *renderpass,
.renderPass = *rd,
.attachmentCount = 1,
.pAttachments = &image_view,
.width = extent.width,
@@ -308,9 +466,21 @@ void VKBlitScreen::CreateDynamicResources() {
CreateRenderPass();
CreateFramebuffers();
CreateGraphicsPipeline();
fsr.reset();
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
CreateFSR();
}
}
void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
if (!fsr) {
CreateFSR();
}
} else {
fsr.reset();
}
if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) {
return;
}
@@ -324,7 +494,16 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
void VKBlitScreen::CreateShaders() {
vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
fxaa_vertex_shader = BuildShader(device, FXAA_VERT_SPV);
fxaa_fragment_shader = BuildShader(device, FXAA_FRAG_SPV);
bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV);
gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV);
if (device.IsFloat16Supported()) {
scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV);
} else {
scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV);
}
}
void VKBlitScreen::CreateSemaphores() {
@@ -344,6 +523,13 @@ void VKBlitScreen::CreateDescriptorPool() {
},
}};
const std::array<VkDescriptorPoolSize, 1> pool_sizes_aa{{
{
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = static_cast<u32>(image_count * 2),
},
}};
const VkDescriptorPoolCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
@@ -353,19 +539,33 @@ void VKBlitScreen::CreateDescriptorPool() {
.pPoolSizes = pool_sizes.data(),
};
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
const VkDescriptorPoolCreateInfo ci_aa{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = static_cast<u32>(image_count),
.poolSizeCount = static_cast<u32>(pool_sizes_aa.size()),
.pPoolSizes = pool_sizes_aa.data(),
};
aa_descriptor_pool = device.GetLogical().CreateDescriptorPool(ci_aa);
}
void VKBlitScreen::CreateRenderPass() {
renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat());
}
vk::RenderPass VKBlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) {
const VkAttachmentDescription color_attachment{
.flags = 0,
.format = swapchain.GetImageViewFormat(),
.format = format,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL,
};
const VkAttachmentReference color_attachment_ref{
@@ -408,7 +608,7 @@ void VKBlitScreen::CreateRenderPass() {
.pDependencies = &dependency,
};
renderpass = device.GetLogical().CreateRenderPass(renderpass_ci);
return device.GetLogical().CreateRenderPass(renderpass_ci);
}
void VKBlitScreen::CreateDescriptorSetLayout() {
@@ -429,6 +629,23 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
},
}};
const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings_aa{{
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
.pImmutableSamplers = nullptr,
},
}};
const VkDescriptorSetLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@@ -437,11 +654,21 @@ void VKBlitScreen::CreateDescriptorSetLayout() {
.pBindings = layout_bindings.data(),
};
const VkDescriptorSetLayoutCreateInfo ci_aa{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = static_cast<u32>(layout_bindings_aa.size()),
.pBindings = layout_bindings_aa.data(),
};
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
aa_descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci_aa);
}
void VKBlitScreen::CreateDescriptorSets() {
const std::vector layouts(image_count, *descriptor_set_layout);
const std::vector layouts_aa(image_count, *aa_descriptor_set_layout);
const VkDescriptorSetAllocateInfo ai{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
@@ -451,7 +678,16 @@ void VKBlitScreen::CreateDescriptorSets() {
.pSetLayouts = layouts.data(),
};
const VkDescriptorSetAllocateInfo ai_aa{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = *aa_descriptor_pool,
.descriptorSetCount = static_cast<u32>(image_count),
.pSetLayouts = layouts_aa.data(),
};
descriptor_sets = descriptor_pool.Allocate(ai);
aa_descriptor_sets = aa_descriptor_pool.Allocate(ai_aa);
}
void VKBlitScreen::CreatePipelineLayout() {
@@ -464,11 +700,21 @@ void VKBlitScreen::CreatePipelineLayout() {
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
const VkPipelineLayoutCreateInfo ci_aa{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = aa_descriptor_set_layout.address(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
aa_pipeline_layout = device.GetLogical().CreatePipelineLayout(ci_aa);
}
void VKBlitScreen::CreateGraphicsPipeline() {
const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
const std::array<VkPipelineShaderStageCreateInfo, 2> bilinear_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
@@ -483,7 +729,70 @@ void VKBlitScreen::CreateGraphicsPipeline() {
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = *fragment_shader,
.module = *bilinear_fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
}};
const std::array<VkPipelineShaderStageCreateInfo, 2> bicubic_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = *vertex_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = *bicubic_fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
}};
const std::array<VkPipelineShaderStageCreateInfo, 2> gaussian_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = *vertex_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = *gaussian_fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
}};
const std::array<VkPipelineShaderStageCreateInfo, 2> scaleforce_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = *vertex_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = *scaleforce_fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
@@ -583,12 +892,12 @@ void VKBlitScreen::CreateGraphicsPipeline() {
.pDynamicStates = dynamic_states.data(),
};
const VkGraphicsPipelineCreateInfo pipeline_ci{
const VkGraphicsPipelineCreateInfo bilinear_pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(shader_stages.size()),
.pStages = shader_stages.data(),
.stageCount = static_cast<u32>(bilinear_shader_stages.size()),
.pStages = bilinear_shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
@@ -605,7 +914,76 @@ void VKBlitScreen::CreateGraphicsPipeline() {
.basePipelineIndex = 0,
};
pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci);
const VkGraphicsPipelineCreateInfo bicubic_pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(bicubic_shader_stages.size()),
.pStages = bicubic_shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
.pViewportState = &viewport_state_ci,
.pRasterizationState = &rasterization_ci,
.pMultisampleState = &multisampling_ci,
.pDepthStencilState = nullptr,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
.layout = *pipeline_layout,
.renderPass = *renderpass,
.subpass = 0,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
};
const VkGraphicsPipelineCreateInfo gaussian_pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(gaussian_shader_stages.size()),
.pStages = gaussian_shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
.pViewportState = &viewport_state_ci,
.pRasterizationState = &rasterization_ci,
.pMultisampleState = &multisampling_ci,
.pDepthStencilState = nullptr,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
.layout = *pipeline_layout,
.renderPass = *renderpass,
.subpass = 0,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
};
const VkGraphicsPipelineCreateInfo scaleforce_pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(scaleforce_shader_stages.size()),
.pStages = scaleforce_shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
.pViewportState = &viewport_state_ci,
.pRasterizationState = &rasterization_ci,
.pMultisampleState = &multisampling_ci,
.pDepthStencilState = nullptr,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
.layout = *pipeline_layout,
.renderPass = *renderpass,
.subpass = 0,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
};
bilinear_pipeline = device.GetLogical().CreateGraphicsPipeline(bilinear_pipeline_ci);
bicubic_pipeline = device.GetLogical().CreateGraphicsPipeline(bicubic_pipeline_ci);
gaussian_pipeline = device.GetLogical().CreateGraphicsPipeline(gaussian_pipeline_ci);
scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci);
}
void VKBlitScreen::CreateSampler() {
@@ -614,8 +992,29 @@ void VKBlitScreen::CreateSampler() {
.pNext = nullptr,
.flags = 0,
.magFilter = VK_FILTER_LINEAR,
.minFilter = VK_FILTER_LINEAR,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.mipLodBias = 0.0f,
.anisotropyEnable = VK_FALSE,
.maxAnisotropy = 0.0f,
.compareEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_NEVER,
.minLod = 0.0f,
.maxLod = 0.0f,
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
.unnormalizedCoordinates = VK_FALSE,
};
const VkSamplerCreateInfo ci_nn{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.magFilter = VK_FILTER_NEAREST,
.minFilter = VK_FILTER_NEAREST,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
@@ -631,6 +1030,7 @@ void VKBlitScreen::CreateSampler() {
};
sampler = device.GetLogical().CreateSampler(ci);
nn_sampler = device.GetLogical().CreateSampler(ci_nn);
}
void VKBlitScreen::CreateFramebuffers() {
@@ -639,7 +1039,7 @@ void VKBlitScreen::CreateFramebuffers() {
for (std::size_t i = 0; i < image_count; ++i) {
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
framebuffers[i] = CreateFramebuffer(image_view, size);
framebuffers[i] = CreateFramebuffer(image_view, size, renderpass);
}
}
@@ -649,6 +1049,11 @@ void VKBlitScreen::ReleaseRawImages() {
}
raw_images.clear();
raw_buffer_commits.clear();
aa_image_view.reset();
aa_image.reset();
aa_commit = MemoryCommit{};
buffer.reset();
buffer_commit = MemoryCommit{};
}
@@ -675,8 +1080,11 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
raw_image_views.resize(image_count);
raw_buffer_commits.resize(image_count);
for (size_t i = 0; i < image_count; ++i) {
raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1,
u32 down_shift = 0) {
u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
: VK_IMAGE_USAGE_TRANSFER_DST_BIT;
return device.GetLogical().CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -684,26 +1092,30 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
.format = GetFormat(framebuffer),
.extent =
{
.width = framebuffer.width,
.height = framebuffer.height,
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_LINEAR,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
.tiling = used_on_framebuffer ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | extra_usages,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal);
raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
};
const auto create_commit = [&](vk::Image& image) {
return memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
};
const auto create_image_view = [&](vk::Image& image) {
return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *raw_images[i],
.image = *image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = GetFormat(framebuffer),
.components =
@@ -722,10 +1134,211 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
.layerCount = 1,
},
});
};
for (size_t i = 0; i < image_count; ++i) {
raw_images[i] = create_image();
raw_buffer_commits[i] = create_commit(raw_images[i]);
raw_image_views[i] = create_image_view(raw_images[i]);
}
// AA Resources
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
aa_image = create_image(true, up_scale, down_shift);
aa_commit = create_commit(aa_image);
aa_image_view = create_image_view(aa_image);
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
};
if (aa_renderpass) {
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
return;
}
aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false);
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = *fxaa_vertex_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = *fxaa_fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
}};
const auto vertex_binding_description = ScreenRectVertex::GetDescription();
const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
const VkPipelineVertexInputStateCreateInfo vertex_input_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = &vertex_binding_description,
.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()},
.pVertexAttributeDescriptions = vertex_attrs_description.data(),
};
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
.primitiveRestartEnable = VK_FALSE,
};
const VkPipelineViewportStateCreateInfo viewport_state_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.viewportCount = 1,
.pViewports = nullptr,
.scissorCount = 1,
.pScissors = nullptr,
};
const VkPipelineRasterizationStateCreateInfo rasterization_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthClampEnable = VK_FALSE,
.rasterizerDiscardEnable = VK_FALSE,
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode = VK_CULL_MODE_NONE,
.frontFace = VK_FRONT_FACE_CLOCKWISE,
.depthBiasEnable = VK_FALSE,
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
.lineWidth = 1.0f,
};
const VkPipelineMultisampleStateCreateInfo multisampling_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
const VkPipelineColorBlendAttachmentState color_blend_attachment{
.blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
const VkPipelineColorBlendStateCreateInfo color_blend_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
.attachmentCount = 1,
.pAttachments = &color_blend_attachment,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
static constexpr std::array dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
};
const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
.pDynamicStates = dynamic_states.data(),
};
const VkGraphicsPipelineCreateInfo fxaa_pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(fxaa_shader_stages.size()),
.pStages = fxaa_shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
.pViewportState = &viewport_state_ci,
.pRasterizationState = &rasterization_ci,
.pMultisampleState = &multisampling_ci,
.pDepthStencilState = nullptr,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
.layout = *aa_pipeline_layout,
.renderPass = *aa_renderpass,
.subpass = 0,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
};
// AA
aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
}
void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
void VKBlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view,
bool nn) const {
const VkDescriptorImageInfo image_info{
.sampler = nn ? *nn_sampler : *sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkWriteDescriptorSet sampler_write{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = aa_descriptor_sets[image_index],
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
const VkWriteDescriptorSet sampler_write_2{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = aa_descriptor_sets[image_index],
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
}
void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view,
bool nn) const {
const VkDescriptorBufferInfo buffer_info{
.buffer = *buffer,
.offset = offsetof(BufferData, uniform),
@@ -746,7 +1359,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
};
const VkDescriptorImageInfo image_info{
.sampler = *sampler,
.sampler = nn ? *nn_sampler : *sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
@@ -798,17 +1411,19 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
f32 scale_u = 1.0f;
f32 scale_v = 1.0f;
if (framebuffer_crop_rect.GetWidth() > 0) {
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
static_cast<f32>(screen_info.width);
}
if (framebuffer_crop_rect.GetHeight() > 0) {
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
static_cast<f32>(screen_info.height);
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
if (!fsr) {
if (framebuffer_crop_rect.GetWidth() > 0) {
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
static_cast<f32>(screen_info.width);
}
if (framebuffer_crop_rect.GetHeight() > 0) {
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
static_cast<f32>(screen_info.height);
}
}
const auto& screen = layout.screen;
@@ -822,6 +1437,15 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v);
}
void VKBlitScreen::CreateFSR() {
const auto& layout = render_window.GetFramebufferLayout();
const VkExtent2D fsr_size{
.width = layout.screen.GetWidth(),
.height = layout.screen.GetHeight(),
};
fsr = std::make_unique<FSR>(device, memory_allocator, image_count, fsr_size);
}
u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
}

View File

@@ -34,6 +34,7 @@ namespace Vulkan {
struct ScreenInfo;
class Device;
class FSR;
class RasterizerVulkan;
class VKScheduler;
class VKSwapchain;
@@ -66,6 +67,9 @@ public:
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent, vk::RenderPass& rd);
private:
struct BufferData;
@@ -74,6 +78,7 @@ private:
void CreateSemaphores();
void CreateDescriptorPool();
void CreateRenderPass();
vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
@@ -88,11 +93,14 @@ private:
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
void CreateFSR();
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
@@ -107,14 +115,24 @@ private:
const VKScreenInfo& screen_info;
vk::ShaderModule vertex_shader;
vk::ShaderModule fragment_shader;
vk::ShaderModule fxaa_vertex_shader;
vk::ShaderModule fxaa_fragment_shader;
vk::ShaderModule bilinear_fragment_shader;
vk::ShaderModule bicubic_fragment_shader;
vk::ShaderModule gaussian_fragment_shader;
vk::ShaderModule scaleforce_fragment_shader;
vk::DescriptorPool descriptor_pool;
vk::DescriptorSetLayout descriptor_set_layout;
vk::PipelineLayout pipeline_layout;
vk::Pipeline pipeline;
vk::Pipeline nearest_neightbor_pipeline;
vk::Pipeline bilinear_pipeline;
vk::Pipeline bicubic_pipeline;
vk::Pipeline gaussian_pipeline;
vk::Pipeline scaleforce_pipeline;
vk::RenderPass renderpass;
std::vector<vk::Framebuffer> framebuffers;
vk::DescriptorSets descriptor_sets;
vk::Sampler nn_sampler;
vk::Sampler sampler;
vk::Buffer buffer;
@@ -126,8 +144,22 @@ private:
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
std::vector<MemoryCommit> raw_buffer_commits;
vk::DescriptorPool aa_descriptor_pool;
vk::DescriptorSetLayout aa_descriptor_set_layout;
vk::PipelineLayout aa_pipeline_layout;
vk::Pipeline aa_pipeline;
vk::RenderPass aa_renderpass;
vk::Framebuffer aa_framebuffer;
vk::DescriptorSets aa_descriptor_sets;
vk::Image aa_image;
vk::ImageView aa_image_view;
MemoryCommit aa_commit;
u32 raw_width = 0;
u32 raw_height = 0;
std::unique_ptr<FSR> fsr;
};
} // namespace Vulkan

View File

@@ -146,7 +146,7 @@ void BufferCacheRuntime::Finish() {
}
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies) {
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -163,10 +163,42 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
if (barrier) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER);
}
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
if (barrier) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
}
});
}
void BufferCacheRuntime::PreCopyBarrier() {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, READ_BARRIER);
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
});
}
void BufferCacheRuntime::PostCopyBarrier() {
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, WRITE_BARRIER);
});

View File

@@ -69,8 +69,12 @@ public:
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
void PreCopyBarrier();
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
std::span<const VideoCommon::BufferCopy> copies);
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void PostCopyBarrier();
void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);

View File

@@ -22,6 +22,7 @@
namespace Vulkan {
using Shader::ImageBufferDescriptor;
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
@@ -108,8 +109,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
texture_cache.SynchronizeComputeDescriptors();
static constexpr size_t max_elements = 64;
std::array<ImageId, max_elements> image_view_ids;
boost::container::static_vector<u32, max_elements> image_view_indices;
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
boost::container::static_vector<VkSampler, max_elements> samplers;
const auto& qmd{kepler_compute.launch_description};
@@ -134,30 +134,37 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
const auto add_image{[&](const auto& desc, bool blacklist) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
views.push_back({
.index = handle.first,
.blacklist = blacklist,
.id = {},
});
}
}};
std::ranges::for_each(info.texture_buffer_descriptors, add_image);
std::ranges::for_each(info.image_buffer_descriptors, add_image);
for (const auto& desc : info.texture_buffer_descriptors) {
add_image(desc, false);
}
for (const auto& desc : info.image_buffer_descriptors) {
add_image(desc, false);
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
views.push_back({handle.first});
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers.push_back(sampler->Handle());
}
}
std::ranges::for_each(info.image_descriptors, add_image);
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
for (const auto& desc : info.image_descriptors) {
add_image(desc, desc.is_written);
}
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
buffer_cache.UnbindComputeTextureBuffers();
ImageId* texture_buffer_ids{image_view_ids.data()};
size_t index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
@@ -166,11 +173,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
ImageView& image_view = texture_cache.GetImageView(views[index].id);
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++texture_buffer_ids;
++index;
}
}};
@@ -180,9 +186,11 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
buffer_cache.UpdateComputeBuffers();
buffer_cache.BindHostComputeBuffers();
RescalingPushConstant rescaling;
const VkSampler* samplers_it{samplers.data()};
const ImageId* views_it{image_view_ids.data()};
PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue);
const VideoCommon::ImageViewInOut* views_it{views.data()};
PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it,
views_it);
if (!is_built.load(std::memory_order::relaxed)) {
// Wait for the pipeline to be built
@@ -192,11 +200,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
});
}
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
scheduler.Record([this, descriptor_data, is_rescaling,
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
if (!descriptor_set_layout) {
return;
}
if (is_rescaling) {
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
rescaling_data.data());
}
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
const vk::Device& dev{device.GetLogical()};
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);

View File

@@ -0,0 +1,553 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cmath>
#include "common/bit_cast.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h"
#include "video_core/renderer_vulkan/vk_fsr.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
namespace {
// Reimplementations of the constant generating functions in ffx_fsr1.h
// GCC generated a lot of warnings when using the official header.
u32 AU1_AH1_AF1(f32 f) {
static constexpr u32 base[512]{
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008,
0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
};
static constexpr s8 shift[512]{
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16,
0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18,
};
const u32 u = Common::BitCast<u32>(f);
const u32 i = u >> 23;
return base[i] + ((u & 0x7fffff) >> shift[i]);
}
u32 AU1_AH2_AF2(f32 a[2]) {
return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16);
}
void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX,
f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY,
f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) {
con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX);
con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY);
con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f);
con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f);
con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY);
con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY);
con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX);
con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX);
con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY);
con3[2] = con3[3] = 0;
}
void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4],
f32 inputViewportInPixelsX, f32 inputViewportInPixelsY,
f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX,
f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) {
FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY,
inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY);
con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f +
inputOffsetInPixelsX);
con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f +
inputOffsetInPixelsY);
}
void FsrRcasCon(u32* con, f32 sharpness) {
sharpness = std::exp2f(-sharpness);
f32 hSharp[2]{sharpness, sharpness};
con[0] = Common::BitCast<u32>(sharpness);
con[1] = AU1_AH2_AF2(hSharp);
con[2] = 0;
con[3] = 0;
}
} // Anonymous namespace
FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,
VkExtent2D output_size_)
: device{device_}, memory_allocator{memory_allocator_}, image_count{image_count_},
output_size{output_size_} {
CreateImages();
CreateSampler();
CreateShaders();
CreateDescriptorPool();
CreateDescriptorSetLayout();
CreateDescriptorSets();
CreatePipelineLayout();
CreatePipeline();
}
VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
UpdateDescriptorSet(image_index, image_view);
scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = 0,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = {},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
std::array<u32, 4 * 4> push_constants;
FsrEasuConOffset(
push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
push_constants.data() + 12,
static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height),
static_cast<f32>(output_size.width), static_cast<f32>(output_size.height),
static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
{
VkImageMemoryBarrier fsr_write_barrier = base_barrier;
fsr_write_barrier.image = *images[image_index],
fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, fsr_write_barrier);
}
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
descriptor_sets[image_index * 2], {});
cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
Common::DivCeil(output_size.height, 16u), 1);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline);
FsrRcasCon(push_constants.data(), 0.25f);
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
{
std::array<VkImageMemoryBarrier, 2> barriers;
auto& fsr_read_barrier = barriers[0];
auto& blit_write_barrier = barriers[1];
fsr_read_barrier = base_barrier;
fsr_read_barrier.image = *images[image_index];
fsr_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
fsr_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
blit_write_barrier = base_barrier;
blit_write_barrier.image = *images[image_count + image_index];
blit_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
blit_write_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, {}, {}, barriers);
}
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
descriptor_sets[image_index * 2 + 1], {});
cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
Common::DivCeil(output_size.height, 16u), 1);
{
std::array<VkImageMemoryBarrier, 1> barriers;
auto& blit_read_barrier = barriers[0];
blit_read_barrier = base_barrier;
blit_read_barrier.image = *images[image_count + image_index];
blit_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, {}, {}, barriers);
}
});
return *image_views[image_count + image_index];
}
void FSR::CreateDescriptorPool() {
const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
{
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = static_cast<u32>(image_count * 2),
},
{
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = static_cast<u32>(image_count * 2),
},
}};
const VkDescriptorPoolCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = static_cast<u32>(image_count * 2),
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data(),
};
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
}
void FSR::CreateDescriptorSetLayout() {
const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = sampler.address(),
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = sampler.address(),
},
}};
const VkDescriptorSetLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = static_cast<u32>(layout_bindings.size()),
.pBindings = layout_bindings.data(),
};
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
}
void FSR::CreateDescriptorSets() {
const u32 sets = static_cast<u32>(image_count * 2);
const std::vector layouts(sets, *descriptor_set_layout);
const VkDescriptorSetAllocateInfo ai{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = *descriptor_pool,
.descriptorSetCount = sets,
.pSetLayouts = layouts.data(),
};
descriptor_sets = descriptor_pool.Allocate(ai);
}
void FSR::CreateImages() {
images.resize(image_count * 2);
image_views.resize(image_count * 2);
buffer_commits.resize(image_count * 2);
for (size_t i = 0; i < image_count * 2; ++i) {
images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.imageType = VK_IMAGE_TYPE_2D,
.format = VK_FORMAT_R16G16B16A16_SFLOAT,
.extent =
{
.width = output_size.width,
.height = output_size.height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal);
image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *images[i],
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = VK_FORMAT_R16G16B16A16_SFLOAT,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
});
}
}
void FSR::CreatePipelineLayout() {
VkPushConstantRange push_const{
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.offset = 0,
.size = sizeof(std::array<u32, 4 * 4>),
};
VkPipelineLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_const,
};
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
}
void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
const auto fsr_image_view = *image_views[image_index];
const auto blit_image_view = *image_views[image_count + image_index];
const VkDescriptorImageInfo image_info{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkDescriptorImageInfo fsr_image_info{
.sampler = VK_NULL_HANDLE,
.imageView = fsr_image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkDescriptorImageInfo blit_image_info{
.sampler = VK_NULL_HANDLE,
.imageView = blit_image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
VkWriteDescriptorSet sampler_write{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_sets[image_index * 2],
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
VkWriteDescriptorSet output_write{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_sets[image_index * 2],
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.pImageInfo = &fsr_image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
sampler_write.dstSet = descriptor_sets[image_index * 2 + 1];
sampler_write.pImageInfo = &fsr_image_info;
output_write.dstSet = descriptor_sets[image_index * 2 + 1];
output_write.pImageInfo = &blit_image_info;
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
}
void FSR::CreateSampler() {
const VkSamplerCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.magFilter = VK_FILTER_LINEAR,
.minFilter = VK_FILTER_LINEAR,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.mipLodBias = 0.0f,
.anisotropyEnable = VK_FALSE,
.maxAnisotropy = 0.0f,
.compareEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_NEVER,
.minLod = 0.0f,
.maxLod = 0.0f,
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
.unnormalizedCoordinates = VK_FALSE,
};
sampler = device.GetLogical().CreateSampler(ci);
}
void FSR::CreateShaders() {
if (device.IsFloat16Supported()) {
easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV);
rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV);
} else {
easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV);
rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV);
}
}
void FSR::CreatePipeline() {
VkPipelineShaderStageCreateInfo shader_stage_easu{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *easu_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
};
VkPipelineShaderStageCreateInfo shader_stage_rcas{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *rcas_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
};
VkComputePipelineCreateInfo pipeline_ci_easu{
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = shader_stage_easu,
.layout = *pipeline_layout,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
};
VkComputePipelineCreateInfo pipeline_ci_rcas{
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = shader_stage_rcas,
.layout = *pipeline_layout,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
};
easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_easu);
rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_rcas);
}
} // namespace Vulkan

View File

@@ -0,0 +1,54 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/math_util.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class VKScheduler;
class FSR {
public:
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
VkExtent2D output_size);
VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
private:
void CreateDescriptorPool();
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreateImages();
void CreateSampler();
void CreateShaders();
void CreatePipeline();
void CreatePipelineLayout();
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
const Device& device;
MemoryAllocator& memory_allocator;
size_t image_count;
VkExtent2D output_size;
vk::DescriptorPool descriptor_pool;
vk::DescriptorSetLayout descriptor_set_layout;
vk::DescriptorSets descriptor_sets;
vk::PipelineLayout pipeline_layout;
vk::ShaderModule easu_shader;
vk::ShaderModule rcas_shader;
vk::Pipeline easu_pipeline;
vk::Pipeline rcas_pipeline;
vk::Sampler sampler;
std::vector<vk::Image> images;
std::vector<vk::ImageView> image_views;
std::vector<MemoryCommit> buffer_commits;
};
} // namespace Vulkan

View File

@@ -32,6 +32,8 @@ namespace {
using boost::container::small_vector;
using boost::container::static_vector;
using Shader::ImageBufferDescriptor;
using Shader::Backend::SPIRV::RESCALING_LAYOUT_DOWN_FACTOR_OFFSET;
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
@@ -235,6 +237,7 @@ GraphicsPipeline::GraphicsPipeline(
stage_infos[stage] = *info;
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
num_textures += Shader::NumDescriptors(info->texture_descriptors);
}
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
@@ -277,11 +280,10 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids;
std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices;
std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views;
std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
size_t sampler_index{};
size_t image_index{};
size_t view_index{};
texture_cache.SynchronizeGraphicsDescriptors();
@@ -322,26 +324,30 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_index++] = handle.first;
views[view_index++] = {
.index = handle.first,
.blacklist = blacklist,
.id = {},
};
}
}};
if constexpr (Spec::has_texture_buffers) {
for (const auto& desc : info.texture_buffer_descriptors) {
add_image(desc);
add_image(desc, false);
}
}
if constexpr (Spec::has_image_buffers) {
for (const auto& desc : info.image_buffer_descriptors) {
add_image(desc);
add_image(desc, false);
}
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_index++] = handle.first;
views[view_index++] = {handle.first};
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
samplers[sampler_index++] = sampler->Handle();
@@ -349,7 +355,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
if constexpr (Spec::has_images) {
for (const auto& desc : info.image_descriptors) {
add_image(desc);
add_image(desc, desc.is_written);
}
}
}};
@@ -368,10 +374,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
config_stage(4);
}
const std::span indices_span(image_view_indices.data(), image_index);
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), view_index));
ImageId* texture_buffer_index{image_view_ids.data()};
VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
size_t index{};
const auto add_buffer{[&](const auto& desc) {
@@ -381,12 +386,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)};
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++index;
++texture_buffer_index;
++texture_buffer_it;
}
}};
buffer_cache.UnbindGraphicsTextureBuffers(stage);
@@ -402,13 +407,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
add_buffer(desc);
}
}
for (const auto& desc : info.texture_descriptors) {
texture_buffer_index += desc.count;
}
texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors);
if constexpr (Spec::has_images) {
for (const auto& desc : info.image_descriptors) {
texture_buffer_index += desc.count;
}
texture_buffer_it += Shader::NumDescriptors(info.image_descriptors);
}
}};
if constexpr (Spec::enabled_stages[0]) {
@@ -432,12 +433,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
update_descriptor_queue.Acquire();
RescalingPushConstant rescaling;
const VkSampler* samplers_it{samplers.data()};
const ImageId* views_it{image_view_ids.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
buffer_cache.BindHostStageBuffers(stage);
PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache,
update_descriptor_queue);
PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling,
samplers_it, views_it);
}};
if constexpr (Spec::enabled_stages[0]) {
prepare_stage(0);
@@ -454,10 +456,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
prepare_stage(4);
}
ConfigureDraw();
ConfigureDraw(rescaling);
}
void GraphicsPipeline::ConfigureDraw() {
void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
texture_cache.UpdateRenderTargets(false);
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
@@ -468,12 +470,25 @@ void GraphicsPipeline::ConfigureDraw() {
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
});
}
const bool is_rescaling{texture_cache.IsRescaling()};
const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)};
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
is_rescaling, update_rescaling](vk::CommandBuffer cmdbuf) {
if (bind_pipeline) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
}
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
rescaling_data.data());
if (update_rescaling) {
const f32 config_down_factor{Settings::values.resolution_info.down_factor};
const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f};
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor),
&scale_down_factor);
}
if (!descriptor_set_layout) {
return;
}
@@ -826,18 +841,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
void GraphicsPipeline::Validate() {
size_t num_images{};
for (const auto& info : stage_infos) {
for (const auto& desc : info.texture_buffer_descriptors) {
num_images += desc.count;
}
for (const auto& desc : info.image_buffer_descriptors) {
num_images += desc.count;
}
for (const auto& desc : info.texture_descriptors) {
num_images += desc.count;
}
for (const auto& desc : info.image_descriptors) {
num_images += desc.count;
}
num_images += Shader::NumDescriptors(info.texture_buffer_descriptors);
num_images += Shader::NumDescriptors(info.image_buffer_descriptors);
num_images += Shader::NumDescriptors(info.texture_descriptors);
num_images += Shader::NumDescriptors(info.image_descriptors);
}
ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
}

View File

@@ -62,6 +62,7 @@ namespace Vulkan {
class Device;
class PipelineStatistics;
class RenderPassCache;
class RescalingPushConstant;
class VKScheduler;
class VKUpdateDescriptorQueue;
@@ -113,7 +114,7 @@ private:
template <typename Spec>
void ConfigureImpl(bool is_indexed);
void ConfigureDraw();
void ConfigureDraw(const RescalingPushConstant& rescaling);
void MakePipeline(VkRenderPass render_pass);
@@ -138,6 +139,7 @@ private:
std::array<Shader::Info, NUM_STAGES> stage_infos;
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
u32 num_textures{};
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;

View File

@@ -70,7 +70,9 @@ public:
return;
}
// If none of the above is hit, fallback to a regular wait
semaphore.Wait(tick);
while (!semaphore.Wait(tick)) {
}
Refresh();
}
private:

View File

@@ -58,18 +58,28 @@ struct DrawParams {
bool is_indexed;
};
VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) {
const auto& src = regs.viewport_transform[index];
const float width = src.scale_x * 2.0f;
float y = src.translate_y - src.scale_y;
float height = src.scale_y * 2.0f;
const auto conv = [scale](float value) {
float new_value = value * scale;
if (scale < 1.0f) {
const bool sign = std::signbit(value);
new_value = std::round(std::abs(new_value));
new_value = sign ? -new_value : new_value;
}
return new_value;
};
const float x = conv(src.translate_x - src.scale_x);
const float width = conv(src.scale_x * 2.0f);
float y = conv(src.translate_y - src.scale_y);
float height = conv(src.scale_y * 2.0f);
if (regs.screen_y_control.y_negate) {
y += height;
height = -height;
}
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
VkViewport viewport{
.x = src.translate_x - src.scale_x,
.x = x,
.y = y,
.width = width != 0.0f ? width : 1.0f,
.height = height != 0.0f ? height : 1.0f,
@@ -83,14 +93,27 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
return viewport;
}
VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u32 down_shift = 0) {
const auto& src = regs.scissor_test[index];
VkRect2D scissor;
const auto scale_up = [&](s32 value) -> s32 {
if (value == 0) {
return 0U;
}
const s32 upset = value * up_scale;
s32 acumm = 0;
if ((up_scale >> down_shift) == 0) {
acumm = upset % 2;
}
const s32 converted_value = (value * up_scale) >> down_shift;
return value < 0 ? std::min<s32>(converted_value - acumm, -1)
: std::max<s32>(converted_value + acumm, 1);
};
if (src.enable) {
scissor.offset.x = static_cast<s32>(src.min_x);
scissor.offset.y = static_cast<s32>(src.min_y);
scissor.extent.width = src.max_x - src.min_x;
scissor.extent.height = src.max_y - src.min_y;
scissor.offset.x = scale_up(static_cast<s32>(src.min_x));
scissor.offset.y = scale_up(static_cast<s32>(src.min_y));
scissor.extent.width = scale_up(src.max_x - src.min_x);
scissor.extent.height = scale_up(src.max_y - src.min_y);
} else {
scissor.offset.x = 0;
scissor.offset.y = 0;
@@ -199,7 +222,7 @@ void RasterizerVulkan::Clear() {
query_cache.UpdateCounters();
const auto& regs = maxwell3d.regs;
auto& regs = maxwell3d.regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
const bool use_depth = regs.clear_buffers.Z;
@@ -214,8 +237,16 @@ void RasterizerVulkan::Clear() {
const VkExtent2D render_area = framebuffer->RenderArea();
scheduler.RequestRenderpass(framebuffer);
u32 up_scale = 1;
u32 down_shift = 0;
if (texture_cache.IsRescaling()) {
up_scale = Settings::values.resolution_info.up_scale;
down_shift = Settings::values.resolution_info.down_shift;
}
UpdateViewportsState(regs);
VkClearRect clear_rect{
.rect = GetScissorState(regs, 0),
.rect = GetScissorState(regs, 0, up_scale, down_shift),
.baseArrayLayer = regs.clear_buffers.layer,
.layerCount = 1,
};
@@ -230,7 +261,38 @@ void RasterizerVulkan::Clear() {
const u32 color_attachment = regs.clear_buffers.RT;
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
VkClearValue clear_value;
std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
bool is_integer = false;
bool is_signed = false;
size_t int_size = 8;
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++i) {
const auto& this_rt = regs.rt[i];
if (this_rt.Address() == 0) {
continue;
}
if (this_rt.format == Tegra::RenderTargetFormat::NONE) {
continue;
}
const auto format =
VideoCore::Surface::PixelFormatFromRenderTargetFormat(this_rt.format);
is_integer = IsPixelFormatInteger(format);
is_signed = IsPixelFormatSignedInteger(format);
int_size = PixelComponentSizeBitsInteger(format);
break;
}
if (!is_integer) {
std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
} else if (!is_signed) {
for (size_t i = 0; i < 4; i++) {
clear_value.color.uint32[i] = static_cast<u32>(
static_cast<f32>(static_cast<u64>(int_size) << 1U) * regs.clear_color[i]);
}
} else {
for (size_t i = 0; i < 4; i++) {
clear_value.color.int32[i] =
static_cast<s32>(static_cast<f32>(static_cast<s64>(int_size - 1) << 1) *
(regs.clear_color[i] - 0.5f));
}
}
scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
const VkClearAttachment attachment{
@@ -595,15 +657,17 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
if (!state_tracker.TouchViewports()) {
return;
}
const bool is_rescaling{texture_cache.IsRescaling()};
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
const std::array viewports{
GetViewportState(device, regs, 0), GetViewportState(device, regs, 1),
GetViewportState(device, regs, 2), GetViewportState(device, regs, 3),
GetViewportState(device, regs, 4), GetViewportState(device, regs, 5),
GetViewportState(device, regs, 6), GetViewportState(device, regs, 7),
GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale),
GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale),
GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale),
GetViewportState(device, regs, 6, scale), GetViewportState(device, regs, 7, scale),
GetViewportState(device, regs, 8, scale), GetViewportState(device, regs, 9, scale),
GetViewportState(device, regs, 10, scale), GetViewportState(device, regs, 11, scale),
GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale),
GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale),
};
scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
}
@@ -612,13 +676,29 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchScissors()) {
return;
}
u32 up_scale = 1;
u32 down_shift = 0;
if (texture_cache.IsRescaling()) {
up_scale = Settings::values.resolution_info.up_scale;
down_shift = Settings::values.resolution_info.down_shift;
}
const std::array scissors{
GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
GetScissorState(regs, 15),
GetScissorState(regs, 0, up_scale, down_shift),
GetScissorState(regs, 1, up_scale, down_shift),
GetScissorState(regs, 2, up_scale, down_shift),
GetScissorState(regs, 3, up_scale, down_shift),
GetScissorState(regs, 4, up_scale, down_shift),
GetScissorState(regs, 5, up_scale, down_shift),
GetScissorState(regs, 6, up_scale, down_shift),
GetScissorState(regs, 7, up_scale, down_shift),
GetScissorState(regs, 8, up_scale, down_shift),
GetScissorState(regs, 9, up_scale, down_shift),
GetScissorState(regs, 10, up_scale, down_shift),
GetScissorState(regs, 11, up_scale, down_shift),
GetScissorState(regs, 12, up_scale, down_shift),
GetScissorState(regs, 13, up_scale, down_shift),
GetScissorState(regs, 14, up_scale, down_shift),
GetScissorState(regs, 15, up_scale, down_shift),
};
scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
}

View File

@@ -128,6 +128,15 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
return true;
}
bool VKScheduler::UpdateRescaling(bool is_rescaling) {
if (state.rescaling_defined && is_rescaling == state.is_rescaling) {
return false;
}
state.rescaling_defined = true;
state.is_rescaling = is_rescaling;
return true;
}
void VKScheduler::WorkerThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("yuzu:VulkanWorker");
do {
@@ -227,6 +236,7 @@ void VKScheduler::AllocateNewContext() {
void VKScheduler::InvalidateState() {
state.graphics_pipeline = nullptr;
state.rescaling_defined = false;
state_tracker.InvalidateCommandBufferState();
}

View File

@@ -56,6 +56,9 @@ public:
/// Update the pipeline to the current execution context.
bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
/// Update the rescaling state. Returns true if the state has to be updated.
bool UpdateRescaling(bool is_rescaling);
/// Invalidates current command buffer state except for render passes
void InvalidateState();
@@ -185,6 +188,8 @@ private:
VkFramebuffer framebuffer = nullptr;
VkExtent2D render_area = {0, 0};
GraphicsPipeline* graphics_pipeline = nullptr;
bool is_rescaling = false;
bool rescaling_defined = false;
};
void WorkerThread(std::stop_token stop_token);

View File

@@ -71,11 +71,15 @@ public:
}
bool TouchViewports() {
return Exchange(Dirty::Viewports, false);
const bool dirty_viewports = Exchange(Dirty::Viewports, false);
const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
return dirty_viewports || rescale_viewports;
}
bool TouchScissors() {
return Exchange(Dirty::Scissors, false);
const bool dirty_scissors = Exchange(Dirty::Scissors, false);
const bool rescale_scissors = Exchange(VideoCommon::Dirty::RescaleScissors, false);
return dirty_scissors || rescale_scissors;
}
bool TouchDepthBias() {

View File

@@ -32,10 +32,12 @@ using Tegra::Engines::Fermi2D;
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TextureMipmapFilter;
using VideoCommon::BufferImageCopy;
using VideoCommon::ImageFlagBits;
using VideoCommon::ImageInfo;
using VideoCommon::ImageType;
using VideoCommon::SubresourceRange;
using VideoCore::Surface::IsPixelFormatASTC;
using VideoCore::Surface::IsPixelFormatInteger;
namespace {
constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
@@ -588,8 +590,158 @@ struct RangedBarrierRange {
UNREACHABLE_MSG("Invalid image format={}", format);
return VK_FORMAT_R32_UINT;
}
void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info,
VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution,
bool up_scaling = true) {
const bool is_2d = info.type == ImageType::e2D;
const auto resources = info.resources;
const VkExtent2D extent{
.width = info.size.width,
.height = info.size.height,
};
// Depth and integer formats must use NEAREST filter for blits.
const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT};
const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)};
const VkFilter vk_filter = is_bilinear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d,
vk_filter, up_scaling](vk::CommandBuffer cmdbuf) {
const VkOffset2D src_size{
.x = static_cast<s32>(up_scaling ? extent.width : resolution.ScaleUp(extent.width)),
.y = static_cast<s32>(is_2d && up_scaling ? extent.height
: resolution.ScaleUp(extent.height)),
};
const VkOffset2D dst_size{
.x = static_cast<s32>(up_scaling ? resolution.ScaleUp(extent.width) : extent.width),
.y = static_cast<s32>(is_2d && up_scaling ? resolution.ScaleUp(extent.height)
: extent.height),
};
boost::container::small_vector<VkImageBlit, 4> regions;
regions.reserve(resources.levels);
for (s32 level = 0; level < resources.levels; level++) {
regions.push_back({
.srcSubresource{
.aspectMask = aspect_mask,
.mipLevel = static_cast<u32>(level),
.baseArrayLayer = 0,
.layerCount = static_cast<u32>(resources.layers),
},
.srcOffsets{
{
.x = 0,
.y = 0,
.z = 0,
},
{
.x = std::max(1, src_size.x >> level),
.y = std::max(1, src_size.y >> level),
.z = 1,
},
},
.dstSubresource{
.aspectMask = aspect_mask,
.mipLevel = static_cast<u32>(level),
.baseArrayLayer = 0,
.layerCount = static_cast<u32>(resources.layers),
},
.dstOffsets{
{
.x = 0,
.y = 0,
.z = 0,
},
{
.x = std::max(1, dst_size.x >> level),
.y = std::max(1, dst_size.y >> level),
.z = 1,
},
},
});
}
const VkImageSubresourceRange subresource_range{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
};
const std::array read_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange = subresource_range,
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, // Discard contents
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange = subresource_range,
},
};
const std::array write_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange = subresource_range,
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange = subresource_range,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, nullptr, nullptr, read_barriers);
cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, vk_filter);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, nullptr, nullptr, write_barriers);
});
}
} // Anonymous namespace
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_buffer_pool_,
BlitImageHelper& blit_image_helper_,
ASTCDecoderPass& astc_decoder_pass_,
RenderPassCache& render_pass_cache_)
: device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_},
staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_},
astc_decoder_pass{astc_decoder_pass_}, render_pass_cache{render_pass_cache_},
resolution{Settings::values.resolution_info} {}
void TextureCacheRuntime::Finish() {
scheduler.Finish();
}
@@ -614,8 +766,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
return;
}
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter,
operation);
blit_image_helper.BlitColor(dst_framebuffer, src.Handle(Shader::TextureType::Color2D),
dst_region, src_region, filter, operation);
return;
}
if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
@@ -719,26 +871,29 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
});
}
void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view,
bool rescaled) {
const u32 up_scale = rescaled ? resolution.up_scale : 1;
const u32 down_shift = rescaled ? resolution.down_shift : 0;
switch (dst_view.format) {
case PixelFormat::R16_UNORM:
if (src_view.format == PixelFormat::D16_UNORM) {
return blit_image_helper.ConvertD16ToR16(dst, src_view);
return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift);
}
break;
case PixelFormat::R32_FLOAT:
if (src_view.format == PixelFormat::D32_FLOAT) {
return blit_image_helper.ConvertD32ToR32(dst, src_view);
return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift);
}
break;
case PixelFormat::D16_UNORM:
if (src_view.format == PixelFormat::R16_UNORM) {
return blit_image_helper.ConvertR16ToD16(dst, src_view);
return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift);
}
break;
case PixelFormat::D32_FLOAT:
if (src_view.format == PixelFormat::R32_FLOAT) {
return blit_image_helper.ConvertR32ToD32(dst, src_view);
return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift);
}
break;
default:
@@ -840,36 +995,39 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
return device.GetDeviceLocalMemory();
}
Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
image(MakeImage(runtime.device, info)),
commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)),
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)),
commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)),
aspect_mask(ImageAspectMask(info.format)) {
if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
if (Settings::values.accelerate_astc.GetValue()) {
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
} else {
flags |= VideoCommon::ImageFlagBits::Converted;
}
}
if (runtime.device.HasDebuggingToolAttached()) {
image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
if (runtime->device.HasDebuggingToolAttached()) {
original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
}
static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
.pNext = nullptr,
.usage = VK_IMAGE_USAGE_STORAGE_BIT,
};
if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
const auto& device = runtime.device.GetLogical();
current_image = *original_image;
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
const auto& device = runtime->device.GetLogical();
storage_image_views.reserve(info.resources.levels);
for (s32 level = 0; level < info.resources.levels; ++level) {
storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = &storage_image_view_usage_create_info,
.flags = 0,
.image = *image,
.image = *original_image,
.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
.components{
@@ -890,26 +1048,39 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
}
}
Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {}
Image::~Image() = default;
void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
// TODO: Move this to another API
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown(true);
}
scheduler->RequestOutsideRenderPassOperationContext();
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
const VkBuffer src_buffer = map.buffer;
const VkImage vk_image = *image;
const VkImage vk_image = *original_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
const bool is_initialized = std::exchange(initialized, true);
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
vk_copies](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
});
if (is_rescaled) {
ScaleUp();
}
}
void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown();
}
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask,
vk_copies](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
@@ -959,6 +1130,146 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
if (is_rescaled) {
ScaleUp(true);
}
}
bool Image::ScaleUp(bool ignore) {
if (True(flags & ImageFlagBits::Rescaled)) {
return false;
}
ASSERT(info.type != ImageType::Linear);
flags |= ImageFlagBits::Rescaled;
const auto& resolution = runtime->resolution;
if (!resolution.active) {
return false;
}
has_scaled = true;
const auto& device = runtime->device;
if (!scaled_image) {
const bool is_2d = info.type == ImageType::e2D;
const u32 scaled_width = resolution.ScaleUp(info.size.width);
const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
auto scaled_info = info;
scaled_info.size.width = scaled_width;
scaled_info.size.height = scaled_height;
scaled_image = MakeImage(device, scaled_info);
auto& allocator = runtime->memory_allocator;
scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
ignore = false;
}
current_image = *scaled_image;
if (ignore) {
return true;
}
if (aspect_mask == 0) {
aspect_mask = ImageAspectMask(info.format);
}
static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
const PixelFormat format = StorageFormat(info.format);
const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
} else {
return BlitScaleHelper(true);
}
return true;
}
bool Image::ScaleDown(bool ignore) {
if (False(flags & ImageFlagBits::Rescaled)) {
return false;
}
ASSERT(info.type != ImageType::Linear);
flags &= ~ImageFlagBits::Rescaled;
const auto& resolution = runtime->resolution;
if (!resolution.active) {
return false;
}
current_image = *original_image;
if (ignore) {
return true;
}
if (aspect_mask == 0) {
aspect_mask = ImageAspectMask(info.format);
}
static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
const PixelFormat format = StorageFormat(info.format);
const auto& device = runtime->device;
const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
} else {
return BlitScaleHelper(false);
}
return true;
}
bool Image::BlitScaleHelper(bool scale_up) {
using namespace VideoCommon;
static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;
const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT};
const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)};
const auto operation = is_bilinear ? Tegra::Engines::Fermi2D::Filter::Bilinear
: Tegra::Engines::Fermi2D::Filter::Point;
const bool is_2d = info.type == ImageType::e2D;
const auto& resolution = runtime->resolution;
const u32 scaled_width = resolution.ScaleUp(info.size.width);
const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
std::unique_ptr<ImageView>& blit_view = scale_up ? scale_view : normal_view;
std::unique_ptr<Framebuffer>& blit_framebuffer =
scale_up ? scale_framebuffer : normal_framebuffer;
if (!blit_view) {
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
blit_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
}
const u32 src_width = scale_up ? info.size.width : scaled_width;
const u32 src_height = scale_up ? info.size.height : scaled_height;
const u32 dst_width = scale_up ? scaled_width : info.size.width;
const u32 dst_height = scale_up ? scaled_height : info.size.height;
const Region2D src_region{
.start = {0, 0},
.end = {static_cast<s32>(src_width), static_cast<s32>(src_height)},
};
const Region2D dst_region{
.start = {0, 0},
.end = {static_cast<s32>(dst_width), static_cast<s32>(dst_height)},
};
const VkExtent2D extent{
.width = std::max(scaled_width, info.size.width),
.height = std::max(scaled_height, info.size.width),
};
auto* view_ptr = blit_view.get();
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
if (!blit_framebuffer) {
blit_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
}
const auto color_view = blit_view->Handle(Shader::TextureType::Color2D);
runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region,
src_region, operation, BLIT_OPERATION);
} else if (!runtime->device.IsBlitDepthStencilSupported() &&
aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
if (!blit_framebuffer) {
blit_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent);
}
runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(),
blit_view->StencilView(), dst_region,
src_region, operation, BLIT_OPERATION);
} else {
// TODO: Use helper blits where applicable
flags &= ~ImageFlagBits::Rescaled;
LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", info.format);
return false;
}
return true;
}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
@@ -1052,7 +1363,7 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params)
: VideoCommon::ImageViewBase{params} {}
VkImageView ImageView::DepthView() {
@@ -1162,7 +1473,27 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
}
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
ImageView* depth_buffer, const VideoCommon::RenderTargets& key)
: render_area{VkExtent2D{
.width = key.size.width,
.height = key.size.height,
}} {
CreateFramebuffer(runtime, color_buffers, depth_buffer);
if (runtime.device.HasDebuggingToolAttached()) {
framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
}
}
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
ImageView* depth_buffer, VkExtent2D extent)
: render_area{extent} {
std::array<ImageView*, NUM_RT> color_buffers{color_buffer};
CreateFramebuffer(runtime, color_buffers, depth_buffer);
}
void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer) {
std::vector<VkImageView> attachments;
RenderPassKey renderpass_key{};
s32 num_layers = 1;
@@ -1200,10 +1531,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
renderpass = runtime.render_pass_cache.Get(renderpass_key);
render_area = VkExtent2D{
.width = key.size.width,
.height = key.size.height,
};
num_color_buffers = static_cast<u32>(num_colors);
framebuffer = runtime.device.GetLogical().CreateFramebuffer({
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
@@ -1212,13 +1539,10 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
.renderPass = renderpass,
.attachmentCount = static_cast<u32>(attachments.size()),
.pAttachments = attachments.data(),
.width = key.size.width,
.height = key.size.height,
.width = render_area.width,
.height = render_area.height,
.layers = static_cast<u32>(std::max(num_layers, 1)),
});
if (runtime.device.HasDebuggingToolAttached()) {
framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
}
}
void TextureCacheRuntime::AccelerateImageUpload(

View File

@@ -13,6 +13,10 @@
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Settings {
struct ResolutionScalingInfo;
}
namespace Vulkan {
using VideoCommon::ImageId;
@@ -31,14 +35,14 @@ class RenderPassCache;
class StagingBufferPool;
class VKScheduler;
struct TextureCacheRuntime {
const Device& device;
VKScheduler& scheduler;
MemoryAllocator& memory_allocator;
StagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
ASTCDecoderPass& astc_decoder_pass;
RenderPassCache& render_pass_cache;
class TextureCacheRuntime {
public:
explicit TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_,
MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_buffer_pool_,
BlitImageHelper& blit_image_helper_,
ASTCDecoderPass& astc_decoder_pass_,
RenderPassCache& render_pass_cache_);
void Finish();
@@ -46,6 +50,10 @@ struct TextureCacheRuntime {
StagingBufferRef DownloadStagingBuffer(size_t size);
void TickFrame();
u64 GetDeviceLocalMemory() const;
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
@@ -53,7 +61,7 @@ struct TextureCacheRuntime {
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled);
bool CanAccelerateImageUpload(Image&) const noexcept {
return false;
@@ -74,13 +82,21 @@ struct TextureCacheRuntime {
return true;
}
u64 GetDeviceLocalMemory() const;
const Device& device;
VKScheduler& scheduler;
MemoryAllocator& memory_allocator;
StagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
ASTCDecoderPass& astc_decoder_pass;
RenderPassCache& render_pass_cache;
const Settings::ResolutionScalingInfo& resolution;
};
class Image : public VideoCommon::ImageBase {
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
explicit Image(const VideoCommon::NullImageParams&);
~Image();
@@ -97,7 +113,7 @@ public:
std::span<const VideoCommon::BufferImageCopy> copies);
[[nodiscard]] VkImage Handle() const noexcept {
return *image;
return current_image;
}
[[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
@@ -113,14 +129,30 @@ public:
return std::exchange(initialized, true);
}
bool ScaleUp(bool ignore = false);
bool ScaleDown(bool ignore = false);
private:
VKScheduler* scheduler;
vk::Image image;
bool BlitScaleHelper(bool scale_up);
VKScheduler* scheduler{};
TextureCacheRuntime* runtime{};
vk::Image original_image;
MemoryCommit commit;
vk::ImageView image_view;
std::vector<vk::ImageView> storage_image_views;
VkImageAspectFlags aspect_mask = 0;
bool initialized = false;
vk::Image scaled_image{};
MemoryCommit scaled_commit{};
VkImage current_image{};
std::unique_ptr<Framebuffer> scale_framebuffer;
std::unique_ptr<ImageView> scale_view;
std::unique_ptr<Framebuffer> normal_framebuffer;
std::unique_ptr<ImageView> normal_view;
};
class ImageView : public VideoCommon::ImageViewBase {
@@ -128,7 +160,7 @@ public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
[[nodiscard]] VkImageView DepthView();
@@ -197,9 +229,15 @@ private:
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
ImageView* depth_buffer, VkExtent2D extent);
void CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer);
[[nodiscard]] VkFramebuffer Handle() const noexcept {
return *framebuffer;
}