early-access version 1255

This commit is contained in:
pineappleEA
2020-12-28 15:15:37 +00:00
parent 84b39492d1
commit 78b48028e1
6254 changed files with 1868140 additions and 0 deletions

View File

@@ -0,0 +1,664 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/surface.h"
namespace Vulkan {
using VideoCommon::ImageViewType;
namespace {
struct PushConstants {
std::array<float, 2> tex_scale;
std::array<float, 2> tex_offset;
};
template <u32 binding>
inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{
.binding = binding,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
.pImmutableSamplers = nullptr,
};
constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{
TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>,
};
constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = 1,
.pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
};
constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()),
.pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(),
};
constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
.offset = 0,
.size = sizeof(PushConstants),
};
constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.vertexBindingDescriptionCount = 0,
.pVertexBindingDescriptions = nullptr,
.vertexAttributeDescriptionCount = 0,
.pVertexAttributeDescriptions = nullptr,
};
constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
.primitiveRestartEnable = VK_FALSE,
};
constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.viewportCount = 1,
.pViewports = nullptr,
.scissorCount = 1,
.pScissors = nullptr,
};
constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthClampEnable = VK_FALSE,
.rasterizerDiscardEnable = VK_FALSE,
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode = VK_CULL_MODE_BACK_BIT,
.frontFace = VK_FRONT_FACE_CLOCKWISE,
.depthBiasEnable = VK_FALSE,
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
.lineWidth = 1.0f,
};
constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
constexpr std::array DYNAMIC_STATES{
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
};
constexpr std::array EXTENDED_DYNAMIC_STATES{
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_CULL_MODE_EXT,
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT,
VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_STENCIL_OP_EXT,
};
constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()),
.pDynamicStates = DYNAMIC_STATES.data(),
};
constexpr VkPipelineDynamicStateCreateInfo PIPELINE_EXTENDED_DYNAMIC_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.dynamicStateCount = static_cast<u32>(EXTENDED_DYNAMIC_STATES.size()),
.pDynamicStates = EXTENDED_DYNAMIC_STATES.data(),
};
constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_CLEAR,
.attachmentCount = 0,
.pAttachments = nullptr,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{
.blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_CLEAR,
.attachmentCount = 1,
.pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthTestEnable = VK_TRUE,
.depthWriteEnable = VK_TRUE,
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
.depthBoundsTestEnable = VK_FALSE,
.stencilTestEnable = VK_FALSE,
.front = VkStencilOpState{},
.back = VkStencilOpState{},
.minDepthBounds = 0.0f,
.maxDepthBounds = 0.0f,
};
template <VkFilter filter>
inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.magFilter = filter,
.minFilter = filter,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.mipLodBias = 0.0f,
.anisotropyEnable = VK_FALSE,
.maxAnisotropy = 0.0f,
.compareEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_NEVER,
.minLod = 0.0f,
.maxLod = 0.0f,
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE,
.unnormalizedCoordinates = VK_TRUE,
};
constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo(
const VkDescriptorSetLayout* set_layout) {
return VkPipelineLayoutCreateInfo{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &PUSH_CONSTANT_RANGE,
};
}
constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage,
VkShaderModule shader) {
return VkPipelineShaderStageCreateInfo{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = stage,
.module = shader,
.pName = "main",
.pSpecializationInfo = nullptr,
};
}
constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages(
VkShaderModule vertex_shader, VkShaderModule fragment_shader) {
return std::array{
PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader),
PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader),
};
}
void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
VkSampler sampler, VkImageView image_view) {
const VkDescriptorImageInfo image_info{
.sampler = sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkWriteDescriptorSet write_descriptor_set{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_set,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr);
}
void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
VkSampler sampler, VkImageView image_view_0,
VkImageView image_view_1) {
const VkDescriptorImageInfo image_info_0{
.sampler = sampler,
.imageView = image_view_0,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkDescriptorImageInfo image_info_1{
.sampler = sampler,
.imageView = image_view_1,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const std::array write_descriptor_sets{
VkWriteDescriptorSet{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_set,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info_0,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
},
VkWriteDescriptorSet{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_set,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info_1,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
},
};
device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr);
}
void BindBlitState(const VKDevice& device, vk::CommandBuffer cmdbuf, VkPipelineLayout layout,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region) {
const VkOffset2D offset{
.x = std::min(dst_region[0].x, dst_region[1].x),
.y = std::min(dst_region[0].y, dst_region[1].y),
};
const VkExtent2D extent{
.width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)),
.height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)),
};
const VkViewport viewport{
.x = static_cast<float>(offset.x),
.y = static_cast<float>(offset.y),
.width = static_cast<float>(extent.width),
.height = static_cast<float>(extent.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
// TODO: Support scissored blits
const VkRect2D scissor{
.offset = offset,
.extent = extent,
};
const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x);
const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y);
const PushConstants push_constants{
.tex_scale = {scale_x, scale_y},
.tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)},
};
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
if (device.IsExtExtendedDynamicStateSupported()) {
// Workaround bug on Nvidia's drivers where the state is not properly handled when switching
// from one pipeline without dynamic state to one with.
// To workaround the bug, we manually set the pipeline state as dynamic state and keep the
// relevant bits enabled.
cmdbuf.SetCullModeEXT(PIPELINE_RASTERIZATION_STATE_CREATE_INFO.cullMode);
cmdbuf.SetFrontFaceEXT(PIPELINE_RASTERIZATION_STATE_CREATE_INFO.frontFace);
cmdbuf.SetDepthTestEnableEXT(PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO.depthTestEnable);
cmdbuf.SetDepthWriteEnableEXT(PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO.depthWriteEnable);
cmdbuf.SetDepthCompareOpEXT(PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO.depthCompareOp);
cmdbuf.SetDepthBoundsTestEnableEXT(
PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO.depthBoundsTestEnable);
cmdbuf.SetStencilTestEnableEXT(PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO.stencilTestEnable);
cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, VK_STENCIL_OP_KEEP,
VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_NEVER);
}
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
}
} // Anonymous namespace
BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_,
StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
: device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout),
two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout),
one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
if (device.IsExtShaderStencilExportSupported()) {
blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV);
}
}
BlitImageHelper::~BlitImageHelper() = default;
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) {
const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
const BlitImagePipelineKey key{
.renderpass = dst_framebuffer->RenderPass(),
.operation = operation,
};
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
const VkPipeline pipeline = FindOrEmplacePipeline(key);
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set,
&device = device](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
nullptr);
BindBlitState(device, cmdbuf, layout, dst_region, src_region);
cmdbuf.Draw(3, 1, 0, 0);
});
scheduler.InvalidateState();
}
void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
VkImageView src_depth_view, VkImageView src_stencil_view,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) {
ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
const VkPipelineLayout layout = *two_textures_pipeline_layout;
const VkSampler sampler = *nearest_sampler;
const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
src_stencil_view, descriptor_set,
&device = device](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
src_stencil_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
nullptr);
BindBlitState(device, cmdbuf, layout, dst_region, src_region);
cmdbuf.Draw(3, 1, 0, 0);
});
scheduler.InvalidateState();
}
void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
const VkSampler sampler = *nearest_sampler;
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
const VkExtent2D extent{
.width = src_image_view.size.width,
.height = src_image_view.size.height,
};
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent,
&device = device](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
.x = 0,
.y = 0,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(extent.width),
.height = static_cast<float>(extent.height),
.minDepth = 0.0f,
.maxDepth = 0.0f,
};
const VkRect2D scissor{
.offset = offset,
.extent = extent,
};
const PushConstants push_constants{
.tex_scale = {viewport.width, viewport.height},
.tex_offset = {0.0f, 0.0f},
};
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
// TODO: Barriers
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
nullptr);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
cmdbuf.Draw(3, 1, 0, 0);
});
scheduler.InvalidateState();
}
VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
const auto it = std::ranges::find(blit_color_keys, key);
if (it != blit_color_keys.end()) {
return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
}
blit_color_keys.push_back(key);
const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag);
const VkPipelineColorBlendAttachmentState blend_attachment{
.blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
// TODO: programmable blending
const VkPipelineColorBlendStateCreateInfo color_blend_create_info{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_CLEAR,
.attachmentCount = 1,
.pAttachments = &blend_attachment,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = nullptr,
.pColorBlendState = &color_blend_create_info,
.pDynamicState = device.IsExtExtendedDynamicStateSupported()
? &PIPELINE_EXTENDED_DYNAMIC_STATE_CREATE_INFO
: &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *one_texture_pipeline_layout,
.renderPass = key.renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
}));
return *blit_color_pipelines.back();
}
VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
if (blit_depth_stencil_pipeline) {
return *blit_depth_stencil_pipeline;
}
const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
.pDynamicState = device.IsExtExtendedDynamicStateSupported()
? &PIPELINE_EXTENDED_DYNAMIC_STATE_CREATE_INFO
: &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *two_textures_pipeline_layout,
.renderPass = renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
return *blit_depth_stencil_pipeline;
}
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
if (pipeline) {
return;
}
const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag);
pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = nullptr,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *one_texture_pipeline_layout,
.renderPass = renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
}
void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
if (pipeline) {
return;
}
const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag);
pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *one_texture_pipeline_layout,
.renderPass = renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
}
} // namespace Vulkan

View File

@@ -0,0 +1,97 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <compare>
#include "video_core/engines/fermi_2d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/texture_cache/types.h"
namespace Vulkan {
using VideoCommon::Offset2D;
class VKDevice;
class VKScheduler;
class StateTracker;
class Framebuffer;
class ImageView;
struct BlitImagePipelineKey {
constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
VkRenderPass renderpass;
Tegra::Engines::Fermi2D::Operation operation;
};
class BlitImageHelper {
public:
explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler,
StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
~BlitImageHelper();
void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
[[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
const VKDevice& device;
VKScheduler& scheduler;
StateTracker& state_tracker;
vk::DescriptorSetLayout one_texture_set_layout;
vk::DescriptorSetLayout two_textures_set_layout;
DescriptorAllocator one_texture_descriptor_allocator;
DescriptorAllocator two_textures_descriptor_allocator;
vk::PipelineLayout one_texture_pipeline_layout;
vk::PipelineLayout two_textures_pipeline_layout;
vk::ShaderModule full_screen_vert;
vk::ShaderModule blit_color_to_color_frag;
vk::ShaderModule blit_depth_stencil_frag;
vk::ShaderModule convert_depth_to_float_frag;
vk::ShaderModule convert_float_to_depth_frag;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
std::vector<BlitImagePipelineKey> blit_color_keys;
std::vector<vk::Pipeline> blit_color_pipelines;
vk::Pipeline blit_depth_stencil_pipeline;
vk::Pipeline convert_d32_to_r32_pipeline;
vk::Pipeline convert_r32_to_d32_pipeline;
vk::Pipeline convert_d16_to_r16_pipeline;
vk::Pipeline convert_r16_to_d16_pipeline;
};
} // namespace Vulkan

View File

@@ -0,0 +1,395 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstring>
#include <tuple>
#include <boost/functional/hash.hpp>
#include "common/bit_cast.h"
#include "common/cityhash.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
namespace Vulkan {
namespace {
constexpr std::size_t POINT = 0;
constexpr std::size_t LINE = 1;
constexpr std::size_t POLYGON = 2;
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POINT, // Points
LINE, // Lines
LINE, // LineLoop
LINE, // LineStrip
POLYGON, // Triangles
POLYGON, // TriangleStrip
POLYGON, // TriangleFan
POLYGON, // Quads
POLYGON, // QuadStrip
POLYGON, // Polygon
LINE, // LinesAdjacency
LINE, // LineStripAdjacency
POLYGON, // TrianglesAdjacency
POLYGON, // TriangleStripAdjacency
POLYGON, // Patches
};
} // Anonymous namespace
void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) {
const std::array enabled_lut = {regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable};
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
raw1 = 0;
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
tessellation_primitive.Assign(static_cast<u32>(regs.tess_mode.prim.Value()));
tessellation_spacing.Assign(static_cast<u32>(regs.tess_mode.spacing.Value()));
tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.operation));
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
topology.Assign(regs.draw.topology);
msaa_mode.Assign(regs.multisample_mode);
raw2 = 0;
const auto test_func =
regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
alpha_test_func.Assign(PackComparisonOp(test_func));
early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
point_size = Common::BitCast<u32>(regs.point_size);
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
binding_divisors[index] =
regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0;
}
for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
const auto& input = regs.vertex_attrib_format[index];
auto& attribute = attributes[index];
attribute.raw = 0;
attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
attribute.buffer.Assign(input.buffer);
attribute.offset.Assign(input.offset);
attribute.type.Assign(static_cast<u32>(input.type.Value()));
attribute.size.Assign(static_cast<u32>(input.size.Value()));
attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0);
}
for (std::size_t index = 0; index < std::size(attachments); ++index) {
attachments[index].Fill(regs, index);
}
const auto& transform = regs.viewport_transform;
std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(),
[](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
if (!has_extended_dynamic_state) {
no_extended_dynamic_state.Assign(1);
dynamic_state.Fill(regs);
}
}
void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
raw = 0;
mask_r.Assign(mask.R);
mask_g.Assign(mask.G);
mask_b.Assign(mask.B);
mask_a.Assign(mask.A);
// TODO: C++20 Use templated lambda to deduplicate code
if (!regs.independent_blend_enable) {
const auto& src = regs.blend;
if (!src.enable[index]) {
return;
}
equation_rgb.Assign(PackBlendEquation(src.equation_rgb));
equation_a.Assign(PackBlendEquation(src.equation_a));
factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb));
factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb));
factor_source_a.Assign(PackBlendFactor(src.factor_source_a));
factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a));
enable.Assign(1);
return;
}
if (!regs.blend.enable[index]) {
return;
}
const auto& src = regs.independent_blend[index];
equation_rgb.Assign(PackBlendEquation(src.equation_rgb));
equation_a.Assign(PackBlendEquation(src.equation_a));
factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb));
factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb));
factor_source_a.Assign(PackBlendFactor(src.factor_source_a));
factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a));
enable.Assign(1);
}
void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
u32 packed_front_face = PackFrontFace(regs.front_face);
if (regs.screen_y_control.triangle_rast_flip != 0) {
// Flip front face
packed_front_face = 1 - packed_front_face;
}
raw1 = 0;
raw2 = 0;
front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail));
front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail));
front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass));
front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func));
if (regs.stencil_two_side_enable) {
back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail));
back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail));
back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass));
back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func));
} else {
back.action_stencil_fail.Assign(front.action_stencil_fail);
back.action_depth_fail.Assign(front.action_depth_fail);
back.action_depth_pass.Assign(front.action_depth_pass);
back.test_func.Assign(front.test_func);
}
stencil_enable.Assign(regs.stencil_enable);
depth_write_enable.Assign(regs.depth_write_enabled);
depth_bounds_enable.Assign(regs.depth_bounds_enable);
depth_test_enable.Assign(regs.depth_test_enable);
front_face.Assign(packed_front_face);
depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
cull_face.Assign(PackCullFace(regs.cull_face));
cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
std::ranges::transform(regs.vertex_array, vertex_strides.begin(), [](const auto& array) {
return static_cast<u16>(array.stride.Value());
});
}
std::size_t FixedPipelineState::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
return static_cast<std::size_t>(hash);
}
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
return std::memcmp(this, &rhs, Size()) == 0;
}
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
// OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
// If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
// Perfect for a hash.
const u32 value = static_cast<u32>(op);
return value - (value >= 0x200 ? 0x200 : 1);
}
Maxwell::ComparisonOp FixedPipelineState::UnpackComparisonOp(u32 packed) noexcept {
// Read PackComparisonOp for the logic behind this.
return static_cast<Maxwell::ComparisonOp>(packed + 1);
}
u32 FixedPipelineState::PackStencilOp(Maxwell::StencilOp op) noexcept {
switch (op) {
case Maxwell::StencilOp::Keep:
case Maxwell::StencilOp::KeepOGL:
return 0;
case Maxwell::StencilOp::Zero:
case Maxwell::StencilOp::ZeroOGL:
return 1;
case Maxwell::StencilOp::Replace:
case Maxwell::StencilOp::ReplaceOGL:
return 2;
case Maxwell::StencilOp::Incr:
case Maxwell::StencilOp::IncrOGL:
return 3;
case Maxwell::StencilOp::Decr:
case Maxwell::StencilOp::DecrOGL:
return 4;
case Maxwell::StencilOp::Invert:
case Maxwell::StencilOp::InvertOGL:
return 5;
case Maxwell::StencilOp::IncrWrap:
case Maxwell::StencilOp::IncrWrapOGL:
return 6;
case Maxwell::StencilOp::DecrWrap:
case Maxwell::StencilOp::DecrWrapOGL:
return 7;
}
return 0;
}
Maxwell::StencilOp FixedPipelineState::UnpackStencilOp(u32 packed) noexcept {
static constexpr std::array LUT = {Maxwell::StencilOp::Keep, Maxwell::StencilOp::Zero,
Maxwell::StencilOp::Replace, Maxwell::StencilOp::Incr,
Maxwell::StencilOp::Decr, Maxwell::StencilOp::Invert,
Maxwell::StencilOp::IncrWrap, Maxwell::StencilOp::DecrWrap};
return LUT[packed];
}
u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept {
// FrontAndBack is 0x408, by substracting 0x406 in it we get 2.
// Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1.
const u32 value = static_cast<u32>(cull);
return value - (value == 0x408 ? 0x406 : 0x404);
}
Maxwell::CullFace FixedPipelineState::UnpackCullFace(u32 packed) noexcept {
static constexpr std::array LUT = {Maxwell::CullFace::Front, Maxwell::CullFace::Back,
Maxwell::CullFace::FrontAndBack};
return LUT[packed];
}
u32 FixedPipelineState::PackFrontFace(Maxwell::FrontFace face) noexcept {
return static_cast<u32>(face) - 0x900;
}
Maxwell::FrontFace FixedPipelineState::UnpackFrontFace(u32 packed) noexcept {
return static_cast<Maxwell::FrontFace>(packed + 0x900);
}
u32 FixedPipelineState::PackPolygonMode(Maxwell::PolygonMode mode) noexcept {
return static_cast<u32>(mode) - 0x1B00;
}
Maxwell::PolygonMode FixedPipelineState::UnpackPolygonMode(u32 packed) noexcept {
return static_cast<Maxwell::PolygonMode>(packed + 0x1B00);
}
u32 FixedPipelineState::PackLogicOp(Maxwell::LogicOperation op) noexcept {
return static_cast<u32>(op) - 0x1500;
}
Maxwell::LogicOperation FixedPipelineState::UnpackLogicOp(u32 packed) noexcept {
return static_cast<Maxwell::LogicOperation>(packed + 0x1500);
}
u32 FixedPipelineState::PackBlendEquation(Maxwell::Blend::Equation equation) noexcept {
switch (equation) {
case Maxwell::Blend::Equation::Add:
case Maxwell::Blend::Equation::AddGL:
return 0;
case Maxwell::Blend::Equation::Subtract:
case Maxwell::Blend::Equation::SubtractGL:
return 1;
case Maxwell::Blend::Equation::ReverseSubtract:
case Maxwell::Blend::Equation::ReverseSubtractGL:
return 2;
case Maxwell::Blend::Equation::Min:
case Maxwell::Blend::Equation::MinGL:
return 3;
case Maxwell::Blend::Equation::Max:
case Maxwell::Blend::Equation::MaxGL:
return 4;
}
return 0;
}
Maxwell::Blend::Equation FixedPipelineState::UnpackBlendEquation(u32 packed) noexcept {
static constexpr std::array LUT = {
Maxwell::Blend::Equation::Add, Maxwell::Blend::Equation::Subtract,
Maxwell::Blend::Equation::ReverseSubtract, Maxwell::Blend::Equation::Min,
Maxwell::Blend::Equation::Max};
return LUT[packed];
}
u32 FixedPipelineState::PackBlendFactor(Maxwell::Blend::Factor factor) noexcept {
switch (factor) {
case Maxwell::Blend::Factor::Zero:
case Maxwell::Blend::Factor::ZeroGL:
return 0;
case Maxwell::Blend::Factor::One:
case Maxwell::Blend::Factor::OneGL:
return 1;
case Maxwell::Blend::Factor::SourceColor:
case Maxwell::Blend::Factor::SourceColorGL:
return 2;
case Maxwell::Blend::Factor::OneMinusSourceColor:
case Maxwell::Blend::Factor::OneMinusSourceColorGL:
return 3;
case Maxwell::Blend::Factor::SourceAlpha:
case Maxwell::Blend::Factor::SourceAlphaGL:
return 4;
case Maxwell::Blend::Factor::OneMinusSourceAlpha:
case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
return 5;
case Maxwell::Blend::Factor::DestAlpha:
case Maxwell::Blend::Factor::DestAlphaGL:
return 6;
case Maxwell::Blend::Factor::OneMinusDestAlpha:
case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
return 7;
case Maxwell::Blend::Factor::DestColor:
case Maxwell::Blend::Factor::DestColorGL:
return 8;
case Maxwell::Blend::Factor::OneMinusDestColor:
case Maxwell::Blend::Factor::OneMinusDestColorGL:
return 9;
case Maxwell::Blend::Factor::SourceAlphaSaturate:
case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
return 10;
case Maxwell::Blend::Factor::Source1Color:
case Maxwell::Blend::Factor::Source1ColorGL:
return 11;
case Maxwell::Blend::Factor::OneMinusSource1Color:
case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
return 12;
case Maxwell::Blend::Factor::Source1Alpha:
case Maxwell::Blend::Factor::Source1AlphaGL:
return 13;
case Maxwell::Blend::Factor::OneMinusSource1Alpha:
case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
return 14;
case Maxwell::Blend::Factor::ConstantColor:
case Maxwell::Blend::Factor::ConstantColorGL:
return 15;
case Maxwell::Blend::Factor::OneMinusConstantColor:
case Maxwell::Blend::Factor::OneMinusConstantColorGL:
return 16;
case Maxwell::Blend::Factor::ConstantAlpha:
case Maxwell::Blend::Factor::ConstantAlphaGL:
return 17;
case Maxwell::Blend::Factor::OneMinusConstantAlpha:
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return 18;
}
return 0;
}
Maxwell::Blend::Factor FixedPipelineState::UnpackBlendFactor(u32 packed) noexcept {
static constexpr std::array LUT = {
Maxwell::Blend::Factor::Zero,
Maxwell::Blend::Factor::One,
Maxwell::Blend::Factor::SourceColor,
Maxwell::Blend::Factor::OneMinusSourceColor,
Maxwell::Blend::Factor::SourceAlpha,
Maxwell::Blend::Factor::OneMinusSourceAlpha,
Maxwell::Blend::Factor::DestAlpha,
Maxwell::Blend::Factor::OneMinusDestAlpha,
Maxwell::Blend::Factor::DestColor,
Maxwell::Blend::Factor::OneMinusDestColor,
Maxwell::Blend::Factor::SourceAlphaSaturate,
Maxwell::Blend::Factor::Source1Color,
Maxwell::Blend::Factor::OneMinusSource1Color,
Maxwell::Blend::Factor::Source1Alpha,
Maxwell::Blend::Factor::OneMinusSource1Alpha,
Maxwell::Blend::Factor::ConstantColor,
Maxwell::Blend::Factor::OneMinusConstantColor,
Maxwell::Blend::Factor::ConstantAlpha,
Maxwell::Blend::Factor::OneMinusConstantAlpha,
};
return LUT[packed];
}
} // namespace Vulkan

View File

@@ -0,0 +1,232 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <type_traits>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct FixedPipelineState {
static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
static u32 PackStencilOp(Maxwell::StencilOp op) noexcept;
static Maxwell::StencilOp UnpackStencilOp(u32 packed) noexcept;
static u32 PackCullFace(Maxwell::CullFace cull) noexcept;
static Maxwell::CullFace UnpackCullFace(u32 packed) noexcept;
static u32 PackFrontFace(Maxwell::FrontFace face) noexcept;
static Maxwell::FrontFace UnpackFrontFace(u32 packed) noexcept;
static u32 PackPolygonMode(Maxwell::PolygonMode mode) noexcept;
static Maxwell::PolygonMode UnpackPolygonMode(u32 packed) noexcept;
static u32 PackLogicOp(Maxwell::LogicOperation op) noexcept;
static Maxwell::LogicOperation UnpackLogicOp(u32 packed) noexcept;
static u32 PackBlendEquation(Maxwell::Blend::Equation equation) noexcept;
static Maxwell::Blend::Equation UnpackBlendEquation(u32 packed) noexcept;
static u32 PackBlendFactor(Maxwell::Blend::Factor factor) noexcept;
static Maxwell::Blend::Factor UnpackBlendFactor(u32 packed) noexcept;
struct BlendingAttachment {
union {
u32 raw;
BitField<0, 1, u32> mask_r;
BitField<1, 1, u32> mask_g;
BitField<2, 1, u32> mask_b;
BitField<3, 1, u32> mask_a;
BitField<4, 3, u32> equation_rgb;
BitField<7, 3, u32> equation_a;
BitField<10, 5, u32> factor_source_rgb;
BitField<15, 5, u32> factor_dest_rgb;
BitField<20, 5, u32> factor_source_a;
BitField<25, 5, u32> factor_dest_a;
BitField<30, 1, u32> enable;
};
void Fill(const Maxwell& regs, std::size_t index);
constexpr std::array<bool, 4> Mask() const noexcept {
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
}
Maxwell::Blend::Equation EquationRGB() const noexcept {
return UnpackBlendEquation(equation_rgb.Value());
}
Maxwell::Blend::Equation EquationAlpha() const noexcept {
return UnpackBlendEquation(equation_a.Value());
}
Maxwell::Blend::Factor SourceRGBFactor() const noexcept {
return UnpackBlendFactor(factor_source_rgb.Value());
}
Maxwell::Blend::Factor DestRGBFactor() const noexcept {
return UnpackBlendFactor(factor_dest_rgb.Value());
}
Maxwell::Blend::Factor SourceAlphaFactor() const noexcept {
return UnpackBlendFactor(factor_source_a.Value());
}
Maxwell::Blend::Factor DestAlphaFactor() const noexcept {
return UnpackBlendFactor(factor_dest_a.Value());
}
};
union VertexAttribute {
u32 raw;
BitField<0, 1, u32> enabled;
BitField<1, 5, u32> buffer;
BitField<6, 14, u32> offset;
BitField<20, 3, u32> type;
BitField<23, 6, u32> size;
// Not really an element of a vertex attribute, but it can be packed here
BitField<29, 1, u32> binding_index_enabled;
constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
}
constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
}
};
template <std::size_t Position>
union StencilFace {
BitField<Position + 0, 3, u32> action_stencil_fail;
BitField<Position + 3, 3, u32> action_depth_fail;
BitField<Position + 6, 3, u32> action_depth_pass;
BitField<Position + 9, 3, u32> test_func;
Maxwell::StencilOp ActionStencilFail() const noexcept {
return UnpackStencilOp(action_stencil_fail);
}
Maxwell::StencilOp ActionDepthFail() const noexcept {
return UnpackStencilOp(action_depth_fail);
}
Maxwell::StencilOp ActionDepthPass() const noexcept {
return UnpackStencilOp(action_depth_pass);
}
Maxwell::ComparisonOp TestFunc() const noexcept {
return UnpackComparisonOp(test_func);
}
};
struct DynamicState {
union {
u32 raw1;
StencilFace<0> front;
StencilFace<12> back;
BitField<24, 1, u32> stencil_enable;
BitField<25, 1, u32> depth_write_enable;
BitField<26, 1, u32> depth_bounds_enable;
BitField<27, 1, u32> depth_test_enable;
BitField<28, 1, u32> front_face;
BitField<29, 3, u32> depth_test_func;
};
union {
u32 raw2;
BitField<0, 2, u32> cull_face;
BitField<2, 1, u32> cull_enable;
};
// Vertex stride is a 12 bits value, we have 4 bits to spare per element
std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
void Fill(const Maxwell& regs);
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
return UnpackComparisonOp(depth_test_func);
}
Maxwell::CullFace CullFace() const noexcept {
return UnpackCullFace(cull_face.Value());
}
Maxwell::FrontFace FrontFace() const noexcept {
return UnpackFrontFace(front_face.Value());
}
};
union {
u32 raw1;
BitField<0, 1, u32> no_extended_dynamic_state;
BitField<2, 1, u32> primitive_restart_enable;
BitField<3, 1, u32> depth_bias_enable;
BitField<4, 1, u32> depth_clamp_disabled;
BitField<5, 1, u32> ndc_minus_one_to_one;
BitField<6, 2, u32> polygon_mode;
BitField<8, 5, u32> patch_control_points_minus_one;
BitField<13, 2, u32> tessellation_primitive;
BitField<15, 2, u32> tessellation_spacing;
BitField<17, 1, u32> tessellation_clockwise;
BitField<18, 1, u32> logic_op_enable;
BitField<19, 4, u32> logic_op;
BitField<23, 1, u32> rasterize_enable;
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
};
union {
u32 raw2;
BitField<0, 3, u32> alpha_test_func;
BitField<3, 1, u32> early_z;
};
u32 alpha_test_ref;
u32 point_size;
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
DynamicState dynamic_state;
void Fill(const Maxwell& regs, bool has_extended_dynamic_state);
std::size_t Hash() const noexcept;
bool operator==(const FixedPipelineState& rhs) const noexcept;
bool operator!=(const FixedPipelineState& rhs) const noexcept {
return !operator==(rhs);
}
std::size_t Size() const noexcept {
const std::size_t total_size = sizeof *this;
return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
}
};
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::FixedPipelineState> {
std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View File

@@ -0,0 +1,743 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <iterator>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/surface.h"
namespace Vulkan::MaxwellToVK {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
namespace Sampler {
VkFilter Filter(Tegra::Texture::TextureFilter filter) {
switch (filter) {
case Tegra::Texture::TextureFilter::Nearest:
return VK_FILTER_NEAREST;
case Tegra::Texture::TextureFilter::Linear:
return VK_FILTER_LINEAR;
}
UNREACHABLE_MSG("Invalid sampler filter={}", filter);
return {};
}
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
switch (mipmap_filter) {
case Tegra::Texture::TextureMipmapFilter::None:
// There are no Vulkan filter modes that directly correspond to OpenGL minification filters
// of GL_LINEAR or GL_NEAREST, but they can be emulated using
// VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter =
// VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively.
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Nearest:
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
}
UNREACHABLE_MSG("Invalid sampler mipmap mode={}", mipmap_filter);
return {};
}
VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
Tegra::Texture::TextureFilter filter) {
switch (wrap_mode) {
case Tegra::Texture::WrapMode::Wrap:
return VK_SAMPLER_ADDRESS_MODE_REPEAT;
case Tegra::Texture::WrapMode::Mirror:
return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT;
case Tegra::Texture::WrapMode::ClampToEdge:
return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::Border:
return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
case Tegra::Texture::WrapMode::Clamp:
if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
// Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this
// by sending an invalid enumeration.
return static_cast<VkSamplerAddressMode>(0xcafe);
}
// TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors
switch (filter) {
case Tegra::Texture::TextureFilter::Nearest:
return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
case Tegra::Texture::TextureFilter::Linear:
return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
}
UNREACHABLE();
return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::MirrorOnceBorder:
UNIMPLEMENTED();
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
default:
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", wrap_mode);
return {};
}
}
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
switch (depth_compare_func) {
case Tegra::Texture::DepthCompareFunc::Never:
return VK_COMPARE_OP_NEVER;
case Tegra::Texture::DepthCompareFunc::Less:
return VK_COMPARE_OP_LESS;
case Tegra::Texture::DepthCompareFunc::LessEqual:
return VK_COMPARE_OP_LESS_OR_EQUAL;
case Tegra::Texture::DepthCompareFunc::Equal:
return VK_COMPARE_OP_EQUAL;
case Tegra::Texture::DepthCompareFunc::NotEqual:
return VK_COMPARE_OP_NOT_EQUAL;
case Tegra::Texture::DepthCompareFunc::Greater:
return VK_COMPARE_OP_GREATER;
case Tegra::Texture::DepthCompareFunc::GreaterEqual:
return VK_COMPARE_OP_GREATER_OR_EQUAL;
case Tegra::Texture::DepthCompareFunc::Always:
return VK_COMPARE_OP_ALWAYS;
}
UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", depth_compare_func);
return {};
}
} // namespace Sampler
namespace {
enum : u32 { Attachable = 1, Storage = 2 };
struct FormatTuple {
VkFormat format; ///< Vulkan format
int usage = 0; ///< Describes image format usage
} constexpr tex_format_tuples[] = {
{VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // A8B8G8R8_UNORM
{VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // A8B8G8R8_SNORM
{VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT
{VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT
{VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM
{VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
{VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
{VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle)
{VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM
{VK_FORMAT_R8_SNORM, Attachable | Storage}, // R8_SNORM
{VK_FORMAT_R8_SINT, Attachable | Storage}, // R8_SINT
{VK_FORMAT_R8_UINT, Attachable | Storage}, // R8_UINT
{VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // R16G16B16A16_FLOAT
{VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // R16G16B16A16_UNORM
{VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // R16G16B16A16_SNORM
{VK_FORMAT_R16G16B16A16_SINT, Attachable | Storage}, // R16G16B16A16_SINT
{VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // R16G16B16A16_UINT
{VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // B10G11R11_FLOAT
{VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // R32G32B32A32_UINT
{VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // BC1_RGBA_UNORM
{VK_FORMAT_BC2_UNORM_BLOCK}, // BC2_UNORM
{VK_FORMAT_BC3_UNORM_BLOCK}, // BC3_UNORM
{VK_FORMAT_BC4_UNORM_BLOCK}, // BC4_UNORM
{VK_FORMAT_BC4_SNORM_BLOCK}, // BC4_SNORM
{VK_FORMAT_BC5_UNORM_BLOCK}, // BC5_UNORM
{VK_FORMAT_BC5_SNORM_BLOCK}, // BC5_SNORM
{VK_FORMAT_BC7_UNORM_BLOCK}, // BC7_UNORM
{VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UFLOAT
{VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SFLOAT
{VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4_UNORM
{VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // B8G8R8A8_UNORM
{VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // R32G32B32A32_FLOAT
{VK_FORMAT_R32G32B32A32_SINT, Attachable | Storage}, // R32G32B32A32_SINT
{VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // R32G32_FLOAT
{VK_FORMAT_R32G32_SINT, Attachable | Storage}, // R32G32_SINT
{VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT
{VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT
{VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM
{VK_FORMAT_UNDEFINED}, // R16_SNORM
{VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT
{VK_FORMAT_UNDEFINED}, // R16_SINT
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
{VK_FORMAT_UNDEFINED}, // R16G16_UINT
{VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
{VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
{VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
{VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB
{VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM
{VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM
{VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT
{VK_FORMAT_R8G8_UINT, Attachable | Storage}, // R8G8_UINT
{VK_FORMAT_R32G32_UINT, Attachable | Storage}, // R32G32_UINT
{VK_FORMAT_UNDEFINED}, // R16G16B16X16_FLOAT
{VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT
{VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM
{VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM
{VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM
{VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB
{VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB
{VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB
{VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB
{VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM
{VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB
{VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB
{VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB
{VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB
{VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5_UNORM
{VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB
{VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8_UNORM
{VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB
{VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM
{VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB
{VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM
{VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB
{VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM
{VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB
{VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM
{VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB
{VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM
{VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT
// Depth formats
{VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
{VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM
// DepthStencil formats
{VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT
{VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated)
{VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // D32_FLOAT_S8_UINT
};
static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat);
constexpr bool IsZetaFormat(PixelFormat pixel_format) {
return pixel_format >= PixelFormat::MaxColorFormat &&
pixel_format < PixelFormat::MaxDepthStencilFormat;
}
} // Anonymous namespace
FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) {
ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples));
auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)];
if (tuple.format == VK_FORMAT_UNDEFINED) {
UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", pixel_format);
return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true};
}
// Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32;
}
const bool attachable = tuple.usage & Attachable;
const bool storage = tuple.usage & Storage;
VkFormatFeatureFlags usage;
switch (format_type) {
case FormatType::Buffer:
usage =
VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
break;
case FormatType::Linear:
case FormatType::Optimal:
usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT;
if (attachable) {
usage |= IsZetaFormat(pixel_format) ? VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT
: VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
}
if (storage) {
usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
}
break;
}
return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
}
VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
switch (stage) {
case Tegra::Engines::ShaderType::Vertex:
return VK_SHADER_STAGE_VERTEX_BIT;
case Tegra::Engines::ShaderType::TesselationControl:
return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
case Tegra::Engines::ShaderType::TesselationEval:
return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
case Tegra::Engines::ShaderType::Geometry:
return VK_SHADER_STAGE_GEOMETRY_BIT;
case Tegra::Engines::ShaderType::Fragment:
return VK_SHADER_STAGE_FRAGMENT_BIT;
case Tegra::Engines::ShaderType::Compute:
return VK_SHADER_STAGE_COMPUTE_BIT;
}
UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage);
return {};
}
VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
Maxwell::PrimitiveTopology topology) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
case Maxwell::PrimitiveTopology::Lines:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
case Maxwell::PrimitiveTopology::LineStrip:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PrimitiveTopology::Triangles:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::TriangleStrip:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
case Maxwell::PrimitiveTopology::TriangleFan:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
case Maxwell::PrimitiveTopology::Quads:
// TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::Patches:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
default:
UNIMPLEMENTED_MSG("Unimplemented topology={}", topology);
return {};
}
}
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
switch (type) {
case Maxwell::VertexAttribute::Type::UnsignedNorm:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
return VK_FORMAT_R8_UNORM;
case Maxwell::VertexAttribute::Size::Size_8_8:
return VK_FORMAT_R8G8_UNORM;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
return VK_FORMAT_R8G8B8_UNORM;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return VK_FORMAT_R8G8B8A8_UNORM;
case Maxwell::VertexAttribute::Size::Size_16:
return VK_FORMAT_R16_UNORM;
case Maxwell::VertexAttribute::Size::Size_16_16:
return VK_FORMAT_R16G16_UNORM;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return VK_FORMAT_R16G16B16_UNORM;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return VK_FORMAT_R16G16B16A16_UNORM;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::SignedNorm:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
return VK_FORMAT_R8_SNORM;
case Maxwell::VertexAttribute::Size::Size_8_8:
return VK_FORMAT_R8G8_SNORM;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
return VK_FORMAT_R8G8B8_SNORM;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return VK_FORMAT_R8G8B8A8_SNORM;
case Maxwell::VertexAttribute::Size::Size_16:
return VK_FORMAT_R16_SNORM;
case Maxwell::VertexAttribute::Size::Size_16_16:
return VK_FORMAT_R16G16_SNORM;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return VK_FORMAT_R16G16B16_SNORM;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return VK_FORMAT_R16G16B16A16_SNORM;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
return VK_FORMAT_R8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8:
return VK_FORMAT_R8G8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
return VK_FORMAT_R8G8B8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return VK_FORMAT_R8G8B8A8_USCALED;
case Maxwell::VertexAttribute::Size::Size_16:
return VK_FORMAT_R16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16:
return VK_FORMAT_R16G16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return VK_FORMAT_R16G16B16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return VK_FORMAT_R16G16B16A16_USCALED;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::SignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
return VK_FORMAT_R8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8:
return VK_FORMAT_R8G8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
return VK_FORMAT_R8G8B8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return VK_FORMAT_R8G8B8A8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16:
return VK_FORMAT_R16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16:
return VK_FORMAT_R16G16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return VK_FORMAT_R16G16B16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return VK_FORMAT_R16G16B16A16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedInt:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
return VK_FORMAT_R8_UINT;
case Maxwell::VertexAttribute::Size::Size_8_8:
return VK_FORMAT_R8G8_UINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
return VK_FORMAT_R8G8B8_UINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return VK_FORMAT_R8G8B8A8_UINT;
case Maxwell::VertexAttribute::Size::Size_16:
return VK_FORMAT_R16_UINT;
case Maxwell::VertexAttribute::Size::Size_16_16:
return VK_FORMAT_R16G16_UINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return VK_FORMAT_R16G16B16_UINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return VK_FORMAT_R16G16B16A16_UINT;
case Maxwell::VertexAttribute::Size::Size_32:
return VK_FORMAT_R32_UINT;
case Maxwell::VertexAttribute::Size::Size_32_32:
return VK_FORMAT_R32G32_UINT;
case Maxwell::VertexAttribute::Size::Size_32_32_32:
return VK_FORMAT_R32G32B32_UINT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_UINT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_UINT_PACK32;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::SignedInt:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
return VK_FORMAT_R8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8:
return VK_FORMAT_R8G8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
return VK_FORMAT_R8G8B8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return VK_FORMAT_R8G8B8A8_SINT;
case Maxwell::VertexAttribute::Size::Size_16:
return VK_FORMAT_R16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16:
return VK_FORMAT_R16G16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return VK_FORMAT_R16G16B16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return VK_FORMAT_R16G16B16A16_SINT;
case Maxwell::VertexAttribute::Size::Size_32:
return VK_FORMAT_R32_SINT;
case Maxwell::VertexAttribute::Size::Size_32_32:
return VK_FORMAT_R32G32_SINT;
case Maxwell::VertexAttribute::Size::Size_32_32_32:
return VK_FORMAT_R32G32B32_SINT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_SINT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SINT_PACK32;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::Float:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_16:
return VK_FORMAT_R16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16:
return VK_FORMAT_R16G16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return VK_FORMAT_R16G16B16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return VK_FORMAT_R16G16B16A16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32:
return VK_FORMAT_R32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32:
return VK_FORMAT_R32G32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32_32:
return VK_FORMAT_R32G32B32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_SFLOAT;
default:
break;
}
break;
}
UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", type, size);
return {};
}
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
switch (comparison) {
case Maxwell::ComparisonOp::Never:
case Maxwell::ComparisonOp::NeverOld:
return VK_COMPARE_OP_NEVER;
case Maxwell::ComparisonOp::Less:
case Maxwell::ComparisonOp::LessOld:
return VK_COMPARE_OP_LESS;
case Maxwell::ComparisonOp::Equal:
case Maxwell::ComparisonOp::EqualOld:
return VK_COMPARE_OP_EQUAL;
case Maxwell::ComparisonOp::LessEqual:
case Maxwell::ComparisonOp::LessEqualOld:
return VK_COMPARE_OP_LESS_OR_EQUAL;
case Maxwell::ComparisonOp::Greater:
case Maxwell::ComparisonOp::GreaterOld:
return VK_COMPARE_OP_GREATER;
case Maxwell::ComparisonOp::NotEqual:
case Maxwell::ComparisonOp::NotEqualOld:
return VK_COMPARE_OP_NOT_EQUAL;
case Maxwell::ComparisonOp::GreaterEqual:
case Maxwell::ComparisonOp::GreaterEqualOld:
return VK_COMPARE_OP_GREATER_OR_EQUAL;
case Maxwell::ComparisonOp::Always:
case Maxwell::ComparisonOp::AlwaysOld:
return VK_COMPARE_OP_ALWAYS;
}
UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
return {};
}
VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) {
switch (index_format) {
case Maxwell::IndexFormat::UnsignedByte:
if (!device.IsExtIndexTypeUint8Supported()) {
UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
return VK_INDEX_TYPE_UINT16;
}
return VK_INDEX_TYPE_UINT8_EXT;
case Maxwell::IndexFormat::UnsignedShort:
return VK_INDEX_TYPE_UINT16;
case Maxwell::IndexFormat::UnsignedInt:
return VK_INDEX_TYPE_UINT32;
}
UNIMPLEMENTED_MSG("Unimplemented index_format={}", index_format);
return {};
}
VkStencilOp StencilOp(Maxwell::StencilOp stencil_op) {
switch (stencil_op) {
case Maxwell::StencilOp::Keep:
case Maxwell::StencilOp::KeepOGL:
return VK_STENCIL_OP_KEEP;
case Maxwell::StencilOp::Zero:
case Maxwell::StencilOp::ZeroOGL:
return VK_STENCIL_OP_ZERO;
case Maxwell::StencilOp::Replace:
case Maxwell::StencilOp::ReplaceOGL:
return VK_STENCIL_OP_REPLACE;
case Maxwell::StencilOp::Incr:
case Maxwell::StencilOp::IncrOGL:
return VK_STENCIL_OP_INCREMENT_AND_CLAMP;
case Maxwell::StencilOp::Decr:
case Maxwell::StencilOp::DecrOGL:
return VK_STENCIL_OP_DECREMENT_AND_CLAMP;
case Maxwell::StencilOp::Invert:
case Maxwell::StencilOp::InvertOGL:
return VK_STENCIL_OP_INVERT;
case Maxwell::StencilOp::IncrWrap:
case Maxwell::StencilOp::IncrWrapOGL:
return VK_STENCIL_OP_INCREMENT_AND_WRAP;
case Maxwell::StencilOp::DecrWrap:
case Maxwell::StencilOp::DecrWrapOGL:
return VK_STENCIL_OP_DECREMENT_AND_WRAP;
}
UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil_op);
return {};
}
VkBlendOp BlendEquation(Maxwell::Blend::Equation equation) {
switch (equation) {
case Maxwell::Blend::Equation::Add:
case Maxwell::Blend::Equation::AddGL:
return VK_BLEND_OP_ADD;
case Maxwell::Blend::Equation::Subtract:
case Maxwell::Blend::Equation::SubtractGL:
return VK_BLEND_OP_SUBTRACT;
case Maxwell::Blend::Equation::ReverseSubtract:
case Maxwell::Blend::Equation::ReverseSubtractGL:
return VK_BLEND_OP_REVERSE_SUBTRACT;
case Maxwell::Blend::Equation::Min:
case Maxwell::Blend::Equation::MinGL:
return VK_BLEND_OP_MIN;
case Maxwell::Blend::Equation::Max:
case Maxwell::Blend::Equation::MaxGL:
return VK_BLEND_OP_MAX;
}
UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation);
return {};
}
VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
switch (factor) {
case Maxwell::Blend::Factor::Zero:
case Maxwell::Blend::Factor::ZeroGL:
return VK_BLEND_FACTOR_ZERO;
case Maxwell::Blend::Factor::One:
case Maxwell::Blend::Factor::OneGL:
return VK_BLEND_FACTOR_ONE;
case Maxwell::Blend::Factor::SourceColor:
case Maxwell::Blend::Factor::SourceColorGL:
return VK_BLEND_FACTOR_SRC_COLOR;
case Maxwell::Blend::Factor::OneMinusSourceColor:
case Maxwell::Blend::Factor::OneMinusSourceColorGL:
return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR;
case Maxwell::Blend::Factor::SourceAlpha:
case Maxwell::Blend::Factor::SourceAlphaGL:
return VK_BLEND_FACTOR_SRC_ALPHA;
case Maxwell::Blend::Factor::OneMinusSourceAlpha:
case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
case Maxwell::Blend::Factor::DestAlpha:
case Maxwell::Blend::Factor::DestAlphaGL:
return VK_BLEND_FACTOR_DST_ALPHA;
case Maxwell::Blend::Factor::OneMinusDestAlpha:
case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA;
case Maxwell::Blend::Factor::DestColor:
case Maxwell::Blend::Factor::DestColorGL:
return VK_BLEND_FACTOR_DST_COLOR;
case Maxwell::Blend::Factor::OneMinusDestColor:
case Maxwell::Blend::Factor::OneMinusDestColorGL:
return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
case Maxwell::Blend::Factor::SourceAlphaSaturate:
case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE;
case Maxwell::Blend::Factor::Source1Color:
case Maxwell::Blend::Factor::Source1ColorGL:
return VK_BLEND_FACTOR_SRC1_COLOR;
case Maxwell::Blend::Factor::OneMinusSource1Color:
case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR;
case Maxwell::Blend::Factor::Source1Alpha:
case Maxwell::Blend::Factor::Source1AlphaGL:
return VK_BLEND_FACTOR_SRC1_ALPHA;
case Maxwell::Blend::Factor::OneMinusSource1Alpha:
case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
return VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
case Maxwell::Blend::Factor::ConstantColor:
case Maxwell::Blend::Factor::ConstantColorGL:
return VK_BLEND_FACTOR_CONSTANT_COLOR;
case Maxwell::Blend::Factor::OneMinusConstantColor:
case Maxwell::Blend::Factor::OneMinusConstantColorGL:
return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR;
case Maxwell::Blend::Factor::ConstantAlpha:
case Maxwell::Blend::Factor::ConstantAlphaGL:
return VK_BLEND_FACTOR_CONSTANT_ALPHA;
case Maxwell::Blend::Factor::OneMinusConstantAlpha:
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA;
}
UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor);
return {};
}
VkFrontFace FrontFace(Maxwell::FrontFace front_face) {
switch (front_face) {
case Maxwell::FrontFace::ClockWise:
return VK_FRONT_FACE_CLOCKWISE;
case Maxwell::FrontFace::CounterClockWise:
return VK_FRONT_FACE_COUNTER_CLOCKWISE;
}
UNIMPLEMENTED_MSG("Unimplemented front face={}", front_face);
return {};
}
VkCullModeFlags CullFace(Maxwell::CullFace cull_face) {
switch (cull_face) {
case Maxwell::CullFace::Front:
return VK_CULL_MODE_FRONT_BIT;
case Maxwell::CullFace::Back:
return VK_CULL_MODE_BACK_BIT;
case Maxwell::CullFace::FrontAndBack:
return VK_CULL_MODE_FRONT_AND_BACK;
}
UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face);
return {};
}
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
switch (swizzle) {
case Tegra::Texture::SwizzleSource::Zero:
return VK_COMPONENT_SWIZZLE_ZERO;
case Tegra::Texture::SwizzleSource::R:
return VK_COMPONENT_SWIZZLE_R;
case Tegra::Texture::SwizzleSource::G:
return VK_COMPONENT_SWIZZLE_G;
case Tegra::Texture::SwizzleSource::B:
return VK_COMPONENT_SWIZZLE_B;
case Tegra::Texture::SwizzleSource::A:
return VK_COMPONENT_SWIZZLE_A;
case Tegra::Texture::SwizzleSource::OneInt:
case Tegra::Texture::SwizzleSource::OneFloat:
return VK_COMPONENT_SWIZZLE_ONE;
}
UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", swizzle);
return {};
}
VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
switch (swizzle) {
case Maxwell::ViewportSwizzle::PositiveX:
return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_X_NV;
case Maxwell::ViewportSwizzle::NegativeX:
return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_X_NV;
case Maxwell::ViewportSwizzle::PositiveY:
return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_Y_NV;
case Maxwell::ViewportSwizzle::NegativeY:
return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_Y_NV;
case Maxwell::ViewportSwizzle::PositiveZ:
return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_Z_NV;
case Maxwell::ViewportSwizzle::NegativeZ:
return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_Z_NV;
case Maxwell::ViewportSwizzle::PositiveW:
return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_W_NV;
case Maxwell::ViewportSwizzle::NegativeW:
return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV;
}
UNREACHABLE_MSG("Invalid swizzle={}", swizzle);
return {};
}
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) {
switch (reduction) {
case Tegra::Texture::SamplerReduction::WeightedAverage:
return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
case Tegra::Texture::SamplerReduction::Min:
return VK_SAMPLER_REDUCTION_MODE_MIN_EXT;
case Tegra::Texture::SamplerReduction::Max:
return VK_SAMPLER_REDUCTION_MODE_MAX_EXT;
}
UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction));
return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
}
} // namespace Vulkan::MaxwellToVK

View File

@@ -0,0 +1,66 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/surface.h"
#include "video_core/textures/texture.h"
namespace Vulkan::MaxwellToVK {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = VideoCore::Surface::PixelFormat;
namespace Sampler {
VkFilter Filter(Tegra::Texture::TextureFilter filter);
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
Tegra::Texture::TextureFilter filter);
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
} // namespace Sampler
struct FormatInfo {
VkFormat format;
bool attachable;
bool storage;
};
FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format);
VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);
VkPrimitiveTopology PrimitiveTopology(const VKDevice& device, Maxwell::PrimitiveTopology topology);
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format);
VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
VkBlendOp BlendEquation(Maxwell::Blend::Equation equation);
VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor);
VkFrontFace FrontFace(Maxwell::FrontFace front_face);
VkCullModeFlags CullFace(Maxwell::CullFace cull_face);
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
} // namespace Vulkan::MaxwellToVK

View File

@@ -0,0 +1,220 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#ifdef HAS_NSIGHT_AFTERMATH
#include <mutex>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include <fmt/format.h>
#define VK_NO_PROTOTYPES
#include <vulkan/vulkan.h>
#include <GFSDK_Aftermath.h>
#include <GFSDK_Aftermath_Defines.h>
#include <GFSDK_Aftermath_GpuCrashDump.h>
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
#include "common/common_paths.h"
#include "common/common_types.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
namespace Vulkan {
static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll";
NsightAftermathTracker::NsightAftermathTracker() = default;
NsightAftermathTracker::~NsightAftermathTracker() {
if (initialized) {
(void)GFSDK_Aftermath_DisableGpuCrashDumps();
}
}
bool NsightAftermathTracker::Initialize() {
if (!dl.Open(AFTERMATH_LIB_NAME)) {
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL");
return false;
}
if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps",
&GFSDK_Aftermath_DisableGpuCrashDumps) ||
!dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps",
&GFSDK_Aftermath_EnableGpuCrashDumps) ||
!dl.GetSymbol("GFSDK_Aftermath_GetShaderDebugInfoIdentifier",
&GFSDK_Aftermath_GetShaderDebugInfoIdentifier) ||
!dl.GetSymbol("GFSDK_Aftermath_GetShaderHashSpirv", &GFSDK_Aftermath_GetShaderHashSpirv) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_CreateDecoder",
&GFSDK_Aftermath_GpuCrashDump_CreateDecoder) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_DestroyDecoder",
&GFSDK_Aftermath_GpuCrashDump_DestroyDecoder) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GenerateJSON",
&GFSDK_Aftermath_GpuCrashDump_GenerateJSON) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON",
&GFSDK_Aftermath_GpuCrashDump_GetJSON)) {
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers");
return false;
}
dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash";
(void)Common::FS::DeleteDirRecursively(dump_dir);
if (!Common::FS::CreateDir(dump_dir)) {
LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
return false;
}
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps(
GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback,
ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) {
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed");
return false;
}
LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir);
initialized = true;
return true;
}
void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
if (!initialized) {
return;
}
std::vector<u32> spirv_copy = spirv;
GFSDK_Aftermath_SpirvCode shader;
shader.pData = spirv_copy.data();
shader.size = static_cast<u32>(spirv_copy.size() * 4);
std::scoped_lock lock{mutex};
GFSDK_Aftermath_ShaderHash hash;
if (!GFSDK_Aftermath_SUCCEED(
GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) {
LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module");
return;
}
Common::FS::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
return;
}
if (file.WriteArray(spirv.data(), spirv.size()) != spirv.size()) {
LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash);
return;
}
}
void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump,
u32 gpu_crash_dump_size) {
std::scoped_lock lock{mutex};
LOG_CRITICAL(Render_Vulkan, "called");
GFSDK_Aftermath_GpuCrashDump_Decoder decoder;
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_CreateDecoder(
GFSDK_Aftermath_Version_API, gpu_crash_dump, gpu_crash_dump_size, &decoder))) {
LOG_ERROR(Render_Vulkan, "Failed to create decoder");
return;
}
SCOPE_EXIT({ GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder); });
u32 json_size = 0;
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON(
decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO,
GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr,
this, &json_size))) {
LOG_ERROR(Render_Vulkan, "Failed to generate JSON");
return;
}
std::vector<char> json(json_size);
if (!GFSDK_Aftermath_SUCCEED(
GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, json_size, json.data()))) {
LOG_ERROR(Render_Vulkan, "Failed to query JSON");
return;
}
const std::string base_name = [this] {
const int id = dump_id++;
if (id == 0) {
return fmt::format("{}/crash.nv-gpudmp", dump_dir);
} else {
return fmt::format("{}/crash_{}.nv-gpudmp", dump_dir, id);
}
}();
std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size);
if (Common::FS::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
LOG_ERROR(Render_Vulkan, "Failed to write dump file");
return;
}
const std::string_view json_view(json.data(), json.size());
if (Common::FS::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
LOG_ERROR(Render_Vulkan, "Failed to write JSON");
return;
}
}
void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_info,
u32 shader_debug_info_size) {
std::scoped_lock lock{mutex};
GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier;
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GetShaderDebugInfoIdentifier(
GFSDK_Aftermath_Version_API, shader_debug_info, shader_debug_info_size, &identifier))) {
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_GetShaderDebugInfoIdentifier failed");
return;
}
const std::string path =
fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]);
Common::FS::IOFile file(path, "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Failed to create file {}", path);
return;
}
if (file.WriteBytes(static_cast<const u8*>(shader_debug_info), shader_debug_info_size) !=
shader_debug_info_size) {
LOG_ERROR(Render_Vulkan, "Failed to write file {}", path);
return;
}
}
void NsightAftermathTracker::OnCrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description) {
add_description(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, "yuzu");
}
void NsightAftermathTracker::GpuCrashDumpCallback(const void* gpu_crash_dump,
u32 gpu_crash_dump_size, void* user_data) {
static_cast<NsightAftermathTracker*>(user_data)->OnGpuCrashDumpCallback(gpu_crash_dump,
gpu_crash_dump_size);
}
void NsightAftermathTracker::ShaderDebugInfoCallback(const void* shader_debug_info,
u32 shader_debug_info_size, void* user_data) {
static_cast<NsightAftermathTracker*>(user_data)->OnShaderDebugInfoCallback(
shader_debug_info, shader_debug_info_size);
}
void NsightAftermathTracker::CrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data) {
static_cast<NsightAftermathTracker*>(user_data)->OnCrashDumpDescriptionCallback(
add_description);
}
} // namespace Vulkan
#endif // HAS_NSIGHT_AFTERMATH

View File

@@ -0,0 +1,87 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <mutex>
#include <string>
#include <vector>
#define VK_NO_PROTOTYPES
#include <vulkan/vulkan.h>
#ifdef HAS_NSIGHT_AFTERMATH
#include <GFSDK_Aftermath_Defines.h>
#include <GFSDK_Aftermath_GpuCrashDump.h>
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
#endif
#include "common/common_types.h"
#include "common/dynamic_library.h"
namespace Vulkan {
class NsightAftermathTracker {
public:
NsightAftermathTracker();
~NsightAftermathTracker();
NsightAftermathTracker(const NsightAftermathTracker&) = delete;
NsightAftermathTracker& operator=(const NsightAftermathTracker&) = delete;
// Delete move semantics because Aftermath initialization uses a pointer to this.
NsightAftermathTracker(NsightAftermathTracker&&) = delete;
NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
bool Initialize();
void SaveShader(const std::vector<u32>& spirv) const;
private:
#ifdef HAS_NSIGHT_AFTERMATH
static void GpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size,
void* user_data);
static void ShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size,
void* user_data);
static void CrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data);
void OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size);
void OnShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size);
void OnCrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description);
mutable std::mutex mutex;
std::string dump_dir;
int dump_id = 0;
bool initialized = false;
Common::DynamicLibrary dl;
PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps;
PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps;
PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier;
PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv;
PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder;
PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder;
PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON;
PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON;
#endif
};
#ifndef HAS_NSIGHT_AFTERMATH
inline NsightAftermathTracker::NsightAftermathTracker() = default;
inline NsightAftermathTracker::~NsightAftermathTracker() = default;
inline bool NsightAftermathTracker::Initialize() {
return false;
}
inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
#endif
} // namespace Vulkan

View File

@@ -0,0 +1,469 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstring>
#include <memory>
#include <optional>
#include <string>
#include <vector>
#include <fmt/format.h>
#include "common/dynamic_library.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/telemetry.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/settings.h"
#include "core/telemetry_session.h"
#include "video_core/gpu.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/wrapper.h"
// Include these late to avoid polluting previous headers
#ifdef _WIN32
#include <windows.h>
// ensure include order
#include <vulkan/vulkan_win32.h>
#endif
#if !defined(_WIN32) && !defined(__APPLE__)
#include <X11/Xlib.h>
#include <vulkan/vulkan_wayland.h>
#include <vulkan/vulkan_xlib.h>
#endif
namespace Vulkan {
namespace {
using Core::Frontend::WindowSystemType;
VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
VkDebugUtilsMessageTypeFlagsEXT type,
const VkDebugUtilsMessengerCallbackDataEXT* data,
[[maybe_unused]] void* user_data) {
const char* const message{data->pMessage};
if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
LOG_CRITICAL(Render_Vulkan, "{}", message);
} else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
LOG_WARNING(Render_Vulkan, "{}", message);
} else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
LOG_INFO(Render_Vulkan, "{}", message);
} else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
LOG_DEBUG(Render_Vulkan, "{}", message);
}
return VK_FALSE;
}
Common::DynamicLibrary OpenVulkanLibrary() {
Common::DynamicLibrary library;
#ifdef __APPLE__
// Check if a path to a specific Vulkan library has been specified.
char* libvulkan_env = getenv("LIBVULKAN_PATH");
if (!libvulkan_env || !library.Open(libvulkan_env)) {
// Use the libvulkan.dylib from the application bundle.
const std::string filename =
Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
library.Open(filename.c_str());
}
#else
std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
if (!library.Open(filename.c_str())) {
// Android devices may not have libvulkan.so.1, only libvulkan.so.
filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
(void)library.Open(filename.c_str());
}
#endif
return library;
}
std::pair<vk::Instance, u32> CreateInstance(Common::DynamicLibrary& library,
vk::InstanceDispatch& dld, WindowSystemType window_type,
bool enable_debug_utils, bool enable_layers) {
if (!library.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Vulkan library not available");
return {};
}
if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) {
LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
return {};
}
if (!vk::Load(dld)) {
LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
return {};
}
std::vector<const char*> extensions;
extensions.reserve(6);
switch (window_type) {
case Core::Frontend::WindowSystemType::Headless:
break;
#ifdef _WIN32
case Core::Frontend::WindowSystemType::Windows:
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
break;
#endif
#if !defined(_WIN32) && !defined(__APPLE__)
case Core::Frontend::WindowSystemType::X11:
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
break;
case Core::Frontend::WindowSystemType::Wayland:
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
break;
#endif
default:
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
break;
}
if (window_type != Core::Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_debug_utils) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
if (!properties) {
LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
return {};
}
for (const char* extension : extensions) {
const auto it =
std::find_if(properties->begin(), properties->end(), [extension](const auto& prop) {
return !std::strcmp(extension, prop.extensionName);
});
if (it == properties->end()) {
LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
return {};
}
}
std::vector<const char*> layers;
layers.reserve(1);
if (enable_layers) {
layers.push_back("VK_LAYER_KHRONOS_validation");
}
const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
if (!layer_properties) {
LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
layers.clear();
}
for (auto layer_it = layers.begin(); layer_it != layers.end();) {
const char* const layer = *layer_it;
const auto it = std::find_if(
layer_properties->begin(), layer_properties->end(),
[layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); });
if (it == layer_properties->end()) {
LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
layer_it = layers.erase(layer_it);
} else {
++layer_it;
}
}
// Limit the maximum version of Vulkan to avoid using untested version.
const u32 version = std::min(vk::AvailableVersion(dld), static_cast<u32>(VK_API_VERSION_1_1));
vk::Instance instance = vk::Instance::Create(version, layers, extensions, dld);
if (!instance) {
LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
return {};
}
if (!vk::Load(*instance, dld)) {
LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
}
return std::make_pair(std::move(instance), version);
}
std::string GetReadableVersion(u32 version) {
return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
VK_VERSION_PATCH(version));
}
std::string GetDriverVersion(const VKDevice& device) {
// Extracted from
// https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
const u32 version = device.GetDriverVersion();
if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
const u32 major = (version >> 22) & 0x3ff;
const u32 minor = (version >> 14) & 0x0ff;
const u32 secondary = (version >> 6) & 0x0ff;
const u32 tertiary = version & 0x003f;
return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
}
if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
const u32 major = version >> 14;
const u32 minor = version & 0x3fff;
return fmt::format("{}.{}", major, minor);
}
return GetReadableVersion(version);
}
std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) {
std::sort(std::begin(available_extensions), std::end(available_extensions));
static constexpr std::size_t AverageExtensionSize = 64;
std::string separated_extensions;
separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
const auto end = std::end(available_extensions);
for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
if (const bool is_last = extension + 1 == end; is_last) {
separated_extensions += *extension;
} else {
separated_extensions += fmt::format("{},", *extension);
}
}
return separated_extensions;
}
} // Anonymous namespace
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_},
cpu_memory{cpu_memory_}, gpu{gpu_} {}
RendererVulkan::~RendererVulkan() {
ShutDown();
}
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
const auto& layout = render_window.GetFramebufferLayout();
if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
swapchain->Create(layout.width, layout.height, is_srgb);
blit_screen->Recreate();
}
scheduler->WaitWorker();
swapchain->AcquireNextImage();
const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated);
scheduler->Flush(render_semaphore);
if (swapchain->Present(render_semaphore)) {
blit_screen->Recreate();
}
rasterizer->TickFrame();
}
render_window.OnFrameDisplayed();
}
bool RendererVulkan::Init() {
library = OpenVulkanLibrary();
std::tie(instance, instance_version) = CreateInstance(
library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug);
if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
return false;
}
Report();
memory_manager = std::make_unique<VKMemoryManager>(*device);
state_tracker = std::make_unique<StateTracker>(gpu);
scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
const auto& framebuffer = render_window.GetFramebufferLayout();
swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
swapchain->Create(framebuffer.width, framebuffer.height, false);
rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
cpu_memory, screen_info, *device,
*memory_manager, *state_tracker, *scheduler);
blit_screen =
std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
*memory_manager, *swapchain, *scheduler, screen_info);
return true;
}
void RendererVulkan::ShutDown() {
if (!device) {
return;
}
if (const auto& dev = device->GetLogical()) {
dev.WaitIdle();
}
rasterizer.reset();
blit_screen.reset();
scheduler.reset();
swapchain.reset();
memory_manager.reset();
device.reset();
}
bool RendererVulkan::CreateDebugCallback() {
if (!Settings::values.renderer_debug) {
return true;
}
debug_callback = instance.TryCreateDebugCallback(DebugCallback);
if (!debug_callback) {
LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
return false;
}
return true;
}
bool RendererVulkan::CreateSurface() {
[[maybe_unused]] const auto& window_info = render_window.GetWindowInfo();
VkSurfaceKHR unsafe_surface = nullptr;
#ifdef _WIN32
if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
const HWND hWnd = static_cast<HWND>(window_info.render_surface);
const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
nullptr, 0, nullptr, hWnd};
const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
if (!vkCreateWin32SurfaceKHR ||
vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
return false;
}
}
#endif
#if !defined(_WIN32) && !defined(__APPLE__)
if (window_info.type == Core::Frontend::WindowSystemType::X11) {
const VkXlibSurfaceCreateInfoKHR xlib_ci{
VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
static_cast<Display*>(window_info.display_connection),
reinterpret_cast<Window>(window_info.render_surface)};
const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
if (!vkCreateXlibSurfaceKHR ||
vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
return false;
}
}
if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
const VkWaylandSurfaceCreateInfoKHR wayland_ci{
VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
static_cast<wl_display*>(window_info.display_connection),
static_cast<wl_surface*>(window_info.render_surface)};
const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
if (!vkCreateWaylandSurfaceKHR ||
vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) !=
VK_SUCCESS) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
return false;
}
}
#endif
if (!unsafe_surface) {
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
return false;
}
surface = vk::SurfaceKHR(unsafe_surface, *instance, dld);
return true;
}
bool RendererVulkan::PickDevices() {
const auto devices = instance.EnumeratePhysicalDevices();
if (!devices) {
LOG_ERROR(Render_Vulkan, "Failed to enumerate physical devices");
return false;
}
const s32 device_index = Settings::values.vulkan_device.GetValue();
if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) {
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
return false;
}
const vk::PhysicalDevice physical_device((*devices)[static_cast<std::size_t>(device_index)],
dld);
if (!VKDevice::IsSuitable(physical_device, *surface)) {
return false;
}
device =
std::make_unique<VKDevice>(*instance, instance_version, physical_device, *surface, dld);
return device->Create();
}
void RendererVulkan::Report() const {
const std::string vendor_name{device->GetVendorName()};
const std::string model_name{device->GetModelName()};
const std::string driver_version = GetDriverVersion(*device);
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
const std::string api_version = GetReadableVersion(device->ApiVersion());
const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
telemetry_session.AddField(field, "GPU_Model", model_name);
telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version);
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
}
std::vector<std::string> RendererVulkan::EnumerateDevices() {
vk::InstanceDispatch dld;
Common::DynamicLibrary library = OpenVulkanLibrary();
vk::Instance instance =
CreateInstance(library, dld, WindowSystemType::Headless, false, false).first;
if (!instance) {
return {};
}
const std::optional physical_devices = instance.EnumeratePhysicalDevices();
if (!physical_devices) {
return {};
}
std::vector<std::string> names;
names.reserve(physical_devices->size());
for (const auto& device : *physical_devices) {
names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
}
return names;
}
} // namespace Vulkan

View File

@@ -0,0 +1,90 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
class TelemetrySession;
}
namespace Core::Memory {
class Memory;
}
namespace Tegra {
class GPU;
}
namespace Vulkan {
class StateTracker;
class VKBlitScreen;
class VKDevice;
class VKMemoryManager;
class VKSwapchain;
class VKScheduler;
struct VKScreenInfo {
VkImageView image_view{};
u32 width{};
u32 height{};
bool is_srgb{};
};
class RendererVulkan final : public VideoCore::RendererBase {
public:
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererVulkan() override;
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
static std::vector<std::string> EnumerateDevices();
private:
bool CreateDebugCallback();
bool CreateSurface();
bool PickDevices();
void Report() const;
Core::TelemetrySession& telemetry_session;
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
Common::DynamicLibrary library;
vk::InstanceDispatch dld;
vk::Instance instance;
u32 instance_version{};
vk::SurfaceKHR surface;
VKScreenInfo screen_info;
vk::DebugCallback debug_callback;
std::unique_ptr<VKDevice> device;
std::unique_ptr<VKMemoryManager> memory_manager;
std::unique_ptr<StateTracker> state_tracker;
std::unique_ptr<VKScheduler> scheduler;
std::unique_ptr<VKSwapchain> swapchain;
std::unique_ptr<VKBlitScreen> blit_screen;
};
} // namespace Vulkan

View File

@@ -0,0 +1,829 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstring>
#include <memory>
#include <tuple>
#include <vector>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
namespace Vulkan {
namespace {
struct ScreenRectVertex {
ScreenRectVertex() = default;
explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {}
std::array<f32, 2> position;
std::array<f32, 2> tex_coord;
static VkVertexInputBindingDescription GetDescription() {
return {
.binding = 0,
.stride = sizeof(ScreenRectVertex),
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
};
}
static std::array<VkVertexInputAttributeDescription, 2> GetAttributes() {
return {{
{
.location = 0,
.binding = 0,
.format = VK_FORMAT_R32G32_SFLOAT,
.offset = offsetof(ScreenRectVertex, position),
},
{
.location = 1,
.binding = 0,
.format = VK_FORMAT_R32G32_SFLOAT,
.offset = offsetof(ScreenRectVertex, tex_coord),
},
}};
}
};
constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
// clang-format off
return { 2.f / width, 0.f, 0.f, 0.f,
0.f, 2.f / height, 0.f, 0.f,
0.f, 0.f, 1.f, 0.f,
-1.f, -1.f, 0.f, 1.f};
// clang-format on
}
u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
using namespace VideoCore::Surface;
return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
}
std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
return static_cast<std::size_t>(framebuffer.stride) *
static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer);
}
VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
switch (framebuffer.pixel_format) {
case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
return VK_FORMAT_R5G6B5_UNORM_PACK16;
default:
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
static_cast<u32>(framebuffer.pixel_format));
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
}
}
} // Anonymous namespace
struct VKBlitScreen::BufferData {
struct {
std::array<f32, 4 * 4> modelview_matrix;
} uniform;
std::array<ScreenRectVertex, 4> vertices;
// Unaligned image data goes here
};
VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
Core::Frontend::EmuWindow& render_window_,
VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_,
VKMemoryManager& memory_manager_, VKSwapchain& swapchain_,
VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
: cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
device{device_}, memory_manager{memory_manager_}, swapchain{swapchain_},
scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
resource_ticks.resize(image_count);
CreateStaticResources();
CreateDynamicResources();
}
VKBlitScreen::~VKBlitScreen() = default;
void VKBlitScreen::Recreate() {
CreateDynamicResources();
}
VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) {
RefreshResources(framebuffer);
// Finish any pending renderpass
scheduler.RequestOutsideRenderPassOperationContext();
const std::size_t image_index = swapchain.GetImageIndex();
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
UpdateDescriptorSet(image_index,
use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
BufferData data;
SetUniformData(data, framebuffer);
SetVertexData(data, framebuffer);
auto map = buffer_commit->Map();
std::memcpy(map.Address(), &data, sizeof(data));
if (!use_accelerated) {
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
const size_t size_bytes = GetSizeInBytes(framebuffer);
rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
Tegra::Texture::UnswizzleTexture(
std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
const VkBufferImageCopy copy{
.bufferOffset = image_offset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = {.x = 0, .y = 0, .z = 0},
.imageExtent =
{
.width = framebuffer.width,
.height = framebuffer.height,
.depth = 1,
},
};
scheduler.Record(
[buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = 0,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
VkImageMemoryBarrier read_barrier = base_barrier;
read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkImageMemoryBarrier write_barrier = base_barrier;
write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
});
}
map.Release();
scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
descriptor_set = descriptor_sets[image_index], buffer = *buffer,
size = swapchain.GetSize(), pipeline = *pipeline,
layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
const VkClearValue clear_color{
.color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = renderpass,
.framebuffer = framebuffer,
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
return *semaphores[image_index];
}
void VKBlitScreen::CreateStaticResources() {
CreateShaders();
CreateSemaphores();
CreateDescriptorPool();
CreateDescriptorSetLayout();
CreateDescriptorSets();
CreatePipelineLayout();
CreateSampler();
}
void VKBlitScreen::CreateDynamicResources() {
CreateRenderPass();
CreateFramebuffers();
CreateGraphicsPipeline();
}
void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) {
return;
}
raw_width = framebuffer.width;
raw_height = framebuffer.height;
ReleaseRawImages();
CreateStagingBuffer(framebuffer);
CreateRawImages(framebuffer);
}
void VKBlitScreen::CreateShaders() {
vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
}
void VKBlitScreen::CreateSemaphores() {
semaphores.resize(image_count);
std::generate(semaphores.begin(), semaphores.end(),
[this] { return device.GetLogical().CreateSemaphore(); });
}
void VKBlitScreen::CreateDescriptorPool() {
const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
{
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = static_cast<u32>(image_count),
},
{
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = static_cast<u32>(image_count),
},
}};
const VkDescriptorPoolCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = static_cast<u32>(image_count),
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data(),
};
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
}
void VKBlitScreen::CreateRenderPass() {
const VkAttachmentDescription color_attachment{
.flags = 0,
.format = swapchain.GetImageFormat(),
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
};
const VkAttachmentReference color_attachment_ref{
.attachment = 0,
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkSubpassDescription subpass_description{
.flags = 0,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.pInputAttachments = nullptr,
.colorAttachmentCount = 1,
.pColorAttachments = &color_attachment_ref,
.pResolveAttachments = nullptr,
.pDepthStencilAttachment = nullptr,
.preserveAttachmentCount = 0,
.pPreserveAttachments = nullptr,
};
const VkSubpassDependency dependency{
.srcSubpass = VK_SUBPASS_EXTERNAL,
.dstSubpass = 0,
.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dependencyFlags = 0,
};
const VkRenderPassCreateInfo renderpass_ci{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.attachmentCount = 1,
.pAttachments = &color_attachment,
.subpassCount = 1,
.pSubpasses = &subpass_description,
.dependencyCount = 1,
.pDependencies = &dependency,
};
renderpass = device.GetLogical().CreateRenderPass(renderpass_ci);
}
void VKBlitScreen::CreateDescriptorSetLayout() {
const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
.pImmutableSamplers = nullptr,
},
}};
const VkDescriptorSetLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = static_cast<u32>(layout_bindings.size()),
.pBindings = layout_bindings.data(),
};
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
}
void VKBlitScreen::CreateDescriptorSets() {
const std::vector layouts(image_count, *descriptor_set_layout);
const VkDescriptorSetAllocateInfo ai{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = *descriptor_pool,
.descriptorSetCount = static_cast<u32>(image_count),
.pSetLayouts = layouts.data(),
};
descriptor_sets = descriptor_pool.Allocate(ai);
}
void VKBlitScreen::CreatePipelineLayout() {
const VkPipelineLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
}
void VKBlitScreen::CreateGraphicsPipeline() {
const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = *vertex_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = *fragment_shader,
.pName = "main",
.pSpecializationInfo = nullptr,
},
}};
const auto vertex_binding_description = ScreenRectVertex::GetDescription();
const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
const VkPipelineVertexInputStateCreateInfo vertex_input_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = &vertex_binding_description,
.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()},
.pVertexAttributeDescriptions = vertex_attrs_description.data(),
};
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
.primitiveRestartEnable = VK_FALSE,
};
const VkPipelineViewportStateCreateInfo viewport_state_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.viewportCount = 1,
.pViewports = nullptr,
.scissorCount = 1,
.pScissors = nullptr,
};
const VkPipelineRasterizationStateCreateInfo rasterization_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthClampEnable = VK_FALSE,
.rasterizerDiscardEnable = VK_FALSE,
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode = VK_CULL_MODE_NONE,
.frontFace = VK_FRONT_FACE_CLOCKWISE,
.depthBiasEnable = VK_FALSE,
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
.lineWidth = 1.0f,
};
const VkPipelineMultisampleStateCreateInfo multisampling_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
const VkPipelineColorBlendAttachmentState color_blend_attachment{
.blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
};
const VkPipelineColorBlendStateCreateInfo color_blend_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
.attachmentCount = 1,
.pAttachments = &color_blend_attachment,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
static constexpr std::array dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
};
const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
.pDynamicStates = dynamic_states.data(),
};
const VkGraphicsPipelineCreateInfo pipeline_ci{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(shader_stages.size()),
.pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = nullptr,
.pViewportState = &viewport_state_ci,
.pRasterizationState = &rasterization_ci,
.pMultisampleState = &multisampling_ci,
.pDepthStencilState = nullptr,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
.layout = *pipeline_layout,
.renderPass = *renderpass,
.subpass = 0,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
};
pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci);
}
void VKBlitScreen::CreateSampler() {
const VkSamplerCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.magFilter = VK_FILTER_LINEAR,
.minFilter = VK_FILTER_NEAREST,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
.mipLodBias = 0.0f,
.anisotropyEnable = VK_FALSE,
.maxAnisotropy = 0.0f,
.compareEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_NEVER,
.minLod = 0.0f,
.maxLod = 0.0f,
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
.unnormalizedCoordinates = VK_FALSE,
};
sampler = device.GetLogical().CreateSampler(ci);
}
void VKBlitScreen::CreateFramebuffers() {
const VkExtent2D size{swapchain.GetSize()};
framebuffers.resize(image_count);
VkFramebufferCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.renderPass = *renderpass,
.attachmentCount = 1,
.pAttachments = nullptr,
.width = size.width,
.height = size.height,
.layers = 1,
};
for (std::size_t i = 0; i < image_count; ++i) {
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
ci.pAttachments = &image_view;
framebuffers[i] = device.GetLogical().CreateFramebuffer(ci);
}
}
void VKBlitScreen::ReleaseRawImages() {
for (std::size_t i = 0; i < raw_images.size(); ++i) {
scheduler.Wait(resource_ticks.at(i));
}
raw_images.clear();
raw_buffer_commits.clear();
buffer.reset();
buffer_commit.reset();
}
void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
const VkBufferCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = CalculateBufferSize(framebuffer),
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
buffer = device.GetLogical().CreateBuffer(ci);
buffer_commit = memory_manager.Commit(buffer, true);
}
void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
raw_images.resize(image_count);
raw_image_views.resize(image_count);
raw_buffer_commits.resize(image_count);
for (size_t i = 0; i < image_count; ++i) {
raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.imageType = VK_IMAGE_TYPE_2D,
.format = GetFormat(framebuffer),
.extent =
{
.width = framebuffer.width,
.height = framebuffer.height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_LINEAR,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false);
raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *raw_images[i],
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = GetFormat(framebuffer),
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
});
}
}
void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
const VkDescriptorBufferInfo buffer_info{
.buffer = *buffer,
.offset = offsetof(BufferData, uniform),
.range = sizeof(BufferData::uniform),
};
const VkWriteDescriptorSet ubo_write{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_sets[image_index],
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.pImageInfo = nullptr,
.pBufferInfo = &buffer_info,
.pTexelBufferView = nullptr,
};
const VkDescriptorImageInfo image_info{
.sampler = *sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkWriteDescriptorSet sampler_write{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_sets[image_index],
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
.pBufferInfo = nullptr,
.pTexelBufferView = nullptr,
};
device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {});
}
void VKBlitScreen::SetUniformData(BufferData& data,
const Tegra::FramebufferConfig& framebuffer) const {
const auto& layout = render_window.GetFramebufferLayout();
data.uniform.modelview_matrix =
MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height));
}
void VKBlitScreen::SetVertexData(BufferData& data,
const Tegra::FramebufferConfig& framebuffer) const {
const auto& framebuffer_transform_flags = framebuffer.transform_flags;
const auto& framebuffer_crop_rect = framebuffer.crop_rect;
static constexpr Common::Rectangle<f32> texcoords{0.f, 0.f, 1.f, 1.f};
auto left = texcoords.left;
auto right = texcoords.right;
switch (framebuffer_transform_flags) {
case Tegra::FramebufferConfig::TransformFlags::Unset:
break;
case Tegra::FramebufferConfig::TransformFlags::FlipV:
// Flip the framebuffer vertically
left = texcoords.right;
right = texcoords.left;
break;
default:
UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
static_cast<u32>(framebuffer_transform_flags));
break;
}
UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
f32 scale_u = 1.0f;
f32 scale_v = 1.0f;
if (framebuffer_crop_rect.GetWidth() > 0) {
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
static_cast<f32>(screen_info.width);
}
if (framebuffer_crop_rect.GetHeight() > 0) {
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
static_cast<f32>(screen_info.height);
}
const auto& screen = render_window.GetFramebufferLayout().screen;
const auto x = static_cast<f32>(screen.left);
const auto y = static_cast<f32>(screen.top);
const auto w = static_cast<f32>(screen.GetWidth());
const auto h = static_cast<f32>(screen.GetHeight());
data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v);
data.vertices[1] = ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v);
data.vertices[2] = ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v);
data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v);
}
u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
}
u64 VKBlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const {
constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData));
return first_image_offset + GetSizeInBytes(framebuffer) * image_index;
}
} // namespace Vulkan

View File

@@ -0,0 +1,119 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
class System;
}
namespace Core::Memory {
class Memory;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra {
struct FramebufferConfig;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
struct ScreenInfo;
class RasterizerVulkan;
class VKDevice;
class VKScheduler;
class VKSwapchain;
class VKBlitScreen final {
public:
explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
Core::Frontend::EmuWindow& render_window,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
VKMemoryManager& memory_manager, VKSwapchain& swapchain,
VKScheduler& scheduler, const VKScreenInfo& screen_info);
~VKBlitScreen();
void Recreate();
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated);
private:
struct BufferData;
void CreateStaticResources();
void CreateShaders();
void CreateSemaphores();
void CreateDescriptorPool();
void CreateRenderPass();
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
void CreateGraphicsPipeline();
void CreateSampler();
void CreateDynamicResources();
void CreateFramebuffers();
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
void ReleaseRawImages();
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
VideoCore::RasterizerInterface& rasterizer;
const VKDevice& device;
VKMemoryManager& memory_manager;
VKSwapchain& swapchain;
VKScheduler& scheduler;
const std::size_t image_count;
const VKScreenInfo& screen_info;
vk::ShaderModule vertex_shader;
vk::ShaderModule fragment_shader;
vk::DescriptorPool descriptor_pool;
vk::DescriptorSetLayout descriptor_set_layout;
vk::PipelineLayout pipeline_layout;
vk::Pipeline pipeline;
vk::RenderPass renderpass;
std::vector<vk::Framebuffer> framebuffers;
vk::DescriptorSets descriptor_sets;
vk::Sampler sampler;
vk::Buffer buffer;
VKMemoryCommit buffer_commit;
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> semaphores;
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
std::vector<VKMemoryCommit> raw_buffer_commits;
u32 raw_width = 0;
u32 raw_height = 0;
};
} // namespace Vulkan

View File

@@ -0,0 +1,196 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstring>
#include <memory>
#include "core/core.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
namespace {
constexpr VkBufferUsageFlags BUFFER_USAGE =
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE =
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
return std::make_unique<VKStreamBuffer>(device, scheduler);
}
} // Anonymous namespace
Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
: BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
staging_pool_} {
const VkBufferCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = static_cast<VkDeviceSize>(size_),
.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
buffer.handle = device.GetLogical().CreateBuffer(ci);
buffer.commit = memory_manager.Commit(buffer.handle, false);
}
Buffer::~Buffer() = default;
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(data_size, true);
std::memcpy(staging.commit->Map(data_size), data, data_size);
scheduler.RequestOutsideRenderPassOperationContext();
const VkBuffer handle = Handle();
scheduler.Record([staging = *staging.handle, handle, offset, data_size,
&device = device](vk::CommandBuffer cmdbuf) {
const VkBufferMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask =
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_WRITE_BIT |
(device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
};
const VkBufferMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = UPLOAD_ACCESS_BARRIERS,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
write_barrier);
});
}
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(data_size, true);
scheduler.RequestOutsideRenderPassOperationContext();
const VkBuffer handle = Handle();
scheduler.Record(
[staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
const VkBufferMemoryBarrier barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
});
scheduler.Finish();
std::memcpy(data, staging.commit->Map(data_size), data_size);
}
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size) {
scheduler.RequestOutsideRenderPassOperationContext();
const VkBuffer dst_buffer = Handle();
scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
copy_size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size});
std::array<VkBufferMemoryBarrier, 2> barriers;
barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barriers[0].pNext = nullptr;
barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[0].buffer = src_buffer;
barriers[0].offset = src_offset;
barriers[0].size = copy_size;
barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barriers[1].pNext = nullptr;
barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[1].buffer = dst_buffer;
barriers[1].offset = dst_offset;
barriers[1].size = copy_size;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
barriers, {});
});
}
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
const VKDevice& device_, VKMemoryManager& memory_manager_,
VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
VKStagingBufferPool& staging_pool_)
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
cpu_memory_, stream_buffer_},
device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
staging_pool_} {}
VKBufferCache::~VKBufferCache() = default;
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
size);
}
VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
size = std::max(size, std::size_t(4));
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, size, 0);
});
return {*empty.handle, 0, 0};
}
} // namespace Vulkan

View File

@@ -0,0 +1,72 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
class VKMemoryManager;
class VKScheduler;
class Buffer final : public VideoCommon::BufferBlock {
public:
explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
~Buffer();
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
void Download(std::size_t offset, std::size_t data_size, u8* data);
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size);
VkBuffer Handle() const {
return *buffer.handle;
}
u64 Address() const {
return 0;
}
private:
const VKDevice& device;
VKScheduler& scheduler;
VKStagingBufferPool& staging_pool;
VKBuffer buffer;
};
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
public:
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
VKStagingBufferPool& staging_pool);
~VKBufferCache();
BufferInfo GetEmptyBuffer(std::size_t size) override;
protected:
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
private:
const VKDevice& device;
VKMemoryManager& memory_manager;
VKScheduler& scheduler;
VKStagingBufferPool& staging_pool;
};
} // namespace Vulkan

View File

@@ -0,0 +1,46 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstddef>
#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
constexpr size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
struct CommandPool::Pool {
vk::CommandPool handle;
vk::CommandBuffers cmdbufs;
};
CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_)
: ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {}
CommandPool::~CommandPool() = default;
void CommandPool::Allocate(size_t begin, size_t end) {
// Command buffers are going to be commited, recorded, executed every single usage cycle.
// They are also going to be reseted when commited.
Pool& pool = pools.emplace_back();
pool.handle = device.GetLogical().CreateCommandPool({
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.pNext = nullptr,
.flags =
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = device.GetGraphicsFamily(),
});
pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
}
VkCommandBuffer CommandPool::Commit() {
const size_t index = CommitResource();
const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
return pools[pool_index].cmdbufs[sub_index];
}
} // namespace Vulkan

View File

@@ -0,0 +1,34 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <vector>
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class MasterSemaphore;
class VKDevice;
class CommandPool final : public ResourcePool {
public:
explicit CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_);
~CommandPool() override;
void Allocate(size_t begin, size_t end) override;
VkCommandBuffer Commit();
private:
struct Pool;
const VKDevice& device;
std::vector<Pool> pools;
};
} // namespace Vulkan

View File

@@ -0,0 +1,323 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <memory>
#include <optional>
#include <utility>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
namespace {
VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
return {
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
};
}
VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
return {
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = 0,
.stride = sizeof(DescriptorUpdateEntry),
};
}
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
return {
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.offset = 0,
.size = static_cast<u32>(size),
};
}
std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
return {{
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
}};
}
VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
return {
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 2,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = 0,
.stride = sizeof(DescriptorUpdateEntry),
};
}
} // Anonymous namespace
VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
vk::Span<VkPushConstantRange> push_constants,
std::span<const u32> code) {
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = bindings.size(),
.pBindings = bindings.data(),
});
layout = device.GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = push_constants.size(),
.pPushConstantRanges = push_constants.data(),
});
if (!templates.empty()) {
descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
.pNext = nullptr,
.flags = 0,
.descriptorUpdateEntryCount = templates.size(),
.pDescriptorUpdateEntries = templates.data(),
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
.descriptorSetLayout = *descriptor_set_layout,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.pipelineLayout = *layout,
.set = 0,
});
descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
}
module = device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.codeSize = static_cast<u32>(code.size_bytes()),
.pCode = code.data(),
});
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage =
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *module,
.pName = "main",
.pSpecializationInfo = nullptr,
},
.layout = *layout,
.basePipelineHandle = nullptr,
.basePipelineIndex = 0,
});
}
VKComputePass::~VKComputePass() = default;
VkDescriptorSet VKComputePass::CommitDescriptorSet(
VKUpdateDescriptorQueue& update_descriptor_queue) {
if (!descriptor_template) {
return nullptr;
}
const VkDescriptorSet set = descriptor_allocator->Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
VKStagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
QuadArrayPass::~QuadArrayPass() = default;
std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
const u32 num_triangle_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
ASSERT(num_vertices % 4 == 0);
const u32 num_quads = num_vertices / 4;
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads,
first, set](vk::CommandBuffer cmdbuf) {
constexpr u32 dispatch_size = 1024;
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = buffer;
barrier.offset = 0;
barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
});
return {*buffer.handle, 0};
}
Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
Uint8Pass::~Uint8Pass() = default;
std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u64 src_offset) {
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
num_vertices](vk::CommandBuffer cmdbuf) {
constexpr u32 dispatch_size = 1024;
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1);
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = buffer;
barrier.offset = 0;
barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16));
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
});
return {*buffer.handle, 0};
}
QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
VKStagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
BuildInputOutputDescriptorUpdateTemplate(),
BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
QuadIndexedPass::~QuadIndexedPass() = default;
std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
VkBuffer src_buffer, u64 src_offset) {
const u32 index_shift = [index_format] {
switch (index_format) {
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
return 0;
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort:
return 1;
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt:
return 2;
}
UNREACHABLE();
return 2;
}();
const u32 input_size = num_vertices << index_shift;
const u32 num_tri_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
static constexpr u32 dispatch_size = 1024;
const std::array push_constants = {base_vertex, index_shift};
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
&push_constants);
cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1);
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = buffer;
barrier.offset = 0;
barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
});
return {*buffer.handle, 0};
}
} // namespace Vulkan

View File

@@ -0,0 +1,94 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <span>
#include <utility>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
class VKScheduler;
class VKStagingBufferPool;
class VKUpdateDescriptorQueue;
class VKComputePass {
public:
explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
~VKComputePass();
protected:
VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
vk::DescriptorUpdateTemplateKHR descriptor_template;
vk::PipelineLayout layout;
vk::Pipeline pipeline;
private:
vk::DescriptorSetLayout descriptor_set_layout;
std::optional<DescriptorAllocator> descriptor_allocator;
vk::ShaderModule module;
};
class QuadArrayPass final : public VKComputePass {
public:
explicit QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
VKStagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadArrayPass();
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
private:
VKScheduler& scheduler;
VKStagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
};
class Uint8Pass final : public VKComputePass {
public:
explicit Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
VKStagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
private:
VKScheduler& scheduler;
VKStagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
};
class QuadIndexedPass final : public VKComputePass {
public:
explicit QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
VKStagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
u64 src_offset);
private:
VKScheduler& scheduler;
VKStagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
};
} // namespace Vulkan

View File

@@ -0,0 +1,153 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <vector>
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
VKComputePipeline::VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
const SPIRVShader& shader_)
: device{device_}, scheduler{scheduler_}, entries{shader_.entries},
descriptor_set_layout{CreateDescriptorSetLayout()},
descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate()},
shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {}
VKComputePipeline::~VKComputePipeline() = default;
VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
const VkDescriptorSet set = descriptor_allocator.Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
std::vector<VkDescriptorSetLayoutBinding> bindings;
u32 binding = 0;
const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
// TODO(Rodrigo): Maybe make individual bindings here?
for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
bindings.push_back({
.binding = binding++,
.descriptorType = descriptor_type,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
});
}
};
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
return device.GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
});
}
vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
return device.GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
});
}
vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries;
u32 binding = 0;
u32 offset = 0;
FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
if (template_entries.empty()) {
// If the shader doesn't use descriptor sets, skip template creation.
return {};
}
return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
.pNext = nullptr,
.flags = 0,
.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
.pDescriptorUpdateEntries = template_entries.data(),
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
.descriptorSetLayout = *descriptor_set_layout,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.pipelineLayout = *layout,
.set = DESCRIPTOR_SET,
});
}
vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
device.SaveShader(code);
return device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.codeSize = code.size() * sizeof(u32),
.pCode = code.data(),
});
}
vk::Pipeline VKComputePipeline::CreatePipeline() const {
VkComputePipelineCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage =
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *shader_module,
.pName = "main",
.pSpecializationInfo = nullptr,
},
.layout = *layout,
.basePipelineHandle = nullptr,
.basePipelineIndex = 0,
};
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
.pNext = nullptr,
.requiredSubgroupSize = GuestWarpSize,
};
if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) {
ci.stage.pNext = &subgroup_size_ci;
}
return device.GetLogical().CreateComputePipeline(ci);
}
} // namespace Vulkan

View File

@@ -0,0 +1,64 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
class VKScheduler;
class VKUpdateDescriptorQueue;
class VKComputePipeline final {
public:
explicit VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
const SPIRVShader& shader_);
~VKComputePipeline();
VkDescriptorSet CommitDescriptorSet();
VkPipeline GetHandle() const {
return *pipeline;
}
VkPipelineLayout GetLayout() const {
return *layout;
}
const ShaderEntries& GetEntries() const {
return entries;
}
private:
vk::DescriptorSetLayout CreateDescriptorSetLayout() const;
vk::PipelineLayout CreatePipelineLayout() const;
vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const;
vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const;
vk::Pipeline CreatePipeline() const;
const VKDevice& device;
VKScheduler& scheduler;
ShaderEntries entries;
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
VKUpdateDescriptorQueue& update_descriptor_queue;
vk::PipelineLayout layout;
vk::DescriptorUpdateTemplateKHR descriptor_template;
vk::ShaderModule shader_module;
vk::Pipeline pipeline;
};
} // namespace Vulkan

View File

@@ -0,0 +1,91 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
constexpr std::size_t SETS_GROW_RATE = 0x20;
DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
VkDescriptorSetLayout layout_)
: ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
descriptor_pool{descriptor_pool_}, layout{layout_} {}
DescriptorAllocator::~DescriptorAllocator() = default;
VkDescriptorSet DescriptorAllocator::Commit() {
const std::size_t index = CommitResource();
return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
}
void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
}
VKDescriptorPool::VKDescriptorPool(const VKDevice& device_, VKScheduler& scheduler)
: device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{
AllocateNewPool()} {}
VKDescriptorPool::~VKDescriptorPool() = default;
vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
static constexpr u32 num_sets = 0x20000;
static constexpr VkDescriptorPoolSize pool_sizes[] = {
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90},
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40},
};
const VkDescriptorPoolCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = num_sets,
.poolSizeCount = static_cast<u32>(std::size(pool_sizes)),
.pPoolSizes = std::data(pool_sizes),
};
return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci));
}
vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout,
std::size_t count) {
const std::vector layout_copies(count, layout);
VkDescriptorSetAllocateInfo ai{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = **active_pool,
.descriptorSetCount = static_cast<u32>(count),
.pSetLayouts = layout_copies.data(),
};
vk::DescriptorSets sets = active_pool->Allocate(ai);
if (!sets.IsOutOfPoolMemory()) {
return sets;
}
// Our current pool is out of memory. Allocate a new one and retry
active_pool = AllocateNewPool();
ai.descriptorPool = **active_pool;
sets = active_pool->Allocate(ai);
if (!sets.IsOutOfPoolMemory()) {
return sets;
}
// After allocating a new pool, we are out of memory again. We can't handle this from here.
throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
}
} // namespace Vulkan

View File

@@ -0,0 +1,60 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
class VKDescriptorPool;
class VKScheduler;
class DescriptorAllocator final : public ResourcePool {
public:
explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
~DescriptorAllocator() override;
DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
DescriptorAllocator(const DescriptorAllocator&) = delete;
VkDescriptorSet Commit();
protected:
void Allocate(std::size_t begin, std::size_t end) override;
private:
VKDescriptorPool& descriptor_pool;
const VkDescriptorSetLayout layout;
std::vector<vk::DescriptorSets> descriptors_allocations;
};
class VKDescriptorPool final {
friend DescriptorAllocator;
public:
explicit VKDescriptorPool(const VKDevice& device, VKScheduler& scheduler);
~VKDescriptorPool();
VKDescriptorPool(const VKDescriptorPool&) = delete;
VKDescriptorPool& operator=(const VKDescriptorPool&) = delete;
private:
vk::DescriptorPool* AllocateNewPool();
vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
const VKDevice& device;
MasterSemaphore& master_semaphore;
std::vector<vk::DescriptorPool> pools;
vk::DescriptorPool* active_pool;
};
} // namespace Vulkan

View File

@@ -0,0 +1,911 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <bitset>
#include <chrono>
#include <optional>
#include <string_view>
#include <thread>
#include <unordered_set>
#include <utility>
#include <vector>
#include "common/assert.h"
#include "core/settings.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
namespace {
namespace Alternatives {
constexpr std::array Depth24UnormS8_UINT{
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_D16_UNORM_S8_UINT,
VkFormat{},
};
constexpr std::array Depth16UnormS8_UINT{
VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_D32_SFLOAT_S8_UINT,
VkFormat{},
};
} // namespace Alternatives
constexpr std::array REQUIRED_EXTENSIONS{
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_KHR_MAINTENANCE1_EXTENSION_NAME,
VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
};
template <typename T>
void SetNext(void**& next, T& data) {
*next = &data;
next = &data.pNext;
}
constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
switch (format) {
case VK_FORMAT_D24_UNORM_S8_UINT:
return Alternatives::Depth24UnormS8_UINT.data();
case VK_FORMAT_D16_UNORM_S8_UINT:
return Alternatives::Depth16UnormS8_UINT.data();
default:
return nullptr;
}
}
VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType format_type) {
switch (format_type) {
case FormatType::Linear:
return properties.linearTilingFeatures;
case FormatType::Optimal:
return properties.optimalTilingFeatures;
case FormatType::Buffer:
return properties.bufferFeatures;
default:
return {};
}
}
[[nodiscard]] bool IsRDNA(std::string_view device_name, VkDriverIdKHR driver_id) {
static constexpr std::array RDNA_DEVICES{
"5700",
"5600",
"5500",
"5300",
};
if (driver_id != VK_DRIVER_ID_AMD_PROPRIETARY_KHR) {
return false;
}
return std::any_of(RDNA_DEVICES.begin(), RDNA_DEVICES.end(), [device_name](const char* name) {
return device_name.find(name) != std::string_view::npos;
});
}
std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
static constexpr std::array formats{
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_A8B8G8R8_UINT_PACK32,
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
VK_FORMAT_A8B8G8R8_SINT_PACK32,
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
VK_FORMAT_B5G6R5_UNORM_PACK16,
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
VK_FORMAT_A2B10G10R10_UINT_PACK32,
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
VK_FORMAT_R32G32B32A32_SFLOAT,
VK_FORMAT_R32G32B32A32_SINT,
VK_FORMAT_R32G32B32A32_UINT,
VK_FORMAT_R32G32_SFLOAT,
VK_FORMAT_R32G32_SINT,
VK_FORMAT_R32G32_UINT,
VK_FORMAT_R16G16B16A16_SINT,
VK_FORMAT_R16G16B16A16_UINT,
VK_FORMAT_R16G16B16A16_SNORM,
VK_FORMAT_R16G16B16A16_UNORM,
VK_FORMAT_R16G16_UNORM,
VK_FORMAT_R16G16_SNORM,
VK_FORMAT_R16G16_SFLOAT,
VK_FORMAT_R16G16_SINT,
VK_FORMAT_R16_UNORM,
VK_FORMAT_R16_UINT,
VK_FORMAT_R8G8B8A8_SRGB,
VK_FORMAT_R8G8_UNORM,
VK_FORMAT_R8G8_SNORM,
VK_FORMAT_R8G8_SINT,
VK_FORMAT_R8G8_UINT,
VK_FORMAT_R8_UNORM,
VK_FORMAT_R8_SNORM,
VK_FORMAT_R8_SINT,
VK_FORMAT_R8_UINT,
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
VK_FORMAT_R32_SFLOAT,
VK_FORMAT_R32_UINT,
VK_FORMAT_R32_SINT,
VK_FORMAT_R16_SFLOAT,
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_B8G8R8A8_SRGB,
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
VK_FORMAT_D32_SFLOAT,
VK_FORMAT_D16_UNORM,
VK_FORMAT_D16_UNORM_S8_UINT,
VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
VK_FORMAT_BC2_UNORM_BLOCK,
VK_FORMAT_BC3_UNORM_BLOCK,
VK_FORMAT_BC4_UNORM_BLOCK,
VK_FORMAT_BC4_SNORM_BLOCK,
VK_FORMAT_BC5_UNORM_BLOCK,
VK_FORMAT_BC5_SNORM_BLOCK,
VK_FORMAT_BC7_UNORM_BLOCK,
VK_FORMAT_BC6H_UFLOAT_BLOCK,
VK_FORMAT_BC6H_SFLOAT_BLOCK,
VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
VK_FORMAT_BC2_SRGB_BLOCK,
VK_FORMAT_BC3_SRGB_BLOCK,
VK_FORMAT_BC7_SRGB_BLOCK,
VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
};
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
for (const auto format : formats) {
format_properties.emplace(format, physical.GetFormatProperties(format));
}
return format_properties;
}
} // Anonymous namespace
VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_,
VkSurfaceKHR surface, const vk::InstanceDispatch& dld_)
: instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} {
SetupFamilies(surface);
SetupFeatures();
}
VKDevice::~VKDevice() = default;
bool VKDevice::Create() {
const auto queue_cis = GetDeviceQueueCreateInfos();
const std::vector extensions = LoadExtensions();
VkPhysicalDeviceFeatures2 features2{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
.pNext = nullptr,
};
const void* first_next = &features2;
void** next = &features2.pNext;
features2.features = {
.robustBufferAccess = false,
.fullDrawIndexUint32 = false,
.imageCubeArray = true,
.independentBlend = true,
.geometryShader = true,
.tessellationShader = true,
.sampleRateShading = false,
.dualSrcBlend = false,
.logicOp = false,
.multiDrawIndirect = false,
.drawIndirectFirstInstance = false,
.depthClamp = true,
.depthBiasClamp = true,
.fillModeNonSolid = false,
.depthBounds = false,
.wideLines = false,
.largePoints = true,
.alphaToOne = false,
.multiViewport = true,
.samplerAnisotropy = true,
.textureCompressionETC2 = false,
.textureCompressionASTC_LDR = is_optimal_astc_supported,
.textureCompressionBC = false,
.occlusionQueryPrecise = true,
.pipelineStatisticsQuery = false,
.vertexPipelineStoresAndAtomics = true,
.fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize = false,
.shaderImageGatherExtended = true,
.shaderStorageImageExtendedFormats = false,
.shaderStorageImageMultisample = true,
.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
.shaderStorageImageWriteWithoutFormat = true,
.shaderUniformBufferArrayDynamicIndexing = false,
.shaderSampledImageArrayDynamicIndexing = false,
.shaderStorageBufferArrayDynamicIndexing = false,
.shaderStorageImageArrayDynamicIndexing = false,
.shaderClipDistance = false,
.shaderCullDistance = false,
.shaderFloat64 = false,
.shaderInt64 = false,
.shaderInt16 = false,
.shaderResourceResidency = false,
.shaderResourceMinLod = false,
.sparseBinding = false,
.sparseResidencyBuffer = false,
.sparseResidencyImage2D = false,
.sparseResidencyImage3D = false,
.sparseResidency2Samples = false,
.sparseResidency4Samples = false,
.sparseResidency8Samples = false,
.sparseResidency16Samples = false,
.sparseResidencyAliased = false,
.variableMultisampleRate = false,
.inheritedQueries = false,
};
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
.pNext = nullptr,
.timelineSemaphore = true,
};
SetNext(next, timeline_semaphore);
VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
.pNext = nullptr,
.storageBuffer16BitAccess = false,
.uniformAndStorageBuffer16BitAccess = true,
.storagePushConstant16 = false,
.storageInputOutput16 = false,
};
SetNext(next, bit16_storage);
VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR,
.pNext = nullptr,
.storageBuffer8BitAccess = false,
.uniformAndStorageBuffer8BitAccess = true,
.storagePushConstant8 = false,
};
SetNext(next, bit8_storage);
VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT,
.hostQueryReset = true,
};
SetNext(next, host_query_reset);
VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
if (is_float16_supported) {
float16_int8 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR,
.pNext = nullptr,
.shaderFloat16 = true,
.shaderInt8 = false,
};
SetNext(next, float16_int8);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
}
if (!nv_viewport_swizzle) {
LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
}
VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
if (khr_uniform_buffer_standard_layout) {
std430_layout = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR,
.pNext = nullptr,
.uniformBufferStandardLayout = true,
};
SetNext(next, std430_layout);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs");
}
VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8;
if (ext_index_type_uint8) {
index_type_uint8 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT,
.pNext = nullptr,
.indexTypeUint8 = true,
};
SetNext(next, index_type_uint8);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
}
VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
if (ext_transform_feedback) {
transform_feedback = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT,
.pNext = nullptr,
.transformFeedback = true,
.geometryStreams = true,
};
SetNext(next, transform_feedback);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
}
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border;
if (ext_custom_border_color) {
custom_border = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT,
.pNext = nullptr,
.customBorderColors = VK_TRUE,
.customBorderColorWithoutFormat = VK_TRUE,
};
SetNext(next, custom_border);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors");
}
VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
if (ext_extended_dynamic_state) {
dynamic_state = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT,
.pNext = nullptr,
.extendedDynamicState = VK_TRUE,
};
SetNext(next, dynamic_state);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
}
VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
if (ext_robustness2) {
robustness2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
.pNext = nullptr,
.robustBufferAccess2 = false,
.robustImageAccess2 = true,
.nullDescriptor = true,
};
SetNext(next, robustness2);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support robustness2");
}
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
if (nv_device_diagnostics_config) {
nsight_aftermath_tracker.Initialize();
diagnostics_nv = {
.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV,
.pNext = &features2,
.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV |
VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV |
VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV,
};
first_next = &diagnostics_nv;
}
logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
if (!logical) {
LOG_ERROR(Render_Vulkan, "Failed to create logical device");
return false;
}
CollectTelemetryParameters();
CollectToolingInfo();
if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
LOG_WARNING(
Render_Vulkan,
"Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu");
ext_extended_dynamic_state = false;
}
if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) {
// AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it
// seems to cause stability issues
LOG_WARNING(
Render_Vulkan,
"Blacklisting AMD proprietary on RDNA devices from VK_EXT_extended_dynamic_state");
ext_extended_dynamic_state = false;
}
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
return true;
}
VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const {
if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
return wanted_format;
}
// The wanted format is not supported by hardware, search for alternatives
const VkFormat* alternatives = GetFormatAlternatives(wanted_format);
if (alternatives == nullptr) {
UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host "
"hardware does not support it",
wanted_format, wanted_usage, format_type);
return wanted_format;
}
std::size_t i = 0;
for (VkFormat alternative = *alternatives; alternative; alternative = alternatives[++i]) {
if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
continue;
}
LOG_WARNING(Render_Vulkan,
"Emulating format={} with alternative format={} with usage={} and type={}",
wanted_format, alternative, wanted_usage, format_type);
return alternative;
}
// No alternatives found, panic
UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and "
"doesn't support any of the alternatives",
wanted_format, wanted_usage, format_type);
return wanted_format;
}
void VKDevice::ReportLoss() const {
LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
// Wait for the log to flush and for Nsight Aftermath to dump the results
std::this_thread::sleep_for(std::chrono::seconds{15});
}
void VKDevice::SaveShader(const std::vector<u32>& spirv) const {
nsight_aftermath_tracker.SaveShader(spirv);
}
bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
// Disable for now to avoid converting ASTC twice.
static constexpr std::array astc_formats = {
VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
VK_FORMAT_ASTC_6x5_UNORM_BLOCK, VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
VK_FORMAT_ASTC_8x5_UNORM_BLOCK, VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
VK_FORMAT_ASTC_8x6_UNORM_BLOCK, VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
VK_FORMAT_ASTC_8x8_UNORM_BLOCK, VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
VK_FORMAT_ASTC_10x5_UNORM_BLOCK, VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
VK_FORMAT_ASTC_10x6_UNORM_BLOCK, VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
VK_FORMAT_ASTC_10x8_UNORM_BLOCK, VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
};
if (!features.textureCompressionASTC_LDR) {
return false;
}
const auto format_feature_usage{
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
for (const auto format : astc_formats) {
const auto physical_format_properties{physical.GetFormatProperties(format)};
if ((physical_format_properties.optimalTilingFeatures & format_feature_usage) == 0) {
return false;
}
}
return true;
}
bool VKDevice::TestDepthStencilBlits() const {
static constexpr VkFormatFeatureFlags required_features =
VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
const auto test_features = [](VkFormatProperties props) {
return (props.optimalTilingFeatures & required_features) == required_features;
};
return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) &&
test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT));
}
bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const {
const auto it = format_properties.find(wanted_format);
if (it == format_properties.end()) {
UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format);
return true;
}
const auto supported_usage = GetFormatFeatures(it->second, format_type);
return (supported_usage & wanted_usage) == wanted_usage;
}
bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
bool is_suitable = true;
std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
for (const auto& prop : physical.EnumerateDeviceExtensionProperties()) {
for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
if (available_extensions[i]) {
continue;
}
const std::string_view name{prop.extensionName};
available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
}
}
if (!available_extensions.all()) {
for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
if (available_extensions[i]) {
continue;
}
LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
is_suitable = false;
}
}
bool has_graphics{}, has_present{};
const std::vector queue_family_properties = physical.GetQueueFamilyProperties();
for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
const auto& family = queue_family_properties[i];
if (family.queueCount == 0) {
continue;
}
has_graphics |= family.queueFlags & VK_QUEUE_GRAPHICS_BIT;
has_present |= physical.GetSurfaceSupportKHR(i, surface);
}
if (!has_graphics || !has_present) {
LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
is_suitable = false;
}
// TODO(Rodrigo): Check if the device matches all requeriments.
const auto properties{physical.GetProperties()};
const auto& limits{properties.limits};
constexpr u32 required_ubo_size = 65536;
if (limits.maxUniformBufferRange < required_ubo_size) {
LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required",
limits.maxUniformBufferRange, required_ubo_size);
is_suitable = false;
}
constexpr u32 required_num_viewports = 16;
if (limits.maxViewports < required_num_viewports) {
LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required",
limits.maxViewports, required_num_viewports);
is_suitable = false;
}
const auto features{physical.GetFeatures()};
const std::array feature_report = {
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
std::make_pair(features.imageCubeArray, "imageCubeArray"),
std::make_pair(features.independentBlend, "independentBlend"),
std::make_pair(features.depthClamp, "depthClamp"),
std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
std::make_pair(features.largePoints, "largePoints"),
std::make_pair(features.multiViewport, "multiViewport"),
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
std::make_pair(features.geometryShader, "geometryShader"),
std::make_pair(features.tessellationShader, "tessellationShader"),
std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"),
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
"shaderStorageImageWriteWithoutFormat"),
};
for (const auto& [supported, name] : feature_report) {
if (supported) {
continue;
}
LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
is_suitable = false;
}
if (!is_suitable) {
LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName);
}
return is_suitable;
}
std::vector<const char*> VKDevice::LoadExtensions() {
std::vector<const char*> extensions;
extensions.reserve(7 + REQUIRED_EXTENSIONS.size());
extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
bool has_khr_shader_float16_int8{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{};
bool has_ext_extended_dynamic_state{};
bool has_ext_robustness2{};
for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
bool push) {
if (extension.extensionName != std::string_view(name)) {
return;
}
if (push) {
extensions.push_back(name);
}
if (status) {
status->get() = true;
}
};
test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
test(khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);
test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,
true);
test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
if (instance_version >= VK_API_VERSION_1_1) {
test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
}
if (Settings::values.renderer_debug) {
test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
true);
}
}
VkPhysicalDeviceFeatures2KHR features;
features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
VkPhysicalDeviceProperties2KHR physical_properties;
physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
if (has_khr_shader_float16_int8) {
VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features;
float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
float16_int8_features.pNext = nullptr;
features.pNext = &float16_int8_features;
physical.GetFeatures2KHR(features);
is_float16_supported = float16_int8_features.shaderFloat16;
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (has_ext_subgroup_size_control) {
VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features;
subgroup_features.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
subgroup_features.pNext = nullptr;
features.pNext = &subgroup_features;
physical.GetFeatures2KHR(features);
VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_properties;
subgroup_properties.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
subgroup_properties.pNext = nullptr;
physical_properties.pNext = &subgroup_properties;
physical.GetProperties2KHR(physical_properties);
is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize;
if (subgroup_features.subgroupSizeControl &&
subgroup_properties.minSubgroupSize <= GuestWarpSize &&
subgroup_properties.maxSubgroupSize >= GuestWarpSize) {
extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages;
}
} else {
is_warp_potentially_bigger = true;
}
if (has_ext_transform_feedback) {
VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
tfb_features.pNext = nullptr;
features.pNext = &tfb_features;
physical.GetFeatures2KHR(features);
VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties;
tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
tfb_properties.pNext = nullptr;
physical_properties.pNext = &tfb_properties;
physical.GetProperties2KHR(physical_properties);
if (tfb_features.transformFeedback && tfb_features.geometryStreams &&
tfb_properties.maxTransformFeedbackStreams >= 4 &&
tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries &&
tfb_properties.transformFeedbackDraw) {
extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
ext_transform_feedback = true;
}
}
if (has_ext_custom_border_color) {
VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features;
border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
border_features.pNext = nullptr;
features.pNext = &border_features;
physical.GetFeatures2KHR(features);
if (border_features.customBorderColors && border_features.customBorderColorWithoutFormat) {
extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
ext_custom_border_color = true;
}
}
if (has_ext_extended_dynamic_state) {
VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
dynamic_state.pNext = nullptr;
features.pNext = &dynamic_state;
physical.GetFeatures2KHR(features);
if (dynamic_state.extendedDynamicState) {
extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
ext_extended_dynamic_state = true;
}
}
if (has_ext_robustness2) {
VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
robustness2.pNext = nullptr;
features.pNext = &robustness2;
physical.GetFeatures2KHR(features);
if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
ext_robustness2 = true;
}
}
return extensions;
}
void VKDevice::SetupFamilies(VkSurfaceKHR surface) {
std::optional<u32> graphics_family_, present_family_;
const std::vector queue_family_properties = physical.GetQueueFamilyProperties();
for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
if (graphics_family_ && present_family_)
break;
const auto& queue_family = queue_family_properties[i];
if (queue_family.queueCount == 0)
continue;
if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
graphics_family_ = i;
}
if (physical.GetSurfaceSupportKHR(i, surface)) {
present_family_ = i;
}
}
ASSERT(graphics_family_ && present_family_);
graphics_family = *graphics_family_;
present_family = *present_family_;
}
void VKDevice::SetupFeatures() {
const auto supported_features{physical.GetFeatures()};
is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
is_blit_depth_stencil_supported = TestDepthStencilBlits();
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
}
void VKDevice::CollectTelemetryParameters() {
VkPhysicalDeviceDriverPropertiesKHR driver{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
.pNext = nullptr,
.driverID = {},
.driverName = {},
.driverInfo = {},
.conformanceVersion = {},
};
VkPhysicalDeviceProperties2KHR device_properties{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
.pNext = &driver,
.properties = {},
};
physical.GetProperties2KHR(device_properties);
driver_id = driver.driverID;
vendor_name = driver.driverName;
const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
reported_extensions.reserve(std::size(extensions));
for (const auto& extension : extensions) {
reported_extensions.emplace_back(extension.extensionName);
}
}
void VKDevice::CollectToolingInfo() {
if (!ext_tooling_info) {
return;
}
const auto vkGetPhysicalDeviceToolPropertiesEXT =
reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>(
dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT"));
if (!vkGetPhysicalDeviceToolPropertiesEXT) {
return;
}
u32 tool_count = 0;
if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) {
return;
}
std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count);
if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) {
return;
}
for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) {
const std::string_view name = tool.name;
LOG_INFO(Render_Vulkan, "{}", name);
has_renderdoc = has_renderdoc || name == "RenderDoc";
has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics";
}
}
std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
static constexpr float QUEUE_PRIORITY = 1.0f;
std::unordered_set<u32> unique_queue_families{graphics_family, present_family};
std::vector<VkDeviceQueueCreateInfo> queue_cis;
queue_cis.reserve(unique_queue_families.size());
for (const u32 queue_family : unique_queue_families) {
auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.queueFamilyIndex = queue_family,
.queueCount = 1,
.pQueuePriorities = nullptr,
});
ci.pQueuePriorities = &QUEUE_PRIORITY;
}
return queue_cis;
}
} // namespace Vulkan

View File

@@ -0,0 +1,314 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
/// Format usage descriptor.
enum class FormatType { Linear, Optimal, Buffer };
/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
const u32 GuestWarpSize = 32;
/// Handles data specific to a physical device.
class VKDevice final {
public:
explicit VKDevice(VkInstance instance, u32 instance_version, vk::PhysicalDevice physical,
VkSurfaceKHR surface, const vk::InstanceDispatch& dld);
~VKDevice();
/// Initializes the device. Returns true on success.
bool Create();
/**
* Returns a format supported by the device for the passed requeriments.
* @param wanted_format The ideal format to be returned. It may not be the returned format.
* @param wanted_usage The usage that must be fulfilled even if the format is not supported.
* @param format_type Format type usage.
* @returns A format supported by the device.
*/
VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const;
/// Reports a device loss.
void ReportLoss() const;
/// Reports a shader to Nsight Aftermath.
void SaveShader(const std::vector<u32>& spirv) const;
/// Returns the dispatch loader with direct function pointers of the device.
const vk::DeviceDispatch& GetDispatchLoader() const {
return dld;
}
/// Returns the logical device.
const vk::Device& GetLogical() const {
return logical;
}
/// Returns the physical device.
vk::PhysicalDevice GetPhysical() const {
return physical;
}
/// Returns the main graphics queue.
vk::Queue GetGraphicsQueue() const {
return graphics_queue;
}
/// Returns the main present queue.
vk::Queue GetPresentQueue() const {
return present_queue;
}
/// Returns main graphics queue family index.
u32 GetGraphicsFamily() const {
return graphics_family;
}
/// Returns main present queue family index.
u32 GetPresentFamily() const {
return present_family;
}
/// Returns the current instance Vulkan API version in Vulkan-formatted version numbers.
u32 InstanceApiVersion() const {
return instance_version;
}
/// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
u32 ApiVersion() const {
return properties.apiVersion;
}
/// Returns the current driver version provided in Vulkan-formatted version numbers.
u32 GetDriverVersion() const {
return properties.driverVersion;
}
/// Returns the device name.
std::string_view GetModelName() const {
return properties.deviceName;
}
/// Returns the driver ID.
VkDriverIdKHR GetDriverID() const {
return driver_id;
}
/// Returns uniform buffer alignment requeriment.
VkDeviceSize GetUniformBufferAlignment() const {
return properties.limits.minUniformBufferOffsetAlignment;
}
/// Returns storage alignment requeriment.
VkDeviceSize GetStorageBufferAlignment() const {
return properties.limits.minStorageBufferOffsetAlignment;
}
/// Returns the maximum range for storage buffers.
VkDeviceSize GetMaxStorageBufferRange() const {
return properties.limits.maxStorageBufferRange;
}
/// Returns the maximum size for push constants.
VkDeviceSize GetMaxPushConstantsSize() const {
return properties.limits.maxPushConstantsSize;
}
/// Returns the maximum size for shared memory.
u32 GetMaxComputeSharedMemorySize() const {
return properties.limits.maxComputeSharedMemorySize;
}
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
}
/// Returns true if the device supports float16 natively
bool IsFloat16Supported() const {
return is_float16_supported;
}
/// Returns true if the device warp size can potentially be bigger than guest's warp size.
bool IsWarpSizePotentiallyBiggerThanGuest() const {
return is_warp_potentially_bigger;
}
/// Returns true if the device can be forced to use the guest warp size.
bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const {
return guest_warp_stages & stage;
}
/// Returns true if formatless image load is supported.
bool IsFormatlessImageLoadSupported() const {
return is_formatless_image_load_supported;
}
/// Returns true when blitting from and to depth stencil images is supported.
bool IsBlitDepthStencilSupported() const {
return is_blit_depth_stencil_supported;
}
/// Returns true if the device supports VK_NV_viewport_swizzle.
bool IsNvViewportSwizzleSupported() const {
return nv_viewport_swizzle;
}
/// Returns true if the device supports VK_EXT_scalar_block_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
}
/// Returns true if the device supports VK_EXT_index_type_uint8.
bool IsExtIndexTypeUint8Supported() const {
return ext_index_type_uint8;
}
/// Returns true if the device supports VK_EXT_sampler_filter_minmax.
bool IsExtSamplerFilterMinmaxSupported() const {
return ext_sampler_filter_minmax;
}
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
bool IsExtDepthRangeUnrestrictedSupported() const {
return ext_depth_range_unrestricted;
}
/// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
bool IsExtShaderViewportIndexLayerSupported() const {
return ext_shader_viewport_index_layer;
}
/// Returns true if the device supports VK_EXT_transform_feedback.
bool IsExtTransformFeedbackSupported() const {
return ext_transform_feedback;
}
/// Returns true if the device supports VK_EXT_custom_border_color.
bool IsExtCustomBorderColorSupported() const {
return ext_custom_border_color;
}
/// Returns true if the device supports VK_EXT_extended_dynamic_state.
bool IsExtExtendedDynamicStateSupported() const {
return ext_extended_dynamic_state;
}
/// Returns true if the device supports VK_EXT_shader_stencil_export.
bool IsExtShaderStencilExportSupported() const {
return ext_shader_stencil_export;
}
/// Returns true when a known debugging tool is attached.
bool HasDebuggingToolAttached() const {
return has_renderdoc || has_nsight_graphics;
}
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
}
/// Returns the list of available extensions.
const std::vector<std::string>& GetAvailableExtensions() const {
return reported_extensions;
}
/// Returns true if the setting for async shader compilation is enabled.
bool UseAsynchronousShaders() const {
return use_asynchronous_shaders;
}
/// Checks if the physical device is suitable.
static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
private:
/// Loads extensions into a vector and stores available ones in this object.
std::vector<const char*> LoadExtensions();
/// Sets up queue families.
void SetupFamilies(VkSurfaceKHR surface);
/// Sets up device features.
void SetupFeatures();
/// Collects telemetry information from the device.
void CollectTelemetryParameters();
/// Collects information about attached tools.
void CollectToolingInfo();
/// Returns a list of queue initialization descriptors.
std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
/// Returns true if ASTC textures are natively supported.
bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const;
/// Returns true if the device natively supports blitting depth stencil images.
bool TestDepthStencilBlits() const;
/// Returns true if a format is supported.
bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const;
VkInstance instance; ///< Vulkan instance.
vk::DeviceDispatch dld; ///< Device function pointers.
vk::PhysicalDevice physical; ///< Physical device.
VkPhysicalDeviceProperties properties; ///< Device properties.
vk::Device logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue.
vk::Queue present_queue; ///< Main present queue.
u32 instance_version{}; ///< Vulkan onstance version.
u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index.
VkDriverIdKHR driver_id{}; ///< Driver ID.
VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.
bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
// Asynchronous Graphics Pipeline setting
bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
/// Format properties dictionary.
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
/// Nsight Aftermath GPU crash tracker
NsightAftermathTracker nsight_aftermath_tracker;
};
} // namespace Vulkan

View File

@@ -0,0 +1,102 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <thread>
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_,
bool is_stubbed_)
: FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {}
InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_,
u32 payload_, bool is_stubbed_)
: FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {}
InnerFence::~InnerFence() = default;
void InnerFence::Queue() {
if (is_stubbed) {
return;
}
ASSERT(!event);
event = device.GetLogical().CreateEvent();
ticks = scheduler.CurrentTick();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
});
}
bool InnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
ASSERT(event);
return IsEventSignalled();
}
void InnerFence::Wait() {
if (is_stubbed) {
return;
}
ASSERT(event);
if (ticks >= scheduler.CurrentTick()) {
scheduler.Flush();
}
while (!IsEventSignalled()) {
std::this_thread::yield();
}
}
bool InnerFence::IsEventSignalled() const {
switch (const VkResult result = event.GetStatus()) {
case VK_EVENT_SET:
return true;
case VK_EVENT_RESET:
return false;
default:
throw vk::Exception(result);
}
}
VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
const VKDevice& device_, VKScheduler& scheduler_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
device{device_}, scheduler{scheduler_} {}
Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
}
Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
}
void VKFenceManager::QueueFence(Fence& fence) {
fence->Queue();
}
bool VKFenceManager::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
void VKFenceManager::WaitFence(Fence& fence) {
fence->Wait();
}
} // namespace Vulkan

View File

@@ -0,0 +1,75 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class VKBufferCache;
class VKDevice;
class VKQueryCache;
class VKScheduler;
class InnerFence : public VideoCommon::FenceBase {
public:
explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_,
bool is_stubbed_);
explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_,
u32 payload_, bool is_stubbed_);
~InnerFence();
void Queue();
bool IsSignaled() const;
void Wait();
private:
bool IsEventSignalled() const;
const VKDevice& device;
VKScheduler& scheduler;
vk::Event event;
u64 ticks = 0;
};
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
class VKFenceManager final : public GenericFenceManager {
public:
explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
const VKDevice& device_, VKScheduler& scheduler_);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
private:
const VKDevice& device;
VKScheduler& scheduler;
};
} // namespace Vulkan

View File

@@ -0,0 +1,486 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstring>
#include <vector>
#include "common/common_types.h"
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
namespace {
template <class StencilFace>
VkStencilOpState GetStencilFaceState(const StencilFace& face) {
return {
.failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()),
.passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()),
.depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()),
.compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()),
.compareMask = 0,
.writeMask = 0,
.reference = 0,
};
}
bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
static constexpr std::array unsupported_topologies = {
VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_PATCH_LIST};
return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
topology) == std::end(unsupported_topologies);
}
VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
union Swizzle {
u32 raw;
BitField<0, 3, Maxwell::ViewportSwizzle> x;
BitField<4, 3, Maxwell::ViewportSwizzle> y;
BitField<8, 3, Maxwell::ViewportSwizzle> z;
BitField<12, 3, Maxwell::ViewportSwizzle> w;
};
const Swizzle unpacked{swizzle};
return {
.x = MaxwellToVK::ViewportSwizzle(unpacked.x),
.y = MaxwellToVK::ViewportSwizzle(unpacked.y),
.z = MaxwellToVK::ViewportSwizzle(unpacked.z),
.w = MaxwellToVK::ViewportSwizzle(unpacked.w),
};
}
VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
switch (msaa_mode) {
case Tegra::Texture::MsaaMode::Msaa1x1:
return VK_SAMPLE_COUNT_1_BIT;
case Tegra::Texture::MsaaMode::Msaa2x1:
case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
return VK_SAMPLE_COUNT_2_BIT;
case Tegra::Texture::MsaaMode::Msaa2x2:
case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
return VK_SAMPLE_COUNT_4_BIT;
case Tegra::Texture::MsaaMode::Msaa4x2:
case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
return VK_SAMPLE_COUNT_8_BIT;
case Tegra::Texture::MsaaMode::Msaa4x4:
return VK_SAMPLE_COUNT_16_BIT;
default:
UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
return VK_SAMPLE_COUNT_1_BIT;
}
}
} // Anonymous namespace
VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
const GraphicsPipelineCacheKey& key,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
const SPIRVProgram& program, u32 num_color_buffers)
: device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate(program)},
modules(CreateShaderModules(program)),
pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
VKGraphicsPipeline::~VKGraphicsPipeline() = default;
VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
const VkDescriptorSet set = descriptor_allocator.Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
vk::Span<VkDescriptorSetLayoutBinding> bindings) const {
const VkDescriptorSetLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = bindings.size(),
.pBindings = bindings.data(),
};
return device.GetLogical().CreateDescriptorSetLayout(ci);
}
vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
const VkPipelineLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
return device.GetLogical().CreatePipelineLayout(ci);
}
vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
const SPIRVProgram& program) const {
std::vector<VkDescriptorUpdateTemplateEntry> template_entries;
u32 binding = 0;
u32 offset = 0;
for (const auto& stage : program) {
if (stage) {
FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries);
}
}
if (template_entries.empty()) {
// If the shader doesn't use descriptor sets, skip template creation.
return {};
}
const VkDescriptorUpdateTemplateCreateInfoKHR ci{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
.pNext = nullptr,
.flags = 0,
.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
.pDescriptorUpdateEntries = template_entries.data(),
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
.descriptorSetLayout = *descriptor_set_layout,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.pipelineLayout = *layout,
.set = DESCRIPTOR_SET,
};
return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
}
std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
const SPIRVProgram& program) const {
VkShaderModuleCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.codeSize = 0,
};
std::vector<vk::ShaderModule> shader_modules;
shader_modules.reserve(Maxwell::MaxShaderStage);
for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
const auto& stage = program[i];
if (!stage) {
continue;
}
device.SaveShader(stage->code);
ci.codeSize = stage->code.size() * sizeof(u32);
ci.pCode = stage->code.data();
shader_modules.push_back(device.GetLogical().CreateShaderModule(ci));
}
return shader_modules;
}
vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
VkRenderPass renderpass,
u32 num_color_buffers) const {
const auto& state = cache_key.fixed_state;
const auto& viewport_swizzles = state.viewport_swizzles;
FixedPipelineState::DynamicState dynamic;
if (device.IsExtExtendedDynamicStateSupported()) {
// Insert dummy values, as long as they are valid they don't matter as extended dynamic
// state is ignored
dynamic.raw1 = 0;
dynamic.raw2 = 0;
dynamic.vertex_strides.fill(0);
} else {
dynamic = state.dynamic_state;
}
std::vector<VkVertexInputBindingDescription> vertex_bindings;
std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (state.attributes[index].binding_index_enabled == 0) {
continue;
}
const bool instanced = state.binding_divisors[index] != 0;
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
vertex_bindings.push_back({
.binding = static_cast<u32>(index),
.stride = dynamic.vertex_strides[index],
.inputRate = rate,
});
if (instanced) {
vertex_binding_divisors.push_back({
.binding = static_cast<u32>(index),
.divisor = state.binding_divisors[index],
});
}
}
std::vector<VkVertexInputAttributeDescription> vertex_attributes;
const auto& input_attributes = program[0]->entries.attributes;
for (std::size_t index = 0; index < state.attributes.size(); ++index) {
const auto& attribute = state.attributes[index];
if (!attribute.enabled) {
continue;
}
if (!input_attributes.contains(static_cast<u32>(index))) {
// Skip attributes not used by the vertex shaders.
continue;
}
vertex_attributes.push_back({
.location = static_cast<u32>(index),
.binding = attribute.buffer,
.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
.offset = attribute.offset,
});
}
VkPipelineVertexInputStateCreateInfo vertex_input_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
.pVertexBindingDescriptions = vertex_bindings.data(),
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
.pVertexAttributeDescriptions = vertex_attributes.data(),
};
const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
.pNext = nullptr,
.vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()),
.pVertexBindingDivisors = vertex_binding_divisors.data(),
};
if (!vertex_binding_divisors.empty()) {
vertex_input_ci.pNext = &input_divisor_ci;
}
const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.topology = MaxwellToVK::PrimitiveTopology(device, state.topology),
.primitiveRestartEnable = state.primitive_restart_enable != 0 &&
SupportsPrimitiveRestart(input_assembly_topology),
};
const VkPipelineTessellationStateCreateInfo tessellation_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
};
VkPipelineViewportStateCreateInfo viewport_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.viewportCount = Maxwell::NumViewports,
.pViewports = nullptr,
.scissorCount = Maxwell::NumViewports,
.pScissors = nullptr,
};
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
.pNext = nullptr,
.flags = 0,
.viewportCount = Maxwell::NumViewports,
.pViewportSwizzles = swizzles.data(),
};
if (device.IsNvViewportSwizzleSupported()) {
viewport_ci.pNext = &swizzle_ci;
}
const VkPipelineRasterizationStateCreateInfo rasterization_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthClampEnable =
static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
.rasterizerDiscardEnable =
static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode =
dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE,
.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
.depthBiasEnable = state.depth_bias_enable,
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
.lineWidth = 1.0f,
};
const VkPipelineMultisampleStateCreateInfo multisample_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthTestEnable = dynamic.depth_test_enable,
.depthWriteEnable = dynamic.depth_write_enable,
.depthCompareOp = dynamic.depth_test_enable
? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
: VK_COMPARE_OP_ALWAYS,
.depthBoundsTestEnable = dynamic.depth_bounds_enable,
.stencilTestEnable = dynamic.stencil_enable,
.front = GetStencilFaceState(dynamic.front),
.back = GetStencilFaceState(dynamic.back),
.minDepthBounds = 0.0f,
.maxDepthBounds = 0.0f,
};
std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
for (std::size_t index = 0; index < num_color_buffers; ++index) {
static constexpr std::array COMPONENT_TABLE{
VK_COLOR_COMPONENT_R_BIT,
VK_COLOR_COMPONENT_G_BIT,
VK_COLOR_COMPONENT_B_BIT,
VK_COLOR_COMPONENT_A_BIT,
};
const auto& blend = state.attachments[index];
VkColorComponentFlags color_components = 0;
for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) {
if (blend.Mask()[i]) {
color_components |= COMPONENT_TABLE[i];
}
}
cb_attachments[index] = {
.blendEnable = blend.enable != 0,
.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
.colorWriteMask = color_components,
};
}
const VkPipelineColorBlendStateCreateInfo color_blend_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
.attachmentCount = num_color_buffers,
.pAttachments = cb_attachments.data(),
.blendConstants = {},
};
std::vector dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
};
if (device.IsExtExtendedDynamicStateSupported()) {
static constexpr std::array extended{
VK_DYNAMIC_STATE_CULL_MODE_EXT,
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT,
VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_STENCIL_OP_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
}
const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
.pDynamicStates = dynamic_states.data(),
};
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
.pNext = nullptr,
.requiredSubgroupSize = GuestWarpSize,
};
std::vector<VkPipelineShaderStageCreateInfo> shader_stages;
std::size_t module_index = 0;
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
if (!program[stage]) {
continue;
}
VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back();
stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
stage_ci.pNext = nullptr;
stage_ci.flags = 0;
stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage));
stage_ci.module = *modules[module_index++];
stage_ci.pName = "main";
stage_ci.pSpecializationInfo = nullptr;
if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
stage_ci.pNext = &subgroup_size_ci;
}
}
return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(shader_stages.size()),
.pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_ci,
.pInputAssemblyState = &input_assembly_ci,
.pTessellationState = &tessellation_ci,
.pViewportState = &viewport_ci,
.pRasterizationState = &rasterization_ci,
.pMultisampleState = &multisample_ci,
.pDepthStencilState = &depth_stencil_ci,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
.layout = *layout,
.renderPass = renderpass,
.subpass = 0,
.basePipelineHandle = nullptr,
.basePipelineIndex = 0,
});
}
} // namespace Vulkan

View File

@@ -0,0 +1,103 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <optional>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsPipelineCacheKey {
VkRenderPass renderpass;
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
FixedPipelineState fixed_state;
std::size_t Hash() const noexcept;
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
std::size_t Size() const noexcept {
return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
class VKDescriptorPool;
class VKDevice;
class VKScheduler;
class VKUpdateDescriptorQueue;
using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
class VKGraphicsPipeline final {
public:
explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_,
const GraphicsPipelineCacheKey& key,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
const SPIRVProgram& program, u32 num_color_buffers);
~VKGraphicsPipeline();
VkDescriptorSet CommitDescriptorSet();
VkPipeline GetHandle() const {
return *pipeline;
}
VkPipelineLayout GetLayout() const {
return *layout;
}
GraphicsPipelineCacheKey GetCacheKey() const {
return cache_key;
}
private:
vk::DescriptorSetLayout CreateDescriptorSetLayout(
vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
vk::PipelineLayout CreatePipelineLayout() const;
vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
const SPIRVProgram& program) const;
std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
u32 num_color_buffers) const;
const VKDevice& device;
VKScheduler& scheduler;
const GraphicsPipelineCacheKey cache_key;
const u64 hash;
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
VKUpdateDescriptorQueue& update_descriptor_queue;
vk::PipelineLayout layout;
vk::DescriptorUpdateTemplateKHR descriptor_template;
std::vector<vk::ShaderModule> modules;
vk::Pipeline pipeline;
};
} // namespace Vulkan

View File

@@ -0,0 +1,56 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <atomic>
#include <chrono>
#include "core/settings.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
using namespace std::chrono_literals;
MasterSemaphore::MasterSemaphore(const VKDevice& device) {
static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
.pNext = nullptr,
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
.initialValue = 0,
};
static constexpr VkSemaphoreCreateInfo semaphore_ci{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = &semaphore_type_ci,
.flags = 0,
};
semaphore = device.GetLogical().CreateSemaphore(semaphore_ci);
if (!Settings::values.renderer_debug) {
return;
}
// Validation layers have a bug where they fail to track resource usage when using timeline
// semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
// a separate thread waiting for each timeline semaphore value.
debug_thread = std::thread([this] {
u64 counter = 0;
while (!shutdown) {
if (semaphore.Wait(counter, 10'000'000)) {
++counter;
}
}
});
}
MasterSemaphore::~MasterSemaphore() {
shutdown = true;
// This thread might not be started
if (debug_thread.joinable()) {
debug_thread.join();
}
}
} // namespace Vulkan

View File

@@ -0,0 +1,70 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <atomic>
#include <thread>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
class MasterSemaphore {
public:
explicit MasterSemaphore(const VKDevice& device);
~MasterSemaphore();
/// Returns the current logical tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return current_tick;
}
/// Returns the timeline semaphore handle.
[[nodiscard]] VkSemaphore Handle() const noexcept {
return *semaphore;
}
/// Returns true when a tick has been hit by the GPU.
[[nodiscard]] bool IsFree(u64 tick) {
return gpu_tick >= tick;
}
/// Advance to the logical tick.
void NextTick() noexcept {
++current_tick;
}
/// Refresh the known GPU tick
void Refresh() {
gpu_tick = semaphore.GetCounter();
}
/// Waits for a tick to be hit on the GPU
void Wait(u64 tick) {
// No need to wait if the GPU is ahead of the tick
if (IsFree(tick)) {
return;
}
// Update the GPU tick and try again
Refresh();
if (IsFree(tick)) {
return;
}
// If none of the above is hit, fallback to a regular wait
semaphore.Wait(tick);
}
private:
vk::Semaphore semaphore; ///< Timeline semaphore.
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
std::atomic<u64> current_tick{1}; ///< Current logical tick.
std::atomic<bool> shutdown{false}; ///< True when the object is being destroyed.
std::thread debug_thread; ///< Debug thread to workaround validation layer bugs.
};
} // namespace Vulkan

View File

@@ -0,0 +1,230 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <optional>
#include <tuple>
#include <vector>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
namespace {
u64 GetAllocationChunkSize(u64 required_size) {
static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
}
} // Anonymous namespace
class VKMemoryAllocation final {
public:
explicit VKMemoryAllocation(const VKDevice& device_, vk::DeviceMemory memory_,
VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_)
: device{device_}, memory{std::move(memory_)}, properties{properties_},
allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {}
VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) {
auto found = TryFindFreeSection(free_iterator, allocation_size,
static_cast<u64>(commit_size), static_cast<u64>(alignment));
if (!found) {
found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
static_cast<u64>(alignment));
if (!found) {
// Signal out of memory, it'll try to do more allocations.
return nullptr;
}
}
auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
*found + commit_size);
commits.push_back(commit.get());
// Last commit's address is highly probable to be free.
free_iterator = *found + commit_size;
return commit;
}
void Free(const VKMemoryCommitImpl* commit) {
ASSERT(commit);
const auto it = std::find(std::begin(commits), std::end(commits), commit);
if (it == commits.end()) {
UNREACHABLE_MSG("Freeing unallocated commit!");
return;
}
commits.erase(it);
}
/// Returns whether this allocation is compatible with the arguments.
bool IsCompatible(VkMemoryPropertyFlags wanted_properties, u32 type_mask) const {
return (wanted_properties & properties) && (type_mask & shifted_type) != 0;
}
private:
static constexpr u32 ShiftType(u32 type) {
return 1U << type;
}
/// A memory allocator, it may return a free region between "start" and "end" with the solicited
/// requirements.
std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
u64 iterator = Common::AlignUp(start, alignment);
while (iterator + size <= end) {
const u64 try_left = iterator;
const u64 try_right = try_left + size;
bool overlap = false;
for (const auto& commit : commits) {
const auto [commit_left, commit_right] = commit->interval;
if (try_left < commit_right && commit_left < try_right) {
// There's an overlap, continue the search where the overlapping commit ends.
iterator = Common::AlignUp(commit_right, alignment);
overlap = true;
break;
}
}
if (!overlap) {
// A free address has been found.
return try_left;
}
}
// No free regions where found, return an empty optional.
return std::nullopt;
}
const VKDevice& device; ///< Vulkan device.
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
const VkMemoryPropertyFlags properties; ///< Vulkan properties.
const u64 allocation_size; ///< Size of this allocation.
const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
/// Hints where the next free region is likely going to be.
u64 free_iterator{};
/// Stores all commits done from this allocation.
std::vector<const VKMemoryCommitImpl*> commits;
};
VKMemoryManager::VKMemoryManager(const VKDevice& device_)
: device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {}
VKMemoryManager::~VKMemoryManager() = default;
VKMemoryCommit VKMemoryManager::Commit(const VkMemoryRequirements& requirements,
bool host_visible) {
const u64 chunk_size = GetAllocationChunkSize(requirements.size);
// When a host visible commit is asked, search for host visible and coherent, otherwise search
// for a fast device local type.
const VkMemoryPropertyFlags wanted_properties =
host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
: VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
return commit;
}
// Commit has failed, allocate more memory.
if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
// TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
// Allocation has failed, panic.
UNREACHABLE_MSG("Ran out of VRAM!");
return {};
}
// Commit again, this time it won't fail since there's a fresh allocation above. If it does,
// there's a bug.
auto commit = TryAllocCommit(requirements, wanted_properties);
ASSERT(commit);
return commit;
}
VKMemoryCommit VKMemoryManager::Commit(const vk::Buffer& buffer, bool host_visible) {
auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), host_visible);
buffer.BindMemory(commit->GetMemory(), commit->GetOffset());
return commit;
}
VKMemoryCommit VKMemoryManager::Commit(const vk::Image& image, bool host_visible) {
auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), host_visible);
image.BindMemory(commit->GetMemory(), commit->GetOffset());
return commit;
}
bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask,
u64 size) {
const u32 type = [&] {
for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
const auto flags = properties.memoryTypes[type_index].propertyFlags;
if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
// The type matches in type and in the wanted properties.
return type_index;
}
}
UNREACHABLE_MSG("Couldn't find a compatible memory type!");
return 0U;
}();
// Try to allocate found type.
vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = nullptr,
.allocationSize = size,
.memoryTypeIndex = type,
});
if (!memory) {
LOG_CRITICAL(Render_Vulkan, "Device allocation failed!");
return false;
}
allocations.push_back(std::make_unique<VKMemoryAllocation>(device, std::move(memory),
wanted_properties, size, type));
return true;
}
VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags wanted_properties) {
for (auto& allocation : allocations) {
if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
continue;
}
if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
return commit;
}
}
return {};
}
VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_,
const vk::DeviceMemory& memory_, u64 begin_, u64 end_)
: device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {}
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
allocation->Free(this);
}
MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size));
}
void VKMemoryCommitImpl::Unmap() const {
memory.Unmap();
}
MemoryMap VKMemoryCommitImpl::Map() const {
return Map(interval.second - interval.first);
}
} // namespace Vulkan

View File

@@ -0,0 +1,132 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <span>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class MemoryMap;
class VKDevice;
class VKMemoryAllocation;
class VKMemoryCommitImpl;
using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
class VKMemoryManager final {
public:
explicit VKMemoryManager(const VKDevice& device_);
VKMemoryManager(const VKMemoryManager&) = delete;
~VKMemoryManager();
/**
* Commits a memory with the specified requeriments.
* @param requirements Requirements returned from a Vulkan call.
* @param host_visible Signals the allocator that it *must* use host visible and coherent
* memory. When passing false, it will try to allocate device local memory.
* @returns A memory commit.
*/
VKMemoryCommit Commit(const VkMemoryRequirements& requirements, bool host_visible);
/// Commits memory required by the buffer and binds it.
VKMemoryCommit Commit(const vk::Buffer& buffer, bool host_visible);
/// Commits memory required by the image and binds it.
VKMemoryCommit Commit(const vk::Image& image, bool host_visible);
private:
/// Allocates a chunk of memory.
bool AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
/// Tries to allocate a memory commit.
VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags wanted_properties);
const VKDevice& device; ///< Device handler.
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
};
class VKMemoryCommitImpl final {
friend VKMemoryAllocation;
friend MemoryMap;
public:
explicit VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_,
const vk::DeviceMemory& memory_, u64 begin_, u64 end_);
~VKMemoryCommitImpl();
/// Maps a memory region and returns a pointer to it.
/// It's illegal to have more than one memory map at the same time.
MemoryMap Map(u64 size, u64 offset = 0) const;
/// Maps the whole commit and returns a pointer to it.
/// It's illegal to have more than one memory map at the same time.
MemoryMap Map() const;
/// Returns the Vulkan memory handler.
VkDeviceMemory GetMemory() const {
return *memory;
}
/// Returns the start position of the commit relative to the allocation.
VkDeviceSize GetOffset() const {
return static_cast<VkDeviceSize>(interval.first);
}
private:
/// Unmaps memory.
void Unmap() const;
const VKDevice& device; ///< Vulkan device.
const vk::DeviceMemory& memory; ///< Vulkan device memory handler.
std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
};
/// Holds ownership of a memory map.
class MemoryMap final {
public:
explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_)
: commit{commit_}, span{span_} {}
~MemoryMap() {
if (commit) {
commit->Unmap();
}
}
/// Prematurely releases the memory map.
void Release() {
commit->Unmap();
commit = nullptr;
}
/// Returns a span to the memory map.
[[nodiscard]] std::span<u8> Span() const noexcept {
return span;
}
/// Returns the address of the memory map.
[[nodiscard]] u8* Address() const noexcept {
return span.data();
}
/// Returns the address of the memory map;
[[nodiscard]] operator u8*() const noexcept {
return span.data();
}
private:
const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
std::span<u8> span; ///< Address to the mapped memory.
};
} // namespace Vulkan

View File

@@ -0,0 +1,453 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstddef>
#include <memory>
#include <vector>
#include "common/bit_cast.h"
#include "common/cityhash.h"
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader_cache.h"
#include "video_core/shader_notify.h"
namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
using Tegra::Engines::ShaderType;
using VideoCommon::Shader::GetShaderAddress;
using VideoCommon::Shader::GetShaderCode;
using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::STAGE_MAIN_OFFSET;
namespace {
constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
.depth = VideoCommon::Shader::CompileDepth::FullDecompile,
.disable_else_derivation = true,
};
constexpr std::size_t GetStageFromProgram(std::size_t program) {
return program == 0 ? 0 : program - 1;
}
constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
}
ShaderType GetShaderType(Maxwell::ShaderProgram program) {
switch (program) {
case Maxwell::ShaderProgram::VertexB:
return ShaderType::Vertex;
case Maxwell::ShaderProgram::TesselationControl:
return ShaderType::TesselationControl;
case Maxwell::ShaderProgram::TesselationEval:
return ShaderType::TesselationEval;
case Maxwell::ShaderProgram::Geometry:
return ShaderType::Geometry;
case Maxwell::ShaderProgram::Fragment:
return ShaderType::Fragment;
default:
UNIMPLEMENTED_MSG("program={}", program);
return ShaderType::Vertex;
}
}
template <VkDescriptorType descriptor_type, class Container>
void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding,
VkShaderStageFlags stage_flags, const Container& container) {
const u32 num_entries = static_cast<u32>(std::size(container));
for (std::size_t i = 0; i < num_entries; ++i) {
u32 count = 1;
if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
// Combined image samplers can be arrayed.
count = container[i].size;
}
bindings.push_back({
.binding = binding++,
.descriptorType = descriptor_type,
.descriptorCount = count,
.stageFlags = stage_flags,
.pImmutableSamplers = nullptr,
});
}
}
u32 FillDescriptorLayout(const ShaderEntries& entries,
std::vector<VkDescriptorSetLayoutBinding>& bindings,
Maxwell::ShaderProgram program_type, u32 base_binding) {
const ShaderType stage = GetStageFromProgram(program_type);
const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
u32 binding = base_binding;
AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
return binding;
}
} // Anonymous namespace
std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
return static_cast<std::size_t>(hash);
}
bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
return std::memcmp(&rhs, this, Size()) == 0;
}
std::size_t ComputePipelineCacheKey::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
return static_cast<std::size_t>(hash);
}
bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
return std::memcmp(&rhs, this, sizeof *this) == 0;
}
Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_,
GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
: gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
shader_ir(program_code, main_offset_, compiler_settings, registry),
entries(GenerateShaderEntries(shader_ir)) {}
Shader::~Shader() = default;
VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
update_descriptor_queue_} {}
VKPipelineCache::~VKPipelineCache() = default;
std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue;
}
const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
if (!result) {
const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
// No shader found - create a new one
static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
std::move(code), stage_offset);
result = shader.get();
if (cpu_addr) {
Register(std::move(shader), *cpu_addr, size_in_bytes);
} else {
null_shader = std::move(shader);
}
}
shaders[index] = result;
}
return last_shaders = shaders;
}
VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
VideoCommon::Shader::AsyncShaders& async_shaders) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
if (last_graphics_pipeline && last_graphics_key == key) {
return last_graphics_pipeline;
}
last_graphics_key = key;
if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
std::unique_lock lock{pipeline_cache};
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
if (is_cache_miss) {
gpu.ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state);
async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
update_descriptor_queue, bindings, program, key,
num_color_buffers);
}
last_graphics_pipeline = pair->second.get();
return last_graphics_pipeline;
}
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
auto& entry = pair->second;
if (is_cache_miss) {
gpu.ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state);
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, key, bindings,
program, num_color_buffers);
gpu.ShaderNotify().MarkShaderComplete();
}
last_graphics_pipeline = entry.get();
return last_graphics_pipeline;
}
VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
auto& entry = pair->second;
if (!is_cache_miss) {
return *entry;
}
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const GPUVAddr gpu_addr = key.shader;
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
if (!shader) {
// No shader found - create a new one
const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
*cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
shader = shader_info.get();
if (cpu_addr) {
Register(std::move(shader_info), *cpu_addr, size_in_bytes);
} else {
null_kernel = std::move(shader_info);
}
}
const Specialization specialization{
.base_binding = 0,
.workgroup_size = key.workgroup_size,
.shared_memory_size = key.shared_memory_size,
.point_size = std::nullopt,
.enabled_attributes = {},
.attribute_types = {},
.ndc_minus_one_to_one = false,
};
const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
shader->GetRegistry(), specialization),
shader->GetEntries()};
entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, spirv_shader);
return *entry;
}
void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
gpu.ShaderNotify().MarkShaderComplete();
std::unique_lock lock{pipeline_cache};
graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
}
void VKPipelineCache::OnShaderRemoval(Shader* shader) {
bool finished = false;
const auto Finish = [&] {
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
// flush.
if (finished) {
return;
}
finished = true;
scheduler.Finish();
};
const GPUVAddr invalidated_addr = shader->GetGpuAddr();
for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
auto& entry = it->first;
if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
entry.shaders.end()) {
++it;
continue;
}
Finish();
it = graphics_cache.erase(it);
}
for (auto it = compute_cache.begin(); it != compute_cache.end();) {
auto& entry = it->first;
if (entry.shader != invalidated_addr) {
++it;
continue;
}
Finish();
it = compute_cache.erase(it);
}
}
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
Specialization specialization;
if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
float point_size;
std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
specialization.point_size = point_size;
ASSERT(point_size != 0.0f);
}
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
const auto& attribute = fixed_state.attributes[i];
specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
specialization.attribute_types[i] = attribute.Type();
}
specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
specialization.early_fragment_tests = fixed_state.early_z;
// Alpha test
specialization.alpha_test_func =
FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
SPIRVProgram program;
std::vector<VkDescriptorSetLayoutBinding> bindings;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
// Skip stages that are not enabled
if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue;
}
const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
const ShaderType program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
program[stage] = {
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
entries};
if (program_enum == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
++index;
}
const u32 old_binding = specialization.base_binding;
specialization.base_binding =
FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
}
return {std::move(program), std::move(bindings)};
}
template <VkDescriptorType descriptor_type, class Container>
void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding,
u32& offset, const Container& container) {
static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
const u32 count = static_cast<u32>(std::size(container));
if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
for (u32 i = 0; i < count; ++i) {
const u32 num_samplers = container[i].size;
template_entries.push_back({
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = num_samplers,
.descriptorType = descriptor_type,
.offset = offset,
.stride = entry_size,
});
++binding;
offset += num_samplers * entry_size;
}
return;
}
if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
descriptor_type == STORAGE_TEXEL_BUFFER) {
// Nvidia has a bug where updating multiple texels at once causes the driver to crash.
// Note: Fixed in driver Windows 443.24, Linux 440.66.15
for (u32 i = 0; i < count; ++i) {
template_entries.push_back({
.dstBinding = binding + i,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = descriptor_type,
.offset = static_cast<std::size_t>(offset + i * entry_size),
.stride = entry_size,
});
}
} else if (count > 0) {
template_entries.push_back({
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = count,
.descriptorType = descriptor_type,
.offset = offset,
.stride = entry_size,
});
}
offset += count * entry_size;
binding += count;
}
void FillDescriptorUpdateTemplateEntries(
const ShaderEntries& entries, u32& binding, u32& offset,
std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
}
} // namespace Vulkan

View File

@@ -0,0 +1,174 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <memory>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <vector>
#include <boost/functional/hash.hpp>
#include "common/common_types.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
namespace Core {
class System;
}
namespace Vulkan {
class RasterizerVulkan;
class VKComputePipeline;
class VKDescriptorPool;
class VKDevice;
class VKScheduler;
class VKUpdateDescriptorQueue;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ComputePipelineCacheKey {
GPUVAddr shader;
u32 shared_memory_size;
std::array<u32, 3> workgroup_size;
std::size_t Hash() const noexcept;
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::GraphicsPipelineCacheKey> {
std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::ComputePipelineCacheKey> {
std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class Shader {
public:
explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_,
Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_,
VideoCommon::Shader::ProgramCode program_code, u32 main_offset_);
~Shader();
GPUVAddr GetGpuAddr() const {
return gpu_addr;
}
VideoCommon::Shader::ShaderIR& GetIR() {
return shader_ir;
}
const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
const VideoCommon::Shader::Registry& GetRegistry() const {
return registry;
}
const ShaderEntries& GetEntries() const {
return entries;
}
private:
GPUVAddr gpu_addr{};
VideoCommon::Shader::ProgramCode program_code;
VideoCommon::Shader::Registry registry;
VideoCommon::Shader::ShaderIR shader_ir;
ShaderEntries entries;
};
class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
public:
explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, const VKDevice& device,
VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue);
~VKPipelineCache() override;
std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
u32 num_color_buffers,
VideoCommon::Shader::AsyncShaders& async_shaders);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
protected:
void OnShaderRemoval(Shader* shader) final;
private:
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
const FixedPipelineState& fixed_state);
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
const VKDevice& device;
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
std::unique_ptr<Shader> null_shader;
std::unique_ptr<Shader> null_kernel;
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
GraphicsPipelineCacheKey last_graphics_key;
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
std::mutex pipeline_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
graphics_cache;
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
};
void FillDescriptorUpdateTemplateEntries(
const ShaderEntries& entries, u32& binding, u32& offset,
std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries);
} // namespace Vulkan

View File

@@ -0,0 +1,137 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstddef>
#include <utility>
#include <vector>
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
using VideoCore::QueryType;
namespace {
constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION};
constexpr VkQueryType GetTarget(QueryType type) {
return QUERY_TARGETS[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
QueryPool::QueryPool(const VKDevice& device_, VKScheduler& scheduler, QueryType type_)
: ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
QueryPool::~QueryPool() = default;
std::pair<VkQueryPool, u32> QueryPool::Commit() {
std::size_t index;
do {
index = CommitResource();
} while (usage[index]);
usage[index] = true;
return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)};
}
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
usage.resize(end);
pools.push_back(device.GetLogical().CreateQueryPool({
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.queryType = GetTarget(type),
.queryCount = static_cast<u32>(end - begin),
.pipelineStatistics = 0,
}));
}
void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
const auto it =
std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) {
return query_pool == *pool;
});
ASSERT(it != std::end(pools));
const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
const VKDevice& device_, VKScheduler& scheduler_)
: QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
query_pools{
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
} {}
VKQueryCache::~VKQueryCache() {
// TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class
// destructor is called. The query cache should be redesigned to have a proper ownership model
// instead of using shared pointers.
for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) {
auto& stream = Stream(static_cast<QueryType>(query_type));
stream.Update(false);
stream.Reset();
}
}
std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(QueryType type) {
return query_pools[static_cast<std::size_t>(type)].Commit();
}
void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
query_pools[static_cast<std::size_t>(type)].Reserve(query);
}
HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
QueryType type_)
: HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_},
query{cache_.AllocateQuery(type_)}, tick{cache_.Scheduler().CurrentTick()} {
const vk::Device* logical = &cache_.Device().GetLogical();
cache_.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
logical->ResetQueryPoolEXT(query.first, query.second, 1);
cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
});
}
HostCounter::~HostCounter() {
cache.Reserve(type, query);
}
void HostCounter::EndQuery() {
cache.Scheduler().Record(
[query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
}
u64 HostCounter::BlockingQuery() const {
if (tick >= cache.Scheduler().CurrentTick()) {
cache.Scheduler().Flush();
}
u64 data;
const VkResult query_result = cache.Device().GetLogical().GetQueryResults(
query.first, query.second, 1, sizeof(data), &data, sizeof(data),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
switch (query_result) {
case VK_SUCCESS:
return data;
case VK_ERROR_DEVICE_LOST:
cache.Device().ReportLoss();
[[fallthrough]];
default:
throw vk::Exception(query_result);
}
}
} // namespace Vulkan

View File

@@ -0,0 +1,102 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <memory>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class CachedQuery;
class HostCounter;
class VKDevice;
class VKQueryCache;
class VKScheduler;
using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
class QueryPool final : public ResourcePool {
public:
explicit QueryPool(const VKDevice& device, VKScheduler& scheduler, VideoCore::QueryType type);
~QueryPool() override;
std::pair<VkQueryPool, u32> Commit();
void Reserve(std::pair<VkQueryPool, u32> query);
protected:
void Allocate(std::size_t begin, std::size_t end) override;
private:
static constexpr std::size_t GROW_STEP = 512;
const VKDevice& device;
const VideoCore::QueryType type;
std::vector<vk::QueryPool> pools;
std::vector<bool> usage;
};
class VKQueryCache final
: public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
const VKDevice& device_, VKScheduler& scheduler_);
~VKQueryCache();
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query);
const VKDevice& Device() const noexcept {
return device;
}
VKScheduler& Scheduler() const noexcept {
return scheduler;
}
private:
const VKDevice& device;
VKScheduler& scheduler;
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
public:
explicit HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
VideoCore::QueryType type_);
~HostCounter();
void EndQuery();
private:
u64 BlockingQuery() const override;
VKQueryCache& cache;
const VideoCore::QueryType type;
const std::pair<VkQueryPool, u32> query;
const u64 tick;
};
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
public:
explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_)
: CachedQueryBase{cpu_addr_, host_ptr_} {}
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,255 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <bitset>
#include <memory>
#include <utility>
#include <vector>
#include <boost/container/static_vector.hpp>
#include "common/common_types.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/async_shaders.h"
namespace Core {
class System;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra::Engines {
class Maxwell3D;
}
namespace Vulkan {
struct VKScreenInfo;
class StateTracker;
class BufferBindings;
struct Transition {
ImageView* view;
VkImageLayout* layout;
};
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
VKScreenInfo& screen_info_, const VKDevice& device_,
VKMemoryManager& memory_manager_, StateTracker& state_tracker_,
VKScheduler& scheduler_);
~RasterizerVulkan() override;
void Draw(bool is_indexed, bool is_instanced) override;
void Clear() override;
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
void InvalidateTextureCache(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void UnmapMemory(VAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
return async_shaders;
}
const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
return async_shaders;
}
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr size_t MaxConstbufferSize = 0x10000;
static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
struct DrawParameters {
void Draw(vk::CommandBuffer cmdbuf) const;
u32 base_instance = 0;
u32 num_instances = 0;
u32 base_vertex = 0;
u32 num_vertices = 0;
bool is_indexed = 0;
};
void FlushWork();
/// Setups geometry buffers and state.
DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
bool is_indexed, bool is_instanced);
/// Setup descriptors in the graphics pipeline.
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
void UpdateDynamicStates();
void BeginTransformFeedback();
void EndTransformFeedback();
void SetupVertexArrays(BufferBindings& buffer_bindings);
void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
/// Setup constant buffers in the graphics pipeline.
void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup global buffers in the graphics pipeline.
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup uniform texels in the graphics pipeline.
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
/// Setup textures in the graphics pipeline.
void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
/// Setup storage texels in the graphics pipeline.
void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
/// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
/// Setup constant buffers in the compute pipeline.
void SetupComputeConstBuffers(const ShaderEntries& entries);
/// Setup global buffers in the compute pipeline.
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
/// Setup texel buffers in the compute pipeline.
void SetupComputeUniformTexels(const ShaderEntries& entries);
/// Setup textures in the compute pipeline.
void SetupComputeTextures(const ShaderEntries& entries);
/// Setup storage texels in the compute pipeline.
void SetupComputeStorageTexels(const ShaderEntries& entries);
/// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries);
void SetupConstBuffer(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer);
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
size_t CalculateComputeStreamBufferSize() const;
size_t CalculateVertexArraysSize() const;
size_t CalculateIndexBufferSize() const;
size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer) const;
VkBuffer DefaultBuffer();
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
VKScreenInfo& screen_info;
const VKDevice& device;
VKMemoryManager& memory_manager;
StateTracker& state_tracker;
VKScheduler& scheduler;
VKStreamBuffer stream_buffer;
VKStagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
QuadArrayPass quad_array_pass;
QuadIndexedPass quad_indexed_pass;
Uint8Pass uint8_pass;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKQueryCache query_cache;
VKFenceManager fence_manager;
vk::Buffer default_buffer;
VKMemoryCommit default_buffer_commit;
vk::Event wfi_event;
VideoCommon::Shader::AsyncShaders async_shaders;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
u32 draw_counter = 0;
};
} // namespace Vulkan

View File

@@ -0,0 +1,63 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <optional>
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace Vulkan {
ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
: master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
ResourcePool::~ResourcePool() = default;
size_t ResourcePool::CommitResource() {
// Refresh semaphore to query updated results
master_semaphore.Refresh();
const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> {
for (size_t iterator = begin; iterator < end; ++iterator) {
if (master_semaphore.IsFree(ticks[iterator])) {
ticks[iterator] = master_semaphore.CurrentTick();
return iterator;
}
}
return {};
};
// Try to find a free resource from the hinted position to the end.
auto found = search(free_iterator, ticks.size());
if (!found) {
// Search from beginning to the hinted position.
found = search(0, free_iterator);
if (!found) {
// Both searches failed, the pool is full; handle it.
const size_t free_resource = ManageOverflow();
ticks[free_resource] = master_semaphore.CurrentTick();
found = free_resource;
}
}
// Free iterator is hinted to the resource after the one that's been commited.
free_iterator = (*found + 1) % ticks.size();
return *found;
}
size_t ResourcePool::ManageOverflow() {
const size_t old_capacity = ticks.size();
Grow();
// The last entry is guaranted to be free, since it's the first element of the freshly
// allocated resources.
return old_capacity;
}
void ResourcePool::Grow() {
const size_t old_capacity = ticks.size();
ticks.resize(old_capacity + grow_step);
Allocate(old_capacity, old_capacity + grow_step);
}
} // namespace Vulkan

View File

@@ -0,0 +1,43 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "common/common_types.h"
namespace Vulkan {
class MasterSemaphore;
/**
* Handles a pool of resources protected by fences. Manages resource overflow allocating more
* resources.
*/
class ResourcePool {
public:
explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
virtual ~ResourcePool();
protected:
size_t CommitResource();
/// Called when a chunk of resources have to be allocated.
virtual void Allocate(size_t begin, size_t end) = 0;
private:
/// Manages pool overflow allocating new resources.
size_t ManageOverflow();
/// Allocates a new page of resources.
void Grow();
MasterSemaphore& master_semaphore;
size_t grow_step = 0; ///< Number of new resources created after an overflow
size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
std::vector<u64> ticks; ///< Ticks for each resource
};
} // namespace Vulkan

View File

@@ -0,0 +1,288 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <mutex>
#include <optional>
#include <thread>
#include <utility>
#include "common/microprofile.h"
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
auto command = first;
while (command != nullptr) {
auto next = command->GetNext();
command->Execute(cmdbuf);
command->~Command();
command = next;
}
command_offset = 0;
first = nullptr;
last = nullptr;
}
VKScheduler::VKScheduler(const VKDevice& device_, StateTracker& state_tracker_)
: device{device_}, state_tracker{state_tracker_},
master_semaphore{std::make_unique<MasterSemaphore>(device)},
command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
AcquireNewChunk();
AllocateNewContext();
worker_thread = std::thread(&VKScheduler::WorkerThread, this);
}
VKScheduler::~VKScheduler() {
quit = true;
cv.notify_all();
worker_thread.join();
}
u64 VKScheduler::CurrentTick() const noexcept {
return master_semaphore->CurrentTick();
}
bool VKScheduler::IsFree(u64 tick) const noexcept {
return master_semaphore->IsFree(tick);
}
void VKScheduler::Wait(u64 tick) {
master_semaphore->Wait(tick);
}
void VKScheduler::Flush(VkSemaphore semaphore) {
SubmitExecution(semaphore);
AllocateNewContext();
}
void VKScheduler::Finish(VkSemaphore semaphore) {
const u64 presubmit_tick = CurrentTick();
SubmitExecution(semaphore);
Wait(presubmit_tick);
AllocateNewContext();
}
void VKScheduler::WaitWorker() {
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
bool finished = false;
do {
cv.notify_all();
std::unique_lock lock{mutex};
finished = chunk_queue.Empty();
} while (!finished);
}
void VKScheduler::DispatchWork() {
if (chunk->Empty()) {
return;
}
chunk_queue.Push(std::move(chunk));
cv.notify_all();
AcquireNewChunk();
}
void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) {
const VkRenderPass renderpass = framebuffer->RenderPass();
const VkFramebuffer framebuffer_handle = framebuffer->Handle();
const VkExtent2D render_area = framebuffer->RenderArea();
if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&
render_area.width == state.render_area.width &&
render_area.height == state.render_area.height) {
return;
}
EndRenderPass();
state.renderpass = renderpass;
state.framebuffer = framebuffer_handle;
state.render_area = render_area;
Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) {
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = renderpass,
.framebuffer = framebuffer_handle,
.renderArea =
{
.offset = {.x = 0, .y = 0},
.extent = render_area,
},
.clearValueCount = 0,
.pClearValues = nullptr,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
});
num_renderpass_images = framebuffer->NumImages();
renderpass_images = framebuffer->Images();
renderpass_image_ranges = framebuffer->ImageRanges();
}
void VKScheduler::RequestOutsideRenderPassOperationContext() {
EndRenderPass();
}
void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
if (state.graphics_pipeline == pipeline) {
return;
}
state.graphics_pipeline = pipeline;
Record([pipeline](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
});
}
void VKScheduler::WorkerThread() {
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
std::unique_lock lock{mutex};
do {
cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
if (quit) {
continue;
}
auto extracted_chunk = std::move(chunk_queue.Front());
chunk_queue.Pop();
extracted_chunk->ExecuteAll(current_cmdbuf);
chunk_reserve.Push(std::move(extracted_chunk));
} while (!quit);
}
void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
EndPendingOperations();
InvalidateState();
WaitWorker();
std::unique_lock lock{mutex};
current_cmdbuf.End();
const VkSemaphore timeline_semaphore = master_semaphore->Handle();
const u32 num_signal_semaphores = semaphore ? 2U : 1U;
const u64 signal_value = master_semaphore->CurrentTick();
const u64 wait_value = signal_value - 1;
const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
master_semaphore->NextTick();
const std::array signal_values{signal_value, u64(0)};
const std::array signal_semaphores{timeline_semaphore, semaphore};
const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
.pNext = nullptr,
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &wait_value,
.signalSemaphoreValueCount = num_signal_semaphores,
.pSignalSemaphoreValues = signal_values.data(),
};
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &timeline_si,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &timeline_semaphore,
.pWaitDstStageMask = &wait_stage_mask,
.commandBufferCount = 1,
.pCommandBuffers = current_cmdbuf.address(),
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(),
};
switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
device.ReportLoss();
[[fallthrough]];
default:
vk::Check(result);
}
}
void VKScheduler::AllocateNewContext() {
std::unique_lock lock{mutex};
current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
current_cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
query_cache->UpdateCounters();
}
}
void VKScheduler::InvalidateState() {
state.graphics_pipeline = nullptr;
state_tracker.InvalidateCommandBufferState();
}
void VKScheduler::EndPendingOperations() {
query_cache->DisableStreams();
EndRenderPass();
}
void VKScheduler::EndRenderPass() {
if (!state.renderpass) {
return;
}
Record([num_images = num_renderpass_images, images = renderpass_images,
ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
std::array<VkImageMemoryBarrier, 9> barriers;
for (size_t i = 0; i < num_images; ++i) {
barriers[i] = VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = images[i],
.subresourceRange = ranges[i],
};
}
cmdbuf.EndRenderPass();
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
vk::Span(barriers.data(), num_images));
});
state.renderpass = nullptr;
num_renderpass_images = 0;
}
void VKScheduler::AcquireNewChunk() {
if (chunk_reserve.Empty()) {
chunk = std::make_unique<CommandChunk>();
return;
}
chunk = std::move(chunk_reserve.Front());
chunk_reserve.Pop();
}
} // namespace Vulkan

View File

@@ -0,0 +1,208 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <atomic>
#include <condition_variable>
#include <memory>
#include <stack>
#include <thread>
#include <utility>
#include "common/common_types.h"
#include "common/threadsafe_queue.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class CommandPool;
class Framebuffer;
class MasterSemaphore;
class StateTracker;
class VKDevice;
class VKQueryCache;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class VKScheduler {
public:
explicit VKScheduler(const VKDevice& device, StateTracker& state_tracker);
~VKScheduler();
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept;
/// Returns true when a tick has been triggered by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept;
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick);
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(VkSemaphore semaphore = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
void WaitWorker();
/// Sends currently recorded work to the worker thread.
void DispatchWork();
/// Requests to begin a renderpass.
void RequestRenderpass(const Framebuffer* framebuffer);
/// Requests the current executino context to be able to execute operations only allowed outside
/// of a renderpass.
void RequestOutsideRenderPassOperationContext();
/// Binds a pipeline to the current execution context.
void BindGraphicsPipeline(VkPipeline pipeline);
/// Invalidates current command buffer state except for render passes
void InvalidateState();
/// Assigns the query cache.
void SetQueryCache(VKQueryCache& query_cache_) {
query_cache = &query_cache_;
}
/// Send work to a separate thread.
template <typename T>
void Record(T&& command) {
if (chunk->Record(command)) {
return;
}
DispatchWork();
(void)chunk->Record(command);
}
/// Returns the master timeline semaphore.
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
return *master_semaphore;
}
private:
class Command {
public:
virtual ~Command() = default;
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
Command* GetNext() const {
return next;
}
void SetNext(Command* next_) {
next = next_;
}
private:
Command* next = nullptr;
};
template <typename T>
class TypedCommand final : public Command {
public:
explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
~TypedCommand() override = default;
TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete;
void Execute(vk::CommandBuffer cmdbuf) const override {
command(cmdbuf);
}
private:
T command;
};
class CommandChunk final {
public:
void ExecuteAll(vk::CommandBuffer cmdbuf);
template <typename T>
bool Record(T& command) {
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
}
Command* current_last = last;
last = new (data.data() + command_offset) FuncType(std::move(command));
if (current_last) {
current_last->SetNext(last);
} else {
first = last;
}
command_offset += sizeof(FuncType);
return true;
}
bool Empty() const {
return command_offset == 0;
}
private:
Command* first = nullptr;
Command* last = nullptr;
std::size_t command_offset = 0;
std::array<u8, 0x8000> data{};
};
struct State {
VkRenderPass renderpass = nullptr;
VkFramebuffer framebuffer = nullptr;
VkExtent2D render_area = {0, 0};
VkPipeline graphics_pipeline = nullptr;
};
void WorkerThread();
void SubmitExecution(VkSemaphore semaphore);
void AllocateNewContext();
void EndPendingOperations();
void EndRenderPass();
void AcquireNewChunk();
const VKDevice& device;
StateTracker& state_tracker;
std::unique_ptr<MasterSemaphore> master_semaphore;
std::unique_ptr<CommandPool> command_pool;
VKQueryCache* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;
std::unique_ptr<CommandChunk> chunk;
std::thread worker_thread;
State state;
u32 num_renderpass_images = 0;
std::array<VkImage, 9> renderpass_images{};
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex mutex;
std::condition_variable cv;
bool quit = false;
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,117 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <set>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace Vulkan {
class VKDevice;
}
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
using SamplerEntry = VideoCommon::Shader::SamplerEntry;
using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
using ImageEntry = VideoCommon::Shader::ImageEntry;
constexpr u32 DESCRIPTOR_SET = 0;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_)
: ConstBuffer{entry_}, index{index_} {}
constexpr u32 GetIndex() const {
return index;
}
private:
u32 index{};
};
class GlobalBufferEntry {
public:
constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
: cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
constexpr u32 GetCbufIndex() const {
return cbuf_index;
}
constexpr u32 GetCbufOffset() const {
return cbuf_offset;
}
constexpr bool IsWritten() const {
return is_written;
}
private:
u32 cbuf_index{};
u32 cbuf_offset{};
bool is_written{};
};
struct ShaderEntries {
u32 NumBindings() const {
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
uniform_texels.size() + samplers.size() + storage_texels.size() +
images.size());
}
std::vector<ConstBufferEntry> const_buffers;
std::vector<GlobalBufferEntry> global_buffers;
std::vector<UniformTexelEntry> uniform_texels;
std::vector<SamplerEntry> samplers;
std::vector<StorageTexelEntry> storage_texels;
std::vector<ImageEntry> images;
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
bool uses_warps{};
};
struct Specialization final {
u32 base_binding{};
// Compute specific
std::array<u32, 3> workgroup_size{};
u32 shared_memory_size{};
// Graphics specific
std::optional<float> point_size;
std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
bool early_fragment_tests{};
float alpha_test_ref{};
Maxwell::ComparisonOp alpha_test_func{};
};
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Specialization>);
struct SPIRVShader {
std::vector<u32> code;
ShaderEntries entries;
};
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage,
const VideoCommon::Shader::Registry& registry,
const Specialization& specialization);
} // namespace Vulkan

View File

@@ -0,0 +1,26 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <memory>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code) {
return device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.codeSize = static_cast<u32>(code.size_bytes()),
.pCode = code.data(),
});
}
} // namespace Vulkan

View File

@@ -0,0 +1,18 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <span>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code);
} // namespace Vulkan

View File

@@ -0,0 +1,114 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <unordered_map>
#include <utility>
#include <vector>
#include "common/bit_util.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_)
: buffer{std::move(buffer_)} {}
VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device_, VKMemoryManager& memory_manager_,
VKScheduler& scheduler_)
: device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {}
VKStagingBufferPool::~VKStagingBufferPool() = default;
VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visible) {
if (const auto buffer = TryGetReservedBuffer(size, host_visible)) {
return *buffer;
}
return CreateStagingBuffer(size, host_visible);
}
void VKStagingBufferPool::TickFrame() {
current_delete_level = (current_delete_level + 1) % NumLevels;
ReleaseCache(true);
ReleaseCache(false);
}
VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
for (StagingBuffer& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
if (!scheduler.IsFree(entry.tick)) {
continue;
}
entry.tick = scheduler.CurrentTick();
return &*entry.buffer;
}
return nullptr;
}
VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
const u32 log2 = Common::Log2Ceil64(size);
auto buffer = std::make_unique<VKBuffer>();
buffer->handle = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 1ULL << log2,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
buffer->commit = memory_manager.Commit(buffer->handle, host_visible);
std::vector<StagingBuffer>& entries = GetCache(host_visible)[log2].entries;
StagingBuffer& entry = entries.emplace_back(std::move(buffer));
entry.tick = scheduler.CurrentTick();
return *entry.buffer;
}
VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
return host_visible ? host_staging_buffers : device_staging_buffers;
}
void VKStagingBufferPool::ReleaseCache(bool host_visible) {
auto& cache = GetCache(host_visible);
const u64 size = ReleaseLevel(cache, current_delete_level);
if (size == 0) {
return;
}
}
u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) {
static constexpr std::size_t deletions_per_tick = 16;
auto& staging = cache[log2];
auto& entries = staging.entries;
const std::size_t old_size = entries.size();
const auto is_deleteable = [this](const StagingBuffer& entry) {
return scheduler.IsFree(entry.tick);
};
const std::size_t begin_offset = staging.delete_index;
const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
const auto begin = std::begin(entries) + begin_offset;
const auto end = std::begin(entries) + end_offset;
entries.erase(std::remove_if(begin, end, is_deleteable), end);
const std::size_t new_size = entries.size();
staging.delete_index += deletions_per_tick;
if (staging.delete_index >= new_size) {
staging.delete_index = 0;
}
return (1ULL << log2) * (old_size - new_size);
}
} // namespace Vulkan

View File

@@ -0,0 +1,71 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <climits>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
class VKScheduler;
struct VKBuffer final {
vk::Buffer handle;
VKMemoryCommit commit;
};
class VKStagingBufferPool final {
public:
explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler);
~VKStagingBufferPool();
VKBuffer& GetUnusedBuffer(std::size_t size, bool host_visible);
void TickFrame();
private:
struct StagingBuffer final {
explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer);
std::unique_ptr<VKBuffer> buffer;
u64 tick = 0;
};
struct StagingBuffers final {
std::vector<StagingBuffer> entries;
std::size_t delete_index = 0;
};
static constexpr std::size_t NumLevels = sizeof(std::size_t) * CHAR_BIT;
using StagingBuffersCache = std::array<StagingBuffers, NumLevels>;
VKBuffer* TryGetReservedBuffer(std::size_t size, bool host_visible);
VKBuffer& CreateStagingBuffer(std::size_t size, bool host_visible);
StagingBuffersCache& GetCache(bool host_visible);
void ReleaseCache(bool host_visible);
u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2);
const VKDevice& device;
VKMemoryManager& memory_manager;
VKScheduler& scheduler;
StagingBuffersCache host_staging_buffers;
StagingBuffersCache device_staging_buffers;
std::size_t current_delete_level = 0;
};
} // namespace Vulkan

View File

@@ -0,0 +1,150 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstddef>
#include <iterator>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace Vulkan {
namespace {
using namespace Dirty;
using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D;
using Regs = Maxwell3D::Regs;
using Tables = Maxwell3D::DirtyState::Tables;
using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
static constexpr std::array INVALIDATION_FLAGS{
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
};
Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
flags[flag] = true;
}
return flags;
}
void SetupDirtyViewports(Tables& tables) {
FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
tables[0][OFF(viewport_transform_enabled)] = Viewports;
}
void SetupDirtyScissors(Tables& tables) {
FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors);
}
void SetupDirtyDepthBias(Tables& tables) {
auto& table = tables[0];
table[OFF(polygon_offset_units)] = DepthBias;
table[OFF(polygon_offset_clamp)] = DepthBias;
table[OFF(polygon_offset_factor)] = DepthBias;
}
void SetupDirtyBlendConstants(Tables& tables) {
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants);
}
void SetupDirtyDepthBounds(Tables& tables) {
FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds);
}
void SetupDirtyStencilProperties(Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_two_side_enable)] = StencilProperties;
table[OFF(stencil_front_func_ref)] = StencilProperties;
table[OFF(stencil_front_mask)] = StencilProperties;
table[OFF(stencil_front_func_mask)] = StencilProperties;
table[OFF(stencil_back_func_ref)] = StencilProperties;
table[OFF(stencil_back_mask)] = StencilProperties;
table[OFF(stencil_back_func_mask)] = StencilProperties;
}
void SetupDirtyCullMode(Tables& tables) {
auto& table = tables[0];
table[OFF(cull_face)] = CullMode;
table[OFF(cull_test_enabled)] = CullMode;
}
void SetupDirtyDepthBoundsEnable(Tables& tables) {
tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable;
}
void SetupDirtyDepthTestEnable(Tables& tables) {
tables[0][OFF(depth_test_enable)] = DepthTestEnable;
}
void SetupDirtyDepthWriteEnable(Tables& tables) {
tables[0][OFF(depth_write_enabled)] = DepthWriteEnable;
}
void SetupDirtyDepthCompareOp(Tables& tables) {
tables[0][OFF(depth_test_func)] = DepthCompareOp;
}
void SetupDirtyFrontFace(Tables& tables) {
auto& table = tables[0];
table[OFF(front_face)] = FrontFace;
table[OFF(screen_y_control)] = FrontFace;
}
void SetupDirtyStencilOp(Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_front_op_fail)] = StencilOp;
table[OFF(stencil_front_op_zfail)] = StencilOp;
table[OFF(stencil_front_op_zpass)] = StencilOp;
table[OFF(stencil_front_func_func)] = StencilOp;
table[OFF(stencil_back_op_fail)] = StencilOp;
table[OFF(stencil_back_op_zfail)] = StencilOp;
table[OFF(stencil_back_op_zpass)] = StencilOp;
table[OFF(stencil_back_func_func)] = StencilOp;
// Table 0 is used by StencilProperties
tables[1][OFF(stencil_two_side_enable)] = StencilOp;
}
void SetupDirtyStencilTestEnable(Tables& tables) {
tables[0][OFF(stencil_enable)] = StencilTestEnable;
}
} // Anonymous namespace
StateTracker::StateTracker(Tegra::GPU& gpu)
: flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
auto& tables = gpu.Maxwell3D().dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyDepthBias(tables);
SetupDirtyBlendConstants(tables);
SetupDirtyDepthBounds(tables);
SetupDirtyStencilProperties(tables);
SetupDirtyCullMode(tables);
SetupDirtyDepthBoundsEnable(tables);
SetupDirtyDepthTestEnable(tables);
SetupDirtyDepthWriteEnable(tables);
SetupDirtyDepthCompareOp(tables);
SetupDirtyFrontFace(tables);
SetupDirtyStencilOp(tables);
SetupDirtyStencilTestEnable(tables);
}
} // namespace Vulkan

View File

@@ -0,0 +1,143 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <limits>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
namespace Vulkan {
namespace Dirty {
enum : u8 {
First = VideoCommon::Dirty::LastCommonEntry,
Viewports,
Scissors,
DepthBias,
BlendConstants,
DepthBounds,
StencilProperties,
CullMode,
DepthBoundsEnable,
DepthTestEnable,
DepthWriteEnable,
DepthCompareOp,
FrontFace,
StencilOp,
StencilTestEnable,
Last
};
static_assert(Last <= std::numeric_limits<u8>::max());
} // namespace Dirty
class StateTracker {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
explicit StateTracker(Tegra::GPU& gpu);
void InvalidateCommandBufferState() {
flags |= invalidation_flags;
current_topology = INVALID_TOPOLOGY;
}
void InvalidateViewports() {
flags[Dirty::Viewports] = true;
}
void InvalidateScissors() {
flags[Dirty::Scissors] = true;
}
bool TouchViewports() {
return Exchange(Dirty::Viewports, false);
}
bool TouchScissors() {
return Exchange(Dirty::Scissors, false);
}
bool TouchDepthBias() {
return Exchange(Dirty::DepthBias, false);
}
bool TouchBlendConstants() {
return Exchange(Dirty::BlendConstants, false);
}
bool TouchDepthBounds() {
return Exchange(Dirty::DepthBounds, false);
}
bool TouchStencilProperties() {
return Exchange(Dirty::StencilProperties, false);
}
bool TouchCullMode() {
return Exchange(Dirty::CullMode, false);
}
bool TouchDepthBoundsTestEnable() {
return Exchange(Dirty::DepthBoundsEnable, false);
}
bool TouchDepthTestEnable() {
return Exchange(Dirty::DepthTestEnable, false);
}
bool TouchDepthBoundsEnable() {
return Exchange(Dirty::DepthBoundsEnable, false);
}
bool TouchDepthWriteEnable() {
return Exchange(Dirty::DepthWriteEnable, false);
}
bool TouchDepthCompareOp() {
return Exchange(Dirty::DepthCompareOp, false);
}
bool TouchFrontFace() {
return Exchange(Dirty::FrontFace, false);
}
bool TouchStencilOp() {
return Exchange(Dirty::StencilOp, false);
}
bool TouchStencilTestEnable() {
return Exchange(Dirty::StencilTestEnable, false);
}
bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
const bool has_changed = current_topology != new_topology;
current_topology = new_topology;
return has_changed;
}
private:
static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
bool Exchange(std::size_t id, bool new_value) const noexcept {
const bool is_dirty = flags[id];
flags[id] = new_value;
return is_dirty;
}
Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
};
} // namespace Vulkan

View File

@@ -0,0 +1,165 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <limits>
#include <optional>
#include <tuple>
#include <vector>
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
namespace {
constexpr VkBufferUsageFlags BUFFER_USAGE =
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
/// Find a memory type with the passed requirements
std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
VkMemoryPropertyFlags wanted,
u32 filter = std::numeric_limits<u32>::max()) {
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
const auto flags = properties.memoryTypes[i].propertyFlags;
if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
return i;
}
}
return std::nullopt;
}
/// Get the preferred host visible memory type.
u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
u32 filter = std::numeric_limits<u32>::max()) {
// Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
// Otherwise search for a host visible allocation.
static constexpr auto HOST_MEMORY =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
if (!preferred_type) {
preferred_type = FindMemoryType(properties, HOST_MEMORY);
ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
}
return preferred_type.value_or(0);
}
} // Anonymous namespace
VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
CreateBuffers();
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
}
VKStreamBuffer::~VKStreamBuffer() = default;
std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) {
ASSERT(size <= stream_buffer_size);
mapped_size = size;
if (alignment > 0) {
offset = Common::AlignUp(offset, alignment);
}
WaitPendingOperations(offset);
if (offset + size > stream_buffer_size) {
// The buffer would overflow, save the amount of used watches and reset the state.
invalidation_mark = current_watch_cursor;
current_watch_cursor = 0;
offset = 0;
// Swap watches and reset waiting cursors.
std::swap(previous_watches, current_watches);
wait_cursor = 0;
wait_bound = 0;
// Ensure that we don't wait for uncommitted fences.
scheduler.Flush();
}
return std::make_pair(memory.Map(offset, size), offset);
}
void VKStreamBuffer::Unmap(u64 size) {
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
memory.Unmap();
offset += size;
if (current_watch_cursor + 1 >= current_watches.size()) {
// Ensure that there are enough watches.
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
}
auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset;
watch.tick = scheduler.CurrentTick();
}
void VKStreamBuffer::CreateBuffers() {
const auto memory_properties = device.GetPhysical().GetMemoryProperties();
const u32 preferred_type = GetMemoryType(memory_properties);
const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
// As per DXVK's example, using `heap_size / 2`
const VkDeviceSize allocable_size = heap_size / 2;
buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
.usage = BUFFER_USAGE,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
const u32 required_flags = requirements.memoryTypeBits;
stream_buffer_size = static_cast<u64>(requirements.size);
memory = device.GetLogical().AllocateMemory({
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = nullptr,
.allocationSize = requirements.size,
.memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
});
buffer.BindMemory(*memory, 0);
}
void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
watches.resize(watches.size() + grow_size);
}
void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
if (!invalidation_mark) {
return;
}
while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
scheduler.Wait(watch.tick);
++wait_cursor;
}
}
} // namespace Vulkan

View File

@@ -0,0 +1,76 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
class VKFenceWatch;
class VKScheduler;
class VKStreamBuffer final {
public:
explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler);
~VKStreamBuffer();
/**
* Reserves a region of memory from the stream buffer.
* @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
std::pair<u8*, u64> Map(u64 size, u64 alignment);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Unmap(u64 size);
VkBuffer Handle() const noexcept {
return *buffer;
}
u64 Address() const noexcept {
return 0;
}
private:
struct Watch {
u64 tick{};
u64 upper_bound{};
};
/// Creates Vulkan buffer handles committing the required the required memory.
void CreateBuffers();
/// Increases the amount of watches available.
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
void WaitPendingOperations(u64 requested_upper_bound);
const VKDevice& device; ///< Vulkan device manager.
VKScheduler& scheduler; ///< Command scheduler.
vk::Buffer buffer; ///< Mapped buffer.
vk::DeviceMemory memory; ///< Memory allocation.
u64 stream_buffer_size{}; ///< Stream buffer size.
u64 offset{}; ///< Buffer iterator.
u64 mapped_size{}; ///< Size reserved for the current copy.
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
u64 wait_bound{}; ///< Highest offset being watched for completion.
};
} // namespace Vulkan

View File

@@ -0,0 +1,239 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <limits>
#include <vector>
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/frontend/framebuffer_layout.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
namespace {
VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) {
if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
VkSurfaceFormatKHR format;
format.format = VK_FORMAT_B8G8R8A8_UNORM;
format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
return format;
}
const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
return format.format == request_format &&
format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
});
return found != formats.end() ? *found : formats[0];
}
VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) {
// Mailbox doesn't lock the application like fifo (vsync), prefer it
const auto found = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR);
return found != modes.end() ? *found : VK_PRESENT_MODE_FIFO_KHR;
}
VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) {
constexpr auto undefined_size{std::numeric_limits<u32>::max()};
if (capabilities.currentExtent.width != undefined_size) {
return capabilities.currentExtent;
}
VkExtent2D extent;
extent.width = std::max(capabilities.minImageExtent.width,
std::min(capabilities.maxImageExtent.width, width));
extent.height = std::max(capabilities.minImageExtent.height,
std::min(capabilities.maxImageExtent.height, height));
return extent;
}
} // Anonymous namespace
VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const VKDevice& device_, VKScheduler& scheduler_)
: surface{surface_}, device{device_}, scheduler{scheduler_} {}
VKSwapchain::~VKSwapchain() = default;
void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
const auto physical_device = device.GetPhysical();
const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
return;
}
device.GetLogical().WaitIdle();
Destroy();
CreateSwapchain(capabilities, width, height, srgb);
CreateSemaphores();
CreateImageViews();
resource_ticks.clear();
resource_ticks.resize(image_count);
}
void VKSwapchain::AcquireNextImage() {
device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
*present_semaphores[frame_index], {}, &image_index);
scheduler.Wait(resource_ticks[image_index]);
}
bool VKSwapchain::Present(VkSemaphore render_semaphore) {
const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
const auto present_queue{device.GetPresentQueue()};
bool recreated = false;
const VkPresentInfoKHR present_info{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pNext = nullptr,
.waitSemaphoreCount = render_semaphore ? 2U : 1U,
.pWaitSemaphores = semaphores.data(),
.swapchainCount = 1,
.pSwapchains = swapchain.address(),
.pImageIndices = &image_index,
.pResults = nullptr,
};
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
break;
case VK_SUBOPTIMAL_KHR:
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
break;
case VK_ERROR_OUT_OF_DATE_KHR:
if (current_width > 0 && current_height > 0) {
Create(current_width, current_height, current_srgb);
recreated = true;
}
break;
default:
LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
break;
}
resource_ticks[image_index] = scheduler.CurrentTick();
frame_index = (frame_index + 1) % static_cast<u32>(image_count);
return recreated;
}
bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
// TODO(Rodrigo): Handle framebuffer pixel format changes
return framebuffer.width != current_width || framebuffer.height != current_height;
}
void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width,
u32 height, bool srgb) {
const auto physical_device{device.GetPhysical()};
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
u32 requested_image_count{capabilities.minImageCount + 1};
if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
requested_image_count = capabilities.maxImageCount;
}
VkSwapchainCreateInfoKHR swapchain_ci{
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
.pNext = nullptr,
.flags = 0,
.surface = surface,
.minImageCount = requested_image_count,
.imageFormat = surface_format.format,
.imageColorSpace = surface_format.colorSpace,
.imageExtent = {},
.imageArrayLayers = 1,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.preTransform = capabilities.currentTransform,
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
.presentMode = present_mode,
.clipped = VK_FALSE,
.oldSwapchain = nullptr,
};
const u32 graphics_family{device.GetGraphicsFamily()};
const u32 present_family{device.GetPresentFamily()};
const std::array<u32, 2> queue_indices{graphics_family, present_family};
if (graphics_family != present_family) {
swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
}
// Request the size again to reduce the possibility of a TOCTOU race condition.
const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
// Don't add code within this and the swapchain creation.
swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
extent = swapchain_ci.imageExtent;
current_width = extent.width;
current_height = extent.height;
current_srgb = srgb;
images = swapchain.GetImages();
image_count = static_cast<u32>(images.size());
image_format = surface_format.format;
}
void VKSwapchain::CreateSemaphores() {
present_semaphores.resize(image_count);
std::generate(present_semaphores.begin(), present_semaphores.end(),
[this] { return device.GetLogical().CreateSemaphore(); });
}
void VKSwapchain::CreateImageViews() {
VkImageViewCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = {},
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = image_format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
image_views.resize(image_count);
for (std::size_t i = 0; i < image_count; i++) {
ci.image = images[i];
image_views[i] = device.GetLogical().CreateImageView(ci);
}
}
void VKSwapchain::Destroy() {
frame_index = 0;
present_semaphores.clear();
framebuffers.clear();
image_views.clear();
swapchain.reset();
}
} // namespace Vulkan

View File

@@ -0,0 +1,99 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Layout {
struct FramebufferLayout;
}
namespace Vulkan {
class VKDevice;
class VKScheduler;
class VKSwapchain {
public:
explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device, VKScheduler& scheduler);
~VKSwapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
void AcquireNextImage();
/// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
/// recreated. Takes responsability for the ownership of fence.
bool Present(VkSemaphore render_semaphore);
/// Returns true when the framebuffer layout has changed.
bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
VkExtent2D GetSize() const {
return extent;
}
std::size_t GetImageCount() const {
return image_count;
}
std::size_t GetImageIndex() const {
return image_index;
}
VkImage GetImageIndex(std::size_t index) const {
return images[index];
}
VkImageView GetImageViewIndex(std::size_t index) const {
return *image_views[index];
}
VkFormat GetImageFormat() const {
return image_format;
}
bool GetSrgbState() const {
return current_srgb;
}
private:
void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
bool srgb);
void CreateSemaphores();
void CreateImageViews();
void Destroy();
const VkSurfaceKHR surface;
const VKDevice& device;
VKScheduler& scheduler;
vk::SwapchainKHR swapchain;
std::size_t image_count{};
std::vector<VkImage> images;
std::vector<vk::ImageView> image_views;
std::vector<vk::Framebuffer> framebuffers;
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> present_semaphores;
u32 image_index{};
u32 frame_index{};
VkFormat image_format{};
VkExtent2D extent{};
u32 current_width{};
u32 current_height{};
bool current_srgb{};
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,268 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <compare>
#include <span>
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/texture_cache/texture_cache.h"
namespace Vulkan {
using VideoCommon::ImageId;
using VideoCommon::NUM_RT;
using VideoCommon::Offset2D;
using VideoCommon::RenderTargets;
using VideoCore::Surface::PixelFormat;
class VKDevice;
class VKScheduler;
class VKStagingBufferPool;
class BlitImageHelper;
class Image;
class ImageView;
class Framebuffer;
struct RenderPassKey {
constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
std::array<PixelFormat, NUM_RT> color_formats;
PixelFormat depth_format;
VkSampleCountFlagBits samples;
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::RenderPassKey> {
[[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
size_t value = static_cast<size_t>(key.depth_format) << 48;
value ^= static_cast<size_t>(key.samples) << 52;
for (size_t i = 0; i < key.color_formats.size(); ++i) {
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
}
return value;
}
};
} // namespace std
namespace Vulkan {
struct ImageBufferMap {
[[nodiscard]] VkBuffer Handle() const noexcept {
return handle;
}
[[nodiscard]] std::span<u8> Span() const noexcept {
return map.Span();
}
VkBuffer handle;
MemoryMap map;
};
struct TextureCacheRuntime {
const VKDevice& device;
VKScheduler& scheduler;
VKMemoryManager& memory_manager;
VKStagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache;
void Finish();
[[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
[[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) {
// TODO: Have a special function for this
return MapUploadBuffer(size);
}
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
[[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept {
return false;
}
void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
std::span<const VideoCommon::SwizzleParameters>) {
UNREACHABLE();
}
void InsertUploadMemoryBarrier() {}
};
class Image : public VideoCommon::ImageBase {
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies);
void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
[[nodiscard]] VkImage Handle() const noexcept {
return *image;
}
[[nodiscard]] VkBuffer Buffer() const noexcept {
return *buffer;
}
[[nodiscard]] VkImageCreateFlags AspectMask() const noexcept {
return aspect_mask;
}
private:
VKScheduler* scheduler;
vk::Image image;
vk::Buffer buffer;
VKMemoryCommit commit;
VkImageAspectFlags aspect_mask = 0;
bool initialized = false;
};
class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
[[nodiscard]] VkImageView DepthView();
[[nodiscard]] VkImageView StencilView();
[[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept {
return *image_views[static_cast<size_t>(query_type)];
}
[[nodiscard]] VkBufferView BufferView() const noexcept {
return *buffer_view;
}
[[nodiscard]] VkImage ImageHandle() const noexcept {
return image_handle;
}
[[nodiscard]] VkImageView RenderTarget() const noexcept {
return render_target;
}
[[nodiscard]] PixelFormat ImageFormat() const noexcept {
return image_format;
}
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
private:
[[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask);
const VKDevice* device = nullptr;
std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views;
vk::ImageView depth_view;
vk::ImageView stencil_view;
vk::BufferView buffer_view;
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
PixelFormat image_format = PixelFormat::Invalid;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
class Sampler {
public:
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
[[nodiscard]] VkSampler Handle() const noexcept {
return *sampler;
}
private:
vk::Sampler sampler;
};
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
[[nodiscard]] VkFramebuffer Handle() const noexcept {
return *framebuffer;
}
[[nodiscard]] VkRenderPass RenderPass() const noexcept {
return renderpass;
}
[[nodiscard]] VkExtent2D RenderArea() const noexcept {
return render_area;
}
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
[[nodiscard]] u32 NumColorBuffers() const noexcept {
return num_color_buffers;
}
[[nodiscard]] u32 NumImages() const noexcept {
return num_images;
}
[[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
return images;
}
[[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
return image_ranges;
}
private:
vk::Framebuffer framebuffer;
VkRenderPass renderpass{};
VkExtent2D render_area{};
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
u32 num_color_buffers = 0;
u32 num_images = 0;
std::array<VkImage, 9> images{};
std::array<VkImageSubresourceRange, 9> image_ranges{};
};
struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false;
using Runtime = Vulkan::TextureCacheRuntime;
using Image = Vulkan::Image;
using ImageAlloc = Vulkan::ImageAlloc;
using ImageView = Vulkan::ImageView;
using Sampler = Vulkan::Sampler;
using Framebuffer = Vulkan::Framebuffer;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
} // namespace Vulkan

View File

@@ -0,0 +1,48 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <variant>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {}
VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
void VKUpdateDescriptorQueue::TickFrame() {
payload.clear();
}
void VKUpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
// This is the maximum number of entries a single draw call migth use.
static constexpr std::size_t MIN_ENTRIES = 0x400;
if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
payload.clear();
}
upload_start = &*payload.end();
}
void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
VkDescriptorSet set) {
const void* const data = upload_start;
const vk::Device* const logical = &device.GetLogical();
scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
logical->UpdateDescriptorSet(set, update_template, data);
});
}
} // namespace Vulkan

View File

@@ -0,0 +1,79 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <variant>
#include <boost/container/static_vector.hpp>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
class VKScheduler;
struct DescriptorUpdateEntry {
DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
union {
VkDescriptorImageInfo image;
VkDescriptorBufferInfo buffer;
VkBufferView texel_buffer;
};
};
class VKUpdateDescriptorQueue final {
public:
explicit VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_);
~VKUpdateDescriptorQueue();
void TickFrame();
void Acquire();
void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
payload.emplace_back(VkDescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
});
}
void AddImage(VkImageView image_view) {
payload.emplace_back(VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
});
}
void AddBuffer(VkBuffer buffer, u64 offset, size_t size) {
payload.emplace_back(VkDescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
});
}
void AddTexelBuffer(VkBufferView texel_buffer) {
payload.emplace_back(texel_buffer);
}
private:
const VKDevice& device;
VKScheduler& scheduler;
const DescriptorUpdateEntry* upload_start = nullptr;
boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
};
} // namespace Vulkan

View File

@@ -0,0 +1,928 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <exception>
#include <memory>
#include <optional>
#include <string_view>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan::vk {
namespace {
template <typename Func>
void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld,
Func&& func) {
// Calling GetProperties calls Vulkan more than needed. But they are supposed to be cheap
// functions.
std::stable_sort(devices.begin(), devices.end(),
[&dld, &func](VkPhysicalDevice lhs, VkPhysicalDevice rhs) {
return func(vk::PhysicalDevice(lhs, dld).GetProperties(),
vk::PhysicalDevice(rhs, dld).GetProperties());
});
}
void SortPhysicalDevicesPerVendor(std::vector<VkPhysicalDevice>& devices,
const InstanceDispatch& dld,
std::initializer_list<u32> vendor_ids) {
for (auto it = vendor_ids.end(); it != vendor_ids.begin();) {
--it;
SortPhysicalDevices(devices, dld, [id = *it](const auto& lhs, const auto& rhs) {
return lhs.vendorID == id && rhs.vendorID != id;
});
}
}
void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld) {
// Sort by name, this will set a base and make GPUs with higher numbers appear first
// (e.g. GTX 1650 will intentionally be listed before a GTX 1080).
SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) {
return std::string_view{lhs.deviceName} > std::string_view{rhs.deviceName};
});
// Prefer discrete over non-discrete
SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) {
return lhs.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU &&
rhs.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
});
// Prefer Nvidia over AMD, AMD over Intel, Intel over the rest.
SortPhysicalDevicesPerVendor(devices, dld, {0x10DE, 0x1002, 0x8086});
}
template <typename T>
bool Proc(T& result, const InstanceDispatch& dld, const char* proc_name,
VkInstance instance = nullptr) noexcept {
result = reinterpret_cast<T>(dld.vkGetInstanceProcAddr(instance, proc_name));
return result != nullptr;
}
template <typename T>
void Proc(T& result, const DeviceDispatch& dld, const char* proc_name, VkDevice device) noexcept {
result = reinterpret_cast<T>(dld.vkGetDeviceProcAddr(device, proc_name));
}
void Load(VkDevice device, DeviceDispatch& dld) noexcept {
#define X(name) Proc(dld.name, dld, #name, device)
X(vkAcquireNextImageKHR);
X(vkAllocateCommandBuffers);
X(vkAllocateDescriptorSets);
X(vkAllocateMemory);
X(vkBeginCommandBuffer);
X(vkBindBufferMemory);
X(vkBindImageMemory);
X(vkCmdBeginQuery);
X(vkCmdBeginRenderPass);
X(vkCmdBeginTransformFeedbackEXT);
X(vkCmdBeginDebugUtilsLabelEXT);
X(vkCmdBindDescriptorSets);
X(vkCmdBindIndexBuffer);
X(vkCmdBindPipeline);
X(vkCmdBindTransformFeedbackBuffersEXT);
X(vkCmdBindVertexBuffers);
X(vkCmdBlitImage);
X(vkCmdClearAttachments);
X(vkCmdCopyBuffer);
X(vkCmdCopyBufferToImage);
X(vkCmdCopyImage);
X(vkCmdCopyImageToBuffer);
X(vkCmdDispatch);
X(vkCmdDraw);
X(vkCmdDrawIndexed);
X(vkCmdEndQuery);
X(vkCmdEndRenderPass);
X(vkCmdEndTransformFeedbackEXT);
X(vkCmdEndDebugUtilsLabelEXT);
X(vkCmdFillBuffer);
X(vkCmdPipelineBarrier);
X(vkCmdPushConstants);
X(vkCmdSetBlendConstants);
X(vkCmdSetDepthBias);
X(vkCmdSetDepthBounds);
X(vkCmdSetEvent);
X(vkCmdSetScissor);
X(vkCmdSetStencilCompareMask);
X(vkCmdSetStencilReference);
X(vkCmdSetStencilWriteMask);
X(vkCmdSetViewport);
X(vkCmdWaitEvents);
X(vkCmdBindVertexBuffers2EXT);
X(vkCmdSetCullModeEXT);
X(vkCmdSetDepthBoundsTestEnableEXT);
X(vkCmdSetDepthCompareOpEXT);
X(vkCmdSetDepthTestEnableEXT);
X(vkCmdSetDepthWriteEnableEXT);
X(vkCmdSetFrontFaceEXT);
X(vkCmdSetPrimitiveTopologyEXT);
X(vkCmdSetStencilOpEXT);
X(vkCmdSetStencilTestEnableEXT);
X(vkCmdResolveImage);
X(vkCreateBuffer);
X(vkCreateBufferView);
X(vkCreateCommandPool);
X(vkCreateComputePipelines);
X(vkCreateDescriptorPool);
X(vkCreateDescriptorSetLayout);
X(vkCreateDescriptorUpdateTemplateKHR);
X(vkCreateEvent);
X(vkCreateFence);
X(vkCreateFramebuffer);
X(vkCreateGraphicsPipelines);
X(vkCreateImage);
X(vkCreateImageView);
X(vkCreatePipelineLayout);
X(vkCreateQueryPool);
X(vkCreateRenderPass);
X(vkCreateSampler);
X(vkCreateSemaphore);
X(vkCreateShaderModule);
X(vkCreateSwapchainKHR);
X(vkDestroyBuffer);
X(vkDestroyBufferView);
X(vkDestroyCommandPool);
X(vkDestroyDescriptorPool);
X(vkDestroyDescriptorSetLayout);
X(vkDestroyDescriptorUpdateTemplateKHR);
X(vkDestroyEvent);
X(vkDestroyFence);
X(vkDestroyFramebuffer);
X(vkDestroyImage);
X(vkDestroyImageView);
X(vkDestroyPipeline);
X(vkDestroyPipelineLayout);
X(vkDestroyQueryPool);
X(vkDestroyRenderPass);
X(vkDestroySampler);
X(vkDestroySemaphore);
X(vkDestroyShaderModule);
X(vkDestroySwapchainKHR);
X(vkDeviceWaitIdle);
X(vkEndCommandBuffer);
X(vkFreeCommandBuffers);
X(vkFreeDescriptorSets);
X(vkFreeMemory);
X(vkGetBufferMemoryRequirements);
X(vkGetDeviceQueue);
X(vkGetEventStatus);
X(vkGetFenceStatus);
X(vkGetImageMemoryRequirements);
X(vkGetQueryPoolResults);
X(vkGetSemaphoreCounterValueKHR);
X(vkMapMemory);
X(vkQueueSubmit);
X(vkResetFences);
X(vkResetQueryPoolEXT);
X(vkSetDebugUtilsObjectNameEXT);
X(vkSetDebugUtilsObjectTagEXT);
X(vkUnmapMemory);
X(vkUpdateDescriptorSetWithTemplateKHR);
X(vkUpdateDescriptorSets);
X(vkWaitForFences);
X(vkWaitSemaphoresKHR);
#undef X
}
template <typename T>
void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type,
const char* name) {
const VkDebugUtilsObjectNameInfoEXT name_info{
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
.pNext = nullptr,
.objectType = VK_OBJECT_TYPE_IMAGE,
.objectHandle = reinterpret_cast<u64>(handle),
.pObjectName = name,
};
Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info));
}
} // Anonymous namespace
bool Load(InstanceDispatch& dld) noexcept {
#define X(name) Proc(dld.name, dld, #name)
return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) &&
X(vkEnumerateInstanceLayerProperties);
#undef X
}
bool Load(VkInstance instance, InstanceDispatch& dld) noexcept {
#define X(name) Proc(dld.name, dld, #name, instance)
// These functions may fail to load depending on the enabled extensions.
// Don't return a failure on these.
X(vkCreateDebugUtilsMessengerEXT);
X(vkDestroyDebugUtilsMessengerEXT);
X(vkDestroySurfaceKHR);
X(vkGetPhysicalDeviceFeatures2KHR);
X(vkGetPhysicalDeviceProperties2KHR);
X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR);
X(vkGetPhysicalDeviceSurfaceFormatsKHR);
X(vkGetPhysicalDeviceSurfacePresentModesKHR);
X(vkGetPhysicalDeviceSurfaceSupportKHR);
X(vkGetSwapchainImagesKHR);
X(vkQueuePresentKHR);
return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) &&
X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) &&
X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) &&
X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceProperties) &&
X(vkGetPhysicalDeviceQueueFamilyProperties);
#undef X
}
const char* Exception::what() const noexcept {
return ToString(result);
}
const char* ToString(VkResult result) noexcept {
switch (result) {
case VkResult::VK_SUCCESS:
return "VK_SUCCESS";
case VkResult::VK_NOT_READY:
return "VK_NOT_READY";
case VkResult::VK_TIMEOUT:
return "VK_TIMEOUT";
case VkResult::VK_EVENT_SET:
return "VK_EVENT_SET";
case VkResult::VK_EVENT_RESET:
return "VK_EVENT_RESET";
case VkResult::VK_INCOMPLETE:
return "VK_INCOMPLETE";
case VkResult::VK_ERROR_OUT_OF_HOST_MEMORY:
return "VK_ERROR_OUT_OF_HOST_MEMORY";
case VkResult::VK_ERROR_OUT_OF_DEVICE_MEMORY:
return "VK_ERROR_OUT_OF_DEVICE_MEMORY";
case VkResult::VK_ERROR_INITIALIZATION_FAILED:
return "VK_ERROR_INITIALIZATION_FAILED";
case VkResult::VK_ERROR_DEVICE_LOST:
return "VK_ERROR_DEVICE_LOST";
case VkResult::VK_ERROR_MEMORY_MAP_FAILED:
return "VK_ERROR_MEMORY_MAP_FAILED";
case VkResult::VK_ERROR_LAYER_NOT_PRESENT:
return "VK_ERROR_LAYER_NOT_PRESENT";
case VkResult::VK_ERROR_EXTENSION_NOT_PRESENT:
return "VK_ERROR_EXTENSION_NOT_PRESENT";
case VkResult::VK_ERROR_FEATURE_NOT_PRESENT:
return "VK_ERROR_FEATURE_NOT_PRESENT";
case VkResult::VK_ERROR_INCOMPATIBLE_DRIVER:
return "VK_ERROR_INCOMPATIBLE_DRIVER";
case VkResult::VK_ERROR_TOO_MANY_OBJECTS:
return "VK_ERROR_TOO_MANY_OBJECTS";
case VkResult::VK_ERROR_FORMAT_NOT_SUPPORTED:
return "VK_ERROR_FORMAT_NOT_SUPPORTED";
case VkResult::VK_ERROR_FRAGMENTED_POOL:
return "VK_ERROR_FRAGMENTED_POOL";
case VkResult::VK_ERROR_OUT_OF_POOL_MEMORY:
return "VK_ERROR_OUT_OF_POOL_MEMORY";
case VkResult::VK_ERROR_INVALID_EXTERNAL_HANDLE:
return "VK_ERROR_INVALID_EXTERNAL_HANDLE";
case VkResult::VK_ERROR_SURFACE_LOST_KHR:
return "VK_ERROR_SURFACE_LOST_KHR";
case VkResult::VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
return "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR";
case VkResult::VK_SUBOPTIMAL_KHR:
return "VK_SUBOPTIMAL_KHR";
case VkResult::VK_ERROR_OUT_OF_DATE_KHR:
return "VK_ERROR_OUT_OF_DATE_KHR";
case VkResult::VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
return "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR";
case VkResult::VK_ERROR_VALIDATION_FAILED_EXT:
return "VK_ERROR_VALIDATION_FAILED_EXT";
case VkResult::VK_ERROR_INVALID_SHADER_NV:
return "VK_ERROR_INVALID_SHADER_NV";
case VkResult::VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
return "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT";
case VkResult::VK_ERROR_FRAGMENTATION_EXT:
return "VK_ERROR_FRAGMENTATION_EXT";
case VkResult::VK_ERROR_NOT_PERMITTED_EXT:
return "VK_ERROR_NOT_PERMITTED_EXT";
case VkResult::VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT";
case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT:
return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
case VkResult::VK_ERROR_UNKNOWN:
return "VK_ERROR_UNKNOWN";
case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR:
return "VK_ERROR_INCOMPATIBLE_VERSION_KHR";
case VkResult::VK_THREAD_IDLE_KHR:
return "VK_THREAD_IDLE_KHR";
case VkResult::VK_THREAD_DONE_KHR:
return "VK_THREAD_DONE_KHR";
case VkResult::VK_OPERATION_DEFERRED_KHR:
return "VK_OPERATION_DEFERRED_KHR";
case VkResult::VK_OPERATION_NOT_DEFERRED_KHR:
return "VK_OPERATION_NOT_DEFERRED_KHR";
case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT:
return "VK_PIPELINE_COMPILE_REQUIRED_EXT";
case VkResult::VK_RESULT_MAX_ENUM:
return "VK_RESULT_MAX_ENUM";
}
return "Unknown";
}
void Destroy(VkInstance instance, const InstanceDispatch& dld) noexcept {
dld.vkDestroyInstance(instance, nullptr);
}
void Destroy(VkDevice device, const InstanceDispatch& dld) noexcept {
dld.vkDestroyDevice(device, nullptr);
}
void Destroy(VkDevice device, VkBuffer handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyBuffer(device, handle, nullptr);
}
void Destroy(VkDevice device, VkBufferView handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyBufferView(device, handle, nullptr);
}
void Destroy(VkDevice device, VkCommandPool handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyCommandPool(device, handle, nullptr);
}
void Destroy(VkDevice device, VkDescriptorPool handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyDescriptorPool(device, handle, nullptr);
}
void Destroy(VkDevice device, VkDescriptorSetLayout handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyDescriptorSetLayout(device, handle, nullptr);
}
void Destroy(VkDevice device, VkDescriptorUpdateTemplateKHR handle,
const DeviceDispatch& dld) noexcept {
dld.vkDestroyDescriptorUpdateTemplateKHR(device, handle, nullptr);
}
void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld) noexcept {
dld.vkFreeMemory(device, handle, nullptr);
}
void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyEvent(device, handle, nullptr);
}
void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyFence(device, handle, nullptr);
}
void Destroy(VkDevice device, VkFramebuffer handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyFramebuffer(device, handle, nullptr);
}
void Destroy(VkDevice device, VkImage handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyImage(device, handle, nullptr);
}
void Destroy(VkDevice device, VkImageView handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyImageView(device, handle, nullptr);
}
void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyPipeline(device, handle, nullptr);
}
void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyPipelineLayout(device, handle, nullptr);
}
void Destroy(VkDevice device, VkQueryPool handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyQueryPool(device, handle, nullptr);
}
void Destroy(VkDevice device, VkRenderPass handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyRenderPass(device, handle, nullptr);
}
void Destroy(VkDevice device, VkSampler handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroySampler(device, handle, nullptr);
}
void Destroy(VkDevice device, VkSwapchainKHR handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroySwapchainKHR(device, handle, nullptr);
}
void Destroy(VkDevice device, VkSemaphore handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroySemaphore(device, handle, nullptr);
}
void Destroy(VkDevice device, VkShaderModule handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyShaderModule(device, handle, nullptr);
}
void Destroy(VkInstance instance, VkDebugUtilsMessengerEXT handle,
const InstanceDispatch& dld) noexcept {
dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr);
}
void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept {
dld.vkDestroySurfaceKHR(instance, handle, nullptr);
}
VkResult Free(VkDevice device, VkDescriptorPool handle, Span<VkDescriptorSet> sets,
const DeviceDispatch& dld) noexcept {
return dld.vkFreeDescriptorSets(device, handle, sets.size(), sets.data());
}
VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffers,
const DeviceDispatch& dld) noexcept {
dld.vkFreeCommandBuffers(device, handle, buffers.size(), buffers.data());
return VK_SUCCESS;
}
Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions,
InstanceDispatch& dispatch) noexcept {
const VkApplicationInfo application_info{
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
.pNext = nullptr,
.pApplicationName = "yuzu Emulator",
.applicationVersion = VK_MAKE_VERSION(0, 1, 0),
.pEngineName = "yuzu Emulator",
.engineVersion = VK_MAKE_VERSION(0, 1, 0),
.apiVersion = version,
};
const VkInstanceCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.pApplicationInfo = &application_info,
.enabledLayerCount = layers.size(),
.ppEnabledLayerNames = layers.data(),
.enabledExtensionCount = extensions.size(),
.ppEnabledExtensionNames = extensions.data(),
};
VkInstance instance;
if (dispatch.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) {
// Failed to create the instance.
return {};
}
if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) {
// We successfully created an instance but the destroy function couldn't be loaded.
// This is a good moment to panic.
return {};
}
return Instance(instance, dispatch);
}
std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() {
u32 num;
if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) {
return std::nullopt;
}
std::vector<VkPhysicalDevice> physical_devices(num);
if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) {
return std::nullopt;
}
SortPhysicalDevices(physical_devices, *dld);
return std::make_optional(std::move(physical_devices));
}
DebugCallback Instance::TryCreateDebugCallback(
PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept {
const VkDebugUtilsMessengerCreateInfoEXT ci{
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
.pNext = nullptr,
.flags = 0,
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT,
.pfnUserCallback = callback,
.pUserData = nullptr,
};
VkDebugUtilsMessengerEXT messenger;
if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) {
return {};
}
return DebugCallback(messenger, handle, *dld);
}
void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
}
void Buffer::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name);
}
void BufferView::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name);
}
void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
Check(dld->vkBindImageMemory(owner, handle, memory, offset));
}
void Image::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name);
}
void ImageView::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
}
void DeviceMemory::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name);
}
void Fence::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name);
}
void Framebuffer::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name);
}
DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const {
const std::size_t num = ai.descriptorSetCount;
std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num);
switch (const VkResult result = dld->vkAllocateDescriptorSets(owner, &ai, sets.get())) {
case VK_SUCCESS:
return DescriptorSets(std::move(sets), num, owner, handle, *dld);
case VK_ERROR_OUT_OF_POOL_MEMORY:
return {};
default:
throw Exception(result);
}
}
void DescriptorPool::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name);
}
CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {
const VkCommandBufferAllocateInfo ai{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.pNext = nullptr,
.commandPool = handle,
.level = level,
.commandBufferCount = static_cast<u32>(num_buffers),
};
std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers);
switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) {
case VK_SUCCESS:
return CommandBuffers(std::move(buffers), num_buffers, owner, handle, *dld);
case VK_ERROR_OUT_OF_POOL_MEMORY:
return {};
default:
throw Exception(result);
}
}
void CommandPool::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name);
}
std::vector<VkImage> SwapchainKHR::GetImages() const {
u32 num;
Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr));
std::vector<VkImage> images(num);
Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, images.data()));
return images;
}
void Event::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name);
}
void ShaderModule::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name);
}
void Semaphore::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name);
}
Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
Span<const char*> enabled_extensions, const void* next,
DeviceDispatch& dispatch) noexcept {
const VkDeviceCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
.pNext = next,
.flags = 0,
.queueCreateInfoCount = queues_ci.size(),
.pQueueCreateInfos = queues_ci.data(),
.enabledLayerCount = 0,
.ppEnabledLayerNames = nullptr,
.enabledExtensionCount = enabled_extensions.size(),
.ppEnabledExtensionNames = enabled_extensions.data(),
.pEnabledFeatures = nullptr,
};
VkDevice device;
if (dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) {
return {};
}
Load(device, dispatch);
return Device(device, dispatch);
}
Queue Device::GetQueue(u32 family_index) const noexcept {
VkQueue queue;
dld->vkGetDeviceQueue(handle, family_index, 0, &queue);
return Queue(queue, *dld);
}
Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const {
VkBuffer object;
Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object));
return Buffer(object, handle, *dld);
}
BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const {
VkBufferView object;
Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object));
return BufferView(object, handle, *dld);
}
Image Device::CreateImage(const VkImageCreateInfo& ci) const {
VkImage object;
Check(dld->vkCreateImage(handle, &ci, nullptr, &object));
return Image(object, handle, *dld);
}
ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const {
VkImageView object;
Check(dld->vkCreateImageView(handle, &ci, nullptr, &object));
return ImageView(object, handle, *dld);
}
Semaphore Device::CreateSemaphore() const {
static constexpr VkSemaphoreCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
};
return CreateSemaphore(ci);
}
Semaphore Device::CreateSemaphore(const VkSemaphoreCreateInfo& ci) const {
VkSemaphore object;
Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object));
return Semaphore(object, handle, *dld);
}
Fence Device::CreateFence(const VkFenceCreateInfo& ci) const {
VkFence object;
Check(dld->vkCreateFence(handle, &ci, nullptr, &object));
return Fence(object, handle, *dld);
}
DescriptorPool Device::CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const {
VkDescriptorPool object;
Check(dld->vkCreateDescriptorPool(handle, &ci, nullptr, &object));
return DescriptorPool(object, handle, *dld);
}
RenderPass Device::CreateRenderPass(const VkRenderPassCreateInfo& ci) const {
VkRenderPass object;
Check(dld->vkCreateRenderPass(handle, &ci, nullptr, &object));
return RenderPass(object, handle, *dld);
}
DescriptorSetLayout Device::CreateDescriptorSetLayout(
const VkDescriptorSetLayoutCreateInfo& ci) const {
VkDescriptorSetLayout object;
Check(dld->vkCreateDescriptorSetLayout(handle, &ci, nullptr, &object));
return DescriptorSetLayout(object, handle, *dld);
}
PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const {
VkPipelineLayout object;
Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object));
return PipelineLayout(object, handle, *dld);
}
Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const {
VkPipeline object;
Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object));
return Pipeline(object, handle, *dld);
}
Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const {
VkPipeline object;
Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object));
return Pipeline(object, handle, *dld);
}
Sampler Device::CreateSampler(const VkSamplerCreateInfo& ci) const {
VkSampler object;
Check(dld->vkCreateSampler(handle, &ci, nullptr, &object));
return Sampler(object, handle, *dld);
}
Framebuffer Device::CreateFramebuffer(const VkFramebufferCreateInfo& ci) const {
VkFramebuffer object;
Check(dld->vkCreateFramebuffer(handle, &ci, nullptr, &object));
return Framebuffer(object, handle, *dld);
}
CommandPool Device::CreateCommandPool(const VkCommandPoolCreateInfo& ci) const {
VkCommandPool object;
Check(dld->vkCreateCommandPool(handle, &ci, nullptr, &object));
return CommandPool(object, handle, *dld);
}
DescriptorUpdateTemplateKHR Device::CreateDescriptorUpdateTemplateKHR(
const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const {
VkDescriptorUpdateTemplateKHR object;
Check(dld->vkCreateDescriptorUpdateTemplateKHR(handle, &ci, nullptr, &object));
return DescriptorUpdateTemplateKHR(object, handle, *dld);
}
QueryPool Device::CreateQueryPool(const VkQueryPoolCreateInfo& ci) const {
VkQueryPool object;
Check(dld->vkCreateQueryPool(handle, &ci, nullptr, &object));
return QueryPool(object, handle, *dld);
}
ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) const {
VkShaderModule object;
Check(dld->vkCreateShaderModule(handle, &ci, nullptr, &object));
return ShaderModule(object, handle, *dld);
}
Event Device::CreateEvent() const {
static constexpr VkEventCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
};
VkEvent object;
Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
return Event(object, handle, *dld);
}
SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
VkSwapchainKHR object;
Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
return SwapchainKHR(object, handle, *dld);
}
DeviceMemory Device::TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept {
VkDeviceMemory memory;
if (dld->vkAllocateMemory(handle, &ai, nullptr, &memory) != VK_SUCCESS) {
return {};
}
return DeviceMemory(memory, handle, *dld);
}
DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const {
VkDeviceMemory memory;
Check(dld->vkAllocateMemory(handle, &ai, nullptr, &memory));
return DeviceMemory(memory, handle, *dld);
}
VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept {
VkMemoryRequirements requirements;
dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements);
return requirements;
}
VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept {
VkMemoryRequirements requirements;
dld->vkGetImageMemoryRequirements(handle, image, &requirements);
return requirements;
}
void Device::UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes,
Span<VkCopyDescriptorSet> copies) const noexcept {
dld->vkUpdateDescriptorSets(handle, writes.size(), writes.data(), copies.size(), copies.data());
}
VkPhysicalDeviceProperties PhysicalDevice::GetProperties() const noexcept {
VkPhysicalDeviceProperties properties;
dld->vkGetPhysicalDeviceProperties(physical_device, &properties);
return properties;
}
void PhysicalDevice::GetProperties2KHR(VkPhysicalDeviceProperties2KHR& properties) const noexcept {
dld->vkGetPhysicalDeviceProperties2KHR(physical_device, &properties);
}
VkPhysicalDeviceFeatures PhysicalDevice::GetFeatures() const noexcept {
VkPhysicalDeviceFeatures2KHR features2;
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
features2.pNext = nullptr;
dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features2);
return features2.features;
}
void PhysicalDevice::GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR& features) const noexcept {
dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features);
}
VkFormatProperties PhysicalDevice::GetFormatProperties(VkFormat format) const noexcept {
VkFormatProperties properties;
dld->vkGetPhysicalDeviceFormatProperties(physical_device, format, &properties);
return properties;
}
std::vector<VkExtensionProperties> PhysicalDevice::EnumerateDeviceExtensionProperties() const {
u32 num;
dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, nullptr);
std::vector<VkExtensionProperties> properties(num);
dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, properties.data());
return properties;
}
std::vector<VkQueueFamilyProperties> PhysicalDevice::GetQueueFamilyProperties() const {
u32 num;
dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, nullptr);
std::vector<VkQueueFamilyProperties> properties(num);
dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, properties.data());
return properties;
}
bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR surface) const {
VkBool32 supported;
Check(dld->vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index, surface,
&supported));
return supported == VK_TRUE;
}
VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {
VkSurfaceCapabilitiesKHR capabilities;
Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
return capabilities;
}
std::vector<VkSurfaceFormatKHR> PhysicalDevice::GetSurfaceFormatsKHR(VkSurfaceKHR surface) const {
u32 num;
Check(dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, nullptr));
std::vector<VkSurfaceFormatKHR> formats(num);
Check(
dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, formats.data()));
return formats;
}
std::vector<VkPresentModeKHR> PhysicalDevice::GetSurfacePresentModesKHR(
VkSurfaceKHR surface) const {
u32 num;
Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, nullptr));
std::vector<VkPresentModeKHR> modes(num);
Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num,
modes.data()));
return modes;
}
VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept {
VkPhysicalDeviceMemoryProperties properties;
dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties);
return properties;
}
u32 AvailableVersion(const InstanceDispatch& dld) noexcept {
PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion;
if (!Proc(vkEnumerateInstanceVersion, dld, "vkEnumerateInstanceVersion")) {
// If the procedure is not found, Vulkan 1.0 is assumed
return VK_API_VERSION_1_0;
}
u32 version;
if (const VkResult result = vkEnumerateInstanceVersion(&version); result != VK_SUCCESS) {
LOG_ERROR(Render_Vulkan, "vkEnumerateInstanceVersion returned {}, assuming Vulkan 1.1",
ToString(result));
return VK_API_VERSION_1_1;
}
return version;
}
std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
const InstanceDispatch& dld) {
u32 num;
if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, nullptr) != VK_SUCCESS) {
return std::nullopt;
}
std::vector<VkExtensionProperties> properties(num);
if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, properties.data()) !=
VK_SUCCESS) {
return std::nullopt;
}
return properties;
}
std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
const InstanceDispatch& dld) {
u32 num;
if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) {
return std::nullopt;
}
std::vector<VkLayerProperties> properties(num);
if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) {
return std::nullopt;
}
return properties;
}
} // namespace Vulkan::vk

File diff suppressed because it is too large Load Diff