early-access version 3252

This commit is contained in:
pineappleEA
2022-12-25 07:12:57 +01:00
parent eb371309f1
commit 88a4a2d4aa
70 changed files with 2515 additions and 508 deletions

View File

@@ -48,43 +48,30 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell&
}
} // Anonymous namespace
void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFeatures& features) {
const Maxwell& regs = maxwell3d.regs;
const auto topology_ = maxwell3d.draw_manager->GetDrawState().topology;
const std::array enabled_lut{
regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable,
};
const u32 topology_index = static_cast<u32>(topology_);
raw1 = 0;
extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
extended_dynamic_state.Assign(features.has_extended_dynamic_state ? 1 : 0);
extended_dynamic_state_2.Assign(features.has_extended_dynamic_state_2 ? 1 : 0);
extended_dynamic_state_2_extra.Assign(features.has_extended_dynamic_state_2_extra ? 1 : 0);
extended_dynamic_state_3_blend.Assign(features.has_extended_dynamic_state_3_blend ? 1 : 0);
extended_dynamic_state_3_enables.Assign(features.has_extended_dynamic_state_3_enables ? 1 : 0);
dynamic_vertex_input.Assign(features.has_dynamic_vertex_input ? 1 : 0);
xfb_enabled.Assign(regs.transform_feedback_enabled != 0);
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value()));
tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value()));
tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==
Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.op));
patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
topology.Assign(topology_);
msaa_mode.Assign(regs.anti_alias_samples_mode);
raw2 = 0;
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
const auto test_func =
regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always_GL;
alpha_test_func.Assign(PackComparisonOp(test_func));
@@ -97,6 +84,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0);
alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0);
alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0);
app_stage.Assign(maxwell3d.engine_state);
for (size_t i = 0; i < regs.rt.size(); ++i) {
color_formats[i] = static_cast<u8>(regs.rt[i].format);
@@ -105,7 +93,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
point_size = Common::BitCast<u32>(regs.point_size);
if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
if (has_dynamic_vertex_input) {
if (features.has_dynamic_vertex_input) {
// Dirty flag will be reset by the command buffer update
static constexpr std::array LUT{
0u, // Invalid
@@ -144,12 +132,6 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
}
}
}
if (maxwell3d.dirty.flags[Dirty::Blending]) {
maxwell3d.dirty.flags[Dirty::Blending] = false;
for (size_t index = 0; index < attachments.size(); ++index) {
attachments[index].Refresh(regs, index);
}
}
if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
const auto& transform = regs.viewport_transform;
@@ -157,8 +139,27 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
return static_cast<u16>(viewport.swizzle.raw);
});
}
dynamic_state.raw1 = 0;
dynamic_state.raw2 = 0;
if (!extended_dynamic_state) {
dynamic_state.Refresh(regs);
std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) {
return static_cast<u16>(array.stride.Value());
});
}
if (!extended_dynamic_state_2_extra) {
dynamic_state.Refresh2(regs, topology, extended_dynamic_state_2);
}
if (!extended_dynamic_state_3_blend) {
if (maxwell3d.dirty.flags[Dirty::Blending]) {
maxwell3d.dirty.flags[Dirty::Blending] = false;
for (size_t index = 0; index < attachments.size(); ++index) {
attachments[index].Refresh(regs, index);
}
}
}
if (!extended_dynamic_state_3_enables) {
dynamic_state.Refresh3(regs);
}
if (xfb_enabled) {
RefreshXfbState(xfb_state, regs);
@@ -175,12 +176,11 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t
mask_a.Assign(mask.A);
// TODO: C++20 Use templated lambda to deduplicate code
if (!regs.blend.enable[index]) {
return;
}
if (!regs.blend_per_target_enabled) {
if (!regs.blend.enable[index]) {
return;
}
const auto& src = regs.blend;
const auto setup_blend = [&]<typename T>(const T& src) {
equation_rgb.Assign(PackBlendEquation(src.color_op));
equation_a.Assign(PackBlendEquation(src.alpha_op));
factor_source_rgb.Assign(PackBlendFactor(src.color_source));
@@ -188,20 +188,13 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t
factor_source_a.Assign(PackBlendFactor(src.alpha_source));
factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
enable.Assign(1);
return;
}
};
if (!regs.blend.enable[index]) {
if (!regs.blend_per_target_enabled) {
setup_blend(regs.blend);
return;
}
const auto& src = regs.blend_per_target[index];
equation_rgb.Assign(PackBlendEquation(src.color_op));
equation_a.Assign(PackBlendEquation(src.alpha_op));
factor_source_rgb.Assign(PackBlendFactor(src.color_source));
factor_dest_rgb.Assign(PackBlendFactor(src.color_dest));
factor_source_a.Assign(PackBlendFactor(src.alpha_source));
factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
enable.Assign(1);
setup_blend(regs.blend_per_target[index]);
}
void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
@@ -211,8 +204,6 @@ void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
packed_front_face = 1 - packed_front_face;
}
raw1 = 0;
raw2 = 0;
front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op.fail));
front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op.zfail));
front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op.zpass));
@@ -236,9 +227,37 @@ void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
cull_face.Assign(PackCullFace(regs.gl_cull_face));
cull_enable.Assign(regs.gl_cull_test_enabled != 0 ? 1 : 0);
std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) {
return static_cast<u16>(array.stride.Value());
});
}
void FixedPipelineState::DynamicState::Refresh2(const Maxwell& regs,
Maxwell::PrimitiveTopology topology_,
bool base_feautures_supported) {
logic_op.Assign(PackLogicOp(regs.logic_op.op));
if (base_feautures_supported) {
return;
}
const std::array enabled_lut{
regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable,
};
const u32 topology_index = static_cast<u32>(topology_);
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
}
void FixedPipelineState::DynamicState::Refresh3(const Maxwell& regs) {
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
}
size_t FixedPipelineState::Hash() const noexcept {

View File

@@ -17,6 +17,15 @@ namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct DynamicFeatures {
bool has_extended_dynamic_state;
bool has_extended_dynamic_state_2;
bool has_extended_dynamic_state_2_extra;
bool has_extended_dynamic_state_3_blend;
bool has_extended_dynamic_state_3_enables;
bool has_dynamic_vertex_input;
};
struct FixedPipelineState {
static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
@@ -133,6 +142,17 @@ struct FixedPipelineState {
struct DynamicState {
union {
u32 raw1;
BitField<0, 2, u32> cull_face;
BitField<2, 1, u32> cull_enable;
BitField<3, 1, u32> primitive_restart_enable;
BitField<4, 1, u32> depth_bias_enable;
BitField<5, 1, u32> rasterize_enable;
BitField<6, 4, u32> logic_op;
BitField<10, 1, u32> logic_op_enable;
BitField<11, 1, u32> depth_clamp_disabled;
};
union {
u32 raw2;
StencilFace<0> front;
StencilFace<12> back;
BitField<24, 1, u32> stencil_enable;
@@ -142,15 +162,11 @@ struct FixedPipelineState {
BitField<28, 1, u32> front_face;
BitField<29, 3, u32> depth_test_func;
};
union {
u32 raw2;
BitField<0, 2, u32> cull_face;
BitField<2, 1, u32> cull_enable;
};
// Vertex stride is a 12 bits value, we have 4 bits to spare per element
std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
void Refresh(const Maxwell& regs);
void Refresh2(const Maxwell& regs, Maxwell::PrimitiveTopology topology,
bool base_feautures_supported);
void Refresh3(const Maxwell& regs);
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
return UnpackComparisonOp(depth_test_func);
@@ -168,25 +184,24 @@ struct FixedPipelineState {
union {
u32 raw1;
BitField<0, 1, u32> extended_dynamic_state;
BitField<1, 1, u32> dynamic_vertex_input;
BitField<2, 1, u32> xfb_enabled;
BitField<3, 1, u32> primitive_restart_enable;
BitField<4, 1, u32> depth_bias_enable;
BitField<5, 1, u32> depth_clamp_disabled;
BitField<6, 1, u32> ndc_minus_one_to_one;
BitField<7, 2, u32> polygon_mode;
BitField<9, 5, u32> patch_control_points_minus_one;
BitField<14, 2, u32> tessellation_primitive;
BitField<16, 2, u32> tessellation_spacing;
BitField<18, 1, u32> tessellation_clockwise;
BitField<19, 1, u32> logic_op_enable;
BitField<20, 4, u32> logic_op;
BitField<1, 1, u32> extended_dynamic_state_2;
BitField<2, 1, u32> extended_dynamic_state_2_extra;
BitField<3, 1, u32> extended_dynamic_state_3_blend;
BitField<4, 1, u32> extended_dynamic_state_3_enables;
BitField<5, 1, u32> dynamic_vertex_input;
BitField<6, 1, u32> xfb_enabled;
BitField<7, 1, u32> ndc_minus_one_to_one;
BitField<8, 2, u32> polygon_mode;
BitField<10, 2, u32> tessellation_primitive;
BitField<12, 2, u32> tessellation_spacing;
BitField<14, 1, u32> tessellation_clockwise;
BitField<15, 5, u32> patch_control_points_minus_one;
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
};
union {
u32 raw2;
BitField<0, 1, u32> rasterize_enable;
BitField<1, 3, u32> alpha_test_func;
BitField<4, 1, u32> early_z;
BitField<5, 1, u32> depth_enabled;
@@ -197,25 +212,28 @@ struct FixedPipelineState {
BitField<14, 1, u32> smooth_lines;
BitField<15, 1, u32> alpha_to_coverage_enabled;
BitField<16, 1, u32> alpha_to_one_enabled;
BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
};
std::array<u8, Maxwell::NumRenderTargets> color_formats;
u32 alpha_test_ref;
u32 point_size;
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
union {
u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
u64 enabled_divisors;
};
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
DynamicState dynamic_state;
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
// Vertex stride is a 12 bits value, we have 4 bits to spare per element
std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
VideoCommon::TransformFeedbackState xfb_state;
void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
bool has_dynamic_vertex_input);
void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFeatures& features);
size_t Hash() const noexcept;
@@ -230,13 +248,17 @@ struct FixedPipelineState {
// When transform feedback is enabled, use the whole struct
return sizeof(*this);
}
if (dynamic_vertex_input) {
if (dynamic_vertex_input && extended_dynamic_state_3_blend) {
// Exclude dynamic state and attributes
return offsetof(FixedPipelineState, dynamic_state);
}
if (dynamic_vertex_input) {
// Exclude dynamic state
return offsetof(FixedPipelineState, attributes);
}
if (extended_dynamic_state) {
// Exclude dynamic state
return offsetof(FixedPipelineState, dynamic_state);
return offsetof(FixedPipelineState, vertex_strides);
}
// Default
return offsetof(FixedPipelineState, xfb_state);

View File

@@ -56,7 +56,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
if (device.IsExtTransformFeedbackSupported()) {
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
@@ -516,6 +517,7 @@ void BufferCacheRuntime::ReserveNullBuffer() {
if (device.IsExtTransformFeedbackSupported()) {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
null_buffer = device.GetLogical().CreateBuffer(create_info);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null buffer");

View File

@@ -201,6 +201,22 @@ struct SimpleVertexSpec {
static constexpr bool has_images = false;
};
struct SimpleStorageSpec {
static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
static constexpr bool has_storage_buffers = true;
static constexpr bool has_texture_buffers = false;
static constexpr bool has_image_buffers = false;
static constexpr bool has_images = false;
};
struct SimpleImageSpec {
static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
static constexpr bool has_storage_buffers = false;
static constexpr bool has_texture_buffers = false;
static constexpr bool has_image_buffers = false;
static constexpr bool has_images = true;
};
struct DefaultSpec {
static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
static constexpr bool has_storage_buffers = true;
@@ -211,7 +227,8 @@ struct DefaultSpec {
ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
const std::array<Shader::Info, NUM_STAGES>& infos) {
return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos);
return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, SimpleStorageSpec, SimpleImageSpec,
DefaultSpec>(modules, infos);
}
} // Anonymous namespace
@@ -524,6 +541,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
FixedPipelineState::DynamicState dynamic{};
if (!key.state.extended_dynamic_state) {
dynamic = key.state.dynamic_state;
} else {
dynamic.raw1 = key.state.dynamic_state.raw1;
}
static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
@@ -561,7 +580,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
vertex_bindings.push_back({
.binding = static_cast<u32>(index),
.stride = dynamic.vertex_strides[index],
.stride = key.state.vertex_strides[index],
.inputRate = rate,
});
if (instanced) {
@@ -625,7 +644,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.pNext = nullptr,
.flags = 0,
.topology = input_assembly_topology,
.primitiveRestartEnable = key.state.primitive_restart_enable != 0 &&
.primitiveRestartEnable = dynamic.primitive_restart_enable != 0 &&
((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
device.IsTopologyListPrimitiveRestartSupported()) ||
SupportsPrimitiveRestart(input_assembly_topology) ||
@@ -672,15 +691,15 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.pNext = nullptr,
.flags = 0,
.depthClampEnable =
static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
static_cast<VkBool32>(dynamic.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
.rasterizerDiscardEnable =
static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
static_cast<VkBool32>(dynamic.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
.polygonMode =
MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)),
.cullMode = static_cast<VkCullModeFlags>(
dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
.depthBiasEnable = key.state.depth_bias_enable,
.depthBiasEnable = (dynamic.depth_bias_enable == 0 ? VK_TRUE : VK_FALSE),
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
@@ -782,13 +801,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = key.state.logic_op_enable != 0,
.logicOp = static_cast<VkLogicOp>(key.state.logic_op.Value()),
.logicOpEnable = dynamic.logic_op_enable != 0,
.logicOp = static_cast<VkLogicOp>(dynamic.logic_op.Value()),
.attachmentCount = static_cast<u32>(cb_attachments.size()),
.pAttachments = cb_attachments.data(),
.blendConstants = {},
};
static_vector<VkDynamicState, 19> dynamic_states{
static_vector<VkDynamicState, 28> dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
@@ -811,6 +830,32 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
}
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
if (key.state.extended_dynamic_state_2) {
static constexpr std::array extended2{
VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT,
VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT,
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended2.begin(), extended2.end());
}
if (key.state.extended_dynamic_state_2_extra) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT);
}
if (key.state.extended_dynamic_state_3_blend) {
static constexpr std::array extended3{
VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT,
VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT,
VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
}
if (key.state.extended_dynamic_state_3_enables) {
static constexpr std::array extended3{
VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT,
VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT,
};
dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end());
}
}
const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,

View File

@@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment;
using VideoCommon::GenericEnvironment;
using VideoCommon::GraphicsEnvironment;
constexpr u32 CACHE_VERSION = 8;
constexpr u32 CACHE_VERSION = 9;
template <typename Container>
auto MakeSpan(Container& container) {
@@ -351,6 +351,15 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
LOG_WARNING(Render_Vulkan, "maxVertexInputBindings is too low: {} < {}",
device.GetMaxVertexInputBindings(), Maxwell::NumVertexArrays);
}
dynamic_features = DynamicFeatures{
.has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(),
.has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported(),
.has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported(),
.has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported(),
.has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported(),
.has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(),
};
}
PipelineCache::~PipelineCache() = default;
@@ -362,8 +371,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
current_pipeline = nullptr;
return nullptr;
}
graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(),
device.IsExtVertexInputDynamicStateSupported());
graphics_key.state.Refresh(*maxwell3d, dynamic_features);
if (current_pipeline) {
GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
@@ -439,14 +447,21 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
});
++state.total;
}};
const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported();
const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported();
const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
GraphicsPipelineCacheKey key;
file.read(reinterpret_cast<char*>(&key), sizeof(key));
if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state ||
(key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) {
if ((key.state.extended_dynamic_state != 0) !=
dynamic_features.has_extended_dynamic_state ||
(key.state.extended_dynamic_state_2 != 0) !=
dynamic_features.has_extended_dynamic_state_2 ||
(key.state.extended_dynamic_state_2_extra != 0) !=
dynamic_features.has_extended_dynamic_state_2_extra ||
(key.state.extended_dynamic_state_3_blend != 0) !=
dynamic_features.has_extended_dynamic_state_3_blend ||
(key.state.extended_dynamic_state_3_enables != 0) !=
dynamic_features.has_extended_dynamic_state_3_enables ||
(key.state.dynamic_vertex_input != 0) != dynamic_features.has_dynamic_vertex_input) {
return;
}
workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {

View File

@@ -160,6 +160,7 @@ private:
Common::ThreadWorker workers;
Common::ThreadWorker serialization_thread;
DynamicFeatures dynamic_features;
};
} // namespace Vulkan

View File

@@ -180,7 +180,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
RasterizerVulkan::~RasterizerVulkan() = default;
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
template <typename Func>
void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
MICROPROFILE_SCOPE(Vulkan_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
@@ -201,22 +202,69 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
UpdateDynamicStates();
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const u32 num_instances{instance_count};
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
draw_params.first_index, draw_params.base_vertex,
draw_params.base_instance);
} else {
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
draw_params.base_vertex, draw_params.base_instance);
}
});
draw_func();
EndTransformFeedback();
}
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
PrepareDraw(is_indexed, [this, is_indexed, instance_count] {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const u32 num_instances{instance_count};
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
draw_params.first_index, draw_params.base_vertex,
draw_params.base_instance);
} else {
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
draw_params.base_vertex, draw_params.base_instance);
}
});
});
}
void RasterizerVulkan::DrawIndirect() {
const auto& params = maxwell3d->draw_manager->GetIndirectParams();
buffer_cache.SetDrawIndirect(&params);
PrepareDraw(params.is_indexed, [this, &params] {
const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer();
const auto& buffer = indirect_buffer.first;
const auto& offset = indirect_buffer.second;
if (params.include_count) {
const auto count = buffer_cache.GetDrawIndirectCount();
const auto& draw_buffer = count.first;
const auto& offset_base = count.second;
scheduler.Record([draw_buffer_obj = draw_buffer->Handle(),
buffer_obj = buffer->Handle(), offset_base, offset,
params](vk::CommandBuffer cmdbuf) {
if (params.is_indexed) {
cmdbuf.DrawIndexedIndirectCount(
buffer_obj, offset, draw_buffer_obj, offset_base,
static_cast<u32>(params.max_draw_counts), static_cast<u32>(params.stride));
} else {
cmdbuf.DrawIndirectCount(buffer_obj, offset, draw_buffer_obj, offset_base,
static_cast<u32>(params.max_draw_counts),
static_cast<u32>(params.stride));
}
});
return;
}
scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) {
if (params.is_indexed) {
cmdbuf.DrawIndexedIndirect(buffer_obj, offset,
static_cast<u32>(params.max_draw_counts),
static_cast<u32>(params.stride));
} else {
cmdbuf.DrawIndirect(buffer_obj, offset, static_cast<u32>(params.max_draw_counts),
static_cast<u32>(params.stride));
}
});
});
buffer_cache.SetDrawIndirect(nullptr);
}
void RasterizerVulkan::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(Vulkan_Clearing);
@@ -379,44 +427,58 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in
void RasterizerVulkan::FlushAll() {}
void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
if (addr == 0 || size == 0) {
return;
}
{
if (bool(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
{
if ((bool(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.DownloadMemory(addr, size);
}
query_cache.FlushRegion(addr, size);
}
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.IsRegionGpuModified(addr, size);
if ((bool(which & VideoCommon::CacheType::QueryCache))) {
query_cache.FlushRegion(addr, size);
}
return texture_cache.IsRegionGpuModified(addr, size) ||
buffer_cache.IsRegionGpuModified(addr, size);
}
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
if ((bool(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
if (buffer_cache.IsRegionGpuModified(addr, size)) {
return true;
}
}
if (!Settings::IsGPULevelHigh()) {
return false;
}
if (bool(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
return texture_cache.IsRegionGpuModified(addr, size);
}
return false;
}
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
if (addr == 0 || size == 0) {
return;
}
{
if (bool(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
{
if ((bool(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
pipeline_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
if ((bool(which & VideoCommon::CacheType::QueryCache))) {
query_cache.InvalidateRegion(addr, size);
}
if ((bool(which & VideoCommon::CacheType::ShaderCache))) {
pipeline_cache.InvalidateRegion(addr, size);
}
}
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
@@ -481,11 +543,12 @@ void RasterizerVulkan::ReleaseFences() {
fence_manager.WaitPendingFences();
}
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which) {
if (Settings::IsGPULevelExtreme()) {
FlushRegion(addr, size);
FlushRegion(addr, size, which);
}
InvalidateRegion(addr, size);
InvalidateRegion(addr, size, which);
}
void RasterizerVulkan::WaitForIdle() {
@@ -541,6 +604,21 @@ void RasterizerVulkan::TickFrame() {
}
}
bool RasterizerVulkan::AccelerateConditionalRendering() {
if (Settings::IsGPULevelHigh()) {
// TODO(Blinkhawk): Reimplement Host conditional rendering.
return false;
}
// Medium / Low Hack: stub any checks on queries writen into the buffer cache.
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
Maxwell::ReportSemaphore::Compare cmp;
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
VideoCommon::CacheType::BufferCache)) {
return true;
}
return false;
}
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
@@ -561,7 +639,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
}
gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{
std::unique_lock<std::mutex> lock{buffer_cache.mutex};
std::unique_lock<std::recursive_mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
buffer_cache.WriteMemory(*cpu_addr, copy_size);
}
@@ -639,16 +717,35 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateLineWidth(regs);
if (device.IsExtExtendedDynamicStateSupported()) {
UpdateCullMode(regs);
UpdateDepthBoundsTestEnable(regs);
UpdateDepthTestEnable(regs);
UpdateDepthWriteEnable(regs);
UpdateDepthCompareOp(regs);
UpdateFrontFace(regs);
UpdateStencilOp(regs);
UpdateStencilTestEnable(regs);
if (device.IsExtVertexInputDynamicStateSupported()) {
UpdateVertexInput(regs);
}
if (state_tracker.TouchStateEnable()) {
UpdateDepthBoundsTestEnable(regs);
UpdateDepthTestEnable(regs);
UpdateDepthWriteEnable(regs);
UpdateStencilTestEnable(regs);
if (device.IsExtExtendedDynamicState2Supported()) {
UpdatePrimitiveRestartEnable(regs);
UpdateRasterizerDiscardEnable(regs);
UpdateDepthBiasEnable(regs);
}
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
UpdateLogicOpEnable(regs);
UpdateDepthClampEnable(regs);
}
}
if (device.IsExtExtendedDynamicState2ExtrasSupported()) {
UpdateLogicOp(regs);
}
if (device.IsExtExtendedDynamicState3Supported()) {
UpdateBlending(regs);
}
}
}
@@ -868,6 +965,82 @@ void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& r
});
}
void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchPrimitiveRestartEnable()) {
return;
}
scheduler.Record([enable = regs.primitive_restart.enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveRestartEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchRasterizerDiscardEnable()) {
return;
}
scheduler.Record([disable = regs.rasterize_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetRasterizerDiscardEnableEXT(disable == 0);
});
}
void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBiasEnable()) {
return;
}
constexpr size_t POINT = 0;
constexpr size_t LINE = 1;
constexpr size_t POLYGON = 2;
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POINT, // Points
LINE, // Lines
LINE, // LineLoop
LINE, // LineStrip
POLYGON, // Triangles
POLYGON, // TriangleStrip
POLYGON, // TriangleFan
POLYGON, // Quads
POLYGON, // QuadStrip
POLYGON, // Polygon
LINE, // LinesAdjacency
LINE, // LineStripAdjacency
POLYGON, // TrianglesAdjacency
POLYGON, // TriangleStripAdjacency
POLYGON, // Patches
};
const std::array enabled_lut{
regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable,
};
const u32 topology_index = static_cast<u32>(maxwell3d->draw_manager->GetDrawState().topology);
const u32 enable = enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]];
scheduler.Record(
[enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBiasEnableEXT(enable != 0); });
}
void RasterizerVulkan::UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchLogicOpEnable()) {
return;
}
scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLogicOpEnableEXT(enable != 0);
});
}
void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthClampEnable()) {
return;
}
bool is_enabled = !(regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
scheduler.Record(
[is_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthClampEnableEXT(is_enabled); });
}
void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthCompareOp()) {
return;
@@ -925,6 +1098,79 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
}
}
void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!regs.logic_op.enable) {
return;
}
if (!state_tracker.TouchLogicOp()) {
return;
}
auto op = static_cast<VkLogicOp>(static_cast<u32>(regs.logic_op.op) - 0x1500);
scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); });
}
void RasterizerVulkan::UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchBlending()) {
return;
}
if (state_tracker.TouchColorMask()) {
std::array<VkColorComponentFlags, Maxwell::NumRenderTargets> setup_masks{};
for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
auto& current = setup_masks[index];
if (mask.R) {
current |= VK_COLOR_COMPONENT_R_BIT;
}
if (mask.G) {
current |= VK_COLOR_COMPONENT_G_BIT;
}
if (mask.B) {
current |= VK_COLOR_COMPONENT_B_BIT;
}
if (mask.A) {
current |= VK_COLOR_COMPONENT_A_BIT;
}
}
scheduler.Record([setup_masks](vk::CommandBuffer cmdbuf) {
cmdbuf.SetColorWriteMaskEXT(0, setup_masks);
});
}
if (state_tracker.TouchBlendEnable()) {
std::array<VkBool32, Maxwell::NumRenderTargets> setup_enables{};
std::ranges::transform(
regs.blend.enable, setup_enables.begin(),
[&](const auto& is_enabled) { return is_enabled != 0 ? VK_TRUE : VK_FALSE; });
scheduler.Record([setup_enables](vk::CommandBuffer cmdbuf) {
cmdbuf.SetColorBlendEnableEXT(0, setup_enables);
});
}
if (state_tracker.TouchBlendEquations()) {
std::array<VkColorBlendEquationEXT, Maxwell::NumRenderTargets> setup_blends{};
for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) {
const auto blend_setup = [&]<typename T>(const T& guest_blend) {
auto& host_blend = setup_blends[index];
host_blend.srcColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_source);
host_blend.dstColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_dest);
host_blend.colorBlendOp = MaxwellToVK::BlendEquation(guest_blend.color_op);
host_blend.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_source);
host_blend.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_dest);
host_blend.alphaBlendOp = MaxwellToVK::BlendEquation(guest_blend.alpha_op);
};
if (!regs.blend_per_target_enabled) {
blend_setup(regs.blend);
continue;
}
blend_setup(regs.blend_per_target[index]);
}
scheduler.Record([setup_blends](vk::CommandBuffer cmdbuf) {
cmdbuf.SetColorBlendEquationEXT(0, setup_blends);
});
}
}
void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchStencilTestEnable()) {
return;

View File

@@ -65,6 +65,7 @@ public:
~RasterizerVulkan() override;
void Draw(bool is_indexed, u32 instance_count) override;
void DrawIndirect() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -72,9 +73,12 @@ public:
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
bool MustFlushRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
@@ -84,12 +88,14 @@ public:
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(
VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateConditionalRendering() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
@@ -114,6 +120,9 @@ private:
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
template <typename Func>
void PrepareDraw(bool is_indexed, Func&&);
void FlushWork();
void UpdateDynamicStates();
@@ -135,9 +144,16 @@ private:
void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);

View File

@@ -1,5 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include <utility>
@@ -30,8 +30,7 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
constexpr VkMemoryPropertyFlags HOST_FLAGS =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
constexpr VkMemoryPropertyFlags HOST_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
bool IsStreamHeap(VkMemoryHeap heap) noexcept {
@@ -93,8 +92,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.pNext = nullptr,
.flags = 0,
.size = STREAM_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@@ -142,11 +140,23 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
return GetStreamBuffer(size);
}
return GetStagingBuffer(size, usage);
return GetStagingBuffer(size, usage, deferred);
}
void StagingBufferPool::FreeDeferred(StagingBufferRef& ref) {
auto& entries = GetCache(ref.usage)[ref.log2_level].entries;
const auto is_this_one = [&ref](const StagingBuffer& entry) {
return entry.index == ref.index;
};
auto it = std::find_if(entries.begin(), entries.end(), is_this_one);
ASSERT(it != entries.end());
ASSERT(it->deferred);
it->tick = scheduler.CurrentTick();
it->deferred = false;
}
void StagingBufferPool::TickFrame() {
@@ -187,6 +197,9 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
.buffer = *stream_buffer,
.offset = static_cast<VkDeviceSize>(offset),
.mapped_span = std::span<u8>(stream_pointer + offset, size),
.usage{},
.log2_level{},
.index{},
};
}
@@ -196,19 +209,21 @@ bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end)
[gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
};
StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage,
bool deferred) {
if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage, deferred)) {
return *ref;
}
return CreateStagingBuffer(size, usage);
return CreateStagingBuffer(size, usage, deferred);
}
std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
MemoryUsage usage) {
MemoryUsage usage,
bool deferred) {
StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
const auto is_free = [this](const StagingBuffer& entry) {
return scheduler.IsFree(entry.tick);
return !entry.deferred && scheduler.IsFree(entry.tick);
};
auto& entries = cache_level.entries;
const auto hint_it = entries.begin() + cache_level.iterate_index;
@@ -220,20 +235,28 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s
}
}
cache_level.iterate_index = std::distance(entries.begin(), it) + 1;
it->tick = scheduler.CurrentTick();
it->tick = deferred ? std::numeric_limits<u64>::max() : scheduler.CurrentTick();
ASSERT(!it->deferred);
it->deferred = deferred;
return it->Ref();
}
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) {
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage,
bool deferred) {
const u32 log2 = Common::Log2Ceil64(size);
VkBufferUsageFlags usage_flags{};
if (usage == MemoryUsage::Upload) {
usage_flags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
}
if (usage == MemoryUsage::Download) {
usage_flags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
}
vk::Buffer buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 1ULL << log2,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.usage = usage_flags,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@@ -249,7 +272,11 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage
.buffer = std::move(buffer),
.commit = std::move(commit),
.mapped_span = mapped_span,
.tick = scheduler.CurrentTick(),
.usage = usage,
.log2_level = log2,
.index = unique_ids++,
.tick = deferred ? std::numeric_limits<u64>::max() : scheduler.CurrentTick(),
.deferred = deferred,
});
return entry.Ref();
}

View File

@@ -1,5 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
@@ -20,6 +20,9 @@ struct StagingBufferRef {
VkBuffer buffer;
VkDeviceSize offset;
std::span<u8> mapped_span;
MemoryUsage usage;
u32 log2_level;
u64 index;
};
class StagingBufferPool {
@@ -30,7 +33,8 @@ public:
Scheduler& scheduler);
~StagingBufferPool();
StagingBufferRef Request(size_t size, MemoryUsage usage);
StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
void FreeDeferred(StagingBufferRef& ref);
void TickFrame();
@@ -44,13 +48,20 @@ private:
vk::Buffer buffer;
MemoryCommit commit;
std::span<u8> mapped_span;
MemoryUsage usage;
u32 log2_level;
u64 index;
u64 tick = 0;
bool deferred{};
StagingBufferRef Ref() const noexcept {
return {
.buffer = *buffer,
.offset = 0,
.mapped_span = mapped_span,
.usage = usage,
.log2_level = log2_level,
.index = index,
};
}
};
@@ -68,11 +79,12 @@ private:
bool AreRegionsActive(size_t region_begin, size_t region_end) const;
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage, bool deferred = false);
std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage,
bool deferred);
StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage, bool deferred);
StagingBuffersCache& GetCache(MemoryUsage usage);
@@ -99,6 +111,7 @@ private:
size_t current_delete_level = 0;
u64 buffer_index = 0;
u64 unique_ids{};
};
} // namespace Vulkan

View File

@@ -27,10 +27,34 @@ using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
static constexpr int INVALIDATION_FLAGS[]{
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable,
DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
VertexBuffers, VertexInput,
Viewports,
Scissors,
DepthBias,
BlendConstants,
DepthBounds,
StencilProperties,
LineWidth,
CullMode,
DepthBoundsEnable,
DepthTestEnable,
DepthWriteEnable,
DepthCompareOp,
FrontFace,
StencilOp,
StencilTestEnable,
VertexBuffers,
VertexInput,
StateEnable,
PrimitiveRestartEnable,
RasterizerDiscardEnable,
DepthBiasEnable,
LogicOpEnable,
DepthClampEnable,
LogicOp,
Blending,
ColorMask,
BlendEquations,
BlendEnable,
};
Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
@@ -96,16 +120,22 @@ void SetupDirtyCullMode(Tables& tables) {
table[OFF(gl_cull_test_enabled)] = CullMode;
}
void SetupDirtyDepthBoundsEnable(Tables& tables) {
tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable;
}
void SetupDirtyDepthTestEnable(Tables& tables) {
tables[0][OFF(depth_test_enable)] = DepthTestEnable;
}
void SetupDirtyDepthWriteEnable(Tables& tables) {
tables[0][OFF(depth_write_enabled)] = DepthWriteEnable;
void SetupDirtyStateEnable(Tables& tables) {
const auto setup = [&](size_t position, u8 flag) {
tables[0][position] = flag;
tables[1][position] = StateEnable;
};
setup(OFF(depth_bounds_enable), DepthBoundsEnable);
setup(OFF(depth_test_enable), DepthTestEnable);
setup(OFF(depth_write_enabled), DepthWriteEnable);
setup(OFF(stencil_enable), StencilTestEnable);
setup(OFF(primitive_restart.enabled), PrimitiveRestartEnable);
setup(OFF(rasterize_enable), RasterizerDiscardEnable);
setup(OFF(polygon_offset_point_enable), DepthBiasEnable);
setup(OFF(polygon_offset_line_enable), DepthBiasEnable);
setup(OFF(polygon_offset_fill_enable), DepthBiasEnable);
setup(OFF(logic_op.enable), LogicOpEnable);
setup(OFF(viewport_clip_control.geometry_clip), DepthClampEnable);
}
void SetupDirtyDepthCompareOp(Tables& tables) {
@@ -133,16 +163,22 @@ void SetupDirtyStencilOp(Tables& tables) {
tables[1][OFF(stencil_two_side_enable)] = StencilOp;
}
void SetupDirtyStencilTestEnable(Tables& tables) {
tables[0][OFF(stencil_enable)] = StencilTestEnable;
}
void SetupDirtyBlending(Tables& tables) {
tables[0][OFF(color_mask_common)] = Blending;
tables[1][OFF(color_mask_common)] = ColorMask;
tables[0][OFF(blend_per_target_enabled)] = Blending;
tables[1][OFF(blend_per_target_enabled)] = BlendEquations;
FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMask);
FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
FillBlock(tables[1], OFF(blend), NUM(blend), BlendEquations);
FillBlock(tables[1], OFF(blend.enable), NUM(blend.enable), BlendEnable);
FillBlock(tables[0], OFF(blend_per_target), NUM(blend_per_target), Blending);
FillBlock(tables[1], OFF(blend_per_target), NUM(blend_per_target), BlendEquations);
}
void SetupDirtySpecialOps(Tables& tables) {
tables[0][OFF(logic_op.op)] = LogicOp;
}
void SetupDirtyViewportSwizzles(Tables& tables) {
@@ -185,17 +221,15 @@ void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
SetupDirtyStencilProperties(tables);
SetupDirtyLineWidth(tables);
SetupDirtyCullMode(tables);
SetupDirtyDepthBoundsEnable(tables);
SetupDirtyDepthTestEnable(tables);
SetupDirtyDepthWriteEnable(tables);
SetupDirtyStateEnable(tables);
SetupDirtyDepthCompareOp(tables);
SetupDirtyFrontFace(tables);
SetupDirtyStencilOp(tables);
SetupDirtyStencilTestEnable(tables);
SetupDirtyBlending(tables);
SetupDirtyViewportSwizzles(tables);
SetupDirtyVertexAttributes(tables);
SetupDirtyVertexBindings(tables);
SetupDirtySpecialOps(tables);
}
void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {

View File

@@ -45,8 +45,18 @@ enum : u8 {
FrontFace,
StencilOp,
StencilTestEnable,
PrimitiveRestartEnable,
RasterizerDiscardEnable,
DepthBiasEnable,
StateEnable,
LogicOp,
LogicOpEnable,
DepthClampEnable,
Blending,
BlendEnable,
BlendEquations,
ColorMask,
ViewportSwizzles,
Last,
@@ -111,6 +121,10 @@ public:
return Exchange(Dirty::CullMode, false);
}
bool TouchStateEnable() {
return Exchange(Dirty::StateEnable, false);
}
bool TouchDepthBoundsTestEnable() {
return Exchange(Dirty::DepthBoundsEnable, false);
}
@@ -123,6 +137,26 @@ public:
return Exchange(Dirty::DepthWriteEnable, false);
}
bool TouchPrimitiveRestartEnable() {
return Exchange(Dirty::PrimitiveRestartEnable, false);
}
bool TouchRasterizerDiscardEnable() {
return Exchange(Dirty::RasterizerDiscardEnable, false);
}
bool TouchDepthBiasEnable() {
return Exchange(Dirty::DepthBiasEnable, false);
}
bool TouchLogicOpEnable() {
return Exchange(Dirty::LogicOpEnable, false);
}
bool TouchDepthClampEnable() {
return Exchange(Dirty::DepthClampEnable, false);
}
bool TouchDepthCompareOp() {
return Exchange(Dirty::DepthCompareOp, false);
}
@@ -135,10 +169,30 @@ public:
return Exchange(Dirty::StencilOp, false);
}
bool TouchBlending() {
return Exchange(Dirty::Blending, false);
}
bool TouchBlendEnable() {
return Exchange(Dirty::BlendEnable, false);
}
bool TouchBlendEquations() {
return Exchange(Dirty::BlendEquations, false);
}
bool TouchColorMask() {
return Exchange(Dirty::ColorMask, false);
}
bool TouchStencilTestEnable() {
return Exchange(Dirty::StencilTestEnable, false);
}
bool TouchLogicOp() {
return Exchange(Dirty::LogicOp, false);
}
bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
const bool has_changed = current_topology != new_topology;
current_topology = new_topology;