another try
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1,118 +1,118 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCommon::Region2D;
|
||||
|
||||
class Device;
|
||||
class Framebuffer;
|
||||
class ImageView;
|
||||
class StateTracker;
|
||||
class Scheduler;
|
||||
|
||||
struct BlitImagePipelineKey {
|
||||
constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
|
||||
|
||||
VkRenderPass renderpass;
|
||||
Tegra::Engines::Fermi2D::Operation operation;
|
||||
};
|
||||
|
||||
class BlitImageHelper {
|
||||
public:
|
||||
explicit BlitImageHelper(const Device& device, Scheduler& scheduler,
|
||||
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
|
||||
~BlitImageHelper();
|
||||
|
||||
void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
|
||||
const Region2D& dst_region, const Region2D& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
|
||||
VkImageView src_stencil_view, const Region2D& dst_region,
|
||||
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
|
||||
|
||||
void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
|
||||
|
||||
private:
|
||||
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view);
|
||||
|
||||
void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
ImageView& src_image_view);
|
||||
|
||||
[[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
|
||||
|
||||
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
|
||||
|
||||
void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth);
|
||||
|
||||
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
|
||||
|
||||
void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
|
||||
|
||||
void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module, bool single_texture, bool is_target_depth);
|
||||
|
||||
void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module);
|
||||
|
||||
void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module);
|
||||
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
vk::DescriptorSetLayout one_texture_set_layout;
|
||||
vk::DescriptorSetLayout two_textures_set_layout;
|
||||
DescriptorAllocator one_texture_descriptor_allocator;
|
||||
DescriptorAllocator two_textures_descriptor_allocator;
|
||||
vk::PipelineLayout one_texture_pipeline_layout;
|
||||
vk::PipelineLayout two_textures_pipeline_layout;
|
||||
vk::ShaderModule full_screen_vert;
|
||||
vk::ShaderModule blit_color_to_color_frag;
|
||||
vk::ShaderModule blit_depth_stencil_frag;
|
||||
vk::ShaderModule convert_depth_to_float_frag;
|
||||
vk::ShaderModule convert_float_to_depth_frag;
|
||||
vk::ShaderModule convert_abgr8_to_d24s8_frag;
|
||||
vk::ShaderModule convert_d24s8_to_abgr8_frag;
|
||||
vk::ShaderModule convert_s8d24_to_abgr8_frag;
|
||||
vk::Sampler linear_sampler;
|
||||
vk::Sampler nearest_sampler;
|
||||
|
||||
std::vector<BlitImagePipelineKey> blit_color_keys;
|
||||
std::vector<vk::Pipeline> blit_color_pipelines;
|
||||
std::vector<BlitImagePipelineKey> blit_depth_stencil_keys;
|
||||
std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
|
||||
vk::Pipeline convert_d32_to_r32_pipeline;
|
||||
vk::Pipeline convert_r32_to_d32_pipeline;
|
||||
vk::Pipeline convert_d16_to_r16_pipeline;
|
||||
vk::Pipeline convert_r16_to_d16_pipeline;
|
||||
vk::Pipeline convert_abgr8_to_d24s8_pipeline;
|
||||
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
|
||||
vk::Pipeline convert_s8d24_to_abgr8_pipeline;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCommon::Region2D;
|
||||
|
||||
class Device;
|
||||
class Framebuffer;
|
||||
class ImageView;
|
||||
class StateTracker;
|
||||
class Scheduler;
|
||||
|
||||
struct BlitImagePipelineKey {
|
||||
constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
|
||||
|
||||
VkRenderPass renderpass;
|
||||
Tegra::Engines::Fermi2D::Operation operation;
|
||||
};
|
||||
|
||||
class BlitImageHelper {
|
||||
public:
|
||||
explicit BlitImageHelper(const Device& device, Scheduler& scheduler,
|
||||
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
|
||||
~BlitImageHelper();
|
||||
|
||||
void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
|
||||
const Region2D& dst_region, const Region2D& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
|
||||
VkImageView src_stencil_view, const Region2D& dst_region,
|
||||
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
|
||||
|
||||
void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
|
||||
|
||||
private:
|
||||
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view);
|
||||
|
||||
void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
ImageView& src_image_view);
|
||||
|
||||
[[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
|
||||
|
||||
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
|
||||
|
||||
void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth);
|
||||
|
||||
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
|
||||
|
||||
void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
|
||||
|
||||
void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module, bool single_texture, bool is_target_depth);
|
||||
|
||||
void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module);
|
||||
|
||||
void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
vk::ShaderModule& module);
|
||||
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
vk::DescriptorSetLayout one_texture_set_layout;
|
||||
vk::DescriptorSetLayout two_textures_set_layout;
|
||||
DescriptorAllocator one_texture_descriptor_allocator;
|
||||
DescriptorAllocator two_textures_descriptor_allocator;
|
||||
vk::PipelineLayout one_texture_pipeline_layout;
|
||||
vk::PipelineLayout two_textures_pipeline_layout;
|
||||
vk::ShaderModule full_screen_vert;
|
||||
vk::ShaderModule blit_color_to_color_frag;
|
||||
vk::ShaderModule blit_depth_stencil_frag;
|
||||
vk::ShaderModule convert_depth_to_float_frag;
|
||||
vk::ShaderModule convert_float_to_depth_frag;
|
||||
vk::ShaderModule convert_abgr8_to_d24s8_frag;
|
||||
vk::ShaderModule convert_d24s8_to_abgr8_frag;
|
||||
vk::ShaderModule convert_s8d24_to_abgr8_frag;
|
||||
vk::Sampler linear_sampler;
|
||||
vk::Sampler nearest_sampler;
|
||||
|
||||
std::vector<BlitImagePipelineKey> blit_color_keys;
|
||||
std::vector<vk::Pipeline> blit_color_pipelines;
|
||||
std::vector<BlitImagePipelineKey> blit_depth_stencil_keys;
|
||||
std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
|
||||
vk::Pipeline convert_d32_to_r32_pipeline;
|
||||
vk::Pipeline convert_r32_to_d32_pipeline;
|
||||
vk::Pipeline convert_d16_to_r16_pipeline;
|
||||
vk::Pipeline convert_r16_to_d16_pipeline;
|
||||
vk::Pipeline convert_abgr8_to_d24s8_pipeline;
|
||||
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
|
||||
vk::Pipeline convert_s8d24_to_abgr8_pipeline;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,455 +1,455 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include "common/bit_cast.h"
|
||||
#include "common/cityhash.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
constexpr size_t POINT = 0;
|
||||
constexpr size_t LINE = 1;
|
||||
constexpr size_t POLYGON = 2;
|
||||
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
|
||||
POINT, // Points
|
||||
LINE, // Lines
|
||||
LINE, // LineLoop
|
||||
LINE, // LineStrip
|
||||
POLYGON, // Triangles
|
||||
POLYGON, // TriangleStrip
|
||||
POLYGON, // TriangleFan
|
||||
POLYGON, // Quads
|
||||
POLYGON, // QuadStrip
|
||||
POLYGON, // Polygon
|
||||
LINE, // LinesAdjacency
|
||||
LINE, // LineStripAdjacency
|
||||
POLYGON, // TrianglesAdjacency
|
||||
POLYGON, // TriangleStripAdjacency
|
||||
POLYGON, // Patches
|
||||
};
|
||||
|
||||
void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
|
||||
std::ranges::transform(regs.transform_feedback.controls, state.layouts.begin(),
|
||||
[](const auto& layout) {
|
||||
return VideoCommon::TransformFeedbackState::Layout{
|
||||
.stream = layout.stream,
|
||||
.varying_count = layout.varying_count,
|
||||
.stride = layout.stride,
|
||||
};
|
||||
});
|
||||
state.varyings = regs.stream_out_layout;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
|
||||
const Maxwell& regs = maxwell3d.regs;
|
||||
const std::array enabled_lut{
|
||||
regs.polygon_offset_point_enable,
|
||||
regs.polygon_offset_line_enable,
|
||||
regs.polygon_offset_fill_enable,
|
||||
};
|
||||
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
|
||||
|
||||
raw1 = 0;
|
||||
extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
|
||||
dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
|
||||
xfb_enabled.Assign(regs.transform_feedback_enabled != 0);
|
||||
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
|
||||
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
|
||||
depth_clamp_disabled.Assign(regs.viewport_clip_control.geometry_clip ==
|
||||
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
|
||||
regs.viewport_clip_control.geometry_clip ==
|
||||
Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
|
||||
regs.viewport_clip_control.geometry_clip ==
|
||||
Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
|
||||
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
|
||||
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
|
||||
patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
|
||||
tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value()));
|
||||
tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value()));
|
||||
tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==
|
||||
Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
|
||||
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
|
||||
logic_op.Assign(PackLogicOp(regs.logic_op.op));
|
||||
topology.Assign(regs.draw.topology);
|
||||
msaa_mode.Assign(regs.anti_alias_samples_mode);
|
||||
|
||||
raw2 = 0;
|
||||
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
|
||||
const auto test_func =
|
||||
regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always_GL;
|
||||
alpha_test_func.Assign(PackComparisonOp(test_func));
|
||||
early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0);
|
||||
depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
|
||||
depth_format.Assign(static_cast<u32>(regs.zeta.format));
|
||||
y_negate.Assign(regs.window_origin.mode != Maxwell::WindowOrigin::Mode::UpperLeft ? 1 : 0);
|
||||
provoking_vertex_last.Assign(regs.provoking_vertex == Maxwell::ProvokingVertex::Last ? 1 : 0);
|
||||
smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0);
|
||||
|
||||
for (size_t i = 0; i < regs.rt.size(); ++i) {
|
||||
color_formats[i] = static_cast<u8>(regs.rt[i].format);
|
||||
}
|
||||
alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
|
||||
point_size = Common::BitCast<u32>(regs.point_size);
|
||||
|
||||
if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
|
||||
if (has_dynamic_vertex_input) {
|
||||
// Dirty flag will be reset by the command buffer update
|
||||
static constexpr std::array LUT{
|
||||
0u, // Invalid
|
||||
1u, // SignedNorm
|
||||
1u, // UnsignedNorm
|
||||
2u, // SignedInt
|
||||
3u, // UnsignedInt
|
||||
1u, // UnsignedScaled
|
||||
1u, // SignedScaled
|
||||
1u, // Float
|
||||
};
|
||||
const auto& attrs = regs.vertex_attrib_format;
|
||||
attribute_types = 0;
|
||||
for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
|
||||
const u32 mask = attrs[i].constant != 0 ? 0 : 3;
|
||||
const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())];
|
||||
attribute_types |= static_cast<u64>(type & mask) << (i * 2);
|
||||
}
|
||||
} else {
|
||||
maxwell3d.dirty.flags[Dirty::VertexInput] = false;
|
||||
enabled_divisors = 0;
|
||||
for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
const bool is_enabled = regs.vertex_stream_instances.IsInstancingEnabled(index);
|
||||
binding_divisors[index] = is_enabled ? regs.vertex_streams[index].frequency : 0;
|
||||
enabled_divisors |= (is_enabled ? u64{1} : 0) << index;
|
||||
}
|
||||
for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
|
||||
const auto& input = regs.vertex_attrib_format[index];
|
||||
auto& attribute = attributes[index];
|
||||
attribute.raw = 0;
|
||||
attribute.enabled.Assign(input.constant ? 0 : 1);
|
||||
attribute.buffer.Assign(input.buffer);
|
||||
attribute.offset.Assign(input.offset);
|
||||
attribute.type.Assign(static_cast<u32>(input.type.Value()));
|
||||
attribute.size.Assign(static_cast<u32>(input.size.Value()));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxwell3d.dirty.flags[Dirty::Blending]) {
|
||||
maxwell3d.dirty.flags[Dirty::Blending] = false;
|
||||
for (size_t index = 0; index < attachments.size(); ++index) {
|
||||
attachments[index].Refresh(regs, index);
|
||||
}
|
||||
}
|
||||
if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
|
||||
maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
|
||||
const auto& transform = regs.viewport_transform;
|
||||
std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) {
|
||||
return static_cast<u16>(viewport.swizzle.raw);
|
||||
});
|
||||
}
|
||||
if (!extended_dynamic_state) {
|
||||
dynamic_state.Refresh(regs);
|
||||
}
|
||||
if (xfb_enabled) {
|
||||
RefreshXfbState(xfb_state, regs);
|
||||
}
|
||||
}
|
||||
|
||||
void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
|
||||
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
|
||||
|
||||
raw = 0;
|
||||
mask_r.Assign(mask.R);
|
||||
mask_g.Assign(mask.G);
|
||||
mask_b.Assign(mask.B);
|
||||
mask_a.Assign(mask.A);
|
||||
|
||||
// TODO: C++20 Use templated lambda to deduplicate code
|
||||
|
||||
if (!regs.blend_per_target_enabled) {
|
||||
if (!regs.blend.enable[index]) {
|
||||
return;
|
||||
}
|
||||
const auto& src = regs.blend;
|
||||
equation_rgb.Assign(PackBlendEquation(src.color_op));
|
||||
equation_a.Assign(PackBlendEquation(src.alpha_op));
|
||||
factor_source_rgb.Assign(PackBlendFactor(src.color_source));
|
||||
factor_dest_rgb.Assign(PackBlendFactor(src.color_dest));
|
||||
factor_source_a.Assign(PackBlendFactor(src.alpha_source));
|
||||
factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
|
||||
enable.Assign(1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!regs.blend.enable[index]) {
|
||||
return;
|
||||
}
|
||||
const auto& src = regs.blend_per_target[index];
|
||||
equation_rgb.Assign(PackBlendEquation(src.color_op));
|
||||
equation_a.Assign(PackBlendEquation(src.alpha_op));
|
||||
factor_source_rgb.Assign(PackBlendFactor(src.color_source));
|
||||
factor_dest_rgb.Assign(PackBlendFactor(src.color_dest));
|
||||
factor_source_a.Assign(PackBlendFactor(src.alpha_source));
|
||||
factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
|
||||
enable.Assign(1);
|
||||
}
|
||||
|
||||
void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
|
||||
u32 packed_front_face = PackFrontFace(regs.gl_front_face);
|
||||
if (regs.window_origin.flip_y != 0) {
|
||||
// Flip front face
|
||||
packed_front_face = 1 - packed_front_face;
|
||||
}
|
||||
|
||||
raw1 = 0;
|
||||
raw2 = 0;
|
||||
front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op.fail));
|
||||
front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op.zfail));
|
||||
front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op.zpass));
|
||||
front.test_func.Assign(PackComparisonOp(regs.stencil_front_op.func));
|
||||
if (regs.stencil_two_side_enable) {
|
||||
back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op.fail));
|
||||
back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op.zfail));
|
||||
back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op.zpass));
|
||||
back.test_func.Assign(PackComparisonOp(regs.stencil_back_op.func));
|
||||
} else {
|
||||
back.action_stencil_fail.Assign(front.action_stencil_fail);
|
||||
back.action_depth_fail.Assign(front.action_depth_fail);
|
||||
back.action_depth_pass.Assign(front.action_depth_pass);
|
||||
back.test_func.Assign(front.test_func);
|
||||
}
|
||||
stencil_enable.Assign(regs.stencil_enable);
|
||||
depth_write_enable.Assign(regs.depth_write_enabled);
|
||||
depth_bounds_enable.Assign(regs.depth_bounds_enable);
|
||||
depth_test_enable.Assign(regs.depth_test_enable);
|
||||
front_face.Assign(packed_front_face);
|
||||
depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
|
||||
cull_face.Assign(PackCullFace(regs.gl_cull_face));
|
||||
cull_enable.Assign(regs.gl_cull_test_enabled != 0 ? 1 : 0);
|
||||
std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) {
|
||||
return static_cast<u16>(array.stride.Value());
|
||||
});
|
||||
}
|
||||
|
||||
size_t FixedPipelineState::Hash() const noexcept {
|
||||
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
|
||||
return static_cast<size_t>(hash);
|
||||
}
|
||||
|
||||
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
|
||||
return std::memcmp(this, &rhs, Size()) == 0;
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
|
||||
// OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
|
||||
// If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
|
||||
// Perfect for a hash.
|
||||
const u32 value = static_cast<u32>(op);
|
||||
return value - (value >= 0x200 ? 0x200 : 1);
|
||||
}
|
||||
|
||||
Maxwell::ComparisonOp FixedPipelineState::UnpackComparisonOp(u32 packed) noexcept {
|
||||
// Read PackComparisonOp for the logic behind this.
|
||||
return static_cast<Maxwell::ComparisonOp>(packed + 1);
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackStencilOp(Maxwell::StencilOp::Op op) noexcept {
|
||||
switch (op) {
|
||||
case Maxwell::StencilOp::Op::Keep_D3D:
|
||||
case Maxwell::StencilOp::Op::Keep_GL:
|
||||
return 0;
|
||||
case Maxwell::StencilOp::Op::Zero_D3D:
|
||||
case Maxwell::StencilOp::Op::Zero_GL:
|
||||
return 1;
|
||||
case Maxwell::StencilOp::Op::Replace_D3D:
|
||||
case Maxwell::StencilOp::Op::Replace_GL:
|
||||
return 2;
|
||||
case Maxwell::StencilOp::Op::IncrSaturate_D3D:
|
||||
case Maxwell::StencilOp::Op::IncrSaturate_GL:
|
||||
return 3;
|
||||
case Maxwell::StencilOp::Op::DecrSaturate_D3D:
|
||||
case Maxwell::StencilOp::Op::DecrSaturate_GL:
|
||||
return 4;
|
||||
case Maxwell::StencilOp::Op::Invert_D3D:
|
||||
case Maxwell::StencilOp::Op::Invert_GL:
|
||||
return 5;
|
||||
case Maxwell::StencilOp::Op::Incr_D3D:
|
||||
case Maxwell::StencilOp::Op::Incr_GL:
|
||||
return 6;
|
||||
case Maxwell::StencilOp::Op::Decr_D3D:
|
||||
case Maxwell::StencilOp::Op::Decr_GL:
|
||||
return 7;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Maxwell::StencilOp::Op FixedPipelineState::UnpackStencilOp(u32 packed) noexcept {
|
||||
static constexpr std::array LUT = {
|
||||
Maxwell::StencilOp::Op::Keep_D3D, Maxwell::StencilOp::Op::Zero_D3D,
|
||||
Maxwell::StencilOp::Op::Replace_D3D, Maxwell::StencilOp::Op::IncrSaturate_D3D,
|
||||
Maxwell::StencilOp::Op::DecrSaturate_D3D, Maxwell::StencilOp::Op::Invert_D3D,
|
||||
Maxwell::StencilOp::Op::Incr_D3D, Maxwell::StencilOp::Op::Decr_D3D};
|
||||
return LUT[packed];
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept {
|
||||
// FrontAndBack is 0x408, by substracting 0x406 in it we get 2.
|
||||
// Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1.
|
||||
const u32 value = static_cast<u32>(cull);
|
||||
return value - (value == 0x408 ? 0x406 : 0x404);
|
||||
}
|
||||
|
||||
Maxwell::CullFace FixedPipelineState::UnpackCullFace(u32 packed) noexcept {
|
||||
static constexpr std::array LUT = {Maxwell::CullFace::Front, Maxwell::CullFace::Back,
|
||||
Maxwell::CullFace::FrontAndBack};
|
||||
return LUT[packed];
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackFrontFace(Maxwell::FrontFace face) noexcept {
|
||||
return static_cast<u32>(face) - 0x900;
|
||||
}
|
||||
|
||||
Maxwell::FrontFace FixedPipelineState::UnpackFrontFace(u32 packed) noexcept {
|
||||
return static_cast<Maxwell::FrontFace>(packed + 0x900);
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackPolygonMode(Maxwell::PolygonMode mode) noexcept {
|
||||
return static_cast<u32>(mode) - 0x1B00;
|
||||
}
|
||||
|
||||
Maxwell::PolygonMode FixedPipelineState::UnpackPolygonMode(u32 packed) noexcept {
|
||||
return static_cast<Maxwell::PolygonMode>(packed + 0x1B00);
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackLogicOp(Maxwell::LogicOp::Op op) noexcept {
|
||||
return static_cast<u32>(op) - 0x1500;
|
||||
}
|
||||
|
||||
Maxwell::LogicOp::Op FixedPipelineState::UnpackLogicOp(u32 packed) noexcept {
|
||||
return static_cast<Maxwell::LogicOp::Op>(packed + 0x1500);
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackBlendEquation(Maxwell::Blend::Equation equation) noexcept {
|
||||
switch (equation) {
|
||||
case Maxwell::Blend::Equation::Add_D3D:
|
||||
case Maxwell::Blend::Equation::Add_GL:
|
||||
return 0;
|
||||
case Maxwell::Blend::Equation::Subtract_D3D:
|
||||
case Maxwell::Blend::Equation::Subtract_GL:
|
||||
return 1;
|
||||
case Maxwell::Blend::Equation::ReverseSubtract_D3D:
|
||||
case Maxwell::Blend::Equation::ReverseSubtract_GL:
|
||||
return 2;
|
||||
case Maxwell::Blend::Equation::Min_D3D:
|
||||
case Maxwell::Blend::Equation::Min_GL:
|
||||
return 3;
|
||||
case Maxwell::Blend::Equation::Max_D3D:
|
||||
case Maxwell::Blend::Equation::Max_GL:
|
||||
return 4;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Maxwell::Blend::Equation FixedPipelineState::UnpackBlendEquation(u32 packed) noexcept {
|
||||
static constexpr std::array LUT = {
|
||||
Maxwell::Blend::Equation::Add_D3D, Maxwell::Blend::Equation::Subtract_D3D,
|
||||
Maxwell::Blend::Equation::ReverseSubtract_D3D, Maxwell::Blend::Equation::Min_D3D,
|
||||
Maxwell::Blend::Equation::Max_D3D};
|
||||
return LUT[packed];
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackBlendFactor(Maxwell::Blend::Factor factor) noexcept {
|
||||
switch (factor) {
|
||||
case Maxwell::Blend::Factor::Zero_D3D:
|
||||
case Maxwell::Blend::Factor::Zero_GL:
|
||||
return 0;
|
||||
case Maxwell::Blend::Factor::One_D3D:
|
||||
case Maxwell::Blend::Factor::One_GL:
|
||||
return 1;
|
||||
case Maxwell::Blend::Factor::SourceColor_D3D:
|
||||
case Maxwell::Blend::Factor::SourceColor_GL:
|
||||
return 2;
|
||||
case Maxwell::Blend::Factor::OneMinusSourceColor_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSourceColor_GL:
|
||||
return 3;
|
||||
case Maxwell::Blend::Factor::SourceAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::SourceAlpha_GL:
|
||||
return 4;
|
||||
case Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSourceAlpha_GL:
|
||||
return 5;
|
||||
case Maxwell::Blend::Factor::DestAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::DestAlpha_GL:
|
||||
return 6;
|
||||
case Maxwell::Blend::Factor::OneMinusDestAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusDestAlpha_GL:
|
||||
return 7;
|
||||
case Maxwell::Blend::Factor::DestColor_D3D:
|
||||
case Maxwell::Blend::Factor::DestColor_GL:
|
||||
return 8;
|
||||
case Maxwell::Blend::Factor::OneMinusDestColor_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusDestColor_GL:
|
||||
return 9;
|
||||
case Maxwell::Blend::Factor::SourceAlphaSaturate_D3D:
|
||||
case Maxwell::Blend::Factor::SourceAlphaSaturate_GL:
|
||||
return 10;
|
||||
case Maxwell::Blend::Factor::Source1Color_D3D:
|
||||
case Maxwell::Blend::Factor::Source1Color_GL:
|
||||
return 11;
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
|
||||
return 12;
|
||||
case Maxwell::Blend::Factor::Source1Alpha_D3D:
|
||||
case Maxwell::Blend::Factor::Source1Alpha_GL:
|
||||
return 13;
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
|
||||
return 14;
|
||||
case Maxwell::Blend::Factor::BlendFactor_D3D:
|
||||
case Maxwell::Blend::Factor::ConstantColor_GL:
|
||||
return 15;
|
||||
case Maxwell::Blend::Factor::OneMinusBlendFactor_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusConstantColor_GL:
|
||||
return 16;
|
||||
case Maxwell::Blend::Factor::BothSourceAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::ConstantAlpha_GL:
|
||||
return 17;
|
||||
case Maxwell::Blend::Factor::OneMinusBothSourceAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusConstantAlpha_GL:
|
||||
return 18;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unknown blend factor {}", static_cast<u32>(factor));
|
||||
return 0;
|
||||
}
|
||||
|
||||
Maxwell::Blend::Factor FixedPipelineState::UnpackBlendFactor(u32 packed) noexcept {
|
||||
static constexpr std::array LUT = {
|
||||
Maxwell::Blend::Factor::Zero_D3D,
|
||||
Maxwell::Blend::Factor::One_D3D,
|
||||
Maxwell::Blend::Factor::SourceColor_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusSourceColor_D3D,
|
||||
Maxwell::Blend::Factor::SourceAlpha_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D,
|
||||
Maxwell::Blend::Factor::DestAlpha_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusDestAlpha_D3D,
|
||||
Maxwell::Blend::Factor::DestColor_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusDestColor_D3D,
|
||||
Maxwell::Blend::Factor::SourceAlphaSaturate_D3D,
|
||||
Maxwell::Blend::Factor::Source1Color_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusSource1Color_D3D,
|
||||
Maxwell::Blend::Factor::Source1Alpha_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D,
|
||||
Maxwell::Blend::Factor::BlendFactor_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusBlendFactor_D3D,
|
||||
Maxwell::Blend::Factor::BothSourceAlpha_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusBothSourceAlpha_D3D,
|
||||
};
|
||||
ASSERT(packed < LUT.size());
|
||||
return LUT[packed];
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include "common/bit_cast.h"
|
||||
#include "common/cityhash.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
constexpr size_t POINT = 0;
|
||||
constexpr size_t LINE = 1;
|
||||
constexpr size_t POLYGON = 2;
|
||||
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
|
||||
POINT, // Points
|
||||
LINE, // Lines
|
||||
LINE, // LineLoop
|
||||
LINE, // LineStrip
|
||||
POLYGON, // Triangles
|
||||
POLYGON, // TriangleStrip
|
||||
POLYGON, // TriangleFan
|
||||
POLYGON, // Quads
|
||||
POLYGON, // QuadStrip
|
||||
POLYGON, // Polygon
|
||||
LINE, // LinesAdjacency
|
||||
LINE, // LineStripAdjacency
|
||||
POLYGON, // TrianglesAdjacency
|
||||
POLYGON, // TriangleStripAdjacency
|
||||
POLYGON, // Patches
|
||||
};
|
||||
|
||||
void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
|
||||
std::ranges::transform(regs.transform_feedback.controls, state.layouts.begin(),
|
||||
[](const auto& layout) {
|
||||
return VideoCommon::TransformFeedbackState::Layout{
|
||||
.stream = layout.stream,
|
||||
.varying_count = layout.varying_count,
|
||||
.stride = layout.stride,
|
||||
};
|
||||
});
|
||||
state.varyings = regs.stream_out_layout;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
|
||||
const Maxwell& regs = maxwell3d.regs;
|
||||
const std::array enabled_lut{
|
||||
regs.polygon_offset_point_enable,
|
||||
regs.polygon_offset_line_enable,
|
||||
regs.polygon_offset_fill_enable,
|
||||
};
|
||||
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
|
||||
|
||||
raw1 = 0;
|
||||
extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
|
||||
dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
|
||||
xfb_enabled.Assign(regs.transform_feedback_enabled != 0);
|
||||
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
|
||||
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
|
||||
depth_clamp_disabled.Assign(regs.viewport_clip_control.geometry_clip ==
|
||||
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
|
||||
regs.viewport_clip_control.geometry_clip ==
|
||||
Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
|
||||
regs.viewport_clip_control.geometry_clip ==
|
||||
Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
|
||||
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
|
||||
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
|
||||
patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
|
||||
tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value()));
|
||||
tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value()));
|
||||
tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==
|
||||
Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
|
||||
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
|
||||
logic_op.Assign(PackLogicOp(regs.logic_op.op));
|
||||
topology.Assign(regs.draw.topology);
|
||||
msaa_mode.Assign(regs.anti_alias_samples_mode);
|
||||
|
||||
raw2 = 0;
|
||||
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
|
||||
const auto test_func =
|
||||
regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always_GL;
|
||||
alpha_test_func.Assign(PackComparisonOp(test_func));
|
||||
early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0);
|
||||
depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
|
||||
depth_format.Assign(static_cast<u32>(regs.zeta.format));
|
||||
y_negate.Assign(regs.window_origin.mode != Maxwell::WindowOrigin::Mode::UpperLeft ? 1 : 0);
|
||||
provoking_vertex_last.Assign(regs.provoking_vertex == Maxwell::ProvokingVertex::Last ? 1 : 0);
|
||||
smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0);
|
||||
|
||||
for (size_t i = 0; i < regs.rt.size(); ++i) {
|
||||
color_formats[i] = static_cast<u8>(regs.rt[i].format);
|
||||
}
|
||||
alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
|
||||
point_size = Common::BitCast<u32>(regs.point_size);
|
||||
|
||||
if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
|
||||
if (has_dynamic_vertex_input) {
|
||||
// Dirty flag will be reset by the command buffer update
|
||||
static constexpr std::array LUT{
|
||||
0u, // Invalid
|
||||
1u, // SignedNorm
|
||||
1u, // UnsignedNorm
|
||||
2u, // SignedInt
|
||||
3u, // UnsignedInt
|
||||
1u, // UnsignedScaled
|
||||
1u, // SignedScaled
|
||||
1u, // Float
|
||||
};
|
||||
const auto& attrs = regs.vertex_attrib_format;
|
||||
attribute_types = 0;
|
||||
for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
|
||||
const u32 mask = attrs[i].constant != 0 ? 0 : 3;
|
||||
const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())];
|
||||
attribute_types |= static_cast<u64>(type & mask) << (i * 2);
|
||||
}
|
||||
} else {
|
||||
maxwell3d.dirty.flags[Dirty::VertexInput] = false;
|
||||
enabled_divisors = 0;
|
||||
for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
const bool is_enabled = regs.vertex_stream_instances.IsInstancingEnabled(index);
|
||||
binding_divisors[index] = is_enabled ? regs.vertex_streams[index].frequency : 0;
|
||||
enabled_divisors |= (is_enabled ? u64{1} : 0) << index;
|
||||
}
|
||||
for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
|
||||
const auto& input = regs.vertex_attrib_format[index];
|
||||
auto& attribute = attributes[index];
|
||||
attribute.raw = 0;
|
||||
attribute.enabled.Assign(input.constant ? 0 : 1);
|
||||
attribute.buffer.Assign(input.buffer);
|
||||
attribute.offset.Assign(input.offset);
|
||||
attribute.type.Assign(static_cast<u32>(input.type.Value()));
|
||||
attribute.size.Assign(static_cast<u32>(input.size.Value()));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxwell3d.dirty.flags[Dirty::Blending]) {
|
||||
maxwell3d.dirty.flags[Dirty::Blending] = false;
|
||||
for (size_t index = 0; index < attachments.size(); ++index) {
|
||||
attachments[index].Refresh(regs, index);
|
||||
}
|
||||
}
|
||||
if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
|
||||
maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
|
||||
const auto& transform = regs.viewport_transform;
|
||||
std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) {
|
||||
return static_cast<u16>(viewport.swizzle.raw);
|
||||
});
|
||||
}
|
||||
if (!extended_dynamic_state) {
|
||||
dynamic_state.Refresh(regs);
|
||||
}
|
||||
if (xfb_enabled) {
|
||||
RefreshXfbState(xfb_state, regs);
|
||||
}
|
||||
}
|
||||
|
||||
void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
|
||||
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
|
||||
|
||||
raw = 0;
|
||||
mask_r.Assign(mask.R);
|
||||
mask_g.Assign(mask.G);
|
||||
mask_b.Assign(mask.B);
|
||||
mask_a.Assign(mask.A);
|
||||
|
||||
// TODO: C++20 Use templated lambda to deduplicate code
|
||||
|
||||
if (!regs.blend_per_target_enabled) {
|
||||
if (!regs.blend.enable[index]) {
|
||||
return;
|
||||
}
|
||||
const auto& src = regs.blend;
|
||||
equation_rgb.Assign(PackBlendEquation(src.color_op));
|
||||
equation_a.Assign(PackBlendEquation(src.alpha_op));
|
||||
factor_source_rgb.Assign(PackBlendFactor(src.color_source));
|
||||
factor_dest_rgb.Assign(PackBlendFactor(src.color_dest));
|
||||
factor_source_a.Assign(PackBlendFactor(src.alpha_source));
|
||||
factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
|
||||
enable.Assign(1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!regs.blend.enable[index]) {
|
||||
return;
|
||||
}
|
||||
const auto& src = regs.blend_per_target[index];
|
||||
equation_rgb.Assign(PackBlendEquation(src.color_op));
|
||||
equation_a.Assign(PackBlendEquation(src.alpha_op));
|
||||
factor_source_rgb.Assign(PackBlendFactor(src.color_source));
|
||||
factor_dest_rgb.Assign(PackBlendFactor(src.color_dest));
|
||||
factor_source_a.Assign(PackBlendFactor(src.alpha_source));
|
||||
factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
|
||||
enable.Assign(1);
|
||||
}
|
||||
|
||||
void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
|
||||
u32 packed_front_face = PackFrontFace(regs.gl_front_face);
|
||||
if (regs.window_origin.flip_y != 0) {
|
||||
// Flip front face
|
||||
packed_front_face = 1 - packed_front_face;
|
||||
}
|
||||
|
||||
raw1 = 0;
|
||||
raw2 = 0;
|
||||
front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op.fail));
|
||||
front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op.zfail));
|
||||
front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op.zpass));
|
||||
front.test_func.Assign(PackComparisonOp(regs.stencil_front_op.func));
|
||||
if (regs.stencil_two_side_enable) {
|
||||
back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op.fail));
|
||||
back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op.zfail));
|
||||
back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op.zpass));
|
||||
back.test_func.Assign(PackComparisonOp(regs.stencil_back_op.func));
|
||||
} else {
|
||||
back.action_stencil_fail.Assign(front.action_stencil_fail);
|
||||
back.action_depth_fail.Assign(front.action_depth_fail);
|
||||
back.action_depth_pass.Assign(front.action_depth_pass);
|
||||
back.test_func.Assign(front.test_func);
|
||||
}
|
||||
stencil_enable.Assign(regs.stencil_enable);
|
||||
depth_write_enable.Assign(regs.depth_write_enabled);
|
||||
depth_bounds_enable.Assign(regs.depth_bounds_enable);
|
||||
depth_test_enable.Assign(regs.depth_test_enable);
|
||||
front_face.Assign(packed_front_face);
|
||||
depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
|
||||
cull_face.Assign(PackCullFace(regs.gl_cull_face));
|
||||
cull_enable.Assign(regs.gl_cull_test_enabled != 0 ? 1 : 0);
|
||||
std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) {
|
||||
return static_cast<u16>(array.stride.Value());
|
||||
});
|
||||
}
|
||||
|
||||
size_t FixedPipelineState::Hash() const noexcept {
|
||||
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
|
||||
return static_cast<size_t>(hash);
|
||||
}
|
||||
|
||||
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
|
||||
return std::memcmp(this, &rhs, Size()) == 0;
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
|
||||
// OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
|
||||
// If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
|
||||
// Perfect for a hash.
|
||||
const u32 value = static_cast<u32>(op);
|
||||
return value - (value >= 0x200 ? 0x200 : 1);
|
||||
}
|
||||
|
||||
Maxwell::ComparisonOp FixedPipelineState::UnpackComparisonOp(u32 packed) noexcept {
|
||||
// Read PackComparisonOp for the logic behind this.
|
||||
return static_cast<Maxwell::ComparisonOp>(packed + 1);
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackStencilOp(Maxwell::StencilOp::Op op) noexcept {
|
||||
switch (op) {
|
||||
case Maxwell::StencilOp::Op::Keep_D3D:
|
||||
case Maxwell::StencilOp::Op::Keep_GL:
|
||||
return 0;
|
||||
case Maxwell::StencilOp::Op::Zero_D3D:
|
||||
case Maxwell::StencilOp::Op::Zero_GL:
|
||||
return 1;
|
||||
case Maxwell::StencilOp::Op::Replace_D3D:
|
||||
case Maxwell::StencilOp::Op::Replace_GL:
|
||||
return 2;
|
||||
case Maxwell::StencilOp::Op::IncrSaturate_D3D:
|
||||
case Maxwell::StencilOp::Op::IncrSaturate_GL:
|
||||
return 3;
|
||||
case Maxwell::StencilOp::Op::DecrSaturate_D3D:
|
||||
case Maxwell::StencilOp::Op::DecrSaturate_GL:
|
||||
return 4;
|
||||
case Maxwell::StencilOp::Op::Invert_D3D:
|
||||
case Maxwell::StencilOp::Op::Invert_GL:
|
||||
return 5;
|
||||
case Maxwell::StencilOp::Op::Incr_D3D:
|
||||
case Maxwell::StencilOp::Op::Incr_GL:
|
||||
return 6;
|
||||
case Maxwell::StencilOp::Op::Decr_D3D:
|
||||
case Maxwell::StencilOp::Op::Decr_GL:
|
||||
return 7;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Maxwell::StencilOp::Op FixedPipelineState::UnpackStencilOp(u32 packed) noexcept {
|
||||
static constexpr std::array LUT = {
|
||||
Maxwell::StencilOp::Op::Keep_D3D, Maxwell::StencilOp::Op::Zero_D3D,
|
||||
Maxwell::StencilOp::Op::Replace_D3D, Maxwell::StencilOp::Op::IncrSaturate_D3D,
|
||||
Maxwell::StencilOp::Op::DecrSaturate_D3D, Maxwell::StencilOp::Op::Invert_D3D,
|
||||
Maxwell::StencilOp::Op::Incr_D3D, Maxwell::StencilOp::Op::Decr_D3D};
|
||||
return LUT[packed];
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept {
|
||||
// FrontAndBack is 0x408, by substracting 0x406 in it we get 2.
|
||||
// Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1.
|
||||
const u32 value = static_cast<u32>(cull);
|
||||
return value - (value == 0x408 ? 0x406 : 0x404);
|
||||
}
|
||||
|
||||
Maxwell::CullFace FixedPipelineState::UnpackCullFace(u32 packed) noexcept {
|
||||
static constexpr std::array LUT = {Maxwell::CullFace::Front, Maxwell::CullFace::Back,
|
||||
Maxwell::CullFace::FrontAndBack};
|
||||
return LUT[packed];
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackFrontFace(Maxwell::FrontFace face) noexcept {
|
||||
return static_cast<u32>(face) - 0x900;
|
||||
}
|
||||
|
||||
Maxwell::FrontFace FixedPipelineState::UnpackFrontFace(u32 packed) noexcept {
|
||||
return static_cast<Maxwell::FrontFace>(packed + 0x900);
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackPolygonMode(Maxwell::PolygonMode mode) noexcept {
|
||||
return static_cast<u32>(mode) - 0x1B00;
|
||||
}
|
||||
|
||||
Maxwell::PolygonMode FixedPipelineState::UnpackPolygonMode(u32 packed) noexcept {
|
||||
return static_cast<Maxwell::PolygonMode>(packed + 0x1B00);
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackLogicOp(Maxwell::LogicOp::Op op) noexcept {
|
||||
return static_cast<u32>(op) - 0x1500;
|
||||
}
|
||||
|
||||
Maxwell::LogicOp::Op FixedPipelineState::UnpackLogicOp(u32 packed) noexcept {
|
||||
return static_cast<Maxwell::LogicOp::Op>(packed + 0x1500);
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackBlendEquation(Maxwell::Blend::Equation equation) noexcept {
|
||||
switch (equation) {
|
||||
case Maxwell::Blend::Equation::Add_D3D:
|
||||
case Maxwell::Blend::Equation::Add_GL:
|
||||
return 0;
|
||||
case Maxwell::Blend::Equation::Subtract_D3D:
|
||||
case Maxwell::Blend::Equation::Subtract_GL:
|
||||
return 1;
|
||||
case Maxwell::Blend::Equation::ReverseSubtract_D3D:
|
||||
case Maxwell::Blend::Equation::ReverseSubtract_GL:
|
||||
return 2;
|
||||
case Maxwell::Blend::Equation::Min_D3D:
|
||||
case Maxwell::Blend::Equation::Min_GL:
|
||||
return 3;
|
||||
case Maxwell::Blend::Equation::Max_D3D:
|
||||
case Maxwell::Blend::Equation::Max_GL:
|
||||
return 4;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Maxwell::Blend::Equation FixedPipelineState::UnpackBlendEquation(u32 packed) noexcept {
|
||||
static constexpr std::array LUT = {
|
||||
Maxwell::Blend::Equation::Add_D3D, Maxwell::Blend::Equation::Subtract_D3D,
|
||||
Maxwell::Blend::Equation::ReverseSubtract_D3D, Maxwell::Blend::Equation::Min_D3D,
|
||||
Maxwell::Blend::Equation::Max_D3D};
|
||||
return LUT[packed];
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackBlendFactor(Maxwell::Blend::Factor factor) noexcept {
|
||||
switch (factor) {
|
||||
case Maxwell::Blend::Factor::Zero_D3D:
|
||||
case Maxwell::Blend::Factor::Zero_GL:
|
||||
return 0;
|
||||
case Maxwell::Blend::Factor::One_D3D:
|
||||
case Maxwell::Blend::Factor::One_GL:
|
||||
return 1;
|
||||
case Maxwell::Blend::Factor::SourceColor_D3D:
|
||||
case Maxwell::Blend::Factor::SourceColor_GL:
|
||||
return 2;
|
||||
case Maxwell::Blend::Factor::OneMinusSourceColor_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSourceColor_GL:
|
||||
return 3;
|
||||
case Maxwell::Blend::Factor::SourceAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::SourceAlpha_GL:
|
||||
return 4;
|
||||
case Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSourceAlpha_GL:
|
||||
return 5;
|
||||
case Maxwell::Blend::Factor::DestAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::DestAlpha_GL:
|
||||
return 6;
|
||||
case Maxwell::Blend::Factor::OneMinusDestAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusDestAlpha_GL:
|
||||
return 7;
|
||||
case Maxwell::Blend::Factor::DestColor_D3D:
|
||||
case Maxwell::Blend::Factor::DestColor_GL:
|
||||
return 8;
|
||||
case Maxwell::Blend::Factor::OneMinusDestColor_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusDestColor_GL:
|
||||
return 9;
|
||||
case Maxwell::Blend::Factor::SourceAlphaSaturate_D3D:
|
||||
case Maxwell::Blend::Factor::SourceAlphaSaturate_GL:
|
||||
return 10;
|
||||
case Maxwell::Blend::Factor::Source1Color_D3D:
|
||||
case Maxwell::Blend::Factor::Source1Color_GL:
|
||||
return 11;
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
|
||||
return 12;
|
||||
case Maxwell::Blend::Factor::Source1Alpha_D3D:
|
||||
case Maxwell::Blend::Factor::Source1Alpha_GL:
|
||||
return 13;
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
|
||||
return 14;
|
||||
case Maxwell::Blend::Factor::BlendFactor_D3D:
|
||||
case Maxwell::Blend::Factor::ConstantColor_GL:
|
||||
return 15;
|
||||
case Maxwell::Blend::Factor::OneMinusBlendFactor_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusConstantColor_GL:
|
||||
return 16;
|
||||
case Maxwell::Blend::Factor::BothSourceAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::ConstantAlpha_GL:
|
||||
return 17;
|
||||
case Maxwell::Blend::Factor::OneMinusBothSourceAlpha_D3D:
|
||||
case Maxwell::Blend::Factor::OneMinusConstantAlpha_GL:
|
||||
return 18;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unknown blend factor {}", static_cast<u32>(factor));
|
||||
return 0;
|
||||
}
|
||||
|
||||
Maxwell::Blend::Factor FixedPipelineState::UnpackBlendFactor(u32 packed) noexcept {
|
||||
static constexpr std::array LUT = {
|
||||
Maxwell::Blend::Factor::Zero_D3D,
|
||||
Maxwell::Blend::Factor::One_D3D,
|
||||
Maxwell::Blend::Factor::SourceColor_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusSourceColor_D3D,
|
||||
Maxwell::Blend::Factor::SourceAlpha_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D,
|
||||
Maxwell::Blend::Factor::DestAlpha_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusDestAlpha_D3D,
|
||||
Maxwell::Blend::Factor::DestColor_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusDestColor_D3D,
|
||||
Maxwell::Blend::Factor::SourceAlphaSaturate_D3D,
|
||||
Maxwell::Blend::Factor::Source1Color_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusSource1Color_D3D,
|
||||
Maxwell::Blend::Factor::Source1Alpha_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D,
|
||||
Maxwell::Blend::Factor::BlendFactor_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusBlendFactor_D3D,
|
||||
Maxwell::Blend::Factor::BothSourceAlpha_D3D,
|
||||
Maxwell::Blend::Factor::OneMinusBothSourceAlpha_D3D,
|
||||
};
|
||||
ASSERT(packed < LUT.size());
|
||||
return LUT[packed];
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,261 +1,261 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/transform_feedback.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct FixedPipelineState {
|
||||
static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
|
||||
static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
|
||||
|
||||
static u32 PackStencilOp(Maxwell::StencilOp::Op op) noexcept;
|
||||
static Maxwell::StencilOp::Op UnpackStencilOp(u32 packed) noexcept;
|
||||
|
||||
static u32 PackCullFace(Maxwell::CullFace cull) noexcept;
|
||||
static Maxwell::CullFace UnpackCullFace(u32 packed) noexcept;
|
||||
|
||||
static u32 PackFrontFace(Maxwell::FrontFace face) noexcept;
|
||||
static Maxwell::FrontFace UnpackFrontFace(u32 packed) noexcept;
|
||||
|
||||
static u32 PackPolygonMode(Maxwell::PolygonMode mode) noexcept;
|
||||
static Maxwell::PolygonMode UnpackPolygonMode(u32 packed) noexcept;
|
||||
|
||||
static u32 PackLogicOp(Maxwell::LogicOp::Op op) noexcept;
|
||||
static Maxwell::LogicOp::Op UnpackLogicOp(u32 packed) noexcept;
|
||||
|
||||
static u32 PackBlendEquation(Maxwell::Blend::Equation equation) noexcept;
|
||||
static Maxwell::Blend::Equation UnpackBlendEquation(u32 packed) noexcept;
|
||||
|
||||
static u32 PackBlendFactor(Maxwell::Blend::Factor factor) noexcept;
|
||||
static Maxwell::Blend::Factor UnpackBlendFactor(u32 packed) noexcept;
|
||||
|
||||
struct BlendingAttachment {
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> mask_r;
|
||||
BitField<1, 1, u32> mask_g;
|
||||
BitField<2, 1, u32> mask_b;
|
||||
BitField<3, 1, u32> mask_a;
|
||||
BitField<4, 3, u32> equation_rgb;
|
||||
BitField<7, 3, u32> equation_a;
|
||||
BitField<10, 5, u32> factor_source_rgb;
|
||||
BitField<15, 5, u32> factor_dest_rgb;
|
||||
BitField<20, 5, u32> factor_source_a;
|
||||
BitField<25, 5, u32> factor_dest_a;
|
||||
BitField<30, 1, u32> enable;
|
||||
};
|
||||
|
||||
void Refresh(const Maxwell& regs, size_t index);
|
||||
|
||||
std::array<bool, 4> Mask() const noexcept {
|
||||
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
|
||||
}
|
||||
|
||||
Maxwell::Blend::Equation EquationRGB() const noexcept {
|
||||
return UnpackBlendEquation(equation_rgb.Value());
|
||||
}
|
||||
|
||||
Maxwell::Blend::Equation EquationAlpha() const noexcept {
|
||||
return UnpackBlendEquation(equation_a.Value());
|
||||
}
|
||||
|
||||
Maxwell::Blend::Factor SourceRGBFactor() const noexcept {
|
||||
return UnpackBlendFactor(factor_source_rgb.Value());
|
||||
}
|
||||
|
||||
Maxwell::Blend::Factor DestRGBFactor() const noexcept {
|
||||
return UnpackBlendFactor(factor_dest_rgb.Value());
|
||||
}
|
||||
|
||||
Maxwell::Blend::Factor SourceAlphaFactor() const noexcept {
|
||||
return UnpackBlendFactor(factor_source_a.Value());
|
||||
}
|
||||
|
||||
Maxwell::Blend::Factor DestAlphaFactor() const noexcept {
|
||||
return UnpackBlendFactor(factor_dest_a.Value());
|
||||
}
|
||||
};
|
||||
|
||||
union VertexAttribute {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> enabled;
|
||||
BitField<1, 5, u32> buffer;
|
||||
BitField<6, 14, u32> offset;
|
||||
BitField<20, 3, u32> type;
|
||||
BitField<23, 6, u32> size;
|
||||
|
||||
Maxwell::VertexAttribute::Type Type() const noexcept {
|
||||
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
|
||||
}
|
||||
|
||||
Maxwell::VertexAttribute::Size Size() const noexcept {
|
||||
return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t Position>
|
||||
union StencilFace {
|
||||
BitField<Position + 0, 3, u32> action_stencil_fail;
|
||||
BitField<Position + 3, 3, u32> action_depth_fail;
|
||||
BitField<Position + 6, 3, u32> action_depth_pass;
|
||||
BitField<Position + 9, 3, u32> test_func;
|
||||
|
||||
Maxwell::StencilOp::Op ActionStencilFail() const noexcept {
|
||||
return UnpackStencilOp(action_stencil_fail);
|
||||
}
|
||||
|
||||
Maxwell::StencilOp::Op ActionDepthFail() const noexcept {
|
||||
return UnpackStencilOp(action_depth_fail);
|
||||
}
|
||||
|
||||
Maxwell::StencilOp::Op ActionDepthPass() const noexcept {
|
||||
return UnpackStencilOp(action_depth_pass);
|
||||
}
|
||||
|
||||
Maxwell::ComparisonOp TestFunc() const noexcept {
|
||||
return UnpackComparisonOp(test_func);
|
||||
}
|
||||
};
|
||||
|
||||
struct DynamicState {
|
||||
union {
|
||||
u32 raw1;
|
||||
StencilFace<0> front;
|
||||
StencilFace<12> back;
|
||||
BitField<24, 1, u32> stencil_enable;
|
||||
BitField<25, 1, u32> depth_write_enable;
|
||||
BitField<26, 1, u32> depth_bounds_enable;
|
||||
BitField<27, 1, u32> depth_test_enable;
|
||||
BitField<28, 1, u32> front_face;
|
||||
BitField<29, 3, u32> depth_test_func;
|
||||
};
|
||||
union {
|
||||
u32 raw2;
|
||||
BitField<0, 2, u32> cull_face;
|
||||
BitField<2, 1, u32> cull_enable;
|
||||
};
|
||||
// Vertex stride is a 12 bits value, we have 4 bits to spare per element
|
||||
std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
|
||||
|
||||
void Refresh(const Maxwell& regs);
|
||||
|
||||
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
|
||||
return UnpackComparisonOp(depth_test_func);
|
||||
}
|
||||
|
||||
Maxwell::CullFace CullFace() const noexcept {
|
||||
return UnpackCullFace(cull_face.Value());
|
||||
}
|
||||
|
||||
Maxwell::FrontFace FrontFace() const noexcept {
|
||||
return UnpackFrontFace(front_face.Value());
|
||||
}
|
||||
};
|
||||
|
||||
union {
|
||||
u32 raw1;
|
||||
BitField<0, 1, u32> extended_dynamic_state;
|
||||
BitField<1, 1, u32> dynamic_vertex_input;
|
||||
BitField<2, 1, u32> xfb_enabled;
|
||||
BitField<3, 1, u32> primitive_restart_enable;
|
||||
BitField<4, 1, u32> depth_bias_enable;
|
||||
BitField<5, 1, u32> depth_clamp_disabled;
|
||||
BitField<6, 1, u32> ndc_minus_one_to_one;
|
||||
BitField<7, 2, u32> polygon_mode;
|
||||
BitField<9, 5, u32> patch_control_points_minus_one;
|
||||
BitField<14, 2, u32> tessellation_primitive;
|
||||
BitField<16, 2, u32> tessellation_spacing;
|
||||
BitField<18, 1, u32> tessellation_clockwise;
|
||||
BitField<19, 1, u32> logic_op_enable;
|
||||
BitField<20, 4, u32> logic_op;
|
||||
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
|
||||
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
|
||||
};
|
||||
union {
|
||||
u32 raw2;
|
||||
BitField<0, 1, u32> rasterize_enable;
|
||||
BitField<1, 3, u32> alpha_test_func;
|
||||
BitField<4, 1, u32> early_z;
|
||||
BitField<5, 1, u32> depth_enabled;
|
||||
BitField<6, 5, u32> depth_format;
|
||||
BitField<11, 1, u32> y_negate;
|
||||
BitField<12, 1, u32> provoking_vertex_last;
|
||||
BitField<14, 1, u32> smooth_lines;
|
||||
};
|
||||
std::array<u8, Maxwell::NumRenderTargets> color_formats;
|
||||
|
||||
u32 alpha_test_ref;
|
||||
u32 point_size;
|
||||
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
|
||||
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
|
||||
union {
|
||||
u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
|
||||
u64 enabled_divisors;
|
||||
};
|
||||
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
|
||||
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
|
||||
|
||||
DynamicState dynamic_state;
|
||||
VideoCommon::TransformFeedbackState xfb_state;
|
||||
|
||||
void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
|
||||
bool has_dynamic_vertex_input);
|
||||
|
||||
size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const FixedPipelineState& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const FixedPipelineState& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
size_t Size() const noexcept {
|
||||
if (xfb_enabled) {
|
||||
// When transform feedback is enabled, use the whole struct
|
||||
return sizeof(*this);
|
||||
}
|
||||
if (dynamic_vertex_input) {
|
||||
// Exclude dynamic state and attributes
|
||||
return offsetof(FixedPipelineState, attributes);
|
||||
}
|
||||
if (extended_dynamic_state) {
|
||||
// Exclude dynamic state
|
||||
return offsetof(FixedPipelineState, dynamic_state);
|
||||
}
|
||||
// Default
|
||||
return offsetof(FixedPipelineState, xfb_state);
|
||||
}
|
||||
|
||||
u32 DynamicAttributeType(size_t index) const noexcept {
|
||||
return (attribute_types >> (index * 2)) & 0b11;
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
|
||||
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
|
||||
static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::FixedPipelineState> {
|
||||
size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/transform_feedback.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct FixedPipelineState {
|
||||
static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
|
||||
static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
|
||||
|
||||
static u32 PackStencilOp(Maxwell::StencilOp::Op op) noexcept;
|
||||
static Maxwell::StencilOp::Op UnpackStencilOp(u32 packed) noexcept;
|
||||
|
||||
static u32 PackCullFace(Maxwell::CullFace cull) noexcept;
|
||||
static Maxwell::CullFace UnpackCullFace(u32 packed) noexcept;
|
||||
|
||||
static u32 PackFrontFace(Maxwell::FrontFace face) noexcept;
|
||||
static Maxwell::FrontFace UnpackFrontFace(u32 packed) noexcept;
|
||||
|
||||
static u32 PackPolygonMode(Maxwell::PolygonMode mode) noexcept;
|
||||
static Maxwell::PolygonMode UnpackPolygonMode(u32 packed) noexcept;
|
||||
|
||||
static u32 PackLogicOp(Maxwell::LogicOp::Op op) noexcept;
|
||||
static Maxwell::LogicOp::Op UnpackLogicOp(u32 packed) noexcept;
|
||||
|
||||
static u32 PackBlendEquation(Maxwell::Blend::Equation equation) noexcept;
|
||||
static Maxwell::Blend::Equation UnpackBlendEquation(u32 packed) noexcept;
|
||||
|
||||
static u32 PackBlendFactor(Maxwell::Blend::Factor factor) noexcept;
|
||||
static Maxwell::Blend::Factor UnpackBlendFactor(u32 packed) noexcept;
|
||||
|
||||
struct BlendingAttachment {
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> mask_r;
|
||||
BitField<1, 1, u32> mask_g;
|
||||
BitField<2, 1, u32> mask_b;
|
||||
BitField<3, 1, u32> mask_a;
|
||||
BitField<4, 3, u32> equation_rgb;
|
||||
BitField<7, 3, u32> equation_a;
|
||||
BitField<10, 5, u32> factor_source_rgb;
|
||||
BitField<15, 5, u32> factor_dest_rgb;
|
||||
BitField<20, 5, u32> factor_source_a;
|
||||
BitField<25, 5, u32> factor_dest_a;
|
||||
BitField<30, 1, u32> enable;
|
||||
};
|
||||
|
||||
void Refresh(const Maxwell& regs, size_t index);
|
||||
|
||||
std::array<bool, 4> Mask() const noexcept {
|
||||
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
|
||||
}
|
||||
|
||||
Maxwell::Blend::Equation EquationRGB() const noexcept {
|
||||
return UnpackBlendEquation(equation_rgb.Value());
|
||||
}
|
||||
|
||||
Maxwell::Blend::Equation EquationAlpha() const noexcept {
|
||||
return UnpackBlendEquation(equation_a.Value());
|
||||
}
|
||||
|
||||
Maxwell::Blend::Factor SourceRGBFactor() const noexcept {
|
||||
return UnpackBlendFactor(factor_source_rgb.Value());
|
||||
}
|
||||
|
||||
Maxwell::Blend::Factor DestRGBFactor() const noexcept {
|
||||
return UnpackBlendFactor(factor_dest_rgb.Value());
|
||||
}
|
||||
|
||||
Maxwell::Blend::Factor SourceAlphaFactor() const noexcept {
|
||||
return UnpackBlendFactor(factor_source_a.Value());
|
||||
}
|
||||
|
||||
Maxwell::Blend::Factor DestAlphaFactor() const noexcept {
|
||||
return UnpackBlendFactor(factor_dest_a.Value());
|
||||
}
|
||||
};
|
||||
|
||||
union VertexAttribute {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> enabled;
|
||||
BitField<1, 5, u32> buffer;
|
||||
BitField<6, 14, u32> offset;
|
||||
BitField<20, 3, u32> type;
|
||||
BitField<23, 6, u32> size;
|
||||
|
||||
Maxwell::VertexAttribute::Type Type() const noexcept {
|
||||
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
|
||||
}
|
||||
|
||||
Maxwell::VertexAttribute::Size Size() const noexcept {
|
||||
return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t Position>
|
||||
union StencilFace {
|
||||
BitField<Position + 0, 3, u32> action_stencil_fail;
|
||||
BitField<Position + 3, 3, u32> action_depth_fail;
|
||||
BitField<Position + 6, 3, u32> action_depth_pass;
|
||||
BitField<Position + 9, 3, u32> test_func;
|
||||
|
||||
Maxwell::StencilOp::Op ActionStencilFail() const noexcept {
|
||||
return UnpackStencilOp(action_stencil_fail);
|
||||
}
|
||||
|
||||
Maxwell::StencilOp::Op ActionDepthFail() const noexcept {
|
||||
return UnpackStencilOp(action_depth_fail);
|
||||
}
|
||||
|
||||
Maxwell::StencilOp::Op ActionDepthPass() const noexcept {
|
||||
return UnpackStencilOp(action_depth_pass);
|
||||
}
|
||||
|
||||
Maxwell::ComparisonOp TestFunc() const noexcept {
|
||||
return UnpackComparisonOp(test_func);
|
||||
}
|
||||
};
|
||||
|
||||
struct DynamicState {
|
||||
union {
|
||||
u32 raw1;
|
||||
StencilFace<0> front;
|
||||
StencilFace<12> back;
|
||||
BitField<24, 1, u32> stencil_enable;
|
||||
BitField<25, 1, u32> depth_write_enable;
|
||||
BitField<26, 1, u32> depth_bounds_enable;
|
||||
BitField<27, 1, u32> depth_test_enable;
|
||||
BitField<28, 1, u32> front_face;
|
||||
BitField<29, 3, u32> depth_test_func;
|
||||
};
|
||||
union {
|
||||
u32 raw2;
|
||||
BitField<0, 2, u32> cull_face;
|
||||
BitField<2, 1, u32> cull_enable;
|
||||
};
|
||||
// Vertex stride is a 12 bits value, we have 4 bits to spare per element
|
||||
std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
|
||||
|
||||
void Refresh(const Maxwell& regs);
|
||||
|
||||
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
|
||||
return UnpackComparisonOp(depth_test_func);
|
||||
}
|
||||
|
||||
Maxwell::CullFace CullFace() const noexcept {
|
||||
return UnpackCullFace(cull_face.Value());
|
||||
}
|
||||
|
||||
Maxwell::FrontFace FrontFace() const noexcept {
|
||||
return UnpackFrontFace(front_face.Value());
|
||||
}
|
||||
};
|
||||
|
||||
union {
|
||||
u32 raw1;
|
||||
BitField<0, 1, u32> extended_dynamic_state;
|
||||
BitField<1, 1, u32> dynamic_vertex_input;
|
||||
BitField<2, 1, u32> xfb_enabled;
|
||||
BitField<3, 1, u32> primitive_restart_enable;
|
||||
BitField<4, 1, u32> depth_bias_enable;
|
||||
BitField<5, 1, u32> depth_clamp_disabled;
|
||||
BitField<6, 1, u32> ndc_minus_one_to_one;
|
||||
BitField<7, 2, u32> polygon_mode;
|
||||
BitField<9, 5, u32> patch_control_points_minus_one;
|
||||
BitField<14, 2, u32> tessellation_primitive;
|
||||
BitField<16, 2, u32> tessellation_spacing;
|
||||
BitField<18, 1, u32> tessellation_clockwise;
|
||||
BitField<19, 1, u32> logic_op_enable;
|
||||
BitField<20, 4, u32> logic_op;
|
||||
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
|
||||
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
|
||||
};
|
||||
union {
|
||||
u32 raw2;
|
||||
BitField<0, 1, u32> rasterize_enable;
|
||||
BitField<1, 3, u32> alpha_test_func;
|
||||
BitField<4, 1, u32> early_z;
|
||||
BitField<5, 1, u32> depth_enabled;
|
||||
BitField<6, 5, u32> depth_format;
|
||||
BitField<11, 1, u32> y_negate;
|
||||
BitField<12, 1, u32> provoking_vertex_last;
|
||||
BitField<14, 1, u32> smooth_lines;
|
||||
};
|
||||
std::array<u8, Maxwell::NumRenderTargets> color_formats;
|
||||
|
||||
u32 alpha_test_ref;
|
||||
u32 point_size;
|
||||
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
|
||||
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
|
||||
union {
|
||||
u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
|
||||
u64 enabled_divisors;
|
||||
};
|
||||
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
|
||||
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
|
||||
|
||||
DynamicState dynamic_state;
|
||||
VideoCommon::TransformFeedbackState xfb_state;
|
||||
|
||||
void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
|
||||
bool has_dynamic_vertex_input);
|
||||
|
||||
size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const FixedPipelineState& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const FixedPipelineState& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
size_t Size() const noexcept {
|
||||
if (xfb_enabled) {
|
||||
// When transform feedback is enabled, use the whole struct
|
||||
return sizeof(*this);
|
||||
}
|
||||
if (dynamic_vertex_input) {
|
||||
// Exclude dynamic state and attributes
|
||||
return offsetof(FixedPipelineState, attributes);
|
||||
}
|
||||
if (extended_dynamic_state) {
|
||||
// Exclude dynamic state
|
||||
return offsetof(FixedPipelineState, dynamic_state);
|
||||
}
|
||||
// Default
|
||||
return offsetof(FixedPipelineState, xfb_state);
|
||||
}
|
||||
|
||||
u32 DynamicAttributeType(size_t index) const noexcept {
|
||||
return (attribute_types >> (index * 2)) & 0b11;
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
|
||||
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
|
||||
static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::FixedPipelineState> {
|
||||
size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,78 +1,78 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/stage.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan::MaxwellToVK {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using PixelFormat = VideoCore::Surface::PixelFormat;
|
||||
|
||||
namespace Sampler {
|
||||
|
||||
VkFilter Filter(Tegra::Texture::TextureFilter filter);
|
||||
|
||||
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
|
||||
|
||||
VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode,
|
||||
Tegra::Texture::TextureFilter filter);
|
||||
|
||||
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
|
||||
|
||||
} // namespace Sampler
|
||||
|
||||
struct FormatInfo {
|
||||
VkFormat format;
|
||||
bool attachable;
|
||||
bool storage;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns format properties supported in the host
|
||||
* @param device Host device
|
||||
* @param format_type Type of image the buffer will use
|
||||
* @param with_srgb True when the format can be sRGB when converted to another format (ASTC)
|
||||
* @param pixel_format Guest pixel format to describe
|
||||
*/
|
||||
[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
|
||||
PixelFormat pixel_format);
|
||||
|
||||
VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
|
||||
|
||||
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
|
||||
|
||||
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
|
||||
Maxwell::VertexAttribute::Size size);
|
||||
|
||||
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
|
||||
|
||||
VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
|
||||
|
||||
VkStencilOp StencilOp(Maxwell::StencilOp::Op stencil_op);
|
||||
|
||||
VkBlendOp BlendEquation(Maxwell::Blend::Equation equation);
|
||||
|
||||
VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor);
|
||||
|
||||
VkFrontFace FrontFace(Maxwell::FrontFace front_face);
|
||||
|
||||
VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
|
||||
|
||||
VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode);
|
||||
|
||||
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
|
||||
|
||||
VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
|
||||
|
||||
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
|
||||
|
||||
VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
|
||||
|
||||
} // namespace Vulkan::MaxwellToVK
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/stage.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan::MaxwellToVK {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using PixelFormat = VideoCore::Surface::PixelFormat;
|
||||
|
||||
namespace Sampler {
|
||||
|
||||
VkFilter Filter(Tegra::Texture::TextureFilter filter);
|
||||
|
||||
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
|
||||
|
||||
VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode,
|
||||
Tegra::Texture::TextureFilter filter);
|
||||
|
||||
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
|
||||
|
||||
} // namespace Sampler
|
||||
|
||||
struct FormatInfo {
|
||||
VkFormat format;
|
||||
bool attachable;
|
||||
bool storage;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns format properties supported in the host
|
||||
* @param device Host device
|
||||
* @param format_type Type of image the buffer will use
|
||||
* @param with_srgb True when the format can be sRGB when converted to another format (ASTC)
|
||||
* @param pixel_format Guest pixel format to describe
|
||||
*/
|
||||
[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
|
||||
PixelFormat pixel_format);
|
||||
|
||||
VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
|
||||
|
||||
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
|
||||
|
||||
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
|
||||
Maxwell::VertexAttribute::Size size);
|
||||
|
||||
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
|
||||
|
||||
VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
|
||||
|
||||
VkStencilOp StencilOp(Maxwell::StencilOp::Op stencil_op);
|
||||
|
||||
VkBlendOp BlendEquation(Maxwell::Blend::Equation equation);
|
||||
|
||||
VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor);
|
||||
|
||||
VkFrontFace FrontFace(Maxwell::FrontFace front_face);
|
||||
|
||||
VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
|
||||
|
||||
VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode);
|
||||
|
||||
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
|
||||
|
||||
VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
|
||||
|
||||
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
|
||||
|
||||
VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
|
||||
|
||||
} // namespace Vulkan::MaxwellToVK
|
||||
|
||||
@@ -1,202 +1,202 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
|
||||
|
||||
class DescriptorLayoutBuilder {
|
||||
public:
|
||||
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
|
||||
|
||||
bool CanUsePushDescriptor() const noexcept {
|
||||
return device->IsKhrPushDescriptorSupported() &&
|
||||
num_descriptors <= device->MaxPushDescriptors();
|
||||
}
|
||||
|
||||
vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const {
|
||||
if (bindings.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
const VkDescriptorSetLayoutCreateFlags flags =
|
||||
use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0;
|
||||
return device->GetLogical().CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = flags,
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
});
|
||||
}
|
||||
|
||||
vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
|
||||
VkPipelineLayout pipeline_layout,
|
||||
bool use_push_descriptor) const {
|
||||
if (entries.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
const VkDescriptorUpdateTemplateType type =
|
||||
use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
|
||||
: VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
|
||||
return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
|
||||
.pDescriptorUpdateEntries = entries.data(),
|
||||
.templateType = type,
|
||||
.descriptorSetLayout = descriptor_set_layout,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.pipelineLayout = pipeline_layout,
|
||||
.set = 0,
|
||||
});
|
||||
}
|
||||
|
||||
vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
|
||||
using Shader::Backend::SPIRV::RescalingLayout;
|
||||
const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u;
|
||||
const VkPushConstantRange range{
|
||||
.stageFlags = static_cast<VkShaderStageFlags>(
|
||||
is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS),
|
||||
.offset = 0,
|
||||
.size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset,
|
||||
};
|
||||
return device->GetLogical().CreatePipelineLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.setLayoutCount = descriptor_set_layout ? 1U : 0U,
|
||||
.pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &range,
|
||||
});
|
||||
}
|
||||
|
||||
void Add(const Shader::Info& info, VkShaderStageFlags stage) {
|
||||
is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0;
|
||||
|
||||
Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
|
||||
Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
|
||||
Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
|
||||
Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors);
|
||||
Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors);
|
||||
Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Descriptors>
|
||||
void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) {
|
||||
const size_t num{descriptors.size()};
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
bindings.push_back({
|
||||
.binding = binding,
|
||||
.descriptorType = type,
|
||||
.descriptorCount = descriptors[i].count,
|
||||
.stageFlags = stage,
|
||||
.pImmutableSamplers = nullptr,
|
||||
});
|
||||
entries.push_back({
|
||||
.dstBinding = binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = descriptors[i].count,
|
||||
.descriptorType = type,
|
||||
.offset = offset,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
});
|
||||
++binding;
|
||||
num_descriptors += descriptors[i].count;
|
||||
offset += sizeof(DescriptorUpdateEntry);
|
||||
}
|
||||
}
|
||||
|
||||
const Device* device{};
|
||||
bool is_compute{};
|
||||
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
|
||||
boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
|
||||
u32 binding{};
|
||||
u32 num_descriptors{};
|
||||
size_t offset{};
|
||||
};
|
||||
|
||||
class RescalingPushConstant {
|
||||
public:
|
||||
explicit RescalingPushConstant() noexcept {}
|
||||
|
||||
void PushTexture(bool is_rescaled) noexcept {
|
||||
*texture_ptr |= is_rescaled ? texture_bit : 0u;
|
||||
texture_bit <<= 1u;
|
||||
if (texture_bit == 0u) {
|
||||
texture_bit = 1u;
|
||||
++texture_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
void PushImage(bool is_rescaled) noexcept {
|
||||
*image_ptr |= is_rescaled ? image_bit : 0u;
|
||||
image_bit <<= 1u;
|
||||
if (image_bit == 0u) {
|
||||
image_bit = 1u;
|
||||
++image_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
const std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS>& Data() const noexcept {
|
||||
return words;
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS> words{};
|
||||
u32* texture_ptr{words.data()};
|
||||
u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS};
|
||||
u32 texture_bit{1u};
|
||||
u32 image_bit{1u};
|
||||
};
|
||||
|
||||
inline void PushImageDescriptors(TextureCache& texture_cache,
|
||||
UpdateDescriptorQueue& update_descriptor_queue,
|
||||
const Shader::Info& info, RescalingPushConstant& rescaling,
|
||||
const VkSampler*& samplers,
|
||||
const VideoCommon::ImageViewInOut*& views) {
|
||||
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
|
||||
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
|
||||
views += num_texture_buffers;
|
||||
views += num_image_buffers;
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const VideoCommon::ImageViewId image_view_id{(views++)->id};
|
||||
const VkSampler sampler{*(samplers++)};
|
||||
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
|
||||
const VkImageView vk_image_view{image_view.Handle(desc.type)};
|
||||
update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
|
||||
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView((views++)->id)};
|
||||
if (desc.is_written) {
|
||||
texture_cache.MarkModification(image_view.image_id);
|
||||
}
|
||||
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
|
||||
update_descriptor_queue.AddImage(vk_image_view);
|
||||
rescaling.PushImage(texture_cache.IsRescaling(image_view));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
|
||||
|
||||
class DescriptorLayoutBuilder {
|
||||
public:
|
||||
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
|
||||
|
||||
bool CanUsePushDescriptor() const noexcept {
|
||||
return device->IsKhrPushDescriptorSupported() &&
|
||||
num_descriptors <= device->MaxPushDescriptors();
|
||||
}
|
||||
|
||||
vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const {
|
||||
if (bindings.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
const VkDescriptorSetLayoutCreateFlags flags =
|
||||
use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0;
|
||||
return device->GetLogical().CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = flags,
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
});
|
||||
}
|
||||
|
||||
vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
|
||||
VkPipelineLayout pipeline_layout,
|
||||
bool use_push_descriptor) const {
|
||||
if (entries.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
const VkDescriptorUpdateTemplateType type =
|
||||
use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
|
||||
: VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
|
||||
return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
|
||||
.pDescriptorUpdateEntries = entries.data(),
|
||||
.templateType = type,
|
||||
.descriptorSetLayout = descriptor_set_layout,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.pipelineLayout = pipeline_layout,
|
||||
.set = 0,
|
||||
});
|
||||
}
|
||||
|
||||
vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
|
||||
using Shader::Backend::SPIRV::RescalingLayout;
|
||||
const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u;
|
||||
const VkPushConstantRange range{
|
||||
.stageFlags = static_cast<VkShaderStageFlags>(
|
||||
is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS),
|
||||
.offset = 0,
|
||||
.size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset,
|
||||
};
|
||||
return device->GetLogical().CreatePipelineLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.setLayoutCount = descriptor_set_layout ? 1U : 0U,
|
||||
.pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &range,
|
||||
});
|
||||
}
|
||||
|
||||
void Add(const Shader::Info& info, VkShaderStageFlags stage) {
|
||||
is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0;
|
||||
|
||||
Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
|
||||
Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
|
||||
Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
|
||||
Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors);
|
||||
Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors);
|
||||
Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Descriptors>
|
||||
void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) {
|
||||
const size_t num{descriptors.size()};
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
bindings.push_back({
|
||||
.binding = binding,
|
||||
.descriptorType = type,
|
||||
.descriptorCount = descriptors[i].count,
|
||||
.stageFlags = stage,
|
||||
.pImmutableSamplers = nullptr,
|
||||
});
|
||||
entries.push_back({
|
||||
.dstBinding = binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = descriptors[i].count,
|
||||
.descriptorType = type,
|
||||
.offset = offset,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
});
|
||||
++binding;
|
||||
num_descriptors += descriptors[i].count;
|
||||
offset += sizeof(DescriptorUpdateEntry);
|
||||
}
|
||||
}
|
||||
|
||||
const Device* device{};
|
||||
bool is_compute{};
|
||||
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
|
||||
boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
|
||||
u32 binding{};
|
||||
u32 num_descriptors{};
|
||||
size_t offset{};
|
||||
};
|
||||
|
||||
class RescalingPushConstant {
|
||||
public:
|
||||
explicit RescalingPushConstant() noexcept {}
|
||||
|
||||
void PushTexture(bool is_rescaled) noexcept {
|
||||
*texture_ptr |= is_rescaled ? texture_bit : 0u;
|
||||
texture_bit <<= 1u;
|
||||
if (texture_bit == 0u) {
|
||||
texture_bit = 1u;
|
||||
++texture_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
void PushImage(bool is_rescaled) noexcept {
|
||||
*image_ptr |= is_rescaled ? image_bit : 0u;
|
||||
image_bit <<= 1u;
|
||||
if (image_bit == 0u) {
|
||||
image_bit = 1u;
|
||||
++image_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
const std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS>& Data() const noexcept {
|
||||
return words;
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS> words{};
|
||||
u32* texture_ptr{words.data()};
|
||||
u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS};
|
||||
u32 texture_bit{1u};
|
||||
u32 image_bit{1u};
|
||||
};
|
||||
|
||||
inline void PushImageDescriptors(TextureCache& texture_cache,
|
||||
UpdateDescriptorQueue& update_descriptor_queue,
|
||||
const Shader::Info& info, RescalingPushConstant& rescaling,
|
||||
const VkSampler*& samplers,
|
||||
const VideoCommon::ImageViewInOut*& views) {
|
||||
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
|
||||
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
|
||||
views += num_texture_buffers;
|
||||
views += num_image_buffers;
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const VideoCommon::ImageViewId image_view_id{(views++)->id};
|
||||
const VkSampler sampler{*(samplers++)};
|
||||
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
|
||||
const VkImageView vk_image_view{image_view.Handle(desc.type)};
|
||||
update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
|
||||
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
ImageView& image_view{texture_cache.GetImageView((views++)->id)};
|
||||
if (desc.is_written) {
|
||||
texture_cache.MarkModification(image_view.image_id);
|
||||
}
|
||||
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
|
||||
update_descriptor_queue.AddImage(vk_image_view);
|
||||
rescaling.PushImage(texture_cache.IsRescaling(image_view));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,99 +1,99 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <string_view>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/pipeline_statistics.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using namespace std::string_view_literals;
|
||||
|
||||
static u64 GetUint64(const VkPipelineExecutableStatisticKHR& statistic) {
|
||||
switch (statistic.format) {
|
||||
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
|
||||
return static_cast<u64>(statistic.value.i64);
|
||||
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
|
||||
return statistic.value.u64;
|
||||
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
|
||||
return static_cast<u64>(statistic.value.f64);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
PipelineStatistics::PipelineStatistics(const Device& device_) : device{device_} {}
|
||||
|
||||
void PipelineStatistics::Collect(VkPipeline pipeline) {
|
||||
const auto& dev{device.GetLogical()};
|
||||
const std::vector properties{dev.GetPipelineExecutablePropertiesKHR(pipeline)};
|
||||
const u32 num_executables{static_cast<u32>(properties.size())};
|
||||
for (u32 executable = 0; executable < num_executables; ++executable) {
|
||||
const auto statistics{dev.GetPipelineExecutableStatisticsKHR(pipeline, executable)};
|
||||
if (statistics.empty()) {
|
||||
continue;
|
||||
}
|
||||
Stats stage_stats;
|
||||
for (const auto& statistic : statistics) {
|
||||
const char* const name{statistic.name};
|
||||
if (name == "Binary Size"sv || name == "Code size"sv || name == "Instruction Count"sv) {
|
||||
stage_stats.code_size = GetUint64(statistic);
|
||||
} else if (name == "Register Count"sv) {
|
||||
stage_stats.register_count = GetUint64(statistic);
|
||||
} else if (name == "SGPRs"sv || name == "numUsedSgprs"sv) {
|
||||
stage_stats.sgpr_count = GetUint64(statistic);
|
||||
} else if (name == "VGPRs"sv || name == "numUsedVgprs"sv) {
|
||||
stage_stats.vgpr_count = GetUint64(statistic);
|
||||
} else if (name == "Branches"sv) {
|
||||
stage_stats.branches_count = GetUint64(statistic);
|
||||
} else if (name == "Basic Block Count"sv) {
|
||||
stage_stats.basic_block_count = GetUint64(statistic);
|
||||
}
|
||||
}
|
||||
std::scoped_lock lock{mutex};
|
||||
collected_stats.push_back(stage_stats);
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineStatistics::Report() const {
|
||||
double num{};
|
||||
Stats total;
|
||||
{
|
||||
std::scoped_lock lock{mutex};
|
||||
for (const Stats& stats : collected_stats) {
|
||||
total.code_size += stats.code_size;
|
||||
total.register_count += stats.register_count;
|
||||
total.sgpr_count += stats.sgpr_count;
|
||||
total.vgpr_count += stats.vgpr_count;
|
||||
total.branches_count += stats.branches_count;
|
||||
total.basic_block_count += stats.basic_block_count;
|
||||
}
|
||||
num = static_cast<double>(collected_stats.size());
|
||||
}
|
||||
std::string report;
|
||||
const auto add = [&](const char* fmt, u64 value) {
|
||||
if (value > 0) {
|
||||
report += fmt::format(fmt::runtime(fmt), static_cast<double>(value) / num);
|
||||
}
|
||||
};
|
||||
add("Code size: {:9.03f}\n", total.code_size);
|
||||
add("Register count: {:9.03f}\n", total.register_count);
|
||||
add("SGPRs: {:9.03f}\n", total.sgpr_count);
|
||||
add("VGPRs: {:9.03f}\n", total.vgpr_count);
|
||||
add("Branches count: {:9.03f}\n", total.branches_count);
|
||||
add("Basic blocks: {:9.03f}\n", total.basic_block_count);
|
||||
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"\nAverage pipeline statistics\n"
|
||||
"==========================================\n"
|
||||
"{}\n",
|
||||
report);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <string_view>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/pipeline_statistics.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using namespace std::string_view_literals;
|
||||
|
||||
static u64 GetUint64(const VkPipelineExecutableStatisticKHR& statistic) {
|
||||
switch (statistic.format) {
|
||||
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
|
||||
return static_cast<u64>(statistic.value.i64);
|
||||
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
|
||||
return statistic.value.u64;
|
||||
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
|
||||
return static_cast<u64>(statistic.value.f64);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
PipelineStatistics::PipelineStatistics(const Device& device_) : device{device_} {}
|
||||
|
||||
void PipelineStatistics::Collect(VkPipeline pipeline) {
|
||||
const auto& dev{device.GetLogical()};
|
||||
const std::vector properties{dev.GetPipelineExecutablePropertiesKHR(pipeline)};
|
||||
const u32 num_executables{static_cast<u32>(properties.size())};
|
||||
for (u32 executable = 0; executable < num_executables; ++executable) {
|
||||
const auto statistics{dev.GetPipelineExecutableStatisticsKHR(pipeline, executable)};
|
||||
if (statistics.empty()) {
|
||||
continue;
|
||||
}
|
||||
Stats stage_stats;
|
||||
for (const auto& statistic : statistics) {
|
||||
const char* const name{statistic.name};
|
||||
if (name == "Binary Size"sv || name == "Code size"sv || name == "Instruction Count"sv) {
|
||||
stage_stats.code_size = GetUint64(statistic);
|
||||
} else if (name == "Register Count"sv) {
|
||||
stage_stats.register_count = GetUint64(statistic);
|
||||
} else if (name == "SGPRs"sv || name == "numUsedSgprs"sv) {
|
||||
stage_stats.sgpr_count = GetUint64(statistic);
|
||||
} else if (name == "VGPRs"sv || name == "numUsedVgprs"sv) {
|
||||
stage_stats.vgpr_count = GetUint64(statistic);
|
||||
} else if (name == "Branches"sv) {
|
||||
stage_stats.branches_count = GetUint64(statistic);
|
||||
} else if (name == "Basic Block Count"sv) {
|
||||
stage_stats.basic_block_count = GetUint64(statistic);
|
||||
}
|
||||
}
|
||||
std::scoped_lock lock{mutex};
|
||||
collected_stats.push_back(stage_stats);
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineStatistics::Report() const {
|
||||
double num{};
|
||||
Stats total;
|
||||
{
|
||||
std::scoped_lock lock{mutex};
|
||||
for (const Stats& stats : collected_stats) {
|
||||
total.code_size += stats.code_size;
|
||||
total.register_count += stats.register_count;
|
||||
total.sgpr_count += stats.sgpr_count;
|
||||
total.vgpr_count += stats.vgpr_count;
|
||||
total.branches_count += stats.branches_count;
|
||||
total.basic_block_count += stats.basic_block_count;
|
||||
}
|
||||
num = static_cast<double>(collected_stats.size());
|
||||
}
|
||||
std::string report;
|
||||
const auto add = [&](const char* fmt, u64 value) {
|
||||
if (value > 0) {
|
||||
report += fmt::format(fmt::runtime(fmt), static_cast<double>(value) / num);
|
||||
}
|
||||
};
|
||||
add("Code size: {:9.03f}\n", total.code_size);
|
||||
add("Register count: {:9.03f}\n", total.register_count);
|
||||
add("SGPRs: {:9.03f}\n", total.sgpr_count);
|
||||
add("VGPRs: {:9.03f}\n", total.vgpr_count);
|
||||
add("Branches count: {:9.03f}\n", total.branches_count);
|
||||
add("Basic blocks: {:9.03f}\n", total.basic_block_count);
|
||||
|
||||
LOG_INFO(Render_Vulkan,
|
||||
"\nAverage pipeline statistics\n"
|
||||
"==========================================\n"
|
||||
"{}\n",
|
||||
report);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,39 +1,39 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
|
||||
class PipelineStatistics {
|
||||
public:
|
||||
explicit PipelineStatistics(const Device& device_);
|
||||
|
||||
void Collect(VkPipeline pipeline);
|
||||
|
||||
void Report() const;
|
||||
|
||||
private:
|
||||
struct Stats {
|
||||
u64 code_size{};
|
||||
u64 register_count{};
|
||||
u64 sgpr_count{};
|
||||
u64 vgpr_count{};
|
||||
u64 branches_count{};
|
||||
u64 basic_block_count{};
|
||||
};
|
||||
|
||||
const Device& device;
|
||||
mutable std::mutex mutex;
|
||||
std::vector<Stats> collected_stats;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
|
||||
class PipelineStatistics {
|
||||
public:
|
||||
explicit PipelineStatistics(const Device& device_);
|
||||
|
||||
void Collect(VkPipeline pipeline);
|
||||
|
||||
void Report() const;
|
||||
|
||||
private:
|
||||
struct Stats {
|
||||
u64 code_size{};
|
||||
u64 register_count{};
|
||||
u64 sgpr_count{};
|
||||
u64 vgpr_count{};
|
||||
u64 branches_count{};
|
||||
u64 basic_block_count{};
|
||||
};
|
||||
|
||||
const Device& device;
|
||||
mutable std::mutex mutex;
|
||||
std::vector<Stats> collected_stats;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,345 +1,345 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "common/telemetry.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "core/telemetry_session.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/vulkan_common/vulkan_debug_callback.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_instance.h"
|
||||
#include "video_core/vulkan_common/vulkan_library.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
std::string GetReadableVersion(u32 version) {
|
||||
return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
|
||||
VK_VERSION_PATCH(version));
|
||||
}
|
||||
|
||||
std::string GetDriverVersion(const Device& device) {
|
||||
// Extracted from
|
||||
// https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
|
||||
const u32 version = device.GetDriverVersion();
|
||||
|
||||
if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
|
||||
const u32 major = (version >> 22) & 0x3ff;
|
||||
const u32 minor = (version >> 14) & 0x0ff;
|
||||
const u32 secondary = (version >> 6) & 0x0ff;
|
||||
const u32 tertiary = version & 0x003f;
|
||||
return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
|
||||
}
|
||||
if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
|
||||
const u32 major = version >> 14;
|
||||
const u32 minor = version & 0x3fff;
|
||||
return fmt::format("{}.{}", major, minor);
|
||||
}
|
||||
return GetReadableVersion(version);
|
||||
}
|
||||
|
||||
std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) {
|
||||
std::sort(std::begin(available_extensions), std::end(available_extensions));
|
||||
|
||||
static constexpr std::size_t AverageExtensionSize = 64;
|
||||
std::string separated_extensions;
|
||||
separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
|
||||
|
||||
const auto end = std::end(available_extensions);
|
||||
for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
|
||||
if (const bool is_last = extension + 1 == end; is_last) {
|
||||
separated_extensions += *extension;
|
||||
} else {
|
||||
separated_extensions += fmt::format("{},", *extension);
|
||||
}
|
||||
}
|
||||
return separated_extensions;
|
||||
}
|
||||
|
||||
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
|
||||
VkSurfaceKHR surface) {
|
||||
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
|
||||
const s32 device_index = Settings::values.vulkan_device.GetValue();
|
||||
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
|
||||
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
|
||||
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
|
||||
}
|
||||
const vk::PhysicalDevice physical_device(devices[device_index], dld);
|
||||
return Device(*instance, physical_device, surface, dld);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
|
||||
Core::Frontend::EmuWindow& emu_window,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
|
||||
: RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
|
||||
cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
|
||||
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
|
||||
true, Settings::values.renderer_debug.GetValue())),
|
||||
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
|
||||
surface(CreateSurface(instance, render_window)),
|
||||
device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
|
||||
state_tracker(), scheduler(device, state_tracker),
|
||||
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
|
||||
render_window.GetFramebufferLayout().height, false),
|
||||
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
|
||||
screen_info),
|
||||
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
|
||||
state_tracker, scheduler) {
|
||||
Report();
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
|
||||
throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
|
||||
}
|
||||
|
||||
RendererVulkan::~RendererVulkan() {
|
||||
void(device.GetLogical().WaitIdle());
|
||||
}
|
||||
|
||||
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
if (!framebuffer) {
|
||||
return;
|
||||
}
|
||||
SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
|
||||
if (!render_window.IsShown()) {
|
||||
return;
|
||||
}
|
||||
// Update screen info if the framebuffer size has changed.
|
||||
if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) {
|
||||
screen_info.width = framebuffer->width;
|
||||
screen_info.height = framebuffer->height;
|
||||
}
|
||||
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
|
||||
const bool use_accelerated =
|
||||
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
|
||||
const bool is_srgb = use_accelerated && screen_info.is_srgb;
|
||||
RenderScreenshot(*framebuffer, use_accelerated);
|
||||
|
||||
bool has_been_recreated = false;
|
||||
const auto recreate_swapchain = [&] {
|
||||
if (!has_been_recreated) {
|
||||
has_been_recreated = true;
|
||||
scheduler.Finish();
|
||||
}
|
||||
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
|
||||
swapchain.Create(layout.width, layout.height, is_srgb);
|
||||
};
|
||||
if (swapchain.NeedsRecreation(is_srgb)) {
|
||||
recreate_swapchain();
|
||||
}
|
||||
bool is_outdated;
|
||||
do {
|
||||
swapchain.AcquireNextImage();
|
||||
is_outdated = swapchain.IsOutDated();
|
||||
if (is_outdated) {
|
||||
recreate_swapchain();
|
||||
}
|
||||
} while (is_outdated);
|
||||
if (has_been_recreated) {
|
||||
blit_screen.Recreate();
|
||||
}
|
||||
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
|
||||
const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
|
||||
scheduler.Flush(render_semaphore, present_semaphore);
|
||||
scheduler.WaitWorker();
|
||||
swapchain.Present(render_semaphore);
|
||||
|
||||
gpu.RendererFrameEndNotify();
|
||||
rasterizer.TickFrame();
|
||||
}
|
||||
|
||||
void RendererVulkan::Report() const {
|
||||
const std::string vendor_name{device.GetVendorName()};
|
||||
const std::string model_name{device.GetModelName()};
|
||||
const std::string driver_version = GetDriverVersion(device);
|
||||
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
|
||||
|
||||
const std::string api_version = GetReadableVersion(device.ApiVersion());
|
||||
|
||||
const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
|
||||
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
|
||||
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
|
||||
|
||||
static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
|
||||
telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
|
||||
telemetry_session.AddField(field, "GPU_Model", model_name);
|
||||
telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
|
||||
telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version);
|
||||
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
|
||||
}
|
||||
|
||||
void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer,
|
||||
bool use_accelerated) {
|
||||
if (!renderer_settings.screenshot_requested) {
|
||||
return;
|
||||
}
|
||||
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
|
||||
vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = VK_FORMAT_B8G8R8A8_UNORM,
|
||||
.extent =
|
||||
{
|
||||
.width = layout.width,
|
||||
.height = layout.height,
|
||||
.depth = 1,
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
});
|
||||
const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
|
||||
|
||||
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.image = *staging_image,
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = screen_info.is_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM,
|
||||
.components{
|
||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
});
|
||||
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
|
||||
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
|
||||
// Since we're not rendering to the screen, ignore the render semaphore.
|
||||
void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
|
||||
|
||||
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
|
||||
const VkBufferCreateInfo dst_buffer_info{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = buffer_size,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
|
||||
MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = *staging_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const VkImageMemoryBarrier image_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = *staging_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
static constexpr VkMemoryBarrier memory_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
const VkBufferImageCopy copy{
|
||||
.bufferOffset = 0,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset{.x = 0, .y = 0, .z = 0},
|
||||
.imageExtent{
|
||||
.width = layout.width,
|
||||
.height = layout.height,
|
||||
.depth = 1,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, read_barrier);
|
||||
cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer,
|
||||
copy);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, memory_write_barrier, nullptr, image_write_barrier);
|
||||
});
|
||||
// Ensure the copy is fully completed before saving the screenshot
|
||||
scheduler.Finish();
|
||||
|
||||
// Copy backing image data to the QImage screenshot buffer
|
||||
const auto dst_memory_map = dst_buffer_memory.Map();
|
||||
std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
|
||||
renderer_settings.screenshot_complete_callback(false);
|
||||
renderer_settings.screenshot_requested = false;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "common/telemetry.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "core/telemetry_session.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/vulkan_common/vulkan_debug_callback.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_instance.h"
|
||||
#include "video_core/vulkan_common/vulkan_library.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
std::string GetReadableVersion(u32 version) {
|
||||
return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
|
||||
VK_VERSION_PATCH(version));
|
||||
}
|
||||
|
||||
std::string GetDriverVersion(const Device& device) {
|
||||
// Extracted from
|
||||
// https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
|
||||
const u32 version = device.GetDriverVersion();
|
||||
|
||||
if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
|
||||
const u32 major = (version >> 22) & 0x3ff;
|
||||
const u32 minor = (version >> 14) & 0x0ff;
|
||||
const u32 secondary = (version >> 6) & 0x0ff;
|
||||
const u32 tertiary = version & 0x003f;
|
||||
return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
|
||||
}
|
||||
if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
|
||||
const u32 major = version >> 14;
|
||||
const u32 minor = version & 0x3fff;
|
||||
return fmt::format("{}.{}", major, minor);
|
||||
}
|
||||
return GetReadableVersion(version);
|
||||
}
|
||||
|
||||
std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) {
|
||||
std::sort(std::begin(available_extensions), std::end(available_extensions));
|
||||
|
||||
static constexpr std::size_t AverageExtensionSize = 64;
|
||||
std::string separated_extensions;
|
||||
separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
|
||||
|
||||
const auto end = std::end(available_extensions);
|
||||
for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
|
||||
if (const bool is_last = extension + 1 == end; is_last) {
|
||||
separated_extensions += *extension;
|
||||
} else {
|
||||
separated_extensions += fmt::format("{},", *extension);
|
||||
}
|
||||
}
|
||||
return separated_extensions;
|
||||
}
|
||||
|
||||
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
|
||||
VkSurfaceKHR surface) {
|
||||
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
|
||||
const s32 device_index = Settings::values.vulkan_device.GetValue();
|
||||
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
|
||||
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
|
||||
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
|
||||
}
|
||||
const vk::PhysicalDevice physical_device(devices[device_index], dld);
|
||||
return Device(*instance, physical_device, surface, dld);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
|
||||
Core::Frontend::EmuWindow& emu_window,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
|
||||
: RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
|
||||
cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
|
||||
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
|
||||
true, Settings::values.renderer_debug.GetValue())),
|
||||
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
|
||||
surface(CreateSurface(instance, render_window)),
|
||||
device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
|
||||
state_tracker(), scheduler(device, state_tracker),
|
||||
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
|
||||
render_window.GetFramebufferLayout().height, false),
|
||||
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
|
||||
screen_info),
|
||||
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
|
||||
state_tracker, scheduler) {
|
||||
Report();
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
|
||||
throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
|
||||
}
|
||||
|
||||
RendererVulkan::~RendererVulkan() {
|
||||
void(device.GetLogical().WaitIdle());
|
||||
}
|
||||
|
||||
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
if (!framebuffer) {
|
||||
return;
|
||||
}
|
||||
SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
|
||||
if (!render_window.IsShown()) {
|
||||
return;
|
||||
}
|
||||
// Update screen info if the framebuffer size has changed.
|
||||
if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) {
|
||||
screen_info.width = framebuffer->width;
|
||||
screen_info.height = framebuffer->height;
|
||||
}
|
||||
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
|
||||
const bool use_accelerated =
|
||||
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
|
||||
const bool is_srgb = use_accelerated && screen_info.is_srgb;
|
||||
RenderScreenshot(*framebuffer, use_accelerated);
|
||||
|
||||
bool has_been_recreated = false;
|
||||
const auto recreate_swapchain = [&] {
|
||||
if (!has_been_recreated) {
|
||||
has_been_recreated = true;
|
||||
scheduler.Finish();
|
||||
}
|
||||
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
|
||||
swapchain.Create(layout.width, layout.height, is_srgb);
|
||||
};
|
||||
if (swapchain.NeedsRecreation(is_srgb)) {
|
||||
recreate_swapchain();
|
||||
}
|
||||
bool is_outdated;
|
||||
do {
|
||||
swapchain.AcquireNextImage();
|
||||
is_outdated = swapchain.IsOutDated();
|
||||
if (is_outdated) {
|
||||
recreate_swapchain();
|
||||
}
|
||||
} while (is_outdated);
|
||||
if (has_been_recreated) {
|
||||
blit_screen.Recreate();
|
||||
}
|
||||
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
|
||||
const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
|
||||
scheduler.Flush(render_semaphore, present_semaphore);
|
||||
scheduler.WaitWorker();
|
||||
swapchain.Present(render_semaphore);
|
||||
|
||||
gpu.RendererFrameEndNotify();
|
||||
rasterizer.TickFrame();
|
||||
}
|
||||
|
||||
void RendererVulkan::Report() const {
|
||||
const std::string vendor_name{device.GetVendorName()};
|
||||
const std::string model_name{device.GetModelName()};
|
||||
const std::string driver_version = GetDriverVersion(device);
|
||||
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
|
||||
|
||||
const std::string api_version = GetReadableVersion(device.ApiVersion());
|
||||
|
||||
const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
|
||||
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
|
||||
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
|
||||
|
||||
static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
|
||||
telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
|
||||
telemetry_session.AddField(field, "GPU_Model", model_name);
|
||||
telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
|
||||
telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version);
|
||||
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
|
||||
}
|
||||
|
||||
void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer,
|
||||
bool use_accelerated) {
|
||||
if (!renderer_settings.screenshot_requested) {
|
||||
return;
|
||||
}
|
||||
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
|
||||
vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = VK_FORMAT_B8G8R8A8_UNORM,
|
||||
.extent =
|
||||
{
|
||||
.width = layout.width,
|
||||
.height = layout.height,
|
||||
.depth = 1,
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
});
|
||||
const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
|
||||
|
||||
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.image = *staging_image,
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = screen_info.is_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM,
|
||||
.components{
|
||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
});
|
||||
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
|
||||
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
|
||||
// Since we're not rendering to the screen, ignore the render semaphore.
|
||||
void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
|
||||
|
||||
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
|
||||
const VkBufferCreateInfo dst_buffer_info{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = buffer_size,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
|
||||
MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = *staging_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const VkImageMemoryBarrier image_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = *staging_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
static constexpr VkMemoryBarrier memory_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
const VkBufferImageCopy copy{
|
||||
.bufferOffset = 0,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset{.x = 0, .y = 0, .z = 0},
|
||||
.imageExtent{
|
||||
.width = layout.width,
|
||||
.height = layout.height,
|
||||
.depth = 1,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, read_barrier);
|
||||
cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer,
|
||||
copy);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, memory_write_barrier, nullptr, image_write_barrier);
|
||||
});
|
||||
// Ensure the copy is fully completed before saving the screenshot
|
||||
scheduler.Finish();
|
||||
|
||||
// Copy backing image data to the QImage screenshot buffer
|
||||
const auto dst_memory_map = dst_buffer_memory.Map();
|
||||
std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
|
||||
renderer_settings.screenshot_complete_callback(false);
|
||||
renderer_settings.screenshot_requested = false;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,79 +1,79 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "common/dynamic_library.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Core {
|
||||
class TelemetrySession;
|
||||
}
|
||||
|
||||
namespace Core::Memory {
|
||||
class Memory;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class RendererVulkan final : public VideoCore::RendererBase {
|
||||
public:
|
||||
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
|
||||
Core::Frontend::EmuWindow& emu_window,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
|
||||
~RendererVulkan() override;
|
||||
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
|
||||
VideoCore::RasterizerInterface* ReadRasterizer() override {
|
||||
return &rasterizer;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string GetDeviceVendor() const override {
|
||||
return device.GetDriverName();
|
||||
}
|
||||
|
||||
private:
|
||||
void Report() const;
|
||||
|
||||
void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated);
|
||||
|
||||
Core::TelemetrySession& telemetry_session;
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
Common::DynamicLibrary library;
|
||||
vk::InstanceDispatch dld;
|
||||
|
||||
vk::Instance instance;
|
||||
vk::DebugUtilsMessenger debug_callback;
|
||||
vk::SurfaceKHR surface;
|
||||
|
||||
ScreenInfo screen_info;
|
||||
|
||||
Device device;
|
||||
MemoryAllocator memory_allocator;
|
||||
StateTracker state_tracker;
|
||||
Scheduler scheduler;
|
||||
Swapchain swapchain;
|
||||
BlitScreen blit_screen;
|
||||
RasterizerVulkan rasterizer;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "common/dynamic_library.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Core {
|
||||
class TelemetrySession;
|
||||
}
|
||||
|
||||
namespace Core::Memory {
|
||||
class Memory;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class RendererVulkan final : public VideoCore::RendererBase {
|
||||
public:
|
||||
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
|
||||
Core::Frontend::EmuWindow& emu_window,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
|
||||
~RendererVulkan() override;
|
||||
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
|
||||
VideoCore::RasterizerInterface* ReadRasterizer() override {
|
||||
return &rasterizer;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string GetDeviceVendor() const override {
|
||||
return device.GetDriverName();
|
||||
}
|
||||
|
||||
private:
|
||||
void Report() const;
|
||||
|
||||
void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated);
|
||||
|
||||
Core::TelemetrySession& telemetry_session;
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
Common::DynamicLibrary library;
|
||||
vk::InstanceDispatch dld;
|
||||
|
||||
vk::Instance instance;
|
||||
vk::DebugUtilsMessenger debug_callback;
|
||||
vk::SurfaceKHR surface;
|
||||
|
||||
ScreenInfo screen_info;
|
||||
|
||||
Device device;
|
||||
MemoryAllocator memory_allocator;
|
||||
StateTracker state_tracker;
|
||||
Scheduler scheduler;
|
||||
Swapchain swapchain;
|
||||
BlitScreen blit_screen;
|
||||
RasterizerVulkan rasterizer;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,168 +1,168 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Core::Memory {
|
||||
class Memory;
|
||||
}
|
||||
|
||||
namespace Core::Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
struct FramebufferConfig;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Service::android {
|
||||
enum class PixelFormat : u32;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct ScreenInfo;
|
||||
|
||||
class Device;
|
||||
class FSR;
|
||||
class RasterizerVulkan;
|
||||
class Scheduler;
|
||||
class Swapchain;
|
||||
|
||||
struct ScreenInfo {
|
||||
VkImageView image_view{};
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
bool is_srgb{};
|
||||
};
|
||||
|
||||
class BlitScreen {
|
||||
public:
|
||||
explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window,
|
||||
const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
|
||||
Scheduler& scheduler, const ScreenInfo& screen_info);
|
||||
~BlitScreen();
|
||||
|
||||
void Recreate();
|
||||
|
||||
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
|
||||
const VkFramebuffer& host_framebuffer,
|
||||
const Layout::FramebufferLayout layout, VkExtent2D render_area,
|
||||
bool use_accelerated);
|
||||
|
||||
[[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
|
||||
bool use_accelerated);
|
||||
|
||||
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
|
||||
VkExtent2D extent);
|
||||
|
||||
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
|
||||
VkExtent2D extent, vk::RenderPass& rd);
|
||||
|
||||
private:
|
||||
struct BufferData;
|
||||
|
||||
void CreateStaticResources();
|
||||
void CreateShaders();
|
||||
void CreateSemaphores();
|
||||
void CreateDescriptorPool();
|
||||
void CreateRenderPass();
|
||||
vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
|
||||
void CreateDescriptorSetLayout();
|
||||
void CreateDescriptorSets();
|
||||
void CreatePipelineLayout();
|
||||
void CreateGraphicsPipeline();
|
||||
void CreateSampler();
|
||||
|
||||
void CreateDynamicResources();
|
||||
void CreateFramebuffers();
|
||||
|
||||
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
|
||||
void ReleaseRawImages();
|
||||
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
|
||||
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
|
||||
|
||||
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
|
||||
void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
|
||||
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
|
||||
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
|
||||
const Layout::FramebufferLayout layout) const;
|
||||
|
||||
void CreateFSR();
|
||||
|
||||
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
|
||||
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
|
||||
std::size_t image_index) const;
|
||||
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
Core::Frontend::EmuWindow& render_window;
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
Swapchain& swapchain;
|
||||
Scheduler& scheduler;
|
||||
std::size_t image_count;
|
||||
const ScreenInfo& screen_info;
|
||||
|
||||
vk::ShaderModule vertex_shader;
|
||||
vk::ShaderModule fxaa_vertex_shader;
|
||||
vk::ShaderModule fxaa_fragment_shader;
|
||||
vk::ShaderModule bilinear_fragment_shader;
|
||||
vk::ShaderModule bicubic_fragment_shader;
|
||||
vk::ShaderModule gaussian_fragment_shader;
|
||||
vk::ShaderModule scaleforce_fragment_shader;
|
||||
vk::DescriptorPool descriptor_pool;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::Pipeline nearest_neightbor_pipeline;
|
||||
vk::Pipeline bilinear_pipeline;
|
||||
vk::Pipeline bicubic_pipeline;
|
||||
vk::Pipeline gaussian_pipeline;
|
||||
vk::Pipeline scaleforce_pipeline;
|
||||
vk::RenderPass renderpass;
|
||||
std::vector<vk::Framebuffer> framebuffers;
|
||||
vk::DescriptorSets descriptor_sets;
|
||||
vk::Sampler nn_sampler;
|
||||
vk::Sampler sampler;
|
||||
|
||||
vk::Buffer buffer;
|
||||
MemoryCommit buffer_commit;
|
||||
|
||||
std::vector<u64> resource_ticks;
|
||||
|
||||
std::vector<vk::Semaphore> semaphores;
|
||||
std::vector<vk::Image> raw_images;
|
||||
std::vector<vk::ImageView> raw_image_views;
|
||||
std::vector<MemoryCommit> raw_buffer_commits;
|
||||
|
||||
vk::DescriptorPool aa_descriptor_pool;
|
||||
vk::DescriptorSetLayout aa_descriptor_set_layout;
|
||||
vk::PipelineLayout aa_pipeline_layout;
|
||||
vk::Pipeline aa_pipeline;
|
||||
vk::RenderPass aa_renderpass;
|
||||
vk::Framebuffer aa_framebuffer;
|
||||
vk::DescriptorSets aa_descriptor_sets;
|
||||
vk::Image aa_image;
|
||||
vk::ImageView aa_image_view;
|
||||
MemoryCommit aa_commit;
|
||||
|
||||
u32 raw_width = 0;
|
||||
u32 raw_height = 0;
|
||||
Service::android::PixelFormat pixel_format{};
|
||||
|
||||
std::unique_ptr<FSR> fsr;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Core::Memory {
|
||||
class Memory;
|
||||
}
|
||||
|
||||
namespace Core::Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
struct FramebufferConfig;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Service::android {
|
||||
enum class PixelFormat : u32;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct ScreenInfo;
|
||||
|
||||
class Device;
|
||||
class FSR;
|
||||
class RasterizerVulkan;
|
||||
class Scheduler;
|
||||
class Swapchain;
|
||||
|
||||
struct ScreenInfo {
|
||||
VkImageView image_view{};
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
bool is_srgb{};
|
||||
};
|
||||
|
||||
class BlitScreen {
|
||||
public:
|
||||
explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window,
|
||||
const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
|
||||
Scheduler& scheduler, const ScreenInfo& screen_info);
|
||||
~BlitScreen();
|
||||
|
||||
void Recreate();
|
||||
|
||||
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
|
||||
const VkFramebuffer& host_framebuffer,
|
||||
const Layout::FramebufferLayout layout, VkExtent2D render_area,
|
||||
bool use_accelerated);
|
||||
|
||||
[[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
|
||||
bool use_accelerated);
|
||||
|
||||
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
|
||||
VkExtent2D extent);
|
||||
|
||||
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
|
||||
VkExtent2D extent, vk::RenderPass& rd);
|
||||
|
||||
private:
|
||||
struct BufferData;
|
||||
|
||||
void CreateStaticResources();
|
||||
void CreateShaders();
|
||||
void CreateSemaphores();
|
||||
void CreateDescriptorPool();
|
||||
void CreateRenderPass();
|
||||
vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
|
||||
void CreateDescriptorSetLayout();
|
||||
void CreateDescriptorSets();
|
||||
void CreatePipelineLayout();
|
||||
void CreateGraphicsPipeline();
|
||||
void CreateSampler();
|
||||
|
||||
void CreateDynamicResources();
|
||||
void CreateFramebuffers();
|
||||
|
||||
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
|
||||
void ReleaseRawImages();
|
||||
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
|
||||
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
|
||||
|
||||
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
|
||||
void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
|
||||
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
|
||||
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
|
||||
const Layout::FramebufferLayout layout) const;
|
||||
|
||||
void CreateFSR();
|
||||
|
||||
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
|
||||
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
|
||||
std::size_t image_index) const;
|
||||
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
Core::Frontend::EmuWindow& render_window;
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
Swapchain& swapchain;
|
||||
Scheduler& scheduler;
|
||||
std::size_t image_count;
|
||||
const ScreenInfo& screen_info;
|
||||
|
||||
vk::ShaderModule vertex_shader;
|
||||
vk::ShaderModule fxaa_vertex_shader;
|
||||
vk::ShaderModule fxaa_fragment_shader;
|
||||
vk::ShaderModule bilinear_fragment_shader;
|
||||
vk::ShaderModule bicubic_fragment_shader;
|
||||
vk::ShaderModule gaussian_fragment_shader;
|
||||
vk::ShaderModule scaleforce_fragment_shader;
|
||||
vk::DescriptorPool descriptor_pool;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::Pipeline nearest_neightbor_pipeline;
|
||||
vk::Pipeline bilinear_pipeline;
|
||||
vk::Pipeline bicubic_pipeline;
|
||||
vk::Pipeline gaussian_pipeline;
|
||||
vk::Pipeline scaleforce_pipeline;
|
||||
vk::RenderPass renderpass;
|
||||
std::vector<vk::Framebuffer> framebuffers;
|
||||
vk::DescriptorSets descriptor_sets;
|
||||
vk::Sampler nn_sampler;
|
||||
vk::Sampler sampler;
|
||||
|
||||
vk::Buffer buffer;
|
||||
MemoryCommit buffer_commit;
|
||||
|
||||
std::vector<u64> resource_ticks;
|
||||
|
||||
std::vector<vk::Semaphore> semaphores;
|
||||
std::vector<vk::Image> raw_images;
|
||||
std::vector<vk::ImageView> raw_image_views;
|
||||
std::vector<MemoryCommit> raw_buffer_commits;
|
||||
|
||||
vk::DescriptorPool aa_descriptor_pool;
|
||||
vk::DescriptorSetLayout aa_descriptor_set_layout;
|
||||
vk::PipelineLayout aa_pipeline_layout;
|
||||
vk::Pipeline aa_pipeline;
|
||||
vk::RenderPass aa_renderpass;
|
||||
vk::Framebuffer aa_framebuffer;
|
||||
vk::DescriptorSets aa_descriptor_sets;
|
||||
vk::Image aa_image;
|
||||
vk::ImageView aa_image_view;
|
||||
MemoryCommit aa_commit;
|
||||
|
||||
u32 raw_width = 0;
|
||||
u32 raw_height = 0;
|
||||
Service::android::PixelFormat pixel_format{};
|
||||
|
||||
std::unique_ptr<FSR> fsr;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,429 +1,429 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
|
||||
return VkBufferCopy{
|
||||
.srcOffset = copy.src_offset,
|
||||
.dstOffset = copy.dst_offset,
|
||||
.size = copy.size,
|
||||
};
|
||||
}
|
||||
|
||||
VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
|
||||
if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
|
||||
return VK_INDEX_TYPE_UINT8_EXT;
|
||||
}
|
||||
if (num_elements <= 0xffff) {
|
||||
return VK_INDEX_TYPE_UINT16;
|
||||
}
|
||||
return VK_INDEX_TYPE_UINT32;
|
||||
}
|
||||
|
||||
size_t BytesPerIndex(VkIndexType index_type) {
|
||||
switch (index_type) {
|
||||
case VK_INDEX_TYPE_UINT8_EXT:
|
||||
return 1;
|
||||
case VK_INDEX_TYPE_UINT16:
|
||||
return 2;
|
||||
case VK_INDEX_TYPE_UINT32:
|
||||
return 4;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid index type={}", index_type);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
|
||||
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
|
||||
for (T& index : indices) {
|
||||
index = static_cast<T>(first + index + quad * 4);
|
||||
}
|
||||
return indices;
|
||||
}
|
||||
|
||||
vk::Buffer CreateBuffer(const Device& device, u64 size) {
|
||||
VkBufferUsageFlags flags =
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
||||
if (device.IsExtTransformFeedbackSupported()) {
|
||||
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
|
||||
}
|
||||
return device.GetLogical().CreateBuffer({
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = size,
|
||||
.usage = flags,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
|
||||
device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
|
||||
commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
|
||||
if (runtime.device.HasDebuggingToolAttached()) {
|
||||
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
|
||||
if (!device) {
|
||||
// Null buffer, return a null descriptor
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
|
||||
return offset == view.offset && size == view.size && format == view.format;
|
||||
})};
|
||||
if (it != views.end()) {
|
||||
return *it->handle;
|
||||
}
|
||||
views.push_back({
|
||||
.offset = offset,
|
||||
.size = size,
|
||||
.format = format,
|
||||
.handle = device->GetLogical().CreateBufferView({
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.buffer = *buffer,
|
||||
.format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format,
|
||||
.offset = offset,
|
||||
.range = size,
|
||||
}),
|
||||
});
|
||||
return *views.back().handle;
|
||||
}
|
||||
|
||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_,
|
||||
DescriptorPool& descriptor_pool)
|
||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
|
||||
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
|
||||
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
||||
quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
|
||||
|
||||
StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return staging_pool.Request(size, MemoryUsage::Upload);
|
||||
}
|
||||
|
||||
StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return staging_pool.Request(size, MemoryUsage::Download);
|
||||
}
|
||||
|
||||
u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
|
||||
return device.GetDeviceLocalMemory();
|
||||
}
|
||||
|
||||
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
|
||||
return device.GetDeviceMemoryUsage();
|
||||
}
|
||||
|
||||
bool BufferCacheRuntime::CanReportMemoryUsage() const {
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
|
||||
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
};
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
|
||||
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
|
||||
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
|
||||
if (barrier) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER);
|
||||
}
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
|
||||
if (barrier) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::PreCopyBarrier() {
|
||||
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
};
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, READ_BARRIER);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::PostCopyBarrier() {
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, WRITE_BARRIER);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) {
|
||||
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
};
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([dest_buffer, offset, size, value](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, READ_BARRIER);
|
||||
cmdbuf.FillBuffer(dest_buffer, offset, size, value);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, WRITE_BARRIER);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
|
||||
u32 base_vertex, u32 num_indices, VkBuffer buffer,
|
||||
u32 offset, [[maybe_unused]] u32 size) {
|
||||
VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
|
||||
VkDeviceSize vk_offset = offset;
|
||||
VkBuffer vk_buffer = buffer;
|
||||
if (topology == PrimitiveTopology::Quads) {
|
||||
vk_index_type = VK_INDEX_TYPE_UINT32;
|
||||
std::tie(vk_buffer, vk_offset) =
|
||||
quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
|
||||
} else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
|
||||
vk_index_type = VK_INDEX_TYPE_UINT16;
|
||||
std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
|
||||
}
|
||||
if (vk_buffer == VK_NULL_HANDLE) {
|
||||
// Vulkan doesn't support null index buffers. Replace it with our own null buffer.
|
||||
ReserveNullBuffer();
|
||||
vk_buffer = *null_buffer;
|
||||
}
|
||||
scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
|
||||
if (count == 0) {
|
||||
ReserveNullBuffer();
|
||||
scheduler.Record([this](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(*null_buffer, 0, VK_INDEX_TYPE_UINT32);
|
||||
});
|
||||
return;
|
||||
}
|
||||
ReserveQuadArrayLUT(first + count, true);
|
||||
|
||||
// The LUT has the indices 0, 1, 2, and 3 copied as an array
|
||||
// To apply these 'first' offsets we can apply an offset based on the modulus.
|
||||
const VkIndexType index_type = quad_array_lut_index_type;
|
||||
const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
|
||||
const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
|
||||
scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(buffer, offset, index_type);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
|
||||
u32 stride) {
|
||||
if (device.IsExtExtendedDynamicStateSupported()) {
|
||||
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
|
||||
const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0;
|
||||
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
|
||||
const VkDeviceSize vk_stride = stride;
|
||||
cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
|
||||
});
|
||||
} else {
|
||||
scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindVertexBuffer(index, buffer, offset);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
|
||||
u32 size) {
|
||||
if (!device.IsExtTransformFeedbackSupported()) {
|
||||
// Already logged in the rasterizer
|
||||
return;
|
||||
}
|
||||
if (buffer == VK_NULL_HANDLE) {
|
||||
// Vulkan doesn't support null transform feedback buffers.
|
||||
// Replace it with our own null buffer.
|
||||
ReserveNullBuffer();
|
||||
buffer = *null_buffer;
|
||||
offset = 0;
|
||||
size = 0;
|
||||
}
|
||||
scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
|
||||
const VkDeviceSize vk_offset = offset;
|
||||
const VkDeviceSize vk_size = size;
|
||||
cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
|
||||
if (num_indices <= current_num_indices) {
|
||||
return;
|
||||
}
|
||||
if (wait_for_idle) {
|
||||
scheduler.Finish();
|
||||
}
|
||||
current_num_indices = num_indices;
|
||||
quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
|
||||
|
||||
const u32 num_quads = num_indices / 4;
|
||||
const u32 num_triangle_indices = num_quads * 6;
|
||||
const u32 num_first_offset_copies = 4;
|
||||
const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
|
||||
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
|
||||
quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = size_bytes,
|
||||
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
quad_array_lut.SetObjectNameEXT("Quad LUT");
|
||||
}
|
||||
quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
|
||||
|
||||
const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
|
||||
u8* staging_data = staging.mapped_span.data();
|
||||
const size_t quad_size = bytes_per_index * 6;
|
||||
for (u32 first = 0; first < num_first_offset_copies; ++first) {
|
||||
for (u32 quad = 0; quad < num_quads; ++quad) {
|
||||
switch (quad_array_lut_index_type) {
|
||||
case VK_INDEX_TYPE_UINT8_EXT:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT16:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT32:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
|
||||
break;
|
||||
default:
|
||||
ASSERT(false);
|
||||
break;
|
||||
}
|
||||
staging_data += quad_size;
|
||||
}
|
||||
}
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
|
||||
dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
|
||||
const VkBufferCopy copy{
|
||||
.srcOffset = src_offset,
|
||||
.dstOffset = 0,
|
||||
.size = size_bytes,
|
||||
};
|
||||
const VkBufferMemoryBarrier write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = dst_buffer,
|
||||
.offset = 0,
|
||||
.size = size_bytes,
|
||||
};
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
|
||||
0, write_barrier);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::ReserveNullBuffer() {
|
||||
if (null_buffer) {
|
||||
return;
|
||||
}
|
||||
VkBufferCreateInfo create_info{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = 4,
|
||||
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
if (device.IsExtTransformFeedbackSupported()) {
|
||||
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
|
||||
}
|
||||
null_buffer = device.GetLogical().CreateBuffer(create_info);
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
null_buffer.SetObjectNameEXT("Null buffer");
|
||||
}
|
||||
null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
|
||||
return VkBufferCopy{
|
||||
.srcOffset = copy.src_offset,
|
||||
.dstOffset = copy.dst_offset,
|
||||
.size = copy.size,
|
||||
};
|
||||
}
|
||||
|
||||
VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
|
||||
if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
|
||||
return VK_INDEX_TYPE_UINT8_EXT;
|
||||
}
|
||||
if (num_elements <= 0xffff) {
|
||||
return VK_INDEX_TYPE_UINT16;
|
||||
}
|
||||
return VK_INDEX_TYPE_UINT32;
|
||||
}
|
||||
|
||||
size_t BytesPerIndex(VkIndexType index_type) {
|
||||
switch (index_type) {
|
||||
case VK_INDEX_TYPE_UINT8_EXT:
|
||||
return 1;
|
||||
case VK_INDEX_TYPE_UINT16:
|
||||
return 2;
|
||||
case VK_INDEX_TYPE_UINT32:
|
||||
return 4;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid index type={}", index_type);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
|
||||
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
|
||||
for (T& index : indices) {
|
||||
index = static_cast<T>(first + index + quad * 4);
|
||||
}
|
||||
return indices;
|
||||
}
|
||||
|
||||
vk::Buffer CreateBuffer(const Device& device, u64 size) {
|
||||
VkBufferUsageFlags flags =
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
||||
if (device.IsExtTransformFeedbackSupported()) {
|
||||
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
|
||||
}
|
||||
return device.GetLogical().CreateBuffer({
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = size,
|
||||
.usage = flags,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
|
||||
device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
|
||||
commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
|
||||
if (runtime.device.HasDebuggingToolAttached()) {
|
||||
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
|
||||
if (!device) {
|
||||
// Null buffer, return a null descriptor
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
|
||||
return offset == view.offset && size == view.size && format == view.format;
|
||||
})};
|
||||
if (it != views.end()) {
|
||||
return *it->handle;
|
||||
}
|
||||
views.push_back({
|
||||
.offset = offset,
|
||||
.size = size,
|
||||
.format = format,
|
||||
.handle = device->GetLogical().CreateBufferView({
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.buffer = *buffer,
|
||||
.format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format,
|
||||
.offset = offset,
|
||||
.range = size,
|
||||
}),
|
||||
});
|
||||
return *views.back().handle;
|
||||
}
|
||||
|
||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_,
|
||||
DescriptorPool& descriptor_pool)
|
||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
|
||||
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
|
||||
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
||||
quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
|
||||
|
||||
StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return staging_pool.Request(size, MemoryUsage::Upload);
|
||||
}
|
||||
|
||||
StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return staging_pool.Request(size, MemoryUsage::Download);
|
||||
}
|
||||
|
||||
u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
|
||||
return device.GetDeviceLocalMemory();
|
||||
}
|
||||
|
||||
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
|
||||
return device.GetDeviceMemoryUsage();
|
||||
}
|
||||
|
||||
bool BufferCacheRuntime::CanReportMemoryUsage() const {
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
|
||||
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
};
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
|
||||
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
|
||||
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
|
||||
if (barrier) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER);
|
||||
}
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
|
||||
if (barrier) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::PreCopyBarrier() {
|
||||
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
};
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, READ_BARRIER);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::PostCopyBarrier() {
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, WRITE_BARRIER);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) {
|
||||
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
};
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([dest_buffer, offset, size, value](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, READ_BARRIER);
|
||||
cmdbuf.FillBuffer(dest_buffer, offset, size, value);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, WRITE_BARRIER);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
|
||||
u32 base_vertex, u32 num_indices, VkBuffer buffer,
|
||||
u32 offset, [[maybe_unused]] u32 size) {
|
||||
VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
|
||||
VkDeviceSize vk_offset = offset;
|
||||
VkBuffer vk_buffer = buffer;
|
||||
if (topology == PrimitiveTopology::Quads) {
|
||||
vk_index_type = VK_INDEX_TYPE_UINT32;
|
||||
std::tie(vk_buffer, vk_offset) =
|
||||
quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
|
||||
} else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
|
||||
vk_index_type = VK_INDEX_TYPE_UINT16;
|
||||
std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
|
||||
}
|
||||
if (vk_buffer == VK_NULL_HANDLE) {
|
||||
// Vulkan doesn't support null index buffers. Replace it with our own null buffer.
|
||||
ReserveNullBuffer();
|
||||
vk_buffer = *null_buffer;
|
||||
}
|
||||
scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
|
||||
if (count == 0) {
|
||||
ReserveNullBuffer();
|
||||
scheduler.Record([this](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(*null_buffer, 0, VK_INDEX_TYPE_UINT32);
|
||||
});
|
||||
return;
|
||||
}
|
||||
ReserveQuadArrayLUT(first + count, true);
|
||||
|
||||
// The LUT has the indices 0, 1, 2, and 3 copied as an array
|
||||
// To apply these 'first' offsets we can apply an offset based on the modulus.
|
||||
const VkIndexType index_type = quad_array_lut_index_type;
|
||||
const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
|
||||
const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
|
||||
scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(buffer, offset, index_type);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
|
||||
u32 stride) {
|
||||
if (device.IsExtExtendedDynamicStateSupported()) {
|
||||
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
|
||||
const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0;
|
||||
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
|
||||
const VkDeviceSize vk_stride = stride;
|
||||
cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
|
||||
});
|
||||
} else {
|
||||
scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindVertexBuffer(index, buffer, offset);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
|
||||
u32 size) {
|
||||
if (!device.IsExtTransformFeedbackSupported()) {
|
||||
// Already logged in the rasterizer
|
||||
return;
|
||||
}
|
||||
if (buffer == VK_NULL_HANDLE) {
|
||||
// Vulkan doesn't support null transform feedback buffers.
|
||||
// Replace it with our own null buffer.
|
||||
ReserveNullBuffer();
|
||||
buffer = *null_buffer;
|
||||
offset = 0;
|
||||
size = 0;
|
||||
}
|
||||
scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
|
||||
const VkDeviceSize vk_offset = offset;
|
||||
const VkDeviceSize vk_size = size;
|
||||
cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
|
||||
if (num_indices <= current_num_indices) {
|
||||
return;
|
||||
}
|
||||
if (wait_for_idle) {
|
||||
scheduler.Finish();
|
||||
}
|
||||
current_num_indices = num_indices;
|
||||
quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
|
||||
|
||||
const u32 num_quads = num_indices / 4;
|
||||
const u32 num_triangle_indices = num_quads * 6;
|
||||
const u32 num_first_offset_copies = 4;
|
||||
const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
|
||||
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
|
||||
quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = size_bytes,
|
||||
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
quad_array_lut.SetObjectNameEXT("Quad LUT");
|
||||
}
|
||||
quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
|
||||
|
||||
const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
|
||||
u8* staging_data = staging.mapped_span.data();
|
||||
const size_t quad_size = bytes_per_index * 6;
|
||||
for (u32 first = 0; first < num_first_offset_copies; ++first) {
|
||||
for (u32 quad = 0; quad < num_quads; ++quad) {
|
||||
switch (quad_array_lut_index_type) {
|
||||
case VK_INDEX_TYPE_UINT8_EXT:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT16:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT32:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
|
||||
break;
|
||||
default:
|
||||
ASSERT(false);
|
||||
break;
|
||||
}
|
||||
staging_data += quad_size;
|
||||
}
|
||||
}
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
|
||||
dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
|
||||
const VkBufferCopy copy{
|
||||
.srcOffset = src_offset,
|
||||
.dstOffset = 0,
|
||||
.size = size_bytes,
|
||||
};
|
||||
const VkBufferMemoryBarrier write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = dst_buffer,
|
||||
.offset = 0,
|
||||
.size = size_bytes,
|
||||
};
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
|
||||
0, write_barrier);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::ReserveNullBuffer() {
|
||||
if (null_buffer) {
|
||||
return;
|
||||
}
|
||||
VkBufferCreateInfo create_info{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = 4,
|
||||
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
if (device.IsExtTransformFeedbackSupported()) {
|
||||
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
|
||||
}
|
||||
null_buffer = device.GetLogical().CreateBuffer(create_info);
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
null_buffer.SetObjectNameEXT("Null buffer");
|
||||
}
|
||||
null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,158 +1,158 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class DescriptorPool;
|
||||
class Scheduler;
|
||||
|
||||
class BufferCacheRuntime;
|
||||
|
||||
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
|
||||
public:
|
||||
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
|
||||
explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_);
|
||||
|
||||
[[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
|
||||
|
||||
[[nodiscard]] VkBuffer Handle() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
operator VkBuffer() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
struct BufferView {
|
||||
u32 offset;
|
||||
u32 size;
|
||||
VideoCore::Surface::PixelFormat format;
|
||||
vk::BufferView handle;
|
||||
};
|
||||
|
||||
const Device* device{};
|
||||
vk::Buffer buffer;
|
||||
MemoryCommit commit;
|
||||
std::vector<BufferView> views;
|
||||
};
|
||||
|
||||
class BufferCacheRuntime {
|
||||
friend Buffer;
|
||||
|
||||
using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
|
||||
using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
|
||||
|
||||
public:
|
||||
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
|
||||
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_,
|
||||
DescriptorPool& descriptor_pool);
|
||||
|
||||
void Finish();
|
||||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
|
||||
u64 GetDeviceMemoryUsage() const;
|
||||
|
||||
bool CanReportMemoryUsage() const;
|
||||
|
||||
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
|
||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||
|
||||
void PreCopyBarrier();
|
||||
|
||||
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||
|
||||
void PostCopyBarrier();
|
||||
|
||||
void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);
|
||||
|
||||
void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
|
||||
u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
|
||||
|
||||
void BindQuadArrayIndexBuffer(u32 first, u32 count);
|
||||
|
||||
void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
|
||||
|
||||
void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
|
||||
|
||||
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
|
||||
[[maybe_unused]] u32 binding_index, u32 size) {
|
||||
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
|
||||
BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
|
||||
return ref.mapped_span;
|
||||
}
|
||||
|
||||
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
||||
BindBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
|
||||
[[maybe_unused]] bool is_written) {
|
||||
BindBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
|
||||
VideoCore::Surface::PixelFormat format) {
|
||||
update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
|
||||
}
|
||||
|
||||
private:
|
||||
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
||||
update_descriptor_queue.AddBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
|
||||
|
||||
void ReserveNullBuffer();
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
Scheduler& scheduler;
|
||||
StagingBufferPool& staging_pool;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
|
||||
vk::Buffer quad_array_lut;
|
||||
MemoryCommit quad_array_lut_commit;
|
||||
VkIndexType quad_array_lut_index_type{};
|
||||
u32 current_num_indices = 0;
|
||||
|
||||
vk::Buffer null_buffer;
|
||||
MemoryCommit null_buffer_commit;
|
||||
|
||||
Uint8Pass uint8_pass;
|
||||
QuadIndexedPass quad_index_pass;
|
||||
};
|
||||
|
||||
struct BufferCacheParams {
|
||||
using Runtime = Vulkan::BufferCacheRuntime;
|
||||
using Buffer = Vulkan::Buffer;
|
||||
|
||||
static constexpr bool IS_OPENGL = false;
|
||||
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
|
||||
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
|
||||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
|
||||
static constexpr bool USE_MEMORY_MAPS = true;
|
||||
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class DescriptorPool;
|
||||
class Scheduler;
|
||||
|
||||
class BufferCacheRuntime;
|
||||
|
||||
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
|
||||
public:
|
||||
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
|
||||
explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_);
|
||||
|
||||
[[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
|
||||
|
||||
[[nodiscard]] VkBuffer Handle() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
operator VkBuffer() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
struct BufferView {
|
||||
u32 offset;
|
||||
u32 size;
|
||||
VideoCore::Surface::PixelFormat format;
|
||||
vk::BufferView handle;
|
||||
};
|
||||
|
||||
const Device* device{};
|
||||
vk::Buffer buffer;
|
||||
MemoryCommit commit;
|
||||
std::vector<BufferView> views;
|
||||
};
|
||||
|
||||
class BufferCacheRuntime {
|
||||
friend Buffer;
|
||||
|
||||
using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
|
||||
using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
|
||||
|
||||
public:
|
||||
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
|
||||
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_,
|
||||
DescriptorPool& descriptor_pool);
|
||||
|
||||
void Finish();
|
||||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
|
||||
u64 GetDeviceMemoryUsage() const;
|
||||
|
||||
bool CanReportMemoryUsage() const;
|
||||
|
||||
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
|
||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||
|
||||
void PreCopyBarrier();
|
||||
|
||||
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||
|
||||
void PostCopyBarrier();
|
||||
|
||||
void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);
|
||||
|
||||
void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
|
||||
u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
|
||||
|
||||
void BindQuadArrayIndexBuffer(u32 first, u32 count);
|
||||
|
||||
void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
|
||||
|
||||
void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
|
||||
|
||||
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
|
||||
[[maybe_unused]] u32 binding_index, u32 size) {
|
||||
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
|
||||
BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
|
||||
return ref.mapped_span;
|
||||
}
|
||||
|
||||
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
||||
BindBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
|
||||
[[maybe_unused]] bool is_written) {
|
||||
BindBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
|
||||
VideoCore::Surface::PixelFormat format) {
|
||||
update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
|
||||
}
|
||||
|
||||
private:
|
||||
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
||||
update_descriptor_queue.AddBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
|
||||
|
||||
void ReserveNullBuffer();
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
Scheduler& scheduler;
|
||||
StagingBufferPool& staging_pool;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
|
||||
vk::Buffer quad_array_lut;
|
||||
MemoryCommit quad_array_lut_commit;
|
||||
VkIndexType quad_array_lut_index_type{};
|
||||
u32 current_num_indices = 0;
|
||||
|
||||
vk::Buffer null_buffer;
|
||||
MemoryCommit null_buffer_commit;
|
||||
|
||||
Uint8Pass uint8_pass;
|
||||
QuadIndexedPass quad_index_pass;
|
||||
};
|
||||
|
||||
struct BufferCacheParams {
|
||||
using Runtime = Vulkan::BufferCacheRuntime;
|
||||
using Buffer = Vulkan::Buffer;
|
||||
|
||||
static constexpr bool IS_OPENGL = false;
|
||||
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
|
||||
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
|
||||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
|
||||
static constexpr bool USE_MEMORY_MAPS = true;
|
||||
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,45 +1,45 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_command_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4;
|
||||
|
||||
struct CommandPool::Pool {
|
||||
vk::CommandPool handle;
|
||||
vk::CommandBuffers cmdbufs;
|
||||
};
|
||||
|
||||
CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& device_)
|
||||
: ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {}
|
||||
|
||||
CommandPool::~CommandPool() = default;
|
||||
|
||||
void CommandPool::Allocate(size_t begin, size_t end) {
|
||||
// Command buffers are going to be commited, recorded, executed every single usage cycle.
|
||||
// They are also going to be reseted when commited.
|
||||
Pool& pool = pools.emplace_back();
|
||||
pool.handle = device.GetLogical().CreateCommandPool({
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags =
|
||||
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
|
||||
.queueFamilyIndex = device.GetGraphicsFamily(),
|
||||
});
|
||||
pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
|
||||
}
|
||||
|
||||
VkCommandBuffer CommandPool::Commit() {
|
||||
const size_t index = CommitResource();
|
||||
const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
|
||||
const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
|
||||
return pools[pool_index].cmdbufs[sub_index];
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_command_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4;
|
||||
|
||||
struct CommandPool::Pool {
|
||||
vk::CommandPool handle;
|
||||
vk::CommandBuffers cmdbufs;
|
||||
};
|
||||
|
||||
CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& device_)
|
||||
: ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {}
|
||||
|
||||
CommandPool::~CommandPool() = default;
|
||||
|
||||
void CommandPool::Allocate(size_t begin, size_t end) {
|
||||
// Command buffers are going to be commited, recorded, executed every single usage cycle.
|
||||
// They are also going to be reseted when commited.
|
||||
Pool& pool = pools.emplace_back();
|
||||
pool.handle = device.GetLogical().CreateCommandPool({
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags =
|
||||
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
|
||||
.queueFamilyIndex = device.GetGraphicsFamily(),
|
||||
});
|
||||
pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
|
||||
}
|
||||
|
||||
VkCommandBuffer CommandPool::Commit() {
|
||||
const size_t index = CommitResource();
|
||||
const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
|
||||
const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
|
||||
return pools[pool_index].cmdbufs[sub_index];
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,33 +1,33 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class MasterSemaphore;
|
||||
|
||||
class CommandPool final : public ResourcePool {
|
||||
public:
|
||||
explicit CommandPool(MasterSemaphore& master_semaphore_, const Device& device_);
|
||||
~CommandPool() override;
|
||||
|
||||
void Allocate(size_t begin, size_t end) override;
|
||||
|
||||
VkCommandBuffer Commit();
|
||||
|
||||
private:
|
||||
struct Pool;
|
||||
|
||||
const Device& device;
|
||||
std::vector<Pool> pools;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class MasterSemaphore;
|
||||
|
||||
class CommandPool final : public ResourcePool {
|
||||
public:
|
||||
explicit CommandPool(MasterSemaphore& master_semaphore_, const Device& device_);
|
||||
~CommandPool() override;
|
||||
|
||||
void Allocate(size_t begin, size_t end) override;
|
||||
|
||||
VkCommandBuffer Commit();
|
||||
|
||||
private:
|
||||
struct Pool;
|
||||
|
||||
const Device& device;
|
||||
std::vector<Pool> pools;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,414 +1,414 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "video_core/host_shaders/astc_decoder_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/texture_cache/accelerated_swizzle.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
|
||||
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1;
|
||||
constexpr size_t ASTC_NUM_BINDINGS = 2;
|
||||
|
||||
template <size_t size>
|
||||
inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.offset = 0,
|
||||
.size = static_cast<u32>(size),
|
||||
};
|
||||
|
||||
constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{
|
||||
{
|
||||
.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
{
|
||||
.binding = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
}};
|
||||
|
||||
constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
|
||||
.uniform_buffers = 0,
|
||||
.storage_buffers = 2,
|
||||
.texture_buffers = 0,
|
||||
.image_buffers = 0,
|
||||
.textures = 0,
|
||||
.images = 0,
|
||||
.score = 2,
|
||||
};
|
||||
|
||||
constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{
|
||||
{
|
||||
.binding = ASTC_BINDING_INPUT_BUFFER,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
{
|
||||
.binding = ASTC_BINDING_OUTPUT_IMAGE,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
}};
|
||||
|
||||
constexpr DescriptorBankInfo ASTC_BANK_INFO{
|
||||
.uniform_buffers = 0,
|
||||
.storage_buffers = 1,
|
||||
.texture_buffers = 0,
|
||||
.image_buffers = 0,
|
||||
.textures = 0,
|
||||
.images = 1,
|
||||
.score = 2,
|
||||
};
|
||||
|
||||
constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 2,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = 0,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
};
|
||||
|
||||
constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
|
||||
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
|
||||
{
|
||||
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
},
|
||||
{
|
||||
.dstBinding = ASTC_BINDING_OUTPUT_IMAGE,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.offset = ASTC_BINDING_OUTPUT_IMAGE * sizeof(DescriptorUpdateEntry),
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
},
|
||||
}};
|
||||
|
||||
struct AstcPushConstants {
|
||||
std::array<u32, 2> blocks_dims;
|
||||
u32 layer_stride;
|
||||
u32 block_size;
|
||||
u32 x_shift;
|
||||
u32 block_height;
|
||||
u32 block_height_mask;
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings,
|
||||
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
|
||||
const DescriptorBankInfo& bank_info,
|
||||
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
|
||||
: device{device_} {
|
||||
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.bindingCount = bindings.size(),
|
||||
.pBindings = bindings.data(),
|
||||
});
|
||||
layout = device.GetLogical().CreatePipelineLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = descriptor_set_layout.address(),
|
||||
.pushConstantRangeCount = push_constants.size(),
|
||||
.pPushConstantRanges = push_constants.data(),
|
||||
});
|
||||
if (!templates.empty()) {
|
||||
descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.descriptorUpdateEntryCount = templates.size(),
|
||||
.pDescriptorUpdateEntries = templates.data(),
|
||||
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
|
||||
.descriptorSetLayout = *descriptor_set_layout,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.pipelineLayout = *layout,
|
||||
.set = 0,
|
||||
});
|
||||
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info);
|
||||
}
|
||||
module = device.GetLogical().CreateShaderModule({
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.codeSize = static_cast<u32>(code.size_bytes()),
|
||||
.pCode = code.data(),
|
||||
});
|
||||
device.SaveShader(code);
|
||||
pipeline = device.GetLogical().CreateComputePipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *module,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
},
|
||||
.layout = *layout,
|
||||
.basePipelineHandle = nullptr,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
}
|
||||
|
||||
ComputePass::~ComputePass() = default;
|
||||
|
||||
Uint8Pass::Uint8Pass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
|
||||
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
|
||||
VULKAN_UINT8_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_} {}
|
||||
|
||||
Uint8Pass::~Uint8Pass() = default;
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
|
||||
u32 src_offset) {
|
||||
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
|
||||
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([this, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
|
||||
static constexpr u32 DISPATCH_SIZE = 1024;
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
|
||||
};
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
|
||||
});
|
||||
return {staging.buffer, staging.offset};
|
||||
}
|
||||
|
||||
QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
|
||||
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
|
||||
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_} {}
|
||||
|
||||
QuadIndexedPass::~QuadIndexedPass() = default;
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
|
||||
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
|
||||
VkBuffer src_buffer, u32 src_offset) {
|
||||
const u32 index_shift = [index_format] {
|
||||
switch (index_format) {
|
||||
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
|
||||
return 0;
|
||||
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort:
|
||||
return 1;
|
||||
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt:
|
||||
return 2;
|
||||
}
|
||||
ASSERT(false);
|
||||
return 2;
|
||||
}();
|
||||
const u32 input_size = num_vertices << index_shift;
|
||||
const u32 num_tri_vertices = (num_vertices / 4) * 6;
|
||||
|
||||
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
|
||||
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex,
|
||||
index_shift](vk::CommandBuffer cmdbuf) {
|
||||
static constexpr u32 DISPATCH_SIZE = 1024;
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
|
||||
};
|
||||
const std::array<u32, 2> push_constants{base_vertex, index_shift};
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
|
||||
&push_constants);
|
||||
cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
|
||||
});
|
||||
return {staging.buffer, staging.offset};
|
||||
}
|
||||
|
||||
ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_,
|
||||
MemoryAllocator& memory_allocator_)
|
||||
: ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
|
||||
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
|
||||
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
|
||||
|
||||
ASTCDecoderPass::~ASTCDecoderPass() = default;
|
||||
|
||||
void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles) {
|
||||
using namespace VideoCommon::Accelerated;
|
||||
const std::array<u32, 2> block_dims{
|
||||
VideoCore::Surface::DefaultBlockWidth(image.info.format),
|
||||
VideoCore::Surface::DefaultBlockHeight(image.info.format),
|
||||
};
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
const VkPipeline vk_pipeline = *pipeline;
|
||||
const VkImageAspectFlags aspect_mask = image.AspectMask();
|
||||
const VkImage vk_image = image.Handle();
|
||||
const bool is_initialized = image.ExchangeInitialization();
|
||||
scheduler.Record([vk_pipeline, vk_image, aspect_mask,
|
||||
is_initialized](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier image_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = static_cast<VkAccessFlags>(is_initialized ? VK_ACCESS_SHADER_WRITE_BIT
|
||||
: VK_ACCESS_NONE),
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = vk_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
|
||||
: VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
|
||||
});
|
||||
for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
|
||||
const size_t input_offset = swizzle.buffer_offset + map.offset;
|
||||
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
|
||||
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
|
||||
const u32 num_dispatches_z = image.info.resources.layers;
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
|
||||
// To unswizzle the ASTC data
|
||||
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
|
||||
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
|
||||
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
|
||||
ASSERT(params.bytes_per_block_log2 == 4);
|
||||
scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
|
||||
params, descriptor_data](vk::CommandBuffer cmdbuf) {
|
||||
const AstcPushConstants uniforms{
|
||||
.blocks_dims = block_dims,
|
||||
.layer_stride = params.layer_stride,
|
||||
.block_size = params.block_size,
|
||||
.x_shift = params.x_shift,
|
||||
.block_height = params.block_height,
|
||||
.block_height_mask = params.block_height_mask,
|
||||
};
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
|
||||
cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
||||
});
|
||||
}
|
||||
scheduler.Record([vk_image, aspect_mask](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier image_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = vk_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
|
||||
});
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "video_core/host_shaders/astc_decoder_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/texture_cache/accelerated_swizzle.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
|
||||
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1;
|
||||
constexpr size_t ASTC_NUM_BINDINGS = 2;
|
||||
|
||||
template <size_t size>
|
||||
inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.offset = 0,
|
||||
.size = static_cast<u32>(size),
|
||||
};
|
||||
|
||||
constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{
|
||||
{
|
||||
.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
{
|
||||
.binding = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
}};
|
||||
|
||||
constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
|
||||
.uniform_buffers = 0,
|
||||
.storage_buffers = 2,
|
||||
.texture_buffers = 0,
|
||||
.image_buffers = 0,
|
||||
.textures = 0,
|
||||
.images = 0,
|
||||
.score = 2,
|
||||
};
|
||||
|
||||
constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{
|
||||
{
|
||||
.binding = ASTC_BINDING_INPUT_BUFFER,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
{
|
||||
.binding = ASTC_BINDING_OUTPUT_IMAGE,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
}};
|
||||
|
||||
constexpr DescriptorBankInfo ASTC_BANK_INFO{
|
||||
.uniform_buffers = 0,
|
||||
.storage_buffers = 1,
|
||||
.texture_buffers = 0,
|
||||
.image_buffers = 0,
|
||||
.textures = 0,
|
||||
.images = 1,
|
||||
.score = 2,
|
||||
};
|
||||
|
||||
constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 2,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = 0,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
};
|
||||
|
||||
constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
|
||||
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
|
||||
{
|
||||
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
},
|
||||
{
|
||||
.dstBinding = ASTC_BINDING_OUTPUT_IMAGE,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.offset = ASTC_BINDING_OUTPUT_IMAGE * sizeof(DescriptorUpdateEntry),
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
},
|
||||
}};
|
||||
|
||||
struct AstcPushConstants {
|
||||
std::array<u32, 2> blocks_dims;
|
||||
u32 layer_stride;
|
||||
u32 block_size;
|
||||
u32 x_shift;
|
||||
u32 block_height;
|
||||
u32 block_height_mask;
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings,
|
||||
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
|
||||
const DescriptorBankInfo& bank_info,
|
||||
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
|
||||
: device{device_} {
|
||||
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.bindingCount = bindings.size(),
|
||||
.pBindings = bindings.data(),
|
||||
});
|
||||
layout = device.GetLogical().CreatePipelineLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = descriptor_set_layout.address(),
|
||||
.pushConstantRangeCount = push_constants.size(),
|
||||
.pPushConstantRanges = push_constants.data(),
|
||||
});
|
||||
if (!templates.empty()) {
|
||||
descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.descriptorUpdateEntryCount = templates.size(),
|
||||
.pDescriptorUpdateEntries = templates.data(),
|
||||
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
|
||||
.descriptorSetLayout = *descriptor_set_layout,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.pipelineLayout = *layout,
|
||||
.set = 0,
|
||||
});
|
||||
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info);
|
||||
}
|
||||
module = device.GetLogical().CreateShaderModule({
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.codeSize = static_cast<u32>(code.size_bytes()),
|
||||
.pCode = code.data(),
|
||||
});
|
||||
device.SaveShader(code);
|
||||
pipeline = device.GetLogical().CreateComputePipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *module,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
},
|
||||
.layout = *layout,
|
||||
.basePipelineHandle = nullptr,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
}
|
||||
|
||||
ComputePass::~ComputePass() = default;
|
||||
|
||||
Uint8Pass::Uint8Pass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
|
||||
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
|
||||
VULKAN_UINT8_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_} {}
|
||||
|
||||
Uint8Pass::~Uint8Pass() = default;
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
|
||||
u32 src_offset) {
|
||||
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
|
||||
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([this, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
|
||||
static constexpr u32 DISPATCH_SIZE = 1024;
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
|
||||
};
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
|
||||
});
|
||||
return {staging.buffer, staging.offset};
|
||||
}
|
||||
|
||||
QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
|
||||
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
|
||||
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_} {}
|
||||
|
||||
QuadIndexedPass::~QuadIndexedPass() = default;
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
|
||||
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
|
||||
VkBuffer src_buffer, u32 src_offset) {
|
||||
const u32 index_shift = [index_format] {
|
||||
switch (index_format) {
|
||||
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
|
||||
return 0;
|
||||
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort:
|
||||
return 1;
|
||||
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt:
|
||||
return 2;
|
||||
}
|
||||
ASSERT(false);
|
||||
return 2;
|
||||
}();
|
||||
const u32 input_size = num_vertices << index_shift;
|
||||
const u32 num_tri_vertices = (num_vertices / 4) * 6;
|
||||
|
||||
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
|
||||
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex,
|
||||
index_shift](vk::CommandBuffer cmdbuf) {
|
||||
static constexpr u32 DISPATCH_SIZE = 1024;
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
|
||||
};
|
||||
const std::array<u32, 2> push_constants{base_vertex, index_shift};
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
|
||||
&push_constants);
|
||||
cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
|
||||
});
|
||||
return {staging.buffer, staging.offset};
|
||||
}
|
||||
|
||||
ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_,
|
||||
MemoryAllocator& memory_allocator_)
|
||||
: ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
|
||||
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
|
||||
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
|
||||
|
||||
ASTCDecoderPass::~ASTCDecoderPass() = default;
|
||||
|
||||
void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles) {
|
||||
using namespace VideoCommon::Accelerated;
|
||||
const std::array<u32, 2> block_dims{
|
||||
VideoCore::Surface::DefaultBlockWidth(image.info.format),
|
||||
VideoCore::Surface::DefaultBlockHeight(image.info.format),
|
||||
};
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
const VkPipeline vk_pipeline = *pipeline;
|
||||
const VkImageAspectFlags aspect_mask = image.AspectMask();
|
||||
const VkImage vk_image = image.Handle();
|
||||
const bool is_initialized = image.ExchangeInitialization();
|
||||
scheduler.Record([vk_pipeline, vk_image, aspect_mask,
|
||||
is_initialized](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier image_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = static_cast<VkAccessFlags>(is_initialized ? VK_ACCESS_SHADER_WRITE_BIT
|
||||
: VK_ACCESS_NONE),
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = vk_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
|
||||
: VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
|
||||
});
|
||||
for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
|
||||
const size_t input_offset = swizzle.buffer_offset + map.offset;
|
||||
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
|
||||
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
|
||||
const u32 num_dispatches_z = image.info.resources.layers;
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
|
||||
// To unswizzle the ASTC data
|
||||
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
|
||||
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
|
||||
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
|
||||
ASSERT(params.bytes_per_block_log2 == 4);
|
||||
scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
|
||||
params, descriptor_data](vk::CommandBuffer cmdbuf) {
|
||||
const AstcPushConstants uniforms{
|
||||
.blocks_dims = block_dims,
|
||||
.layer_stride = params.layer_stride,
|
||||
.block_size = params.block_size,
|
||||
.x_shift = params.x_shift,
|
||||
.block_height = params.block_height,
|
||||
.block_height_mask = params.block_height_mask,
|
||||
};
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
|
||||
cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
||||
});
|
||||
}
|
||||
scheduler.Record([vk_image, aspect_mask](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier image_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = vk_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
|
||||
});
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,104 +1,104 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <utility>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
struct SwizzleParameters;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class StagingBufferPool;
|
||||
class Scheduler;
|
||||
class UpdateDescriptorQueue;
|
||||
class Image;
|
||||
struct StagingBufferRef;
|
||||
|
||||
class ComputePass {
|
||||
public:
|
||||
explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings,
|
||||
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
|
||||
const DescriptorBankInfo& bank_info,
|
||||
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
|
||||
~ComputePass();
|
||||
|
||||
protected:
|
||||
const Device& device;
|
||||
vk::DescriptorUpdateTemplateKHR descriptor_template;
|
||||
vk::PipelineLayout layout;
|
||||
vk::Pipeline pipeline;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
|
||||
private:
|
||||
vk::ShaderModule module;
|
||||
};
|
||||
|
||||
class Uint8Pass final : public ComputePass {
|
||||
public:
|
||||
explicit Uint8Pass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_);
|
||||
~Uint8Pass();
|
||||
|
||||
/// Assemble uint8 indices into an uint16 index buffer
|
||||
/// Returns a pair with the staging buffer, and the offset where the assembled data is
|
||||
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
|
||||
u32 src_offset);
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
};
|
||||
|
||||
class QuadIndexedPass final : public ComputePass {
|
||||
public:
|
||||
explicit QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_);
|
||||
~QuadIndexedPass();
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> Assemble(
|
||||
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
|
||||
u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
};
|
||||
|
||||
class ASTCDecoderPass final : public ComputePass {
|
||||
public:
|
||||
explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_,
|
||||
MemoryAllocator& memory_allocator_);
|
||||
~ASTCDecoderPass();
|
||||
|
||||
void Assemble(Image& image, const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
MemoryAllocator& memory_allocator;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <utility>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
struct SwizzleParameters;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class StagingBufferPool;
|
||||
class Scheduler;
|
||||
class UpdateDescriptorQueue;
|
||||
class Image;
|
||||
struct StagingBufferRef;
|
||||
|
||||
class ComputePass {
|
||||
public:
|
||||
explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings,
|
||||
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
|
||||
const DescriptorBankInfo& bank_info,
|
||||
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
|
||||
~ComputePass();
|
||||
|
||||
protected:
|
||||
const Device& device;
|
||||
vk::DescriptorUpdateTemplateKHR descriptor_template;
|
||||
vk::PipelineLayout layout;
|
||||
vk::Pipeline pipeline;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
|
||||
private:
|
||||
vk::ShaderModule module;
|
||||
};
|
||||
|
||||
class Uint8Pass final : public ComputePass {
|
||||
public:
|
||||
explicit Uint8Pass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_);
|
||||
~Uint8Pass();
|
||||
|
||||
/// Assemble uint8 indices into an uint16 index buffer
|
||||
/// Returns a pair with the staging buffer, and the offset where the assembled data is
|
||||
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
|
||||
u32 src_offset);
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
};
|
||||
|
||||
class QuadIndexedPass final : public ComputePass {
|
||||
public:
|
||||
explicit QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_);
|
||||
~QuadIndexedPass();
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> Assemble(
|
||||
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
|
||||
u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
};
|
||||
|
||||
class ASTCDecoderPass final : public ComputePass {
|
||||
public:
|
||||
explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_,
|
||||
MemoryAllocator& memory_allocator_);
|
||||
~ASTCDecoderPass();
|
||||
|
||||
void Assemble(Image& image, const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
MemoryAllocator& memory_allocator;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,222 +1,222 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "video_core/renderer_vulkan/pipeline_helper.h"
|
||||
#include "video_core/renderer_vulkan/pipeline_statistics.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::ImageBufferDescriptor;
|
||||
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
|
||||
using Tegra::Texture::TexturePair;
|
||||
|
||||
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_,
|
||||
Common::ThreadWorker* thread_worker,
|
||||
PipelineStatistics* pipeline_statistics,
|
||||
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
|
||||
vk::ShaderModule spv_module_)
|
||||
: device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
|
||||
spv_module(std::move(spv_module_)) {
|
||||
if (shader_notify) {
|
||||
shader_notify->MarkShaderBuilding();
|
||||
}
|
||||
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
|
||||
uniform_buffer_sizes.begin());
|
||||
|
||||
auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics] {
|
||||
DescriptorLayoutBuilder builder{device};
|
||||
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
descriptor_set_layout = builder.CreateDescriptorSetLayout(false);
|
||||
pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
|
||||
descriptor_update_template =
|
||||
builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
|
||||
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.requiredSubgroupSize = GuestWarpSize,
|
||||
};
|
||||
VkPipelineCreateFlags flags{};
|
||||
if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
|
||||
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
|
||||
}
|
||||
pipeline = device.GetLogical().CreateComputePipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = flags,
|
||||
.stage{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *spv_module,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
},
|
||||
.layout = *pipeline_layout,
|
||||
.basePipelineHandle = 0,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
if (pipeline_statistics) {
|
||||
pipeline_statistics->Collect(*pipeline);
|
||||
}
|
||||
std::scoped_lock lock{build_mutex};
|
||||
is_built = true;
|
||||
build_condvar.notify_one();
|
||||
if (shader_notify) {
|
||||
shader_notify->MarkShaderComplete();
|
||||
}
|
||||
}};
|
||||
if (thread_worker) {
|
||||
thread_worker->QueueWork(std::move(func));
|
||||
} else {
|
||||
func();
|
||||
}
|
||||
}
|
||||
|
||||
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
|
||||
BufferCache& buffer_cache, TextureCache& texture_cache) {
|
||||
update_descriptor_queue.Acquire();
|
||||
|
||||
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
size_t ssbo_index{};
|
||||
for (const auto& desc : info.storage_buffers_descriptors) {
|
||||
ASSERT(desc.count == 1);
|
||||
buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
|
||||
desc.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
|
||||
static constexpr size_t max_elements = 64;
|
||||
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
|
||||
boost::container::static_vector<VkSampler, max_elements> samplers;
|
||||
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const auto& cbufs{qmd.const_buffer_config};
|
||||
const bool via_header_index{qmd.linked_tsc != 0};
|
||||
const auto read_handle{[&](const auto& desc, u32 index) {
|
||||
ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
|
||||
const u32 index_offset{index << desc.size_shift};
|
||||
const u32 offset{desc.cbuf_offset + index_offset};
|
||||
const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
|
||||
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
|
||||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
|
||||
if (desc.has_secondary) {
|
||||
ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
|
||||
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
|
||||
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
|
||||
secondary_offset};
|
||||
const u32 lhs_raw{gpu_memory.Read<u32>(addr) << desc.shift_left};
|
||||
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr) << desc.secondary_shift_left};
|
||||
return TexturePair(lhs_raw | rhs_raw, via_header_index);
|
||||
}
|
||||
}
|
||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||
}};
|
||||
const auto add_image{[&](const auto& desc, bool blacklist) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
views.push_back({
|
||||
.index = handle.first,
|
||||
.blacklist = blacklist,
|
||||
.id = {},
|
||||
});
|
||||
}
|
||||
}};
|
||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||
add_image(desc, false);
|
||||
}
|
||||
for (const auto& desc : info.image_buffer_descriptors) {
|
||||
add_image(desc, false);
|
||||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
views.push_back({handle.first});
|
||||
|
||||
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
|
||||
samplers.push_back(sampler->Handle());
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
add_image(desc, desc.is_written);
|
||||
}
|
||||
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
|
||||
|
||||
buffer_cache.UnbindComputeTextureBuffers();
|
||||
size_t index{};
|
||||
const auto add_buffer{[&](const auto& desc) {
|
||||
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
|
||||
for (u32 i = 0; i < desc.count; ++i) {
|
||||
bool is_written{false};
|
||||
if constexpr (is_image) {
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view = texture_cache.GetImageView(views[index].id);
|
||||
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
is_written, is_image);
|
||||
++index;
|
||||
}
|
||||
}};
|
||||
std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
|
||||
std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
|
||||
|
||||
buffer_cache.UpdateComputeBuffers();
|
||||
buffer_cache.BindHostComputeBuffers();
|
||||
|
||||
RescalingPushConstant rescaling;
|
||||
const VkSampler* samplers_it{samplers.data()};
|
||||
const VideoCommon::ImageViewInOut* views_it{views.data()};
|
||||
PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it,
|
||||
views_it);
|
||||
|
||||
if (!is_built.load(std::memory_order::relaxed)) {
|
||||
// Wait for the pipeline to be built
|
||||
scheduler.Record([this](vk::CommandBuffer) {
|
||||
std::unique_lock lock{build_mutex};
|
||||
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
|
||||
});
|
||||
}
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
|
||||
scheduler.Record([this, descriptor_data, is_rescaling,
|
||||
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
if (!descriptor_set_layout) {
|
||||
return;
|
||||
}
|
||||
if (is_rescaling) {
|
||||
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
|
||||
rescaling_data.data());
|
||||
}
|
||||
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
||||
const vk::Device& dev{device.GetLogical()};
|
||||
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
|
||||
descriptor_set, nullptr);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "video_core/renderer_vulkan/pipeline_helper.h"
|
||||
#include "video_core/renderer_vulkan/pipeline_statistics.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::ImageBufferDescriptor;
|
||||
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
|
||||
using Tegra::Texture::TexturePair;
|
||||
|
||||
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_,
|
||||
Common::ThreadWorker* thread_worker,
|
||||
PipelineStatistics* pipeline_statistics,
|
||||
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
|
||||
vk::ShaderModule spv_module_)
|
||||
: device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
|
||||
spv_module(std::move(spv_module_)) {
|
||||
if (shader_notify) {
|
||||
shader_notify->MarkShaderBuilding();
|
||||
}
|
||||
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
|
||||
uniform_buffer_sizes.begin());
|
||||
|
||||
auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics] {
|
||||
DescriptorLayoutBuilder builder{device};
|
||||
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
descriptor_set_layout = builder.CreateDescriptorSetLayout(false);
|
||||
pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
|
||||
descriptor_update_template =
|
||||
builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
|
||||
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.requiredSubgroupSize = GuestWarpSize,
|
||||
};
|
||||
VkPipelineCreateFlags flags{};
|
||||
if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
|
||||
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
|
||||
}
|
||||
pipeline = device.GetLogical().CreateComputePipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = flags,
|
||||
.stage{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *spv_module,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
},
|
||||
.layout = *pipeline_layout,
|
||||
.basePipelineHandle = 0,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
if (pipeline_statistics) {
|
||||
pipeline_statistics->Collect(*pipeline);
|
||||
}
|
||||
std::scoped_lock lock{build_mutex};
|
||||
is_built = true;
|
||||
build_condvar.notify_one();
|
||||
if (shader_notify) {
|
||||
shader_notify->MarkShaderComplete();
|
||||
}
|
||||
}};
|
||||
if (thread_worker) {
|
||||
thread_worker->QueueWork(std::move(func));
|
||||
} else {
|
||||
func();
|
||||
}
|
||||
}
|
||||
|
||||
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
|
||||
BufferCache& buffer_cache, TextureCache& texture_cache) {
|
||||
update_descriptor_queue.Acquire();
|
||||
|
||||
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
size_t ssbo_index{};
|
||||
for (const auto& desc : info.storage_buffers_descriptors) {
|
||||
ASSERT(desc.count == 1);
|
||||
buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
|
||||
desc.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
|
||||
static constexpr size_t max_elements = 64;
|
||||
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
|
||||
boost::container::static_vector<VkSampler, max_elements> samplers;
|
||||
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const auto& cbufs{qmd.const_buffer_config};
|
||||
const bool via_header_index{qmd.linked_tsc != 0};
|
||||
const auto read_handle{[&](const auto& desc, u32 index) {
|
||||
ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
|
||||
const u32 index_offset{index << desc.size_shift};
|
||||
const u32 offset{desc.cbuf_offset + index_offset};
|
||||
const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
|
||||
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
|
||||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
|
||||
if (desc.has_secondary) {
|
||||
ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
|
||||
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
|
||||
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
|
||||
secondary_offset};
|
||||
const u32 lhs_raw{gpu_memory.Read<u32>(addr) << desc.shift_left};
|
||||
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr) << desc.secondary_shift_left};
|
||||
return TexturePair(lhs_raw | rhs_raw, via_header_index);
|
||||
}
|
||||
}
|
||||
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
|
||||
}};
|
||||
const auto add_image{[&](const auto& desc, bool blacklist) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
views.push_back({
|
||||
.index = handle.first,
|
||||
.blacklist = blacklist,
|
||||
.id = {},
|
||||
});
|
||||
}
|
||||
}};
|
||||
for (const auto& desc : info.texture_buffer_descriptors) {
|
||||
add_image(desc, false);
|
||||
}
|
||||
for (const auto& desc : info.image_buffer_descriptors) {
|
||||
add_image(desc, false);
|
||||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
views.push_back({handle.first});
|
||||
|
||||
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
|
||||
samplers.push_back(sampler->Handle());
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
add_image(desc, desc.is_written);
|
||||
}
|
||||
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
|
||||
|
||||
buffer_cache.UnbindComputeTextureBuffers();
|
||||
size_t index{};
|
||||
const auto add_buffer{[&](const auto& desc) {
|
||||
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
|
||||
for (u32 i = 0; i < desc.count; ++i) {
|
||||
bool is_written{false};
|
||||
if constexpr (is_image) {
|
||||
is_written = desc.is_written;
|
||||
}
|
||||
ImageView& image_view = texture_cache.GetImageView(views[index].id);
|
||||
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
|
||||
image_view.BufferSize(), image_view.format,
|
||||
is_written, is_image);
|
||||
++index;
|
||||
}
|
||||
}};
|
||||
std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
|
||||
std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
|
||||
|
||||
buffer_cache.UpdateComputeBuffers();
|
||||
buffer_cache.BindHostComputeBuffers();
|
||||
|
||||
RescalingPushConstant rescaling;
|
||||
const VkSampler* samplers_it{samplers.data()};
|
||||
const VideoCommon::ImageViewInOut* views_it{views.data()};
|
||||
PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it,
|
||||
views_it);
|
||||
|
||||
if (!is_built.load(std::memory_order::relaxed)) {
|
||||
// Wait for the pipeline to be built
|
||||
scheduler.Record([this](vk::CommandBuffer) {
|
||||
std::unique_lock lock{build_mutex};
|
||||
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
|
||||
});
|
||||
}
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
|
||||
scheduler.Record([this, descriptor_data, is_rescaling,
|
||||
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
if (!descriptor_set_layout) {
|
||||
return;
|
||||
}
|
||||
if (is_rescaling) {
|
||||
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
|
||||
rescaling_data.data());
|
||||
}
|
||||
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
||||
const vk::Device& dev{device.GetLogical()};
|
||||
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
|
||||
descriptor_set, nullptr);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,66 +1,66 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/thread_worker.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class ShaderNotify;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class PipelineStatistics;
|
||||
class Scheduler;
|
||||
|
||||
class ComputePipeline {
|
||||
public:
|
||||
explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
|
||||
UpdateDescriptorQueue& update_descriptor_queue,
|
||||
Common::ThreadWorker* thread_worker,
|
||||
PipelineStatistics* pipeline_statistics,
|
||||
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
|
||||
vk::ShaderModule spv_module);
|
||||
|
||||
ComputePipeline& operator=(ComputePipeline&&) noexcept = delete;
|
||||
ComputePipeline(ComputePipeline&&) noexcept = delete;
|
||||
|
||||
ComputePipeline& operator=(const ComputePipeline&) = delete;
|
||||
ComputePipeline(const ComputePipeline&) = delete;
|
||||
|
||||
void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
|
||||
Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
Shader::Info info;
|
||||
|
||||
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
|
||||
|
||||
vk::ShaderModule spv_module;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
|
||||
vk::Pipeline pipeline;
|
||||
|
||||
std::condition_variable build_condvar;
|
||||
std::mutex build_mutex;
|
||||
std::atomic_bool is_built{false};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/thread_worker.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class ShaderNotify;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class PipelineStatistics;
|
||||
class Scheduler;
|
||||
|
||||
class ComputePipeline {
|
||||
public:
|
||||
explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
|
||||
UpdateDescriptorQueue& update_descriptor_queue,
|
||||
Common::ThreadWorker* thread_worker,
|
||||
PipelineStatistics* pipeline_statistics,
|
||||
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
|
||||
vk::ShaderModule spv_module);
|
||||
|
||||
ComputePipeline& operator=(ComputePipeline&&) noexcept = delete;
|
||||
ComputePipeline(ComputePipeline&&) noexcept = delete;
|
||||
|
||||
ComputePipeline& operator=(const ComputePipeline&) = delete;
|
||||
ComputePipeline(const ComputePipeline&) = delete;
|
||||
|
||||
void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
|
||||
Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
Shader::Info info;
|
||||
|
||||
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
|
||||
|
||||
vk::ShaderModule spv_module;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
|
||||
vk::Pipeline pipeline;
|
||||
|
||||
std::condition_variable build_condvar;
|
||||
std::mutex build_mutex;
|
||||
std::atomic_bool is_built{false};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,163 +1,163 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
|
||||
constexpr size_t SETS_GROW_RATE = 16;
|
||||
constexpr s32 SCORE_THRESHOLD = 3;
|
||||
|
||||
struct DescriptorBank {
|
||||
DescriptorBankInfo info;
|
||||
std::vector<vk::DescriptorPool> pools;
|
||||
};
|
||||
|
||||
bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
|
||||
return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
|
||||
texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
|
||||
textures >= subset.textures && images >= subset.image_buffers;
|
||||
}
|
||||
|
||||
template <typename Descriptors>
|
||||
static u32 Accumulate(const Descriptors& descriptors) {
|
||||
u32 count = 0;
|
||||
for (const auto& descriptor : descriptors) {
|
||||
count += descriptor.count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
|
||||
DescriptorBankInfo bank;
|
||||
for (const Shader::Info& info : infos) {
|
||||
bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
|
||||
bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
|
||||
bank.texture_buffers += Accumulate(info.texture_buffer_descriptors);
|
||||
bank.image_buffers += Accumulate(info.image_buffer_descriptors);
|
||||
bank.textures += Accumulate(info.texture_descriptors);
|
||||
bank.images += Accumulate(info.image_descriptors);
|
||||
}
|
||||
bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
|
||||
bank.image_buffers + bank.textures + bank.images;
|
||||
return bank;
|
||||
}
|
||||
|
||||
static void AllocatePool(const Device& device, DescriptorBank& bank) {
|
||||
std::array<VkDescriptorPoolSize, 6> pool_sizes;
|
||||
size_t pool_cursor{};
|
||||
const u32 sets_per_pool = device.GetSetsPerPool();
|
||||
const auto add = [&](VkDescriptorType type, u32 count) {
|
||||
if (count > 0) {
|
||||
pool_sizes[pool_cursor++] = {
|
||||
.type = type,
|
||||
.descriptorCount = count * sets_per_pool,
|
||||
};
|
||||
}
|
||||
};
|
||||
const auto& info{bank.info};
|
||||
add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers);
|
||||
add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers);
|
||||
add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers);
|
||||
add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers);
|
||||
add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures);
|
||||
add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images);
|
||||
bank.pools.push_back(device.GetLogical().CreateDescriptorPool({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
||||
.maxSets = sets_per_pool,
|
||||
.poolSizeCount = static_cast<u32>(pool_cursor),
|
||||
.pPoolSizes = std::data(pool_sizes),
|
||||
}));
|
||||
}
|
||||
|
||||
DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
|
||||
DescriptorBank& bank_, VkDescriptorSetLayout layout_)
|
||||
: ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_},
|
||||
layout{layout_} {}
|
||||
|
||||
VkDescriptorSet DescriptorAllocator::Commit() {
|
||||
const size_t index = CommitResource();
|
||||
return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
|
||||
}
|
||||
|
||||
void DescriptorAllocator::Allocate(size_t begin, size_t end) {
|
||||
sets.push_back(AllocateDescriptors(end - begin));
|
||||
}
|
||||
|
||||
vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
|
||||
const std::vector<VkDescriptorSetLayout> layouts(count, layout);
|
||||
VkDescriptorSetAllocateInfo allocate_info{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.descriptorPool = *bank->pools.back(),
|
||||
.descriptorSetCount = static_cast<u32>(count),
|
||||
.pSetLayouts = layouts.data(),
|
||||
};
|
||||
vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
|
||||
if (!new_sets.IsOutOfPoolMemory()) {
|
||||
return new_sets;
|
||||
}
|
||||
// Our current pool is out of memory. Allocate a new one and retry
|
||||
AllocatePool(*device, *bank);
|
||||
allocate_info.descriptorPool = *bank->pools.back();
|
||||
new_sets = bank->pools.back().Allocate(allocate_info);
|
||||
if (!new_sets.IsOutOfPoolMemory()) {
|
||||
return new_sets;
|
||||
}
|
||||
// After allocating a new pool, we are out of memory again. We can't handle this from here.
|
||||
throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
|
||||
}
|
||||
|
||||
DescriptorPool::DescriptorPool(const Device& device_, Scheduler& scheduler)
|
||||
: device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
|
||||
|
||||
DescriptorPool::~DescriptorPool() = default;
|
||||
|
||||
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
|
||||
std::span<const Shader::Info> infos) {
|
||||
return Allocator(layout, MakeBankInfo(infos));
|
||||
}
|
||||
|
||||
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
|
||||
const Shader::Info& info) {
|
||||
return Allocator(layout, MakeBankInfo(std::array{info}));
|
||||
}
|
||||
|
||||
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
|
||||
const DescriptorBankInfo& info) {
|
||||
return DescriptorAllocator(device, master_semaphore, Bank(info), layout);
|
||||
}
|
||||
|
||||
DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
|
||||
std::shared_lock read_lock{banks_mutex};
|
||||
const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
|
||||
return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
|
||||
});
|
||||
if (it != bank_infos.end()) {
|
||||
return *banks[std::distance(bank_infos.begin(), it)].get();
|
||||
}
|
||||
read_lock.unlock();
|
||||
|
||||
std::unique_lock write_lock{banks_mutex};
|
||||
bank_infos.push_back(reqs);
|
||||
|
||||
auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
|
||||
bank.info = reqs;
|
||||
AllocatePool(device, bank);
|
||||
return bank;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
|
||||
constexpr size_t SETS_GROW_RATE = 16;
|
||||
constexpr s32 SCORE_THRESHOLD = 3;
|
||||
|
||||
struct DescriptorBank {
|
||||
DescriptorBankInfo info;
|
||||
std::vector<vk::DescriptorPool> pools;
|
||||
};
|
||||
|
||||
bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
|
||||
return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
|
||||
texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
|
||||
textures >= subset.textures && images >= subset.image_buffers;
|
||||
}
|
||||
|
||||
template <typename Descriptors>
|
||||
static u32 Accumulate(const Descriptors& descriptors) {
|
||||
u32 count = 0;
|
||||
for (const auto& descriptor : descriptors) {
|
||||
count += descriptor.count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
|
||||
DescriptorBankInfo bank;
|
||||
for (const Shader::Info& info : infos) {
|
||||
bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
|
||||
bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
|
||||
bank.texture_buffers += Accumulate(info.texture_buffer_descriptors);
|
||||
bank.image_buffers += Accumulate(info.image_buffer_descriptors);
|
||||
bank.textures += Accumulate(info.texture_descriptors);
|
||||
bank.images += Accumulate(info.image_descriptors);
|
||||
}
|
||||
bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
|
||||
bank.image_buffers + bank.textures + bank.images;
|
||||
return bank;
|
||||
}
|
||||
|
||||
static void AllocatePool(const Device& device, DescriptorBank& bank) {
|
||||
std::array<VkDescriptorPoolSize, 6> pool_sizes;
|
||||
size_t pool_cursor{};
|
||||
const u32 sets_per_pool = device.GetSetsPerPool();
|
||||
const auto add = [&](VkDescriptorType type, u32 count) {
|
||||
if (count > 0) {
|
||||
pool_sizes[pool_cursor++] = {
|
||||
.type = type,
|
||||
.descriptorCount = count * sets_per_pool,
|
||||
};
|
||||
}
|
||||
};
|
||||
const auto& info{bank.info};
|
||||
add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers);
|
||||
add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers);
|
||||
add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers);
|
||||
add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers);
|
||||
add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures);
|
||||
add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images);
|
||||
bank.pools.push_back(device.GetLogical().CreateDescriptorPool({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
||||
.maxSets = sets_per_pool,
|
||||
.poolSizeCount = static_cast<u32>(pool_cursor),
|
||||
.pPoolSizes = std::data(pool_sizes),
|
||||
}));
|
||||
}
|
||||
|
||||
DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
|
||||
DescriptorBank& bank_, VkDescriptorSetLayout layout_)
|
||||
: ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_},
|
||||
layout{layout_} {}
|
||||
|
||||
VkDescriptorSet DescriptorAllocator::Commit() {
|
||||
const size_t index = CommitResource();
|
||||
return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
|
||||
}
|
||||
|
||||
void DescriptorAllocator::Allocate(size_t begin, size_t end) {
|
||||
sets.push_back(AllocateDescriptors(end - begin));
|
||||
}
|
||||
|
||||
vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
|
||||
const std::vector<VkDescriptorSetLayout> layouts(count, layout);
|
||||
VkDescriptorSetAllocateInfo allocate_info{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.descriptorPool = *bank->pools.back(),
|
||||
.descriptorSetCount = static_cast<u32>(count),
|
||||
.pSetLayouts = layouts.data(),
|
||||
};
|
||||
vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
|
||||
if (!new_sets.IsOutOfPoolMemory()) {
|
||||
return new_sets;
|
||||
}
|
||||
// Our current pool is out of memory. Allocate a new one and retry
|
||||
AllocatePool(*device, *bank);
|
||||
allocate_info.descriptorPool = *bank->pools.back();
|
||||
new_sets = bank->pools.back().Allocate(allocate_info);
|
||||
if (!new_sets.IsOutOfPoolMemory()) {
|
||||
return new_sets;
|
||||
}
|
||||
// After allocating a new pool, we are out of memory again. We can't handle this from here.
|
||||
throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
|
||||
}
|
||||
|
||||
DescriptorPool::DescriptorPool(const Device& device_, Scheduler& scheduler)
|
||||
: device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
|
||||
|
||||
DescriptorPool::~DescriptorPool() = default;
|
||||
|
||||
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
|
||||
std::span<const Shader::Info> infos) {
|
||||
return Allocator(layout, MakeBankInfo(infos));
|
||||
}
|
||||
|
||||
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
|
||||
const Shader::Info& info) {
|
||||
return Allocator(layout, MakeBankInfo(std::array{info}));
|
||||
}
|
||||
|
||||
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
|
||||
const DescriptorBankInfo& info) {
|
||||
return DescriptorAllocator(device, master_semaphore, Bank(info), layout);
|
||||
}
|
||||
|
||||
DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
|
||||
std::shared_lock read_lock{banks_mutex};
|
||||
const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
|
||||
return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
|
||||
});
|
||||
if (it != bank_infos.end()) {
|
||||
return *banks[std::distance(bank_infos.begin(), it)].get();
|
||||
}
|
||||
read_lock.unlock();
|
||||
|
||||
std::unique_lock write_lock{banks_mutex};
|
||||
bank_infos.push_back(reqs);
|
||||
|
||||
auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
|
||||
bank.info = reqs;
|
||||
AllocatePool(device, bank);
|
||||
return bank;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,87 +1,87 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <shared_mutex>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class Scheduler;
|
||||
|
||||
struct DescriptorBank;
|
||||
|
||||
struct DescriptorBankInfo {
|
||||
[[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept;
|
||||
|
||||
u32 uniform_buffers{}; ///< Number of uniform buffer descriptors
|
||||
u32 storage_buffers{}; ///< Number of storage buffer descriptors
|
||||
u32 texture_buffers{}; ///< Number of texture buffer descriptors
|
||||
u32 image_buffers{}; ///< Number of image buffer descriptors
|
||||
u32 textures{}; ///< Number of texture descriptors
|
||||
u32 images{}; ///< Number of image descriptors
|
||||
s32 score{}; ///< Number of descriptors in total
|
||||
};
|
||||
|
||||
class DescriptorAllocator final : public ResourcePool {
|
||||
friend class DescriptorPool;
|
||||
|
||||
public:
|
||||
explicit DescriptorAllocator() = default;
|
||||
~DescriptorAllocator() override = default;
|
||||
|
||||
DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
|
||||
DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
|
||||
|
||||
DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
|
||||
DescriptorAllocator(const DescriptorAllocator&) = delete;
|
||||
|
||||
VkDescriptorSet Commit();
|
||||
|
||||
private:
|
||||
explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
|
||||
DescriptorBank& bank_, VkDescriptorSetLayout layout_);
|
||||
|
||||
void Allocate(size_t begin, size_t end) override;
|
||||
|
||||
vk::DescriptorSets AllocateDescriptors(size_t count);
|
||||
|
||||
const Device* device{};
|
||||
DescriptorBank* bank{};
|
||||
VkDescriptorSetLayout layout{};
|
||||
|
||||
std::vector<vk::DescriptorSets> sets;
|
||||
};
|
||||
|
||||
class DescriptorPool {
|
||||
public:
|
||||
explicit DescriptorPool(const Device& device, Scheduler& scheduler);
|
||||
~DescriptorPool();
|
||||
|
||||
DescriptorPool& operator=(const DescriptorPool&) = delete;
|
||||
DescriptorPool(const DescriptorPool&) = delete;
|
||||
|
||||
DescriptorAllocator Allocator(VkDescriptorSetLayout layout,
|
||||
std::span<const Shader::Info> infos);
|
||||
DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info);
|
||||
DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info);
|
||||
|
||||
private:
|
||||
DescriptorBank& Bank(const DescriptorBankInfo& reqs);
|
||||
|
||||
const Device& device;
|
||||
MasterSemaphore& master_semaphore;
|
||||
|
||||
std::shared_mutex banks_mutex;
|
||||
std::vector<DescriptorBankInfo> bank_infos;
|
||||
std::vector<std::unique_ptr<DescriptorBank>> banks;
|
||||
};
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <shared_mutex>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class Scheduler;
|
||||
|
||||
struct DescriptorBank;
|
||||
|
||||
struct DescriptorBankInfo {
|
||||
[[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept;
|
||||
|
||||
u32 uniform_buffers{}; ///< Number of uniform buffer descriptors
|
||||
u32 storage_buffers{}; ///< Number of storage buffer descriptors
|
||||
u32 texture_buffers{}; ///< Number of texture buffer descriptors
|
||||
u32 image_buffers{}; ///< Number of image buffer descriptors
|
||||
u32 textures{}; ///< Number of texture descriptors
|
||||
u32 images{}; ///< Number of image descriptors
|
||||
s32 score{}; ///< Number of descriptors in total
|
||||
};
|
||||
|
||||
class DescriptorAllocator final : public ResourcePool {
|
||||
friend class DescriptorPool;
|
||||
|
||||
public:
|
||||
explicit DescriptorAllocator() = default;
|
||||
~DescriptorAllocator() override = default;
|
||||
|
||||
DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
|
||||
DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
|
||||
|
||||
DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
|
||||
DescriptorAllocator(const DescriptorAllocator&) = delete;
|
||||
|
||||
VkDescriptorSet Commit();
|
||||
|
||||
private:
|
||||
explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
|
||||
DescriptorBank& bank_, VkDescriptorSetLayout layout_);
|
||||
|
||||
void Allocate(size_t begin, size_t end) override;
|
||||
|
||||
vk::DescriptorSets AllocateDescriptors(size_t count);
|
||||
|
||||
const Device* device{};
|
||||
DescriptorBank* bank{};
|
||||
VkDescriptorSetLayout layout{};
|
||||
|
||||
std::vector<vk::DescriptorSets> sets;
|
||||
};
|
||||
|
||||
class DescriptorPool {
|
||||
public:
|
||||
explicit DescriptorPool(const Device& device, Scheduler& scheduler);
|
||||
~DescriptorPool();
|
||||
|
||||
DescriptorPool& operator=(const DescriptorPool&) = delete;
|
||||
DescriptorPool(const DescriptorPool&) = delete;
|
||||
|
||||
DescriptorAllocator Allocator(VkDescriptorSetLayout layout,
|
||||
std::span<const Shader::Info> infos);
|
||||
DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info);
|
||||
DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info);
|
||||
|
||||
private:
|
||||
DescriptorBank& Bank(const DescriptorBankInfo& reqs);
|
||||
|
||||
const Device& device;
|
||||
MasterSemaphore& master_semaphore;
|
||||
|
||||
std::shared_mutex banks_mutex;
|
||||
std::vector<DescriptorBankInfo> bank_infos;
|
||||
std::vector<std::unique_ptr<DescriptorBank>> banks;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -1,64 +1,64 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_)
|
||||
: FenceBase{is_stubbed_}, scheduler{scheduler_} {}
|
||||
|
||||
InnerFence::~InnerFence() = default;
|
||||
|
||||
void InnerFence::Queue() {
|
||||
if (is_stubbed) {
|
||||
return;
|
||||
}
|
||||
// Get the current tick so we can wait for it
|
||||
wait_tick = scheduler.CurrentTick();
|
||||
scheduler.Flush();
|
||||
}
|
||||
|
||||
bool InnerFence::IsSignaled() const {
|
||||
if (is_stubbed) {
|
||||
return true;
|
||||
}
|
||||
return scheduler.IsFree(wait_tick);
|
||||
}
|
||||
|
||||
void InnerFence::Wait() {
|
||||
if (is_stubbed) {
|
||||
return;
|
||||
}
|
||||
scheduler.Wait(wait_tick);
|
||||
}
|
||||
|
||||
FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
QueryCache& query_cache_, const Device& device_, Scheduler& scheduler_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
|
||||
scheduler{scheduler_} {}
|
||||
|
||||
Fence FenceManager::CreateFence(bool is_stubbed) {
|
||||
return std::make_shared<InnerFence>(scheduler, is_stubbed);
|
||||
}
|
||||
|
||||
void FenceManager::QueueFence(Fence& fence) {
|
||||
fence->Queue();
|
||||
}
|
||||
|
||||
bool FenceManager::IsFenceSignaled(Fence& fence) const {
|
||||
return fence->IsSignaled();
|
||||
}
|
||||
|
||||
void FenceManager::WaitFence(Fence& fence) {
|
||||
fence->Wait();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_)
|
||||
: FenceBase{is_stubbed_}, scheduler{scheduler_} {}
|
||||
|
||||
InnerFence::~InnerFence() = default;
|
||||
|
||||
void InnerFence::Queue() {
|
||||
if (is_stubbed) {
|
||||
return;
|
||||
}
|
||||
// Get the current tick so we can wait for it
|
||||
wait_tick = scheduler.CurrentTick();
|
||||
scheduler.Flush();
|
||||
}
|
||||
|
||||
bool InnerFence::IsSignaled() const {
|
||||
if (is_stubbed) {
|
||||
return true;
|
||||
}
|
||||
return scheduler.IsFree(wait_tick);
|
||||
}
|
||||
|
||||
void InnerFence::Wait() {
|
||||
if (is_stubbed) {
|
||||
return;
|
||||
}
|
||||
scheduler.Wait(wait_tick);
|
||||
}
|
||||
|
||||
FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
QueryCache& query_cache_, const Device& device_, Scheduler& scheduler_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
|
||||
scheduler{scheduler_} {}
|
||||
|
||||
Fence FenceManager::CreateFence(bool is_stubbed) {
|
||||
return std::make_shared<InnerFence>(scheduler, is_stubbed);
|
||||
}
|
||||
|
||||
void FenceManager::QueueFence(Fence& fence) {
|
||||
fence->Queue();
|
||||
}
|
||||
|
||||
bool FenceManager::IsFenceSignaled(Fence& fence) const {
|
||||
return fence->IsSignaled();
|
||||
}
|
||||
|
||||
void FenceManager::WaitFence(Fence& fence) {
|
||||
fence->Wait();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,61 +1,61 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "video_core/fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class QueryCache;
|
||||
class Scheduler;
|
||||
|
||||
class InnerFence : public VideoCommon::FenceBase {
|
||||
public:
|
||||
explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_);
|
||||
~InnerFence();
|
||||
|
||||
void Queue();
|
||||
|
||||
bool IsSignaled() const;
|
||||
|
||||
void Wait();
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
u64 wait_tick = 0;
|
||||
};
|
||||
using Fence = std::shared_ptr<InnerFence>;
|
||||
|
||||
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
|
||||
|
||||
class FenceManager final : public GenericFenceManager {
|
||||
public:
|
||||
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
|
||||
TextureCache& texture_cache, BufferCache& buffer_cache,
|
||||
QueryCache& query_cache, const Device& device, Scheduler& scheduler);
|
||||
|
||||
protected:
|
||||
Fence CreateFence(bool is_stubbed) override;
|
||||
void QueueFence(Fence& fence) override;
|
||||
bool IsFenceSignaled(Fence& fence) const override;
|
||||
void WaitFence(Fence& fence) override;
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "video_core/fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class QueryCache;
|
||||
class Scheduler;
|
||||
|
||||
class InnerFence : public VideoCommon::FenceBase {
|
||||
public:
|
||||
explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_);
|
||||
~InnerFence();
|
||||
|
||||
void Queue();
|
||||
|
||||
bool IsSignaled() const;
|
||||
|
||||
void Wait();
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
u64 wait_tick = 0;
|
||||
};
|
||||
using Fence = std::shared_ptr<InnerFence>;
|
||||
|
||||
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
|
||||
|
||||
class FenceManager final : public GenericFenceManager {
|
||||
public:
|
||||
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
|
||||
TextureCache& texture_cache, BufferCache& buffer_cache,
|
||||
QueryCache& query_cache, const Device& device, Scheduler& scheduler);
|
||||
|
||||
protected:
|
||||
Fence CreateFence(bool is_stubbed) override;
|
||||
void QueueFence(Fence& fence) override;
|
||||
bool IsFenceSignaled(Fence& fence) const override;
|
||||
void WaitFence(Fence& fence) override;
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,53 +1,53 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class Scheduler;
|
||||
|
||||
class FSR {
|
||||
public:
|
||||
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
|
||||
VkExtent2D output_size);
|
||||
VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
|
||||
|
||||
private:
|
||||
void CreateDescriptorPool();
|
||||
void CreateDescriptorSetLayout();
|
||||
void CreateDescriptorSets();
|
||||
void CreateImages();
|
||||
void CreateSampler();
|
||||
void CreateShaders();
|
||||
void CreatePipeline();
|
||||
void CreatePipelineLayout();
|
||||
|
||||
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
size_t image_count;
|
||||
VkExtent2D output_size;
|
||||
|
||||
vk::DescriptorPool descriptor_pool;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
vk::DescriptorSets descriptor_sets;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::ShaderModule easu_shader;
|
||||
vk::ShaderModule rcas_shader;
|
||||
vk::Pipeline easu_pipeline;
|
||||
vk::Pipeline rcas_pipeline;
|
||||
vk::Sampler sampler;
|
||||
std::vector<vk::Image> images;
|
||||
std::vector<vk::ImageView> image_views;
|
||||
std::vector<MemoryCommit> buffer_commits;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class Scheduler;
|
||||
|
||||
class FSR {
|
||||
public:
|
||||
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
|
||||
VkExtent2D output_size);
|
||||
VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
|
||||
|
||||
private:
|
||||
void CreateDescriptorPool();
|
||||
void CreateDescriptorSetLayout();
|
||||
void CreateDescriptorSets();
|
||||
void CreateImages();
|
||||
void CreateSampler();
|
||||
void CreateShaders();
|
||||
void CreatePipeline();
|
||||
void CreatePipelineLayout();
|
||||
|
||||
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
size_t image_count;
|
||||
VkExtent2D output_size;
|
||||
|
||||
vk::DescriptorPool descriptor_pool;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
vk::DescriptorSets descriptor_sets;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::ShaderModule easu_shader;
|
||||
vk::ShaderModule rcas_shader;
|
||||
vk::Pipeline easu_pipeline;
|
||||
vk::Pipeline rcas_pipeline;
|
||||
vk::Sampler sampler;
|
||||
std::vector<vk::Image> images;
|
||||
std::vector<vk::ImageView> image_views;
|
||||
std::vector<MemoryCommit> buffer_commits;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,161 +1,161 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/thread_worker.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class ShaderNotify;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct GraphicsPipelineCacheKey {
|
||||
std::array<u64, 6> unique_hashes;
|
||||
FixedPipelineState state;
|
||||
|
||||
size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
size_t Size() const noexcept {
|
||||
return sizeof(unique_hashes) + state.Size();
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
|
||||
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
|
||||
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Vulkan::GraphicsPipelineCacheKey> {
|
||||
size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class PipelineStatistics;
|
||||
class RenderPassCache;
|
||||
class RescalingPushConstant;
|
||||
class Scheduler;
|
||||
class UpdateDescriptorQueue;
|
||||
|
||||
class GraphicsPipeline {
|
||||
static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
|
||||
|
||||
public:
|
||||
explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache,
|
||||
TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify,
|
||||
const Device& device, DescriptorPool& descriptor_pool,
|
||||
UpdateDescriptorQueue& update_descriptor_queue,
|
||||
Common::ThreadWorker* worker_thread,
|
||||
PipelineStatistics* pipeline_statistics,
|
||||
RenderPassCache& render_pass_cache,
|
||||
const GraphicsPipelineCacheKey& key,
|
||||
std::array<vk::ShaderModule, NUM_STAGES> stages,
|
||||
const std::array<const Shader::Info*, NUM_STAGES>& infos);
|
||||
|
||||
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
|
||||
GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
|
||||
|
||||
GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
|
||||
GraphicsPipeline(const GraphicsPipeline&) = delete;
|
||||
|
||||
void AddTransition(GraphicsPipeline* transition);
|
||||
|
||||
void Configure(bool is_indexed) {
|
||||
configure_func(this, is_indexed);
|
||||
}
|
||||
|
||||
[[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
|
||||
if (key == current_key) {
|
||||
return this;
|
||||
}
|
||||
const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
|
||||
return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
|
||||
: nullptr;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsBuilt() const noexcept {
|
||||
return is_built.load(std::memory_order::relaxed);
|
||||
}
|
||||
|
||||
template <typename Spec>
|
||||
static auto MakeConfigureSpecFunc() {
|
||||
return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
|
||||
}
|
||||
|
||||
void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) {
|
||||
maxwell3d = maxwell3d_;
|
||||
gpu_memory = gpu_memory_;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Spec>
|
||||
void ConfigureImpl(bool is_indexed);
|
||||
|
||||
void ConfigureDraw(const RescalingPushConstant& rescaling);
|
||||
|
||||
void MakePipeline(VkRenderPass render_pass);
|
||||
|
||||
void Validate();
|
||||
|
||||
const GraphicsPipelineCacheKey key;
|
||||
Tegra::Engines::Maxwell3D* maxwell3d;
|
||||
Tegra::MemoryManager* gpu_memory;
|
||||
const Device& device;
|
||||
TextureCache& texture_cache;
|
||||
BufferCache& buffer_cache;
|
||||
Scheduler& scheduler;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
|
||||
void (*configure_func)(GraphicsPipeline*, bool){};
|
||||
|
||||
std::vector<GraphicsPipelineCacheKey> transition_keys;
|
||||
std::vector<GraphicsPipeline*> transitions;
|
||||
|
||||
std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
|
||||
|
||||
std::array<Shader::Info, NUM_STAGES> stage_infos;
|
||||
std::array<u32, 5> enabled_uniform_buffer_masks{};
|
||||
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
|
||||
u32 num_textures{};
|
||||
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
|
||||
vk::Pipeline pipeline;
|
||||
|
||||
std::condition_variable build_condvar;
|
||||
std::mutex build_mutex;
|
||||
std::atomic_bool is_built{false};
|
||||
bool uses_push_descriptor{false};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/thread_worker.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class ShaderNotify;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct GraphicsPipelineCacheKey {
|
||||
std::array<u64, 6> unique_hashes;
|
||||
FixedPipelineState state;
|
||||
|
||||
size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
size_t Size() const noexcept {
|
||||
return sizeof(unique_hashes) + state.Size();
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
|
||||
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
|
||||
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Vulkan::GraphicsPipelineCacheKey> {
|
||||
size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class PipelineStatistics;
|
||||
class RenderPassCache;
|
||||
class RescalingPushConstant;
|
||||
class Scheduler;
|
||||
class UpdateDescriptorQueue;
|
||||
|
||||
class GraphicsPipeline {
|
||||
static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
|
||||
|
||||
public:
|
||||
explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache,
|
||||
TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify,
|
||||
const Device& device, DescriptorPool& descriptor_pool,
|
||||
UpdateDescriptorQueue& update_descriptor_queue,
|
||||
Common::ThreadWorker* worker_thread,
|
||||
PipelineStatistics* pipeline_statistics,
|
||||
RenderPassCache& render_pass_cache,
|
||||
const GraphicsPipelineCacheKey& key,
|
||||
std::array<vk::ShaderModule, NUM_STAGES> stages,
|
||||
const std::array<const Shader::Info*, NUM_STAGES>& infos);
|
||||
|
||||
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
|
||||
GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
|
||||
|
||||
GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
|
||||
GraphicsPipeline(const GraphicsPipeline&) = delete;
|
||||
|
||||
void AddTransition(GraphicsPipeline* transition);
|
||||
|
||||
void Configure(bool is_indexed) {
|
||||
configure_func(this, is_indexed);
|
||||
}
|
||||
|
||||
[[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
|
||||
if (key == current_key) {
|
||||
return this;
|
||||
}
|
||||
const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
|
||||
return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
|
||||
: nullptr;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsBuilt() const noexcept {
|
||||
return is_built.load(std::memory_order::relaxed);
|
||||
}
|
||||
|
||||
template <typename Spec>
|
||||
static auto MakeConfigureSpecFunc() {
|
||||
return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
|
||||
}
|
||||
|
||||
void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) {
|
||||
maxwell3d = maxwell3d_;
|
||||
gpu_memory = gpu_memory_;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Spec>
|
||||
void ConfigureImpl(bool is_indexed);
|
||||
|
||||
void ConfigureDraw(const RescalingPushConstant& rescaling);
|
||||
|
||||
void MakePipeline(VkRenderPass render_pass);
|
||||
|
||||
void Validate();
|
||||
|
||||
const GraphicsPipelineCacheKey key;
|
||||
Tegra::Engines::Maxwell3D* maxwell3d;
|
||||
Tegra::MemoryManager* gpu_memory;
|
||||
const Device& device;
|
||||
TextureCache& texture_cache;
|
||||
BufferCache& buffer_cache;
|
||||
Scheduler& scheduler;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
|
||||
void (*configure_func)(GraphicsPipeline*, bool){};
|
||||
|
||||
std::vector<GraphicsPipelineCacheKey> transition_keys;
|
||||
std::vector<GraphicsPipeline*> transitions;
|
||||
|
||||
std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
|
||||
|
||||
std::array<Shader::Info, NUM_STAGES> stage_infos;
|
||||
std::array<u32, 5> enabled_uniform_buffer_masks{};
|
||||
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
|
||||
u32 num_textures{};
|
||||
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
|
||||
vk::Pipeline pipeline;
|
||||
|
||||
std::condition_variable build_condvar;
|
||||
std::mutex build_mutex;
|
||||
std::atomic_bool is_built{false};
|
||||
bool uses_push_descriptor{false};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,45 +1,45 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <thread>
|
||||
|
||||
#include "common/settings.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
MasterSemaphore::MasterSemaphore(const Device& device) {
|
||||
static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
|
||||
.initialValue = 0,
|
||||
};
|
||||
static constexpr VkSemaphoreCreateInfo semaphore_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||
.pNext = &semaphore_type_ci,
|
||||
.flags = 0,
|
||||
};
|
||||
semaphore = device.GetLogical().CreateSemaphore(semaphore_ci);
|
||||
|
||||
if (!Settings::values.renderer_debug) {
|
||||
return;
|
||||
}
|
||||
// Validation layers have a bug where they fail to track resource usage when using timeline
|
||||
// semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
|
||||
// a separate thread waiting for each timeline semaphore value.
|
||||
debug_thread = std::jthread([this](std::stop_token stop_token) {
|
||||
u64 counter = 0;
|
||||
while (!stop_token.stop_requested()) {
|
||||
if (semaphore.Wait(counter, 10'000'000)) {
|
||||
++counter;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
MasterSemaphore::~MasterSemaphore() = default;
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <thread>
|
||||
|
||||
#include "common/settings.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
MasterSemaphore::MasterSemaphore(const Device& device) {
|
||||
static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
|
||||
.initialValue = 0,
|
||||
};
|
||||
static constexpr VkSemaphoreCreateInfo semaphore_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||
.pNext = &semaphore_type_ci,
|
||||
.flags = 0,
|
||||
};
|
||||
semaphore = device.GetLogical().CreateSemaphore(semaphore_ci);
|
||||
|
||||
if (!Settings::values.renderer_debug) {
|
||||
return;
|
||||
}
|
||||
// Validation layers have a bug where they fail to track resource usage when using timeline
|
||||
// semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
|
||||
// a separate thread waiting for each timeline semaphore value.
|
||||
debug_thread = std::jthread([this](std::stop_token stop_token) {
|
||||
u64 counter = 0;
|
||||
while (!stop_token.stop_requested()) {
|
||||
if (semaphore.Wait(counter, 10'000'000)) {
|
||||
++counter;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
MasterSemaphore::~MasterSemaphore() = default;
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,84 +1,84 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
|
||||
class MasterSemaphore {
|
||||
public:
|
||||
explicit MasterSemaphore(const Device& device);
|
||||
~MasterSemaphore();
|
||||
|
||||
/// Returns the current logical tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept {
|
||||
return current_tick.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
/// Returns the last known GPU tick.
|
||||
[[nodiscard]] u64 KnownGpuTick() const noexcept {
|
||||
return gpu_tick.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
/// Returns the timeline semaphore handle.
|
||||
[[nodiscard]] VkSemaphore Handle() const noexcept {
|
||||
return *semaphore;
|
||||
}
|
||||
|
||||
/// Returns true when a tick has been hit by the GPU.
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
|
||||
return KnownGpuTick() >= tick;
|
||||
}
|
||||
|
||||
/// Advance to the logical tick and return the old one
|
||||
[[nodiscard]] u64 NextTick() noexcept {
|
||||
return current_tick.fetch_add(1, std::memory_order_release);
|
||||
}
|
||||
|
||||
/// Refresh the known GPU tick
|
||||
void Refresh() {
|
||||
u64 this_tick{};
|
||||
u64 counter{};
|
||||
do {
|
||||
this_tick = gpu_tick.load(std::memory_order_acquire);
|
||||
counter = semaphore.GetCounter();
|
||||
if (counter < this_tick) {
|
||||
return;
|
||||
}
|
||||
} while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
|
||||
std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
/// Waits for a tick to be hit on the GPU
|
||||
void Wait(u64 tick) {
|
||||
// No need to wait if the GPU is ahead of the tick
|
||||
if (IsFree(tick)) {
|
||||
return;
|
||||
}
|
||||
// Update the GPU tick and try again
|
||||
Refresh();
|
||||
if (IsFree(tick)) {
|
||||
return;
|
||||
}
|
||||
// If none of the above is hit, fallback to a regular wait
|
||||
while (!semaphore.Wait(tick)) {
|
||||
}
|
||||
Refresh();
|
||||
}
|
||||
|
||||
private:
|
||||
vk::Semaphore semaphore; ///< Timeline semaphore.
|
||||
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
|
||||
std::atomic<u64> current_tick{1}; ///< Current logical tick.
|
||||
std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
|
||||
class MasterSemaphore {
|
||||
public:
|
||||
explicit MasterSemaphore(const Device& device);
|
||||
~MasterSemaphore();
|
||||
|
||||
/// Returns the current logical tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept {
|
||||
return current_tick.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
/// Returns the last known GPU tick.
|
||||
[[nodiscard]] u64 KnownGpuTick() const noexcept {
|
||||
return gpu_tick.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
/// Returns the timeline semaphore handle.
|
||||
[[nodiscard]] VkSemaphore Handle() const noexcept {
|
||||
return *semaphore;
|
||||
}
|
||||
|
||||
/// Returns true when a tick has been hit by the GPU.
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
|
||||
return KnownGpuTick() >= tick;
|
||||
}
|
||||
|
||||
/// Advance to the logical tick and return the old one
|
||||
[[nodiscard]] u64 NextTick() noexcept {
|
||||
return current_tick.fetch_add(1, std::memory_order_release);
|
||||
}
|
||||
|
||||
/// Refresh the known GPU tick
|
||||
void Refresh() {
|
||||
u64 this_tick{};
|
||||
u64 counter{};
|
||||
do {
|
||||
this_tick = gpu_tick.load(std::memory_order_acquire);
|
||||
counter = semaphore.GetCounter();
|
||||
if (counter < this_tick) {
|
||||
return;
|
||||
}
|
||||
} while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
|
||||
std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
/// Waits for a tick to be hit on the GPU
|
||||
void Wait(u64 tick) {
|
||||
// No need to wait if the GPU is ahead of the tick
|
||||
if (IsFree(tick)) {
|
||||
return;
|
||||
}
|
||||
// Update the GPU tick and try again
|
||||
Refresh();
|
||||
if (IsFree(tick)) {
|
||||
return;
|
||||
}
|
||||
// If none of the above is hit, fallback to a regular wait
|
||||
while (!semaphore.Wait(tick)) {
|
||||
}
|
||||
Refresh();
|
||||
}
|
||||
|
||||
private:
|
||||
vk::Semaphore semaphore; ///< Timeline semaphore.
|
||||
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
|
||||
std::atomic<u64> current_tick{1}; ///< Current logical tick.
|
||||
std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,165 +1,165 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/thread_worker.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/frontend/maxwell/control_flow.h"
|
||||
#include "shader_recompiler/host_translate_info.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Shader::IR {
|
||||
struct Program;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class ShaderNotify;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct ComputePipelineCacheKey {
|
||||
u64 unique_hash;
|
||||
u32 shared_memory_size;
|
||||
std::array<u32, 3> workgroup_size;
|
||||
|
||||
size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>);
|
||||
static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
|
||||
static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::ComputePipelineCacheKey> {
|
||||
size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class ComputePipeline;
|
||||
class DescriptorPool;
|
||||
class Device;
|
||||
class PipelineStatistics;
|
||||
class RasterizerVulkan;
|
||||
class RenderPassCache;
|
||||
class Scheduler;
|
||||
class UpdateDescriptorQueue;
|
||||
|
||||
using VideoCommon::ShaderInfo;
|
||||
|
||||
struct ShaderPools {
|
||||
void ReleaseContents() {
|
||||
flow_block.ReleaseContents();
|
||||
block.ReleaseContents();
|
||||
inst.ReleaseContents();
|
||||
}
|
||||
|
||||
Shader::ObjectPool<Shader::IR::Inst> inst;
|
||||
Shader::ObjectPool<Shader::IR::Block> block;
|
||||
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
|
||||
};
|
||||
|
||||
class PipelineCache : public VideoCommon::ShaderCache {
|
||||
public:
|
||||
explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
|
||||
DescriptorPool& descriptor_pool,
|
||||
UpdateDescriptorQueue& update_descriptor_queue,
|
||||
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
|
||||
TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
|
||||
~PipelineCache();
|
||||
|
||||
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
|
||||
|
||||
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
|
||||
|
||||
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback);
|
||||
|
||||
private:
|
||||
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
|
||||
|
||||
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
|
||||
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
|
||||
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
|
||||
bool build_in_parallel);
|
||||
|
||||
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
|
||||
const ShaderInfo* shader);
|
||||
|
||||
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
|
||||
const ComputePipelineCacheKey& key,
|
||||
Shader::Environment& env,
|
||||
PipelineStatistics* statistics,
|
||||
bool build_in_parallel);
|
||||
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
DescriptorPool& descriptor_pool;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
RenderPassCache& render_pass_cache;
|
||||
BufferCache& buffer_cache;
|
||||
TextureCache& texture_cache;
|
||||
VideoCore::ShaderNotify& shader_notify;
|
||||
bool use_asynchronous_shaders{};
|
||||
|
||||
GraphicsPipelineCacheKey graphics_key{};
|
||||
GraphicsPipeline* current_pipeline{};
|
||||
|
||||
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
|
||||
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
|
||||
|
||||
ShaderPools main_pools;
|
||||
|
||||
Shader::Profile profile;
|
||||
Shader::HostTranslateInfo host_info;
|
||||
|
||||
std::filesystem::path pipeline_cache_filename;
|
||||
|
||||
Common::ThreadWorker workers;
|
||||
Common::ThreadWorker serialization_thread;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/thread_worker.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/frontend/maxwell/control_flow.h"
|
||||
#include "shader_recompiler/host_translate_info.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Shader::IR {
|
||||
struct Program;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class ShaderNotify;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct ComputePipelineCacheKey {
|
||||
u64 unique_hash;
|
||||
u32 shared_memory_size;
|
||||
std::array<u32, 3> workgroup_size;
|
||||
|
||||
size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>);
|
||||
static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
|
||||
static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::ComputePipelineCacheKey> {
|
||||
size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class ComputePipeline;
|
||||
class DescriptorPool;
|
||||
class Device;
|
||||
class PipelineStatistics;
|
||||
class RasterizerVulkan;
|
||||
class RenderPassCache;
|
||||
class Scheduler;
|
||||
class UpdateDescriptorQueue;
|
||||
|
||||
using VideoCommon::ShaderInfo;
|
||||
|
||||
struct ShaderPools {
|
||||
void ReleaseContents() {
|
||||
flow_block.ReleaseContents();
|
||||
block.ReleaseContents();
|
||||
inst.ReleaseContents();
|
||||
}
|
||||
|
||||
Shader::ObjectPool<Shader::IR::Inst> inst;
|
||||
Shader::ObjectPool<Shader::IR::Block> block;
|
||||
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
|
||||
};
|
||||
|
||||
class PipelineCache : public VideoCommon::ShaderCache {
|
||||
public:
|
||||
explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
|
||||
DescriptorPool& descriptor_pool,
|
||||
UpdateDescriptorQueue& update_descriptor_queue,
|
||||
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
|
||||
TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
|
||||
~PipelineCache();
|
||||
|
||||
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
|
||||
|
||||
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
|
||||
|
||||
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback);
|
||||
|
||||
private:
|
||||
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
|
||||
|
||||
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
|
||||
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
|
||||
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
|
||||
bool build_in_parallel);
|
||||
|
||||
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
|
||||
const ShaderInfo* shader);
|
||||
|
||||
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
|
||||
const ComputePipelineCacheKey& key,
|
||||
Shader::Environment& env,
|
||||
PipelineStatistics* statistics,
|
||||
bool build_in_parallel);
|
||||
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
DescriptorPool& descriptor_pool;
|
||||
UpdateDescriptorQueue& update_descriptor_queue;
|
||||
RenderPassCache& render_pass_cache;
|
||||
BufferCache& buffer_cache;
|
||||
TextureCache& texture_cache;
|
||||
VideoCore::ShaderNotify& shader_notify;
|
||||
bool use_asynchronous_shaders{};
|
||||
|
||||
GraphicsPipelineCacheKey graphics_key{};
|
||||
GraphicsPipeline* current_pipeline{};
|
||||
|
||||
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
|
||||
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
|
||||
|
||||
ShaderPools main_pools;
|
||||
|
||||
Shader::Profile profile;
|
||||
Shader::HostTranslateInfo host_info;
|
||||
|
||||
std::filesystem::path pipeline_cache_filename;
|
||||
|
||||
Common::ThreadWorker workers;
|
||||
Common::ThreadWorker serialization_thread;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,133 +1,133 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCore::QueryType;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION};
|
||||
|
||||
constexpr VkQueryType GetTarget(QueryType type) {
|
||||
return QUERY_TARGETS[static_cast<std::size_t>(type)];
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
QueryPool::QueryPool(const Device& device_, Scheduler& scheduler, QueryType type_)
|
||||
: ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
|
||||
|
||||
QueryPool::~QueryPool() = default;
|
||||
|
||||
std::pair<VkQueryPool, u32> QueryPool::Commit() {
|
||||
std::size_t index;
|
||||
do {
|
||||
index = CommitResource();
|
||||
} while (usage[index]);
|
||||
usage[index] = true;
|
||||
|
||||
return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)};
|
||||
}
|
||||
|
||||
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
|
||||
usage.resize(end);
|
||||
|
||||
pools.push_back(device.GetLogical().CreateQueryPool({
|
||||
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.queryType = GetTarget(type),
|
||||
.queryCount = static_cast<u32>(end - begin),
|
||||
.pipelineStatistics = 0,
|
||||
}));
|
||||
}
|
||||
|
||||
void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
|
||||
const auto it =
|
||||
std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) {
|
||||
return query_pool == *pool;
|
||||
});
|
||||
|
||||
if (it != std::end(pools)) {
|
||||
const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
|
||||
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
|
||||
}
|
||||
}
|
||||
|
||||
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
|
||||
Scheduler& scheduler_)
|
||||
: QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
|
||||
query_pools{
|
||||
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
|
||||
} {}
|
||||
|
||||
QueryCache::~QueryCache() {
|
||||
// TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class
|
||||
// destructor is called. The query cache should be redesigned to have a proper ownership model
|
||||
// instead of using shared pointers.
|
||||
for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) {
|
||||
auto& stream = Stream(static_cast<QueryType>(query_type));
|
||||
stream.Update(false);
|
||||
stream.Reset();
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<VkQueryPool, u32> QueryCache::AllocateQuery(QueryType type) {
|
||||
return query_pools[static_cast<std::size_t>(type)].Commit();
|
||||
}
|
||||
|
||||
void QueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
|
||||
query_pools[static_cast<std::size_t>(type)].Reserve(query);
|
||||
}
|
||||
|
||||
HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
|
||||
QueryType type_)
|
||||
: HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_},
|
||||
query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
|
||||
const vk::Device* logical = &cache.GetDevice().GetLogical();
|
||||
cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
|
||||
logical->ResetQueryPoolEXT(query.first, query.second, 1);
|
||||
cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
|
||||
});
|
||||
}
|
||||
|
||||
HostCounter::~HostCounter() {
|
||||
cache.Reserve(type, query);
|
||||
}
|
||||
|
||||
void HostCounter::EndQuery() {
|
||||
cache.GetScheduler().Record(
|
||||
[query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
|
||||
}
|
||||
|
||||
u64 HostCounter::BlockingQuery() const {
|
||||
cache.GetScheduler().Wait(tick);
|
||||
u64 data;
|
||||
const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
|
||||
query.first, query.second, 1, sizeof(data), &data, sizeof(data),
|
||||
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
||||
|
||||
switch (query_result) {
|
||||
case VK_SUCCESS:
|
||||
return data;
|
||||
case VK_ERROR_DEVICE_LOST:
|
||||
cache.GetDevice().ReportLoss();
|
||||
[[fallthrough]];
|
||||
default:
|
||||
throw vk::Exception(query_result);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCore::QueryType;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION};
|
||||
|
||||
constexpr VkQueryType GetTarget(QueryType type) {
|
||||
return QUERY_TARGETS[static_cast<std::size_t>(type)];
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
QueryPool::QueryPool(const Device& device_, Scheduler& scheduler, QueryType type_)
|
||||
: ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
|
||||
|
||||
QueryPool::~QueryPool() = default;
|
||||
|
||||
std::pair<VkQueryPool, u32> QueryPool::Commit() {
|
||||
std::size_t index;
|
||||
do {
|
||||
index = CommitResource();
|
||||
} while (usage[index]);
|
||||
usage[index] = true;
|
||||
|
||||
return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)};
|
||||
}
|
||||
|
||||
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
|
||||
usage.resize(end);
|
||||
|
||||
pools.push_back(device.GetLogical().CreateQueryPool({
|
||||
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.queryType = GetTarget(type),
|
||||
.queryCount = static_cast<u32>(end - begin),
|
||||
.pipelineStatistics = 0,
|
||||
}));
|
||||
}
|
||||
|
||||
void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
|
||||
const auto it =
|
||||
std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) {
|
||||
return query_pool == *pool;
|
||||
});
|
||||
|
||||
if (it != std::end(pools)) {
|
||||
const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
|
||||
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
|
||||
}
|
||||
}
|
||||
|
||||
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
|
||||
Scheduler& scheduler_)
|
||||
: QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
|
||||
query_pools{
|
||||
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
|
||||
} {}
|
||||
|
||||
QueryCache::~QueryCache() {
|
||||
// TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class
|
||||
// destructor is called. The query cache should be redesigned to have a proper ownership model
|
||||
// instead of using shared pointers.
|
||||
for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) {
|
||||
auto& stream = Stream(static_cast<QueryType>(query_type));
|
||||
stream.Update(false);
|
||||
stream.Reset();
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<VkQueryPool, u32> QueryCache::AllocateQuery(QueryType type) {
|
||||
return query_pools[static_cast<std::size_t>(type)].Commit();
|
||||
}
|
||||
|
||||
void QueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
|
||||
query_pools[static_cast<std::size_t>(type)].Reserve(query);
|
||||
}
|
||||
|
||||
HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
|
||||
QueryType type_)
|
||||
: HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_},
|
||||
query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
|
||||
const vk::Device* logical = &cache.GetDevice().GetLogical();
|
||||
cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
|
||||
logical->ResetQueryPoolEXT(query.first, query.second, 1);
|
||||
cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
|
||||
});
|
||||
}
|
||||
|
||||
HostCounter::~HostCounter() {
|
||||
cache.Reserve(type, query);
|
||||
}
|
||||
|
||||
void HostCounter::EndQuery() {
|
||||
cache.GetScheduler().Record(
|
||||
[query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
|
||||
}
|
||||
|
||||
u64 HostCounter::BlockingQuery() const {
|
||||
cache.GetScheduler().Wait(tick);
|
||||
u64 data;
|
||||
const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
|
||||
query.first, query.second, 1, sizeof(data), &data, sizeof(data),
|
||||
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
||||
|
||||
switch (query_result) {
|
||||
case VK_SUCCESS:
|
||||
return data;
|
||||
case VK_ERROR_DEVICE_LOST:
|
||||
cache.GetDevice().ReportLoss();
|
||||
[[fallthrough]];
|
||||
default:
|
||||
throw vk::Exception(query_result);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,100 +1,100 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class CachedQuery;
|
||||
class Device;
|
||||
class HostCounter;
|
||||
class QueryCache;
|
||||
class Scheduler;
|
||||
|
||||
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
|
||||
|
||||
class QueryPool final : public ResourcePool {
|
||||
public:
|
||||
explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type);
|
||||
~QueryPool() override;
|
||||
|
||||
std::pair<VkQueryPool, u32> Commit();
|
||||
|
||||
void Reserve(std::pair<VkQueryPool, u32> query);
|
||||
|
||||
protected:
|
||||
void Allocate(std::size_t begin, std::size_t end) override;
|
||||
|
||||
private:
|
||||
static constexpr std::size_t GROW_STEP = 512;
|
||||
|
||||
const Device& device;
|
||||
const VideoCore::QueryType type;
|
||||
|
||||
std::vector<vk::QueryPool> pools;
|
||||
std::vector<bool> usage;
|
||||
};
|
||||
|
||||
class QueryCache final
|
||||
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
|
||||
public:
|
||||
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
|
||||
Scheduler& scheduler_);
|
||||
~QueryCache();
|
||||
|
||||
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
|
||||
|
||||
void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query);
|
||||
|
||||
const Device& GetDevice() const noexcept {
|
||||
return device;
|
||||
}
|
||||
|
||||
Scheduler& GetScheduler() const noexcept {
|
||||
return scheduler;
|
||||
}
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
|
||||
};
|
||||
|
||||
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
|
||||
public:
|
||||
explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
|
||||
VideoCore::QueryType type_);
|
||||
~HostCounter();
|
||||
|
||||
void EndQuery();
|
||||
|
||||
private:
|
||||
u64 BlockingQuery() const override;
|
||||
|
||||
QueryCache& cache;
|
||||
const VideoCore::QueryType type;
|
||||
const std::pair<VkQueryPool, u32> query;
|
||||
const u64 tick;
|
||||
};
|
||||
|
||||
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
|
||||
public:
|
||||
explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_)
|
||||
: CachedQueryBase{cpu_addr_, host_ptr_} {}
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class CachedQuery;
|
||||
class Device;
|
||||
class HostCounter;
|
||||
class QueryCache;
|
||||
class Scheduler;
|
||||
|
||||
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
|
||||
|
||||
class QueryPool final : public ResourcePool {
|
||||
public:
|
||||
explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type);
|
||||
~QueryPool() override;
|
||||
|
||||
std::pair<VkQueryPool, u32> Commit();
|
||||
|
||||
void Reserve(std::pair<VkQueryPool, u32> query);
|
||||
|
||||
protected:
|
||||
void Allocate(std::size_t begin, std::size_t end) override;
|
||||
|
||||
private:
|
||||
static constexpr std::size_t GROW_STEP = 512;
|
||||
|
||||
const Device& device;
|
||||
const VideoCore::QueryType type;
|
||||
|
||||
std::vector<vk::QueryPool> pools;
|
||||
std::vector<bool> usage;
|
||||
};
|
||||
|
||||
class QueryCache final
|
||||
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
|
||||
public:
|
||||
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
|
||||
Scheduler& scheduler_);
|
||||
~QueryCache();
|
||||
|
||||
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
|
||||
|
||||
void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query);
|
||||
|
||||
const Device& GetDevice() const noexcept {
|
||||
return device;
|
||||
}
|
||||
|
||||
Scheduler& GetScheduler() const noexcept {
|
||||
return scheduler;
|
||||
}
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
|
||||
};
|
||||
|
||||
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
|
||||
public:
|
||||
explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
|
||||
VideoCore::QueryType type_);
|
||||
~HostCounter();
|
||||
|
||||
void EndQuery();
|
||||
|
||||
private:
|
||||
u64 BlockingQuery() const override;
|
||||
|
||||
QueryCache& cache;
|
||||
const VideoCore::QueryType type;
|
||||
const std::pair<VkQueryPool, u32> query;
|
||||
const u64 tick;
|
||||
};
|
||||
|
||||
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
|
||||
public:
|
||||
explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_)
|
||||
: CachedQueryBase{cpu_addr_, host_ptr_} {}
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,178 +1,178 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/control/channel_state_cache.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_vulkan/blit_image.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Core::Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
class Maxwell3D;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct ScreenInfo;
|
||||
|
||||
class StateTracker;
|
||||
|
||||
class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
|
||||
public:
|
||||
explicit AccelerateDMA(BufferCache& buffer_cache);
|
||||
|
||||
bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override;
|
||||
|
||||
bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
|
||||
|
||||
private:
|
||||
BufferCache& buffer_cache;
|
||||
};
|
||||
|
||||
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
|
||||
protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
|
||||
public:
|
||||
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
|
||||
const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
StateTracker& state_tracker_, Scheduler& scheduler_);
|
||||
~RasterizerVulkan() override;
|
||||
|
||||
void Draw(bool is_indexed, u32 instance_count) override;
|
||||
void Clear() override;
|
||||
void DispatchCompute() override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void InvalidateGPUCache() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
|
||||
void SignalFence(std::function<void()>&& func) override;
|
||||
void SyncOperation(std::function<void()>&& func) override;
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void SignalReference() override;
|
||||
void ReleaseFences() override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitForIdle() override;
|
||||
void FragmentBarrier() override;
|
||||
void TiledCacheBarrier() override;
|
||||
void FlushCommands() override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
|
||||
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
|
||||
std::span<const u8> memory) override;
|
||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) override;
|
||||
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
void InitializeChannel(Tegra::Control::ChannelState& channel) override;
|
||||
|
||||
void BindChannel(Tegra::Control::ChannelState& channel) override;
|
||||
|
||||
void ReleaseChannel(s32 channel_id) override;
|
||||
|
||||
private:
|
||||
static constexpr size_t MAX_TEXTURES = 192;
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
|
||||
|
||||
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
|
||||
|
||||
void FlushWork();
|
||||
|
||||
void UpdateDynamicStates();
|
||||
|
||||
void BeginTransformFeedback();
|
||||
|
||||
void EndTransformFeedback();
|
||||
|
||||
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
void BindInlineIndexBuffer();
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
ScreenInfo& screen_info;
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
StateTracker& state_tracker;
|
||||
Scheduler& scheduler;
|
||||
|
||||
StagingBufferPool staging_pool;
|
||||
DescriptorPool descriptor_pool;
|
||||
UpdateDescriptorQueue update_descriptor_queue;
|
||||
BlitImageHelper blit_image;
|
||||
RenderPassCache render_pass_cache;
|
||||
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
BufferCache buffer_cache;
|
||||
PipelineCache pipeline_cache;
|
||||
QueryCache query_cache;
|
||||
AccelerateDMA accelerate_dma;
|
||||
FenceManager fence_manager;
|
||||
|
||||
vk::Event wfi_event;
|
||||
|
||||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
|
||||
|
||||
u32 draw_counter = 0;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/control/channel_state_cache.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_vulkan/blit_image.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Core::Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
class Maxwell3D;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct ScreenInfo;
|
||||
|
||||
class StateTracker;
|
||||
|
||||
class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
|
||||
public:
|
||||
explicit AccelerateDMA(BufferCache& buffer_cache);
|
||||
|
||||
bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override;
|
||||
|
||||
bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
|
||||
|
||||
private:
|
||||
BufferCache& buffer_cache;
|
||||
};
|
||||
|
||||
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
|
||||
protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
|
||||
public:
|
||||
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
|
||||
const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
StateTracker& state_tracker_, Scheduler& scheduler_);
|
||||
~RasterizerVulkan() override;
|
||||
|
||||
void Draw(bool is_indexed, u32 instance_count) override;
|
||||
void Clear() override;
|
||||
void DispatchCompute() override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void InvalidateGPUCache() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
|
||||
void SignalFence(std::function<void()>&& func) override;
|
||||
void SyncOperation(std::function<void()>&& func) override;
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void SignalReference() override;
|
||||
void ReleaseFences() override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitForIdle() override;
|
||||
void FragmentBarrier() override;
|
||||
void TiledCacheBarrier() override;
|
||||
void FlushCommands() override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
|
||||
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
|
||||
std::span<const u8> memory) override;
|
||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) override;
|
||||
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
void InitializeChannel(Tegra::Control::ChannelState& channel) override;
|
||||
|
||||
void BindChannel(Tegra::Control::ChannelState& channel) override;
|
||||
|
||||
void ReleaseChannel(s32 channel_id) override;
|
||||
|
||||
private:
|
||||
static constexpr size_t MAX_TEXTURES = 192;
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
|
||||
|
||||
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
|
||||
|
||||
void FlushWork();
|
||||
|
||||
void UpdateDynamicStates();
|
||||
|
||||
void BeginTransformFeedback();
|
||||
|
||||
void EndTransformFeedback();
|
||||
|
||||
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
void BindInlineIndexBuffer();
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
ScreenInfo& screen_info;
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
StateTracker& state_tracker;
|
||||
Scheduler& scheduler;
|
||||
|
||||
StagingBufferPool staging_pool;
|
||||
DescriptorPool descriptor_pool;
|
||||
UpdateDescriptorQueue update_descriptor_queue;
|
||||
BlitImageHelper blit_image;
|
||||
RenderPassCache render_pass_cache;
|
||||
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
BufferCache buffer_cache;
|
||||
PipelineCache pipeline_cache;
|
||||
QueryCache query_cache;
|
||||
AccelerateDMA accelerate_dma;
|
||||
FenceManager fence_manager;
|
||||
|
||||
vk::Event wfi_event;
|
||||
|
||||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
|
||||
|
||||
u32 draw_counter = 0;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,95 +1,95 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
|
||||
VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
|
||||
VkSampleCountFlagBits samples) {
|
||||
using MaxwellToVK::SurfaceFormat;
|
||||
return {
|
||||
.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
|
||||
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
|
||||
.samples = samples,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
|
||||
|
||||
VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
|
||||
std::scoped_lock lock{mutex};
|
||||
const auto [pair, is_new] = cache.try_emplace(key);
|
||||
if (!is_new) {
|
||||
return *pair->second;
|
||||
}
|
||||
boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
|
||||
std::array<VkAttachmentReference, 8> references{};
|
||||
u32 num_attachments{};
|
||||
u32 num_colors{};
|
||||
for (size_t index = 0; index < key.color_formats.size(); ++index) {
|
||||
const PixelFormat format{key.color_formats[index]};
|
||||
const bool is_valid{format != PixelFormat::Invalid};
|
||||
references[index] = VkAttachmentReference{
|
||||
.attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED,
|
||||
.layout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
if (is_valid) {
|
||||
descriptions.push_back(AttachmentDescription(*device, format, key.samples));
|
||||
num_attachments = static_cast<u32>(index + 1);
|
||||
++num_colors;
|
||||
}
|
||||
}
|
||||
const bool has_depth{key.depth_format != PixelFormat::Invalid};
|
||||
VkAttachmentReference depth_reference{};
|
||||
if (key.depth_format != PixelFormat::Invalid) {
|
||||
depth_reference = VkAttachmentReference{
|
||||
.attachment = num_colors,
|
||||
.layout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
|
||||
}
|
||||
const VkSubpassDescription subpass{
|
||||
.flags = 0,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.inputAttachmentCount = 0,
|
||||
.pInputAttachments = nullptr,
|
||||
.colorAttachmentCount = num_attachments,
|
||||
.pColorAttachments = references.data(),
|
||||
.pResolveAttachments = nullptr,
|
||||
.pDepthStencilAttachment = has_depth ? &depth_reference : nullptr,
|
||||
.preserveAttachmentCount = 0,
|
||||
.pPreserveAttachments = nullptr,
|
||||
};
|
||||
pair->second = device->GetLogical().CreateRenderPass({
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.attachmentCount = static_cast<u32>(descriptions.size()),
|
||||
.pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &subpass,
|
||||
.dependencyCount = 0,
|
||||
.pDependencies = nullptr,
|
||||
});
|
||||
return *pair->second;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
|
||||
VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
|
||||
VkSampleCountFlagBits samples) {
|
||||
using MaxwellToVK::SurfaceFormat;
|
||||
return {
|
||||
.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
|
||||
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
|
||||
.samples = samples,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
|
||||
|
||||
VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
|
||||
std::scoped_lock lock{mutex};
|
||||
const auto [pair, is_new] = cache.try_emplace(key);
|
||||
if (!is_new) {
|
||||
return *pair->second;
|
||||
}
|
||||
boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
|
||||
std::array<VkAttachmentReference, 8> references{};
|
||||
u32 num_attachments{};
|
||||
u32 num_colors{};
|
||||
for (size_t index = 0; index < key.color_formats.size(); ++index) {
|
||||
const PixelFormat format{key.color_formats[index]};
|
||||
const bool is_valid{format != PixelFormat::Invalid};
|
||||
references[index] = VkAttachmentReference{
|
||||
.attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED,
|
||||
.layout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
if (is_valid) {
|
||||
descriptions.push_back(AttachmentDescription(*device, format, key.samples));
|
||||
num_attachments = static_cast<u32>(index + 1);
|
||||
++num_colors;
|
||||
}
|
||||
}
|
||||
const bool has_depth{key.depth_format != PixelFormat::Invalid};
|
||||
VkAttachmentReference depth_reference{};
|
||||
if (key.depth_format != PixelFormat::Invalid) {
|
||||
depth_reference = VkAttachmentReference{
|
||||
.attachment = num_colors,
|
||||
.layout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
|
||||
}
|
||||
const VkSubpassDescription subpass{
|
||||
.flags = 0,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.inputAttachmentCount = 0,
|
||||
.pInputAttachments = nullptr,
|
||||
.colorAttachmentCount = num_attachments,
|
||||
.pColorAttachments = references.data(),
|
||||
.pResolveAttachments = nullptr,
|
||||
.pDepthStencilAttachment = has_depth ? &depth_reference : nullptr,
|
||||
.preserveAttachmentCount = 0,
|
||||
.pPreserveAttachments = nullptr,
|
||||
};
|
||||
pair->second = device->GetLogical().CreateRenderPass({
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.attachmentCount = static_cast<u32>(descriptions.size()),
|
||||
.pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &subpass,
|
||||
.dependencyCount = 0,
|
||||
.pDependencies = nullptr,
|
||||
});
|
||||
return *pair->second;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,54 +1,54 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct RenderPassKey {
|
||||
auto operator<=>(const RenderPassKey&) const noexcept = default;
|
||||
|
||||
std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
|
||||
VideoCore::Surface::PixelFormat depth_format;
|
||||
VkSampleCountFlagBits samples;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Vulkan::RenderPassKey> {
|
||||
[[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
|
||||
size_t value = static_cast<size_t>(key.depth_format) << 48;
|
||||
value ^= static_cast<size_t>(key.samples) << 52;
|
||||
for (size_t i = 0; i < key.color_formats.size(); ++i) {
|
||||
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
|
||||
class RenderPassCache {
|
||||
public:
|
||||
explicit RenderPassCache(const Device& device_);
|
||||
|
||||
VkRenderPass Get(const RenderPassKey& key);
|
||||
|
||||
private:
|
||||
const Device* device{};
|
||||
std::unordered_map<RenderPassKey, vk::RenderPass> cache;
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct RenderPassKey {
|
||||
auto operator<=>(const RenderPassKey&) const noexcept = default;
|
||||
|
||||
std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
|
||||
VideoCore::Surface::PixelFormat depth_format;
|
||||
VkSampleCountFlagBits samples;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Vulkan::RenderPassKey> {
|
||||
[[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
|
||||
size_t value = static_cast<size_t>(key.depth_format) << 48;
|
||||
value ^= static_cast<size_t>(key.samples) << 52;
|
||||
for (size_t i = 0; i < key.color_formats.size(); ++i) {
|
||||
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
|
||||
class RenderPassCache {
|
||||
public:
|
||||
explicit RenderPassCache(const Device& device_);
|
||||
|
||||
VkRenderPass Get(const RenderPassKey& key);
|
||||
|
||||
private:
|
||||
const Device* device{};
|
||||
std::unordered_map<RenderPassKey, vk::RenderPass> cache;
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,60 +1,60 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
|
||||
: master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
|
||||
|
||||
size_t ResourcePool::CommitResource() {
|
||||
// Refresh semaphore to query updated results
|
||||
master_semaphore->Refresh();
|
||||
const u64 gpu_tick = master_semaphore->KnownGpuTick();
|
||||
const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
|
||||
for (size_t iterator = begin; iterator < end; ++iterator) {
|
||||
if (gpu_tick >= ticks[iterator]) {
|
||||
ticks[iterator] = master_semaphore->CurrentTick();
|
||||
return iterator;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
// Try to find a free resource from the hinted position to the end.
|
||||
std::optional<size_t> found = search(hint_iterator, ticks.size());
|
||||
if (!found) {
|
||||
// Search from beginning to the hinted position.
|
||||
found = search(0, hint_iterator);
|
||||
if (!found) {
|
||||
// Both searches failed, the pool is full; handle it.
|
||||
const size_t free_resource = ManageOverflow();
|
||||
|
||||
ticks[free_resource] = master_semaphore->CurrentTick();
|
||||
found = free_resource;
|
||||
}
|
||||
}
|
||||
// Free iterator is hinted to the resource after the one that's been commited.
|
||||
hint_iterator = (*found + 1) % ticks.size();
|
||||
return *found;
|
||||
}
|
||||
|
||||
size_t ResourcePool::ManageOverflow() {
|
||||
const size_t old_capacity = ticks.size();
|
||||
Grow();
|
||||
|
||||
// The last entry is guaranted to be free, since it's the first element of the freshly
|
||||
// allocated resources.
|
||||
return old_capacity;
|
||||
}
|
||||
|
||||
void ResourcePool::Grow() {
|
||||
const size_t old_capacity = ticks.size();
|
||||
ticks.resize(old_capacity + grow_step);
|
||||
Allocate(old_capacity, old_capacity + grow_step);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
|
||||
: master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
|
||||
|
||||
size_t ResourcePool::CommitResource() {
|
||||
// Refresh semaphore to query updated results
|
||||
master_semaphore->Refresh();
|
||||
const u64 gpu_tick = master_semaphore->KnownGpuTick();
|
||||
const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
|
||||
for (size_t iterator = begin; iterator < end; ++iterator) {
|
||||
if (gpu_tick >= ticks[iterator]) {
|
||||
ticks[iterator] = master_semaphore->CurrentTick();
|
||||
return iterator;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
// Try to find a free resource from the hinted position to the end.
|
||||
std::optional<size_t> found = search(hint_iterator, ticks.size());
|
||||
if (!found) {
|
||||
// Search from beginning to the hinted position.
|
||||
found = search(0, hint_iterator);
|
||||
if (!found) {
|
||||
// Both searches failed, the pool is full; handle it.
|
||||
const size_t free_resource = ManageOverflow();
|
||||
|
||||
ticks[free_resource] = master_semaphore->CurrentTick();
|
||||
found = free_resource;
|
||||
}
|
||||
}
|
||||
// Free iterator is hinted to the resource after the one that's been commited.
|
||||
hint_iterator = (*found + 1) % ticks.size();
|
||||
return *found;
|
||||
}
|
||||
|
||||
size_t ResourcePool::ManageOverflow() {
|
||||
const size_t old_capacity = ticks.size();
|
||||
Grow();
|
||||
|
||||
// The last entry is guaranted to be free, since it's the first element of the freshly
|
||||
// allocated resources.
|
||||
return old_capacity;
|
||||
}
|
||||
|
||||
void ResourcePool::Grow() {
|
||||
const size_t old_capacity = ticks.size();
|
||||
ticks.resize(old_capacity + grow_step);
|
||||
Allocate(old_capacity, old_capacity + grow_step);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,50 +1,50 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class MasterSemaphore;
|
||||
|
||||
/**
|
||||
* Handles a pool of resources protected by fences. Manages resource overflow allocating more
|
||||
* resources.
|
||||
*/
|
||||
class ResourcePool {
|
||||
public:
|
||||
explicit ResourcePool() = default;
|
||||
explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
|
||||
|
||||
virtual ~ResourcePool() = default;
|
||||
|
||||
ResourcePool& operator=(ResourcePool&&) noexcept = default;
|
||||
ResourcePool(ResourcePool&&) noexcept = default;
|
||||
|
||||
ResourcePool& operator=(const ResourcePool&) = default;
|
||||
ResourcePool(const ResourcePool&) = default;
|
||||
|
||||
protected:
|
||||
size_t CommitResource();
|
||||
|
||||
/// Called when a chunk of resources have to be allocated.
|
||||
virtual void Allocate(size_t begin, size_t end) = 0;
|
||||
|
||||
private:
|
||||
/// Manages pool overflow allocating new resources.
|
||||
size_t ManageOverflow();
|
||||
|
||||
/// Allocates a new page of resources.
|
||||
void Grow();
|
||||
|
||||
MasterSemaphore* master_semaphore{};
|
||||
size_t grow_step = 0; ///< Number of new resources created after an overflow
|
||||
size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
|
||||
std::vector<u64> ticks; ///< Ticks for each resource
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class MasterSemaphore;
|
||||
|
||||
/**
|
||||
* Handles a pool of resources protected by fences. Manages resource overflow allocating more
|
||||
* resources.
|
||||
*/
|
||||
class ResourcePool {
|
||||
public:
|
||||
explicit ResourcePool() = default;
|
||||
explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
|
||||
|
||||
virtual ~ResourcePool() = default;
|
||||
|
||||
ResourcePool& operator=(ResourcePool&&) noexcept = default;
|
||||
ResourcePool(ResourcePool&&) noexcept = default;
|
||||
|
||||
ResourcePool& operator=(const ResourcePool&) = default;
|
||||
ResourcePool(const ResourcePool&) = default;
|
||||
|
||||
protected:
|
||||
size_t CommitResource();
|
||||
|
||||
/// Called when a chunk of resources have to be allocated.
|
||||
virtual void Allocate(size_t begin, size_t end) = 0;
|
||||
|
||||
private:
|
||||
/// Manages pool overflow allocating new resources.
|
||||
size_t ManageOverflow();
|
||||
|
||||
/// Allocates a new page of resources.
|
||||
void Grow();
|
||||
|
||||
MasterSemaphore* master_semaphore{};
|
||||
size_t grow_step = 0; ///< Number of new resources created after an overflow
|
||||
size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
|
||||
std::vector<u64> ticks; ///< Ticks for each resource
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,295 +1,295 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
||||
#include "common/microprofile.h"
|
||||
#include "common/thread.h"
|
||||
#include "video_core/renderer_vulkan/vk_command_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
|
||||
|
||||
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
|
||||
auto command = first;
|
||||
while (command != nullptr) {
|
||||
auto next = command->GetNext();
|
||||
command->Execute(cmdbuf);
|
||||
command->~Command();
|
||||
command = next;
|
||||
}
|
||||
submit = false;
|
||||
command_offset = 0;
|
||||
first = nullptr;
|
||||
last = nullptr;
|
||||
}
|
||||
|
||||
Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
|
||||
: device{device_}, state_tracker{state_tracker_},
|
||||
master_semaphore{std::make_unique<MasterSemaphore>(device)},
|
||||
command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
|
||||
AcquireNewChunk();
|
||||
AllocateWorkerCommandBuffer();
|
||||
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
|
||||
}
|
||||
|
||||
Scheduler::~Scheduler() = default;
|
||||
|
||||
void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
|
||||
SubmitExecution(signal_semaphore, wait_semaphore);
|
||||
AllocateNewContext();
|
||||
}
|
||||
|
||||
void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
|
||||
const u64 presubmit_tick = CurrentTick();
|
||||
SubmitExecution(signal_semaphore, wait_semaphore);
|
||||
WaitWorker();
|
||||
Wait(presubmit_tick);
|
||||
AllocateNewContext();
|
||||
}
|
||||
|
||||
void Scheduler::WaitWorker() {
|
||||
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
|
||||
DispatchWork();
|
||||
|
||||
std::unique_lock lock{work_mutex};
|
||||
wait_cv.wait(lock, [this] { return work_queue.empty(); });
|
||||
}
|
||||
|
||||
void Scheduler::DispatchWork() {
|
||||
if (chunk->Empty()) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{work_mutex};
|
||||
work_queue.push(std::move(chunk));
|
||||
}
|
||||
work_cv.notify_one();
|
||||
AcquireNewChunk();
|
||||
}
|
||||
|
||||
void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) {
|
||||
const VkRenderPass renderpass = framebuffer->RenderPass();
|
||||
const VkFramebuffer framebuffer_handle = framebuffer->Handle();
|
||||
const VkExtent2D render_area = framebuffer->RenderArea();
|
||||
if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&
|
||||
render_area.width == state.render_area.width &&
|
||||
render_area.height == state.render_area.height) {
|
||||
return;
|
||||
}
|
||||
EndRenderPass();
|
||||
state.renderpass = renderpass;
|
||||
state.framebuffer = framebuffer_handle;
|
||||
state.render_area = render_area;
|
||||
|
||||
Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) {
|
||||
const VkRenderPassBeginInfo renderpass_bi{
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.pNext = nullptr,
|
||||
.renderPass = renderpass,
|
||||
.framebuffer = framebuffer_handle,
|
||||
.renderArea =
|
||||
{
|
||||
.offset = {.x = 0, .y = 0},
|
||||
.extent = render_area,
|
||||
},
|
||||
.clearValueCount = 0,
|
||||
.pClearValues = nullptr,
|
||||
};
|
||||
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
|
||||
});
|
||||
num_renderpass_images = framebuffer->NumImages();
|
||||
renderpass_images = framebuffer->Images();
|
||||
renderpass_image_ranges = framebuffer->ImageRanges();
|
||||
}
|
||||
|
||||
void Scheduler::RequestOutsideRenderPassOperationContext() {
|
||||
EndRenderPass();
|
||||
}
|
||||
|
||||
bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
|
||||
if (state.graphics_pipeline == pipeline) {
|
||||
return false;
|
||||
}
|
||||
state.graphics_pipeline = pipeline;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scheduler::UpdateRescaling(bool is_rescaling) {
|
||||
if (state.rescaling_defined && is_rescaling == state.is_rescaling) {
|
||||
return false;
|
||||
}
|
||||
state.rescaling_defined = true;
|
||||
state.is_rescaling = is_rescaling;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Scheduler::WorkerThread(std::stop_token stop_token) {
|
||||
Common::SetCurrentThreadName("VulkanWorker");
|
||||
do {
|
||||
std::unique_ptr<CommandChunk> work;
|
||||
bool has_submit{false};
|
||||
{
|
||||
std::unique_lock lock{work_mutex};
|
||||
if (work_queue.empty()) {
|
||||
wait_cv.notify_all();
|
||||
}
|
||||
work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); });
|
||||
if (stop_token.stop_requested()) {
|
||||
continue;
|
||||
}
|
||||
work = std::move(work_queue.front());
|
||||
work_queue.pop();
|
||||
|
||||
has_submit = work->HasSubmit();
|
||||
work->ExecuteAll(current_cmdbuf);
|
||||
}
|
||||
if (has_submit) {
|
||||
AllocateWorkerCommandBuffer();
|
||||
}
|
||||
std::scoped_lock reserve_lock{reserve_mutex};
|
||||
chunk_reserve.push_back(std::move(work));
|
||||
} while (!stop_token.stop_requested());
|
||||
}
|
||||
|
||||
void Scheduler::AllocateWorkerCommandBuffer() {
|
||||
current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
|
||||
current_cmdbuf.Begin({
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = nullptr,
|
||||
});
|
||||
}
|
||||
|
||||
void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
|
||||
EndPendingOperations();
|
||||
InvalidateState();
|
||||
|
||||
const u64 signal_value = master_semaphore->NextTick();
|
||||
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.End();
|
||||
const VkSemaphore timeline_semaphore = master_semaphore->Handle();
|
||||
|
||||
const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
|
||||
const std::array signal_values{signal_value, u64(0)};
|
||||
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
|
||||
|
||||
const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
|
||||
const std::array wait_values{signal_value - 1, u64(1)};
|
||||
const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
|
||||
static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
};
|
||||
|
||||
const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
|
||||
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.waitSemaphoreValueCount = num_wait_semaphores,
|
||||
.pWaitSemaphoreValues = wait_values.data(),
|
||||
.signalSemaphoreValueCount = num_signal_semaphores,
|
||||
.pSignalSemaphoreValues = signal_values.data(),
|
||||
};
|
||||
const VkSubmitInfo submit_info{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = &timeline_si,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = wait_semaphores.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = signal_semaphores.data(),
|
||||
};
|
||||
switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_ERROR_DEVICE_LOST:
|
||||
device.ReportLoss();
|
||||
[[fallthrough]];
|
||||
default:
|
||||
vk::Check(result);
|
||||
}
|
||||
});
|
||||
chunk->MarkSubmit();
|
||||
DispatchWork();
|
||||
}
|
||||
|
||||
void Scheduler::AllocateNewContext() {
|
||||
// Enable counters once again. These are disabled when a command buffer is finished.
|
||||
if (query_cache) {
|
||||
query_cache->UpdateCounters();
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::InvalidateState() {
|
||||
state.graphics_pipeline = nullptr;
|
||||
state.rescaling_defined = false;
|
||||
state_tracker.InvalidateCommandBufferState();
|
||||
}
|
||||
|
||||
void Scheduler::EndPendingOperations() {
|
||||
query_cache->DisableStreams();
|
||||
EndRenderPass();
|
||||
}
|
||||
|
||||
void Scheduler::EndRenderPass() {
|
||||
if (!state.renderpass) {
|
||||
return;
|
||||
}
|
||||
Record([num_images = num_renderpass_images, images = renderpass_images,
|
||||
ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
|
||||
std::array<VkImageMemoryBarrier, 9> barriers;
|
||||
for (size_t i = 0; i < num_images; ++i) {
|
||||
barriers[i] = VkImageMemoryBarrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = images[i],
|
||||
.subresourceRange = ranges[i],
|
||||
};
|
||||
}
|
||||
cmdbuf.EndRenderPass();
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
|
||||
vk::Span(barriers.data(), num_images));
|
||||
});
|
||||
state.renderpass = nullptr;
|
||||
num_renderpass_images = 0;
|
||||
}
|
||||
|
||||
void Scheduler::AcquireNewChunk() {
|
||||
std::scoped_lock lock{reserve_mutex};
|
||||
if (chunk_reserve.empty()) {
|
||||
chunk = std::make_unique<CommandChunk>();
|
||||
return;
|
||||
}
|
||||
chunk = std::move(chunk_reserve.back());
|
||||
chunk_reserve.pop_back();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
||||
#include "common/microprofile.h"
|
||||
#include "common/thread.h"
|
||||
#include "video_core/renderer_vulkan/vk_command_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
|
||||
|
||||
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
|
||||
auto command = first;
|
||||
while (command != nullptr) {
|
||||
auto next = command->GetNext();
|
||||
command->Execute(cmdbuf);
|
||||
command->~Command();
|
||||
command = next;
|
||||
}
|
||||
submit = false;
|
||||
command_offset = 0;
|
||||
first = nullptr;
|
||||
last = nullptr;
|
||||
}
|
||||
|
||||
Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
|
||||
: device{device_}, state_tracker{state_tracker_},
|
||||
master_semaphore{std::make_unique<MasterSemaphore>(device)},
|
||||
command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
|
||||
AcquireNewChunk();
|
||||
AllocateWorkerCommandBuffer();
|
||||
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
|
||||
}
|
||||
|
||||
Scheduler::~Scheduler() = default;
|
||||
|
||||
void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
|
||||
SubmitExecution(signal_semaphore, wait_semaphore);
|
||||
AllocateNewContext();
|
||||
}
|
||||
|
||||
void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
|
||||
const u64 presubmit_tick = CurrentTick();
|
||||
SubmitExecution(signal_semaphore, wait_semaphore);
|
||||
WaitWorker();
|
||||
Wait(presubmit_tick);
|
||||
AllocateNewContext();
|
||||
}
|
||||
|
||||
void Scheduler::WaitWorker() {
|
||||
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
|
||||
DispatchWork();
|
||||
|
||||
std::unique_lock lock{work_mutex};
|
||||
wait_cv.wait(lock, [this] { return work_queue.empty(); });
|
||||
}
|
||||
|
||||
void Scheduler::DispatchWork() {
|
||||
if (chunk->Empty()) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{work_mutex};
|
||||
work_queue.push(std::move(chunk));
|
||||
}
|
||||
work_cv.notify_one();
|
||||
AcquireNewChunk();
|
||||
}
|
||||
|
||||
void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) {
|
||||
const VkRenderPass renderpass = framebuffer->RenderPass();
|
||||
const VkFramebuffer framebuffer_handle = framebuffer->Handle();
|
||||
const VkExtent2D render_area = framebuffer->RenderArea();
|
||||
if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&
|
||||
render_area.width == state.render_area.width &&
|
||||
render_area.height == state.render_area.height) {
|
||||
return;
|
||||
}
|
||||
EndRenderPass();
|
||||
state.renderpass = renderpass;
|
||||
state.framebuffer = framebuffer_handle;
|
||||
state.render_area = render_area;
|
||||
|
||||
Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) {
|
||||
const VkRenderPassBeginInfo renderpass_bi{
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.pNext = nullptr,
|
||||
.renderPass = renderpass,
|
||||
.framebuffer = framebuffer_handle,
|
||||
.renderArea =
|
||||
{
|
||||
.offset = {.x = 0, .y = 0},
|
||||
.extent = render_area,
|
||||
},
|
||||
.clearValueCount = 0,
|
||||
.pClearValues = nullptr,
|
||||
};
|
||||
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
|
||||
});
|
||||
num_renderpass_images = framebuffer->NumImages();
|
||||
renderpass_images = framebuffer->Images();
|
||||
renderpass_image_ranges = framebuffer->ImageRanges();
|
||||
}
|
||||
|
||||
void Scheduler::RequestOutsideRenderPassOperationContext() {
|
||||
EndRenderPass();
|
||||
}
|
||||
|
||||
bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
|
||||
if (state.graphics_pipeline == pipeline) {
|
||||
return false;
|
||||
}
|
||||
state.graphics_pipeline = pipeline;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scheduler::UpdateRescaling(bool is_rescaling) {
|
||||
if (state.rescaling_defined && is_rescaling == state.is_rescaling) {
|
||||
return false;
|
||||
}
|
||||
state.rescaling_defined = true;
|
||||
state.is_rescaling = is_rescaling;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Scheduler::WorkerThread(std::stop_token stop_token) {
|
||||
Common::SetCurrentThreadName("VulkanWorker");
|
||||
do {
|
||||
std::unique_ptr<CommandChunk> work;
|
||||
bool has_submit{false};
|
||||
{
|
||||
std::unique_lock lock{work_mutex};
|
||||
if (work_queue.empty()) {
|
||||
wait_cv.notify_all();
|
||||
}
|
||||
work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); });
|
||||
if (stop_token.stop_requested()) {
|
||||
continue;
|
||||
}
|
||||
work = std::move(work_queue.front());
|
||||
work_queue.pop();
|
||||
|
||||
has_submit = work->HasSubmit();
|
||||
work->ExecuteAll(current_cmdbuf);
|
||||
}
|
||||
if (has_submit) {
|
||||
AllocateWorkerCommandBuffer();
|
||||
}
|
||||
std::scoped_lock reserve_lock{reserve_mutex};
|
||||
chunk_reserve.push_back(std::move(work));
|
||||
} while (!stop_token.stop_requested());
|
||||
}
|
||||
|
||||
void Scheduler::AllocateWorkerCommandBuffer() {
|
||||
current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
|
||||
current_cmdbuf.Begin({
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = nullptr,
|
||||
});
|
||||
}
|
||||
|
||||
void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
|
||||
EndPendingOperations();
|
||||
InvalidateState();
|
||||
|
||||
const u64 signal_value = master_semaphore->NextTick();
|
||||
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.End();
|
||||
const VkSemaphore timeline_semaphore = master_semaphore->Handle();
|
||||
|
||||
const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
|
||||
const std::array signal_values{signal_value, u64(0)};
|
||||
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
|
||||
|
||||
const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
|
||||
const std::array wait_values{signal_value - 1, u64(1)};
|
||||
const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
|
||||
static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
};
|
||||
|
||||
const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
|
||||
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.waitSemaphoreValueCount = num_wait_semaphores,
|
||||
.pWaitSemaphoreValues = wait_values.data(),
|
||||
.signalSemaphoreValueCount = num_signal_semaphores,
|
||||
.pSignalSemaphoreValues = signal_values.data(),
|
||||
};
|
||||
const VkSubmitInfo submit_info{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = &timeline_si,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = wait_semaphores.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = signal_semaphores.data(),
|
||||
};
|
||||
switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_ERROR_DEVICE_LOST:
|
||||
device.ReportLoss();
|
||||
[[fallthrough]];
|
||||
default:
|
||||
vk::Check(result);
|
||||
}
|
||||
});
|
||||
chunk->MarkSubmit();
|
||||
DispatchWork();
|
||||
}
|
||||
|
||||
void Scheduler::AllocateNewContext() {
|
||||
// Enable counters once again. These are disabled when a command buffer is finished.
|
||||
if (query_cache) {
|
||||
query_cache->UpdateCounters();
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::InvalidateState() {
|
||||
state.graphics_pipeline = nullptr;
|
||||
state.rescaling_defined = false;
|
||||
state_tracker.InvalidateCommandBufferState();
|
||||
}
|
||||
|
||||
void Scheduler::EndPendingOperations() {
|
||||
query_cache->DisableStreams();
|
||||
EndRenderPass();
|
||||
}
|
||||
|
||||
void Scheduler::EndRenderPass() {
|
||||
if (!state.renderpass) {
|
||||
return;
|
||||
}
|
||||
Record([num_images = num_renderpass_images, images = renderpass_images,
|
||||
ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
|
||||
std::array<VkImageMemoryBarrier, 9> barriers;
|
||||
for (size_t i = 0; i < num_images; ++i) {
|
||||
barriers[i] = VkImageMemoryBarrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = images[i],
|
||||
.subresourceRange = ranges[i],
|
||||
};
|
||||
}
|
||||
cmdbuf.EndRenderPass();
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
|
||||
vk::Span(barriers.data(), num_images));
|
||||
});
|
||||
state.renderpass = nullptr;
|
||||
num_renderpass_images = 0;
|
||||
}
|
||||
|
||||
void Scheduler::AcquireNewChunk() {
|
||||
std::scoped_lock lock{reserve_mutex};
|
||||
if (chunk_reserve.empty()) {
|
||||
chunk = std::make_unique<CommandChunk>();
|
||||
return;
|
||||
}
|
||||
chunk = std::move(chunk_reserve.back());
|
||||
chunk_reserve.pop_back();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,234 +1,234 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <queue>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class CommandPool;
|
||||
class Device;
|
||||
class Framebuffer;
|
||||
class GraphicsPipeline;
|
||||
class StateTracker;
|
||||
class QueryCache;
|
||||
|
||||
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
||||
/// OpenGL-like operations on Vulkan command buffers.
|
||||
class Scheduler {
|
||||
public:
|
||||
explicit Scheduler(const Device& device, StateTracker& state_tracker);
|
||||
~Scheduler();
|
||||
|
||||
/// Sends the current execution context to the GPU.
|
||||
void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
|
||||
|
||||
/// Sends the current execution context to the GPU and waits for it to complete.
|
||||
void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
|
||||
|
||||
/// Waits for the worker thread to finish executing everything. After this function returns it's
|
||||
/// safe to touch worker resources.
|
||||
void WaitWorker();
|
||||
|
||||
/// Sends currently recorded work to the worker thread.
|
||||
void DispatchWork();
|
||||
|
||||
/// Requests to begin a renderpass.
|
||||
void RequestRenderpass(const Framebuffer* framebuffer);
|
||||
|
||||
/// Requests the current executino context to be able to execute operations only allowed outside
|
||||
/// of a renderpass.
|
||||
void RequestOutsideRenderPassOperationContext();
|
||||
|
||||
/// Update the pipeline to the current execution context.
|
||||
bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
|
||||
|
||||
/// Update the rescaling state. Returns true if the state has to be updated.
|
||||
bool UpdateRescaling(bool is_rescaling);
|
||||
|
||||
/// Invalidates current command buffer state except for render passes
|
||||
void InvalidateState();
|
||||
|
||||
/// Assigns the query cache.
|
||||
void SetQueryCache(QueryCache& query_cache_) {
|
||||
query_cache = &query_cache_;
|
||||
}
|
||||
|
||||
/// Send work to a separate thread.
|
||||
template <typename T>
|
||||
void Record(T&& command) {
|
||||
if (chunk->Record(command)) {
|
||||
return;
|
||||
}
|
||||
DispatchWork();
|
||||
(void)chunk->Record(command);
|
||||
}
|
||||
|
||||
/// Returns the current command buffer tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept {
|
||||
return master_semaphore->CurrentTick();
|
||||
}
|
||||
|
||||
/// Returns true when a tick has been triggered by the GPU.
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
|
||||
return master_semaphore->IsFree(tick);
|
||||
}
|
||||
|
||||
/// Waits for the given tick to trigger on the GPU.
|
||||
void Wait(u64 tick) {
|
||||
if (tick >= master_semaphore->CurrentTick()) {
|
||||
// Make sure we are not waiting for the current tick without signalling
|
||||
Flush();
|
||||
}
|
||||
master_semaphore->Wait(tick);
|
||||
}
|
||||
|
||||
/// Returns the master timeline semaphore.
|
||||
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
|
||||
return *master_semaphore;
|
||||
}
|
||||
|
||||
private:
|
||||
class Command {
|
||||
public:
|
||||
virtual ~Command() = default;
|
||||
|
||||
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
|
||||
|
||||
Command* GetNext() const {
|
||||
return next;
|
||||
}
|
||||
|
||||
void SetNext(Command* next_) {
|
||||
next = next_;
|
||||
}
|
||||
|
||||
private:
|
||||
Command* next = nullptr;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class TypedCommand final : public Command {
|
||||
public:
|
||||
explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
|
||||
~TypedCommand() override = default;
|
||||
|
||||
TypedCommand(TypedCommand&&) = delete;
|
||||
TypedCommand& operator=(TypedCommand&&) = delete;
|
||||
|
||||
void Execute(vk::CommandBuffer cmdbuf) const override {
|
||||
command(cmdbuf);
|
||||
}
|
||||
|
||||
private:
|
||||
T command;
|
||||
};
|
||||
|
||||
class CommandChunk final {
|
||||
public:
|
||||
void ExecuteAll(vk::CommandBuffer cmdbuf);
|
||||
|
||||
template <typename T>
|
||||
bool Record(T& command) {
|
||||
using FuncType = TypedCommand<T>;
|
||||
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
|
||||
|
||||
command_offset = Common::AlignUp(command_offset, alignof(FuncType));
|
||||
if (command_offset > sizeof(data) - sizeof(FuncType)) {
|
||||
return false;
|
||||
}
|
||||
Command* const current_last = last;
|
||||
last = new (data.data() + command_offset) FuncType(std::move(command));
|
||||
|
||||
if (current_last) {
|
||||
current_last->SetNext(last);
|
||||
} else {
|
||||
first = last;
|
||||
}
|
||||
command_offset += sizeof(FuncType);
|
||||
return true;
|
||||
}
|
||||
|
||||
void MarkSubmit() {
|
||||
submit = true;
|
||||
}
|
||||
|
||||
bool Empty() const {
|
||||
return command_offset == 0;
|
||||
}
|
||||
|
||||
bool HasSubmit() const {
|
||||
return submit;
|
||||
}
|
||||
|
||||
private:
|
||||
Command* first = nullptr;
|
||||
Command* last = nullptr;
|
||||
|
||||
size_t command_offset = 0;
|
||||
bool submit = false;
|
||||
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
|
||||
};
|
||||
|
||||
struct State {
|
||||
VkRenderPass renderpass = nullptr;
|
||||
VkFramebuffer framebuffer = nullptr;
|
||||
VkExtent2D render_area = {0, 0};
|
||||
GraphicsPipeline* graphics_pipeline = nullptr;
|
||||
bool is_rescaling = false;
|
||||
bool rescaling_defined = false;
|
||||
};
|
||||
|
||||
void WorkerThread(std::stop_token stop_token);
|
||||
|
||||
void AllocateWorkerCommandBuffer();
|
||||
|
||||
void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
|
||||
|
||||
void AllocateNewContext();
|
||||
|
||||
void EndPendingOperations();
|
||||
|
||||
void EndRenderPass();
|
||||
|
||||
void AcquireNewChunk();
|
||||
|
||||
const Device& device;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
std::unique_ptr<MasterSemaphore> master_semaphore;
|
||||
std::unique_ptr<CommandPool> command_pool;
|
||||
|
||||
QueryCache* query_cache = nullptr;
|
||||
|
||||
vk::CommandBuffer current_cmdbuf;
|
||||
|
||||
std::unique_ptr<CommandChunk> chunk;
|
||||
|
||||
State state;
|
||||
|
||||
u32 num_renderpass_images = 0;
|
||||
std::array<VkImage, 9> renderpass_images{};
|
||||
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
|
||||
|
||||
std::queue<std::unique_ptr<CommandChunk>> work_queue;
|
||||
std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
|
||||
std::mutex reserve_mutex;
|
||||
std::mutex work_mutex;
|
||||
std::condition_variable_any work_cv;
|
||||
std::condition_variable wait_cv;
|
||||
std::jthread worker_thread;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <queue>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class CommandPool;
|
||||
class Device;
|
||||
class Framebuffer;
|
||||
class GraphicsPipeline;
|
||||
class StateTracker;
|
||||
class QueryCache;
|
||||
|
||||
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
||||
/// OpenGL-like operations on Vulkan command buffers.
|
||||
class Scheduler {
|
||||
public:
|
||||
explicit Scheduler(const Device& device, StateTracker& state_tracker);
|
||||
~Scheduler();
|
||||
|
||||
/// Sends the current execution context to the GPU.
|
||||
void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
|
||||
|
||||
/// Sends the current execution context to the GPU and waits for it to complete.
|
||||
void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
|
||||
|
||||
/// Waits for the worker thread to finish executing everything. After this function returns it's
|
||||
/// safe to touch worker resources.
|
||||
void WaitWorker();
|
||||
|
||||
/// Sends currently recorded work to the worker thread.
|
||||
void DispatchWork();
|
||||
|
||||
/// Requests to begin a renderpass.
|
||||
void RequestRenderpass(const Framebuffer* framebuffer);
|
||||
|
||||
/// Requests the current executino context to be able to execute operations only allowed outside
|
||||
/// of a renderpass.
|
||||
void RequestOutsideRenderPassOperationContext();
|
||||
|
||||
/// Update the pipeline to the current execution context.
|
||||
bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
|
||||
|
||||
/// Update the rescaling state. Returns true if the state has to be updated.
|
||||
bool UpdateRescaling(bool is_rescaling);
|
||||
|
||||
/// Invalidates current command buffer state except for render passes
|
||||
void InvalidateState();
|
||||
|
||||
/// Assigns the query cache.
|
||||
void SetQueryCache(QueryCache& query_cache_) {
|
||||
query_cache = &query_cache_;
|
||||
}
|
||||
|
||||
/// Send work to a separate thread.
|
||||
template <typename T>
|
||||
void Record(T&& command) {
|
||||
if (chunk->Record(command)) {
|
||||
return;
|
||||
}
|
||||
DispatchWork();
|
||||
(void)chunk->Record(command);
|
||||
}
|
||||
|
||||
/// Returns the current command buffer tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept {
|
||||
return master_semaphore->CurrentTick();
|
||||
}
|
||||
|
||||
/// Returns true when a tick has been triggered by the GPU.
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
|
||||
return master_semaphore->IsFree(tick);
|
||||
}
|
||||
|
||||
/// Waits for the given tick to trigger on the GPU.
|
||||
void Wait(u64 tick) {
|
||||
if (tick >= master_semaphore->CurrentTick()) {
|
||||
// Make sure we are not waiting for the current tick without signalling
|
||||
Flush();
|
||||
}
|
||||
master_semaphore->Wait(tick);
|
||||
}
|
||||
|
||||
/// Returns the master timeline semaphore.
|
||||
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
|
||||
return *master_semaphore;
|
||||
}
|
||||
|
||||
private:
|
||||
class Command {
|
||||
public:
|
||||
virtual ~Command() = default;
|
||||
|
||||
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
|
||||
|
||||
Command* GetNext() const {
|
||||
return next;
|
||||
}
|
||||
|
||||
void SetNext(Command* next_) {
|
||||
next = next_;
|
||||
}
|
||||
|
||||
private:
|
||||
Command* next = nullptr;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class TypedCommand final : public Command {
|
||||
public:
|
||||
explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
|
||||
~TypedCommand() override = default;
|
||||
|
||||
TypedCommand(TypedCommand&&) = delete;
|
||||
TypedCommand& operator=(TypedCommand&&) = delete;
|
||||
|
||||
void Execute(vk::CommandBuffer cmdbuf) const override {
|
||||
command(cmdbuf);
|
||||
}
|
||||
|
||||
private:
|
||||
T command;
|
||||
};
|
||||
|
||||
class CommandChunk final {
|
||||
public:
|
||||
void ExecuteAll(vk::CommandBuffer cmdbuf);
|
||||
|
||||
template <typename T>
|
||||
bool Record(T& command) {
|
||||
using FuncType = TypedCommand<T>;
|
||||
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
|
||||
|
||||
command_offset = Common::AlignUp(command_offset, alignof(FuncType));
|
||||
if (command_offset > sizeof(data) - sizeof(FuncType)) {
|
||||
return false;
|
||||
}
|
||||
Command* const current_last = last;
|
||||
last = new (data.data() + command_offset) FuncType(std::move(command));
|
||||
|
||||
if (current_last) {
|
||||
current_last->SetNext(last);
|
||||
} else {
|
||||
first = last;
|
||||
}
|
||||
command_offset += sizeof(FuncType);
|
||||
return true;
|
||||
}
|
||||
|
||||
void MarkSubmit() {
|
||||
submit = true;
|
||||
}
|
||||
|
||||
bool Empty() const {
|
||||
return command_offset == 0;
|
||||
}
|
||||
|
||||
bool HasSubmit() const {
|
||||
return submit;
|
||||
}
|
||||
|
||||
private:
|
||||
Command* first = nullptr;
|
||||
Command* last = nullptr;
|
||||
|
||||
size_t command_offset = 0;
|
||||
bool submit = false;
|
||||
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
|
||||
};
|
||||
|
||||
struct State {
|
||||
VkRenderPass renderpass = nullptr;
|
||||
VkFramebuffer framebuffer = nullptr;
|
||||
VkExtent2D render_area = {0, 0};
|
||||
GraphicsPipeline* graphics_pipeline = nullptr;
|
||||
bool is_rescaling = false;
|
||||
bool rescaling_defined = false;
|
||||
};
|
||||
|
||||
void WorkerThread(std::stop_token stop_token);
|
||||
|
||||
void AllocateWorkerCommandBuffer();
|
||||
|
||||
void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
|
||||
|
||||
void AllocateNewContext();
|
||||
|
||||
void EndPendingOperations();
|
||||
|
||||
void EndRenderPass();
|
||||
|
||||
void AcquireNewChunk();
|
||||
|
||||
const Device& device;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
std::unique_ptr<MasterSemaphore> master_semaphore;
|
||||
std::unique_ptr<CommandPool> command_pool;
|
||||
|
||||
QueryCache* query_cache = nullptr;
|
||||
|
||||
vk::CommandBuffer current_cmdbuf;
|
||||
|
||||
std::unique_ptr<CommandChunk> chunk;
|
||||
|
||||
State state;
|
||||
|
||||
u32 num_renderpass_images = 0;
|
||||
std::array<VkImage, 9> renderpass_images{};
|
||||
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
|
||||
|
||||
std::queue<std::unique_ptr<CommandChunk>> work_queue;
|
||||
std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
|
||||
std::mutex reserve_mutex;
|
||||
std::mutex work_mutex;
|
||||
std::condition_variable_any work_cv;
|
||||
std::condition_variable wait_cv;
|
||||
std::jthread worker_thread;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,23 +1,23 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) {
|
||||
return device.GetLogical().CreateShaderModule({
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.codeSize = static_cast<u32>(code.size_bytes()),
|
||||
.pCode = code.data(),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) {
|
||||
return device.GetLogical().CreateShaderModule({
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.codeSize = static_cast<u32>(code.size_bytes()),
|
||||
.pCode = code.data(),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
|
||||
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code);
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
|
||||
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,300 +1,300 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/literals.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
|
||||
using namespace Common::Literals;
|
||||
|
||||
// Maximum potential alignment of a Vulkan buffer
|
||||
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
|
||||
// Maximum size to put elements in the stream buffer
|
||||
constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
|
||||
// Stream buffer size in bytes
|
||||
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
|
||||
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
|
||||
|
||||
constexpr VkMemoryPropertyFlags HOST_FLAGS =
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
|
||||
|
||||
bool IsStreamHeap(VkMemoryHeap heap) noexcept {
|
||||
return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
|
||||
}
|
||||
|
||||
std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
|
||||
VkMemoryPropertyFlags flags) noexcept {
|
||||
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
|
||||
if (((type_mask >> type_index) & 1) == 0) {
|
||||
// Memory type is incompatible
|
||||
continue;
|
||||
}
|
||||
const VkMemoryType& memory_type = props.memoryTypes[type_index];
|
||||
if ((memory_type.propertyFlags & flags) != flags) {
|
||||
// Memory type doesn't have the flags we want
|
||||
continue;
|
||||
}
|
||||
if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
|
||||
// Memory heap is not suitable for streaming
|
||||
continue;
|
||||
}
|
||||
// Success!
|
||||
return type_index;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
|
||||
bool try_device_local) {
|
||||
std::optional<u32> type;
|
||||
if (try_device_local) {
|
||||
// Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
|
||||
type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
|
||||
if (type) {
|
||||
return *type;
|
||||
}
|
||||
}
|
||||
// Otherwise try without the DEVICE_LOCAL_BIT
|
||||
type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
|
||||
if (type) {
|
||||
return *type;
|
||||
}
|
||||
// This should never happen, and in case it does, signal it as an out of memory situation
|
||||
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
size_t Region(size_t iterator) noexcept {
|
||||
return iterator / REGION_SIZE;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
Scheduler& scheduler_)
|
||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
|
||||
const vk::Device& dev = device.GetLogical();
|
||||
stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = STREAM_BUFFER_SIZE,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
stream_buffer.SetObjectNameEXT("Stream Buffer");
|
||||
}
|
||||
VkMemoryDedicatedRequirements dedicated_reqs{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
|
||||
.pNext = nullptr,
|
||||
.prefersDedicatedAllocation = VK_FALSE,
|
||||
.requiresDedicatedAllocation = VK_FALSE,
|
||||
};
|
||||
const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
|
||||
const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
|
||||
dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
|
||||
const VkMemoryDedicatedAllocateInfo dedicated_info{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.image = nullptr,
|
||||
.buffer = *stream_buffer,
|
||||
};
|
||||
const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties;
|
||||
VkMemoryAllocateInfo stream_memory_info{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = make_dedicated ? &dedicated_info : nullptr,
|
||||
.allocationSize = requirements.size,
|
||||
.memoryTypeIndex =
|
||||
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
|
||||
};
|
||||
stream_memory = dev.TryAllocateMemory(stream_memory_info);
|
||||
if (!stream_memory) {
|
||||
LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
|
||||
stream_memory_info.memoryTypeIndex =
|
||||
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
|
||||
stream_memory = dev.AllocateMemory(stream_memory_info);
|
||||
}
|
||||
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
stream_memory.SetObjectNameEXT("Stream Buffer Memory");
|
||||
}
|
||||
stream_buffer.BindMemory(*stream_memory, 0);
|
||||
stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
StagingBufferPool::~StagingBufferPool() = default;
|
||||
|
||||
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
|
||||
if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
|
||||
return GetStreamBuffer(size);
|
||||
}
|
||||
return GetStagingBuffer(size, usage);
|
||||
}
|
||||
|
||||
void StagingBufferPool::TickFrame() {
|
||||
current_delete_level = (current_delete_level + 1) % NUM_LEVELS;
|
||||
|
||||
ReleaseCache(MemoryUsage::DeviceLocal);
|
||||
ReleaseCache(MemoryUsage::Upload);
|
||||
ReleaseCache(MemoryUsage::Download);
|
||||
}
|
||||
|
||||
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
|
||||
if (AreRegionsActive(Region(free_iterator) + 1,
|
||||
std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
|
||||
// Avoid waiting for the previous usages to be free
|
||||
return GetStagingBuffer(size, MemoryUsage::Upload);
|
||||
}
|
||||
const u64 current_tick = scheduler.CurrentTick();
|
||||
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
|
||||
current_tick);
|
||||
used_iterator = iterator;
|
||||
free_iterator = std::max(free_iterator, iterator + size);
|
||||
|
||||
if (iterator + size >= STREAM_BUFFER_SIZE) {
|
||||
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
|
||||
current_tick);
|
||||
used_iterator = 0;
|
||||
iterator = 0;
|
||||
free_iterator = size;
|
||||
|
||||
if (AreRegionsActive(0, Region(size) + 1)) {
|
||||
// Avoid waiting for the previous usages to be free
|
||||
return GetStagingBuffer(size, MemoryUsage::Upload);
|
||||
}
|
||||
}
|
||||
const size_t offset = iterator;
|
||||
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
|
||||
return StagingBufferRef{
|
||||
.buffer = *stream_buffer,
|
||||
.offset = static_cast<VkDeviceSize>(offset),
|
||||
.mapped_span = std::span<u8>(stream_pointer + offset, size),
|
||||
};
|
||||
}
|
||||
|
||||
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
|
||||
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
|
||||
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
|
||||
[gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
|
||||
};
|
||||
|
||||
StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
|
||||
if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
|
||||
return *ref;
|
||||
}
|
||||
return CreateStagingBuffer(size, usage);
|
||||
}
|
||||
|
||||
std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
|
||||
MemoryUsage usage) {
|
||||
StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
|
||||
|
||||
const auto is_free = [this](const StagingBuffer& entry) {
|
||||
return scheduler.IsFree(entry.tick);
|
||||
};
|
||||
auto& entries = cache_level.entries;
|
||||
const auto hint_it = entries.begin() + cache_level.iterate_index;
|
||||
auto it = std::find_if(entries.begin() + cache_level.iterate_index, entries.end(), is_free);
|
||||
if (it == entries.end()) {
|
||||
it = std::find_if(entries.begin(), hint_it, is_free);
|
||||
if (it == hint_it) {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
cache_level.iterate_index = std::distance(entries.begin(), it) + 1;
|
||||
it->tick = scheduler.CurrentTick();
|
||||
return it->Ref();
|
||||
}
|
||||
|
||||
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) {
|
||||
const u32 log2 = Common::Log2Ceil64(size);
|
||||
vk::Buffer buffer = device.GetLogical().CreateBuffer({
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = 1ULL << log2,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
++buffer_index;
|
||||
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
|
||||
}
|
||||
MemoryCommit commit = memory_allocator.Commit(buffer, usage);
|
||||
const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
|
||||
|
||||
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
|
||||
.buffer = std::move(buffer),
|
||||
.commit = std::move(commit),
|
||||
.mapped_span = mapped_span,
|
||||
.tick = scheduler.CurrentTick(),
|
||||
});
|
||||
return entry.Ref();
|
||||
}
|
||||
|
||||
StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(MemoryUsage usage) {
|
||||
switch (usage) {
|
||||
case MemoryUsage::DeviceLocal:
|
||||
return device_local_cache;
|
||||
case MemoryUsage::Upload:
|
||||
return upload_cache;
|
||||
case MemoryUsage::Download:
|
||||
return download_cache;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid memory usage={}", usage);
|
||||
return upload_cache;
|
||||
}
|
||||
}
|
||||
|
||||
void StagingBufferPool::ReleaseCache(MemoryUsage usage) {
|
||||
ReleaseLevel(GetCache(usage), current_delete_level);
|
||||
}
|
||||
|
||||
void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) {
|
||||
constexpr size_t deletions_per_tick = 16;
|
||||
auto& staging = cache[log2];
|
||||
auto& entries = staging.entries;
|
||||
const size_t old_size = entries.size();
|
||||
|
||||
const auto is_deleteable = [this](const StagingBuffer& entry) {
|
||||
return scheduler.IsFree(entry.tick);
|
||||
};
|
||||
const size_t begin_offset = staging.delete_index;
|
||||
const size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
|
||||
const auto begin = entries.begin() + begin_offset;
|
||||
const auto end = entries.begin() + end_offset;
|
||||
entries.erase(std::remove_if(begin, end, is_deleteable), end);
|
||||
|
||||
const size_t new_size = entries.size();
|
||||
staging.delete_index += deletions_per_tick;
|
||||
if (staging.delete_index >= new_size) {
|
||||
staging.delete_index = 0;
|
||||
}
|
||||
if (staging.iterate_index > new_size) {
|
||||
staging.iterate_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/literals.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
|
||||
using namespace Common::Literals;
|
||||
|
||||
// Maximum potential alignment of a Vulkan buffer
|
||||
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
|
||||
// Maximum size to put elements in the stream buffer
|
||||
constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
|
||||
// Stream buffer size in bytes
|
||||
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
|
||||
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
|
||||
|
||||
constexpr VkMemoryPropertyFlags HOST_FLAGS =
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
|
||||
|
||||
bool IsStreamHeap(VkMemoryHeap heap) noexcept {
|
||||
return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
|
||||
}
|
||||
|
||||
std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
|
||||
VkMemoryPropertyFlags flags) noexcept {
|
||||
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
|
||||
if (((type_mask >> type_index) & 1) == 0) {
|
||||
// Memory type is incompatible
|
||||
continue;
|
||||
}
|
||||
const VkMemoryType& memory_type = props.memoryTypes[type_index];
|
||||
if ((memory_type.propertyFlags & flags) != flags) {
|
||||
// Memory type doesn't have the flags we want
|
||||
continue;
|
||||
}
|
||||
if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
|
||||
// Memory heap is not suitable for streaming
|
||||
continue;
|
||||
}
|
||||
// Success!
|
||||
return type_index;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
|
||||
bool try_device_local) {
|
||||
std::optional<u32> type;
|
||||
if (try_device_local) {
|
||||
// Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
|
||||
type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
|
||||
if (type) {
|
||||
return *type;
|
||||
}
|
||||
}
|
||||
// Otherwise try without the DEVICE_LOCAL_BIT
|
||||
type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
|
||||
if (type) {
|
||||
return *type;
|
||||
}
|
||||
// This should never happen, and in case it does, signal it as an out of memory situation
|
||||
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
size_t Region(size_t iterator) noexcept {
|
||||
return iterator / REGION_SIZE;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
Scheduler& scheduler_)
|
||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
|
||||
const vk::Device& dev = device.GetLogical();
|
||||
stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = STREAM_BUFFER_SIZE,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
stream_buffer.SetObjectNameEXT("Stream Buffer");
|
||||
}
|
||||
VkMemoryDedicatedRequirements dedicated_reqs{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
|
||||
.pNext = nullptr,
|
||||
.prefersDedicatedAllocation = VK_FALSE,
|
||||
.requiresDedicatedAllocation = VK_FALSE,
|
||||
};
|
||||
const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
|
||||
const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
|
||||
dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
|
||||
const VkMemoryDedicatedAllocateInfo dedicated_info{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.image = nullptr,
|
||||
.buffer = *stream_buffer,
|
||||
};
|
||||
const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties;
|
||||
VkMemoryAllocateInfo stream_memory_info{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = make_dedicated ? &dedicated_info : nullptr,
|
||||
.allocationSize = requirements.size,
|
||||
.memoryTypeIndex =
|
||||
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
|
||||
};
|
||||
stream_memory = dev.TryAllocateMemory(stream_memory_info);
|
||||
if (!stream_memory) {
|
||||
LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
|
||||
stream_memory_info.memoryTypeIndex =
|
||||
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
|
||||
stream_memory = dev.AllocateMemory(stream_memory_info);
|
||||
}
|
||||
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
stream_memory.SetObjectNameEXT("Stream Buffer Memory");
|
||||
}
|
||||
stream_buffer.BindMemory(*stream_memory, 0);
|
||||
stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
StagingBufferPool::~StagingBufferPool() = default;
|
||||
|
||||
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
|
||||
if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
|
||||
return GetStreamBuffer(size);
|
||||
}
|
||||
return GetStagingBuffer(size, usage);
|
||||
}
|
||||
|
||||
void StagingBufferPool::TickFrame() {
|
||||
current_delete_level = (current_delete_level + 1) % NUM_LEVELS;
|
||||
|
||||
ReleaseCache(MemoryUsage::DeviceLocal);
|
||||
ReleaseCache(MemoryUsage::Upload);
|
||||
ReleaseCache(MemoryUsage::Download);
|
||||
}
|
||||
|
||||
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
|
||||
if (AreRegionsActive(Region(free_iterator) + 1,
|
||||
std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
|
||||
// Avoid waiting for the previous usages to be free
|
||||
return GetStagingBuffer(size, MemoryUsage::Upload);
|
||||
}
|
||||
const u64 current_tick = scheduler.CurrentTick();
|
||||
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
|
||||
current_tick);
|
||||
used_iterator = iterator;
|
||||
free_iterator = std::max(free_iterator, iterator + size);
|
||||
|
||||
if (iterator + size >= STREAM_BUFFER_SIZE) {
|
||||
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
|
||||
current_tick);
|
||||
used_iterator = 0;
|
||||
iterator = 0;
|
||||
free_iterator = size;
|
||||
|
||||
if (AreRegionsActive(0, Region(size) + 1)) {
|
||||
// Avoid waiting for the previous usages to be free
|
||||
return GetStagingBuffer(size, MemoryUsage::Upload);
|
||||
}
|
||||
}
|
||||
const size_t offset = iterator;
|
||||
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
|
||||
return StagingBufferRef{
|
||||
.buffer = *stream_buffer,
|
||||
.offset = static_cast<VkDeviceSize>(offset),
|
||||
.mapped_span = std::span<u8>(stream_pointer + offset, size),
|
||||
};
|
||||
}
|
||||
|
||||
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
|
||||
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
|
||||
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
|
||||
[gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
|
||||
};
|
||||
|
||||
StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
|
||||
if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
|
||||
return *ref;
|
||||
}
|
||||
return CreateStagingBuffer(size, usage);
|
||||
}
|
||||
|
||||
std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
|
||||
MemoryUsage usage) {
|
||||
StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
|
||||
|
||||
const auto is_free = [this](const StagingBuffer& entry) {
|
||||
return scheduler.IsFree(entry.tick);
|
||||
};
|
||||
auto& entries = cache_level.entries;
|
||||
const auto hint_it = entries.begin() + cache_level.iterate_index;
|
||||
auto it = std::find_if(entries.begin() + cache_level.iterate_index, entries.end(), is_free);
|
||||
if (it == entries.end()) {
|
||||
it = std::find_if(entries.begin(), hint_it, is_free);
|
||||
if (it == hint_it) {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
cache_level.iterate_index = std::distance(entries.begin(), it) + 1;
|
||||
it->tick = scheduler.CurrentTick();
|
||||
return it->Ref();
|
||||
}
|
||||
|
||||
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) {
|
||||
const u32 log2 = Common::Log2Ceil64(size);
|
||||
vk::Buffer buffer = device.GetLogical().CreateBuffer({
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = 1ULL << log2,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
++buffer_index;
|
||||
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
|
||||
}
|
||||
MemoryCommit commit = memory_allocator.Commit(buffer, usage);
|
||||
const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
|
||||
|
||||
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
|
||||
.buffer = std::move(buffer),
|
||||
.commit = std::move(commit),
|
||||
.mapped_span = mapped_span,
|
||||
.tick = scheduler.CurrentTick(),
|
||||
});
|
||||
return entry.Ref();
|
||||
}
|
||||
|
||||
StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(MemoryUsage usage) {
|
||||
switch (usage) {
|
||||
case MemoryUsage::DeviceLocal:
|
||||
return device_local_cache;
|
||||
case MemoryUsage::Upload:
|
||||
return upload_cache;
|
||||
case MemoryUsage::Download:
|
||||
return download_cache;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid memory usage={}", usage);
|
||||
return upload_cache;
|
||||
}
|
||||
}
|
||||
|
||||
void StagingBufferPool::ReleaseCache(MemoryUsage usage) {
|
||||
ReleaseLevel(GetCache(usage), current_delete_level);
|
||||
}
|
||||
|
||||
void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) {
|
||||
constexpr size_t deletions_per_tick = 16;
|
||||
auto& staging = cache[log2];
|
||||
auto& entries = staging.entries;
|
||||
const size_t old_size = entries.size();
|
||||
|
||||
const auto is_deleteable = [this](const StagingBuffer& entry) {
|
||||
return scheduler.IsFree(entry.tick);
|
||||
};
|
||||
const size_t begin_offset = staging.delete_index;
|
||||
const size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
|
||||
const auto begin = entries.begin() + begin_offset;
|
||||
const auto end = entries.begin() + end_offset;
|
||||
entries.erase(std::remove_if(begin, end, is_deleteable), end);
|
||||
|
||||
const size_t new_size = entries.size();
|
||||
staging.delete_index += deletions_per_tick;
|
||||
if (staging.delete_index >= new_size) {
|
||||
staging.delete_index = 0;
|
||||
}
|
||||
if (staging.iterate_index > new_size) {
|
||||
staging.iterate_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,104 +1,104 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <climits>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class Scheduler;
|
||||
|
||||
struct StagingBufferRef {
|
||||
VkBuffer buffer;
|
||||
VkDeviceSize offset;
|
||||
std::span<u8> mapped_span;
|
||||
};
|
||||
|
||||
class StagingBufferPool {
|
||||
public:
|
||||
static constexpr size_t NUM_SYNCS = 16;
|
||||
|
||||
explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
|
||||
Scheduler& scheduler);
|
||||
~StagingBufferPool();
|
||||
|
||||
StagingBufferRef Request(size_t size, MemoryUsage usage);
|
||||
|
||||
void TickFrame();
|
||||
|
||||
private:
|
||||
struct StreamBufferCommit {
|
||||
size_t upper_bound;
|
||||
u64 tick;
|
||||
};
|
||||
|
||||
struct StagingBuffer {
|
||||
vk::Buffer buffer;
|
||||
MemoryCommit commit;
|
||||
std::span<u8> mapped_span;
|
||||
u64 tick = 0;
|
||||
|
||||
StagingBufferRef Ref() const noexcept {
|
||||
return {
|
||||
.buffer = *buffer,
|
||||
.offset = 0,
|
||||
.mapped_span = mapped_span,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
struct StagingBuffers {
|
||||
std::vector<StagingBuffer> entries;
|
||||
size_t delete_index = 0;
|
||||
size_t iterate_index = 0;
|
||||
};
|
||||
|
||||
static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
|
||||
using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
|
||||
|
||||
StagingBufferRef GetStreamBuffer(size_t size);
|
||||
|
||||
bool AreRegionsActive(size_t region_begin, size_t region_end) const;
|
||||
|
||||
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
|
||||
|
||||
std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
|
||||
|
||||
StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
|
||||
|
||||
StagingBuffersCache& GetCache(MemoryUsage usage);
|
||||
|
||||
void ReleaseCache(MemoryUsage usage);
|
||||
|
||||
void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
Scheduler& scheduler;
|
||||
|
||||
vk::Buffer stream_buffer;
|
||||
vk::DeviceMemory stream_memory;
|
||||
u8* stream_pointer = nullptr;
|
||||
|
||||
size_t iterator = 0;
|
||||
size_t used_iterator = 0;
|
||||
size_t free_iterator = 0;
|
||||
std::array<u64, NUM_SYNCS> sync_ticks{};
|
||||
|
||||
StagingBuffersCache device_local_cache;
|
||||
StagingBuffersCache upload_cache;
|
||||
StagingBuffersCache download_cache;
|
||||
|
||||
size_t current_delete_level = 0;
|
||||
u64 buffer_index = 0;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <climits>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class Scheduler;
|
||||
|
||||
struct StagingBufferRef {
|
||||
VkBuffer buffer;
|
||||
VkDeviceSize offset;
|
||||
std::span<u8> mapped_span;
|
||||
};
|
||||
|
||||
class StagingBufferPool {
|
||||
public:
|
||||
static constexpr size_t NUM_SYNCS = 16;
|
||||
|
||||
explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
|
||||
Scheduler& scheduler);
|
||||
~StagingBufferPool();
|
||||
|
||||
StagingBufferRef Request(size_t size, MemoryUsage usage);
|
||||
|
||||
void TickFrame();
|
||||
|
||||
private:
|
||||
struct StreamBufferCommit {
|
||||
size_t upper_bound;
|
||||
u64 tick;
|
||||
};
|
||||
|
||||
struct StagingBuffer {
|
||||
vk::Buffer buffer;
|
||||
MemoryCommit commit;
|
||||
std::span<u8> mapped_span;
|
||||
u64 tick = 0;
|
||||
|
||||
StagingBufferRef Ref() const noexcept {
|
||||
return {
|
||||
.buffer = *buffer,
|
||||
.offset = 0,
|
||||
.mapped_span = mapped_span,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
struct StagingBuffers {
|
||||
std::vector<StagingBuffer> entries;
|
||||
size_t delete_index = 0;
|
||||
size_t iterate_index = 0;
|
||||
};
|
||||
|
||||
static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
|
||||
using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
|
||||
|
||||
StagingBufferRef GetStreamBuffer(size_t size);
|
||||
|
||||
bool AreRegionsActive(size_t region_begin, size_t region_end) const;
|
||||
|
||||
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
|
||||
|
||||
std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
|
||||
|
||||
StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
|
||||
|
||||
StagingBuffersCache& GetCache(MemoryUsage usage);
|
||||
|
||||
void ReleaseCache(MemoryUsage usage);
|
||||
|
||||
void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
Scheduler& scheduler;
|
||||
|
||||
vk::Buffer stream_buffer;
|
||||
vk::DeviceMemory stream_memory;
|
||||
u8* stream_pointer = nullptr;
|
||||
|
||||
size_t iterator = 0;
|
||||
size_t used_iterator = 0;
|
||||
size_t free_iterator = 0;
|
||||
std::array<u64, NUM_SYNCS> sync_ticks{};
|
||||
|
||||
StagingBuffersCache device_local_cache;
|
||||
StagingBuffersCache upload_cache;
|
||||
StagingBuffersCache download_cache;
|
||||
|
||||
size_t current_delete_level = 0;
|
||||
u64 buffer_index = 0;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,212 +1,212 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
|
||||
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
|
||||
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
using namespace Dirty;
|
||||
using namespace VideoCommon::Dirty;
|
||||
using Tegra::Engines::Maxwell3D;
|
||||
using Regs = Maxwell3D::Regs;
|
||||
using Tables = Maxwell3D::DirtyState::Tables;
|
||||
using Table = Maxwell3D::DirtyState::Table;
|
||||
using Flags = Maxwell3D::DirtyState::Flags;
|
||||
|
||||
Flags MakeInvalidationFlags() {
|
||||
static constexpr int INVALIDATION_FLAGS[]{
|
||||
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
|
||||
StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable,
|
||||
DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
|
||||
VertexBuffers, VertexInput,
|
||||
};
|
||||
Flags flags{};
|
||||
for (const int flag : INVALIDATION_FLAGS) {
|
||||
flags[flag] = true;
|
||||
}
|
||||
for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
for (int index = VertexBinding0; index <= VertexBinding31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
void SetupDirtyViewports(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
|
||||
FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
|
||||
tables[0][OFF(viewport_scale_offset_enbled)] = Viewports;
|
||||
tables[1][OFF(window_origin)] = Viewports;
|
||||
}
|
||||
|
||||
void SetupDirtyScissors(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors);
|
||||
}
|
||||
|
||||
void SetupDirtyDepthBias(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(depth_bias)] = DepthBias;
|
||||
table[OFF(depth_bias_clamp)] = DepthBias;
|
||||
table[OFF(slope_scale_depth_bias)] = DepthBias;
|
||||
}
|
||||
|
||||
void SetupDirtyBlendConstants(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants);
|
||||
}
|
||||
|
||||
void SetupDirtyDepthBounds(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds);
|
||||
}
|
||||
|
||||
void SetupDirtyStencilProperties(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(stencil_two_side_enable)] = StencilProperties;
|
||||
table[OFF(stencil_front_ref)] = StencilProperties;
|
||||
table[OFF(stencil_front_mask)] = StencilProperties;
|
||||
table[OFF(stencil_front_func_mask)] = StencilProperties;
|
||||
table[OFF(stencil_back_ref)] = StencilProperties;
|
||||
table[OFF(stencil_back_mask)] = StencilProperties;
|
||||
table[OFF(stencil_back_func_mask)] = StencilProperties;
|
||||
}
|
||||
|
||||
void SetupDirtyLineWidth(Tables& tables) {
|
||||
tables[0][OFF(line_width_smooth)] = LineWidth;
|
||||
tables[0][OFF(line_width_aliased)] = LineWidth;
|
||||
}
|
||||
|
||||
void SetupDirtyCullMode(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(gl_cull_face)] = CullMode;
|
||||
table[OFF(gl_cull_test_enabled)] = CullMode;
|
||||
}
|
||||
|
||||
void SetupDirtyDepthBoundsEnable(Tables& tables) {
|
||||
tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable;
|
||||
}
|
||||
|
||||
void SetupDirtyDepthTestEnable(Tables& tables) {
|
||||
tables[0][OFF(depth_test_enable)] = DepthTestEnable;
|
||||
}
|
||||
|
||||
void SetupDirtyDepthWriteEnable(Tables& tables) {
|
||||
tables[0][OFF(depth_write_enabled)] = DepthWriteEnable;
|
||||
}
|
||||
|
||||
void SetupDirtyDepthCompareOp(Tables& tables) {
|
||||
tables[0][OFF(depth_test_func)] = DepthCompareOp;
|
||||
}
|
||||
|
||||
void SetupDirtyFrontFace(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(gl_front_face)] = FrontFace;
|
||||
table[OFF(window_origin)] = FrontFace;
|
||||
}
|
||||
|
||||
void SetupDirtyStencilOp(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(stencil_front_op.fail)] = StencilOp;
|
||||
table[OFF(stencil_front_op.zfail)] = StencilOp;
|
||||
table[OFF(stencil_front_op.zpass)] = StencilOp;
|
||||
table[OFF(stencil_front_op.func)] = StencilOp;
|
||||
table[OFF(stencil_back_op.fail)] = StencilOp;
|
||||
table[OFF(stencil_back_op.zfail)] = StencilOp;
|
||||
table[OFF(stencil_back_op.zpass)] = StencilOp;
|
||||
table[OFF(stencil_back_op.func)] = StencilOp;
|
||||
|
||||
// Table 0 is used by StencilProperties
|
||||
tables[1][OFF(stencil_two_side_enable)] = StencilOp;
|
||||
}
|
||||
|
||||
void SetupDirtyStencilTestEnable(Tables& tables) {
|
||||
tables[0][OFF(stencil_enable)] = StencilTestEnable;
|
||||
}
|
||||
|
||||
void SetupDirtyBlending(Tables& tables) {
|
||||
tables[0][OFF(color_mask_common)] = Blending;
|
||||
tables[0][OFF(blend_per_target_enabled)] = Blending;
|
||||
FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
|
||||
FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
|
||||
FillBlock(tables[0], OFF(blend_per_target), NUM(blend_per_target), Blending);
|
||||
}
|
||||
|
||||
void SetupDirtyViewportSwizzles(Tables& tables) {
|
||||
static constexpr size_t swizzle_offset = 6;
|
||||
for (size_t index = 0; index < Regs::NumViewports; ++index) {
|
||||
tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
|
||||
ViewportSwizzles;
|
||||
}
|
||||
}
|
||||
|
||||
void SetupDirtyVertexAttributes(Tables& tables) {
|
||||
for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
|
||||
const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
|
||||
FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
|
||||
}
|
||||
FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
|
||||
}
|
||||
|
||||
void SetupDirtyVertexBindings(Tables& tables) {
|
||||
// Do NOT include stride here, it's implicit in VertexBuffer
|
||||
static constexpr size_t divisor_offset = 3;
|
||||
for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
|
||||
const u8 flag = static_cast<u8>(VertexBinding0 + i);
|
||||
tables[0][OFF(vertex_stream_instances) + i] = VertexInput;
|
||||
tables[1][OFF(vertex_stream_instances) + i] = flag;
|
||||
tables[0][OFF(vertex_streams) + i * NUM(vertex_streams[0]) + divisor_offset] = VertexInput;
|
||||
tables[1][OFF(vertex_streams) + i * NUM(vertex_streams[0]) + divisor_offset] = flag;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
|
||||
auto& tables{channel_state.maxwell_3d->dirty.tables};
|
||||
SetupDirtyFlags(tables);
|
||||
SetupDirtyViewports(tables);
|
||||
SetupDirtyScissors(tables);
|
||||
SetupDirtyDepthBias(tables);
|
||||
SetupDirtyBlendConstants(tables);
|
||||
SetupDirtyDepthBounds(tables);
|
||||
SetupDirtyStencilProperties(tables);
|
||||
SetupDirtyLineWidth(tables);
|
||||
SetupDirtyCullMode(tables);
|
||||
SetupDirtyDepthBoundsEnable(tables);
|
||||
SetupDirtyDepthTestEnable(tables);
|
||||
SetupDirtyDepthWriteEnable(tables);
|
||||
SetupDirtyDepthCompareOp(tables);
|
||||
SetupDirtyFrontFace(tables);
|
||||
SetupDirtyStencilOp(tables);
|
||||
SetupDirtyStencilTestEnable(tables);
|
||||
SetupDirtyBlending(tables);
|
||||
SetupDirtyViewportSwizzles(tables);
|
||||
SetupDirtyVertexAttributes(tables);
|
||||
SetupDirtyVertexBindings(tables);
|
||||
}
|
||||
|
||||
void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
|
||||
flags = &channel_state.maxwell_3d->dirty.flags;
|
||||
}
|
||||
|
||||
void StateTracker::InvalidateState() {
|
||||
flags->set();
|
||||
}
|
||||
|
||||
StateTracker::StateTracker()
|
||||
: flags{&default_flags}, default_flags{}, invalidation_flags{MakeInvalidationFlags()} {}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
|
||||
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
|
||||
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
using namespace Dirty;
|
||||
using namespace VideoCommon::Dirty;
|
||||
using Tegra::Engines::Maxwell3D;
|
||||
using Regs = Maxwell3D::Regs;
|
||||
using Tables = Maxwell3D::DirtyState::Tables;
|
||||
using Table = Maxwell3D::DirtyState::Table;
|
||||
using Flags = Maxwell3D::DirtyState::Flags;
|
||||
|
||||
Flags MakeInvalidationFlags() {
|
||||
static constexpr int INVALIDATION_FLAGS[]{
|
||||
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
|
||||
StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable,
|
||||
DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
|
||||
VertexBuffers, VertexInput,
|
||||
};
|
||||
Flags flags{};
|
||||
for (const int flag : INVALIDATION_FLAGS) {
|
||||
flags[flag] = true;
|
||||
}
|
||||
for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
for (int index = VertexBinding0; index <= VertexBinding31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
void SetupDirtyViewports(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
|
||||
FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
|
||||
tables[0][OFF(viewport_scale_offset_enbled)] = Viewports;
|
||||
tables[1][OFF(window_origin)] = Viewports;
|
||||
}
|
||||
|
||||
void SetupDirtyScissors(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors);
|
||||
}
|
||||
|
||||
void SetupDirtyDepthBias(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(depth_bias)] = DepthBias;
|
||||
table[OFF(depth_bias_clamp)] = DepthBias;
|
||||
table[OFF(slope_scale_depth_bias)] = DepthBias;
|
||||
}
|
||||
|
||||
void SetupDirtyBlendConstants(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants);
|
||||
}
|
||||
|
||||
void SetupDirtyDepthBounds(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds);
|
||||
}
|
||||
|
||||
void SetupDirtyStencilProperties(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(stencil_two_side_enable)] = StencilProperties;
|
||||
table[OFF(stencil_front_ref)] = StencilProperties;
|
||||
table[OFF(stencil_front_mask)] = StencilProperties;
|
||||
table[OFF(stencil_front_func_mask)] = StencilProperties;
|
||||
table[OFF(stencil_back_ref)] = StencilProperties;
|
||||
table[OFF(stencil_back_mask)] = StencilProperties;
|
||||
table[OFF(stencil_back_func_mask)] = StencilProperties;
|
||||
}
|
||||
|
||||
void SetupDirtyLineWidth(Tables& tables) {
|
||||
tables[0][OFF(line_width_smooth)] = LineWidth;
|
||||
tables[0][OFF(line_width_aliased)] = LineWidth;
|
||||
}
|
||||
|
||||
void SetupDirtyCullMode(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(gl_cull_face)] = CullMode;
|
||||
table[OFF(gl_cull_test_enabled)] = CullMode;
|
||||
}
|
||||
|
||||
void SetupDirtyDepthBoundsEnable(Tables& tables) {
|
||||
tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable;
|
||||
}
|
||||
|
||||
void SetupDirtyDepthTestEnable(Tables& tables) {
|
||||
tables[0][OFF(depth_test_enable)] = DepthTestEnable;
|
||||
}
|
||||
|
||||
void SetupDirtyDepthWriteEnable(Tables& tables) {
|
||||
tables[0][OFF(depth_write_enabled)] = DepthWriteEnable;
|
||||
}
|
||||
|
||||
void SetupDirtyDepthCompareOp(Tables& tables) {
|
||||
tables[0][OFF(depth_test_func)] = DepthCompareOp;
|
||||
}
|
||||
|
||||
void SetupDirtyFrontFace(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(gl_front_face)] = FrontFace;
|
||||
table[OFF(window_origin)] = FrontFace;
|
||||
}
|
||||
|
||||
void SetupDirtyStencilOp(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(stencil_front_op.fail)] = StencilOp;
|
||||
table[OFF(stencil_front_op.zfail)] = StencilOp;
|
||||
table[OFF(stencil_front_op.zpass)] = StencilOp;
|
||||
table[OFF(stencil_front_op.func)] = StencilOp;
|
||||
table[OFF(stencil_back_op.fail)] = StencilOp;
|
||||
table[OFF(stencil_back_op.zfail)] = StencilOp;
|
||||
table[OFF(stencil_back_op.zpass)] = StencilOp;
|
||||
table[OFF(stencil_back_op.func)] = StencilOp;
|
||||
|
||||
// Table 0 is used by StencilProperties
|
||||
tables[1][OFF(stencil_two_side_enable)] = StencilOp;
|
||||
}
|
||||
|
||||
void SetupDirtyStencilTestEnable(Tables& tables) {
|
||||
tables[0][OFF(stencil_enable)] = StencilTestEnable;
|
||||
}
|
||||
|
||||
void SetupDirtyBlending(Tables& tables) {
|
||||
tables[0][OFF(color_mask_common)] = Blending;
|
||||
tables[0][OFF(blend_per_target_enabled)] = Blending;
|
||||
FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
|
||||
FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
|
||||
FillBlock(tables[0], OFF(blend_per_target), NUM(blend_per_target), Blending);
|
||||
}
|
||||
|
||||
void SetupDirtyViewportSwizzles(Tables& tables) {
|
||||
static constexpr size_t swizzle_offset = 6;
|
||||
for (size_t index = 0; index < Regs::NumViewports; ++index) {
|
||||
tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
|
||||
ViewportSwizzles;
|
||||
}
|
||||
}
|
||||
|
||||
void SetupDirtyVertexAttributes(Tables& tables) {
|
||||
for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
|
||||
const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
|
||||
FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
|
||||
}
|
||||
FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
|
||||
}
|
||||
|
||||
void SetupDirtyVertexBindings(Tables& tables) {
|
||||
// Do NOT include stride here, it's implicit in VertexBuffer
|
||||
static constexpr size_t divisor_offset = 3;
|
||||
for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
|
||||
const u8 flag = static_cast<u8>(VertexBinding0 + i);
|
||||
tables[0][OFF(vertex_stream_instances) + i] = VertexInput;
|
||||
tables[1][OFF(vertex_stream_instances) + i] = flag;
|
||||
tables[0][OFF(vertex_streams) + i * NUM(vertex_streams[0]) + divisor_offset] = VertexInput;
|
||||
tables[1][OFF(vertex_streams) + i * NUM(vertex_streams[0]) + divisor_offset] = flag;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
|
||||
auto& tables{channel_state.maxwell_3d->dirty.tables};
|
||||
SetupDirtyFlags(tables);
|
||||
SetupDirtyViewports(tables);
|
||||
SetupDirtyScissors(tables);
|
||||
SetupDirtyDepthBias(tables);
|
||||
SetupDirtyBlendConstants(tables);
|
||||
SetupDirtyDepthBounds(tables);
|
||||
SetupDirtyStencilProperties(tables);
|
||||
SetupDirtyLineWidth(tables);
|
||||
SetupDirtyCullMode(tables);
|
||||
SetupDirtyDepthBoundsEnable(tables);
|
||||
SetupDirtyDepthTestEnable(tables);
|
||||
SetupDirtyDepthWriteEnable(tables);
|
||||
SetupDirtyDepthCompareOp(tables);
|
||||
SetupDirtyFrontFace(tables);
|
||||
SetupDirtyStencilOp(tables);
|
||||
SetupDirtyStencilTestEnable(tables);
|
||||
SetupDirtyBlending(tables);
|
||||
SetupDirtyViewportSwizzles(tables);
|
||||
SetupDirtyVertexAttributes(tables);
|
||||
SetupDirtyVertexBindings(tables);
|
||||
}
|
||||
|
||||
void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
|
||||
flags = &channel_state.maxwell_3d->dirty.flags;
|
||||
}
|
||||
|
||||
void StateTracker::InvalidateState() {
|
||||
flags->set();
|
||||
}
|
||||
|
||||
StateTracker::StateTracker()
|
||||
: flags{&default_flags}, default_flags{}, invalidation_flags{MakeInvalidationFlags()} {}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,169 +1,169 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <limits>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
|
||||
namespace Tegra {
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
} // namespace Tegra
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace Dirty {
|
||||
|
||||
enum : u8 {
|
||||
First = VideoCommon::Dirty::LastCommonEntry,
|
||||
|
||||
VertexInput,
|
||||
VertexAttribute0,
|
||||
VertexAttribute31 = VertexAttribute0 + 31,
|
||||
VertexBinding0,
|
||||
VertexBinding31 = VertexBinding0 + 31,
|
||||
|
||||
Viewports,
|
||||
Scissors,
|
||||
DepthBias,
|
||||
BlendConstants,
|
||||
DepthBounds,
|
||||
StencilProperties,
|
||||
LineWidth,
|
||||
|
||||
CullMode,
|
||||
DepthBoundsEnable,
|
||||
DepthTestEnable,
|
||||
DepthWriteEnable,
|
||||
DepthCompareOp,
|
||||
FrontFace,
|
||||
StencilOp,
|
||||
StencilTestEnable,
|
||||
|
||||
Blending,
|
||||
ViewportSwizzles,
|
||||
|
||||
Last,
|
||||
};
|
||||
static_assert(Last <= std::numeric_limits<u8>::max());
|
||||
|
||||
} // namespace Dirty
|
||||
|
||||
class StateTracker {
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
public:
|
||||
explicit StateTracker();
|
||||
|
||||
void InvalidateCommandBufferState() {
|
||||
(*flags) |= invalidation_flags;
|
||||
current_topology = INVALID_TOPOLOGY;
|
||||
}
|
||||
|
||||
void InvalidateViewports() {
|
||||
(*flags)[Dirty::Viewports] = true;
|
||||
}
|
||||
|
||||
void InvalidateScissors() {
|
||||
(*flags)[Dirty::Scissors] = true;
|
||||
}
|
||||
|
||||
bool TouchViewports() {
|
||||
const bool dirty_viewports = Exchange(Dirty::Viewports, false);
|
||||
const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
|
||||
return dirty_viewports || rescale_viewports;
|
||||
}
|
||||
|
||||
bool TouchScissors() {
|
||||
const bool dirty_scissors = Exchange(Dirty::Scissors, false);
|
||||
const bool rescale_scissors = Exchange(VideoCommon::Dirty::RescaleScissors, false);
|
||||
return dirty_scissors || rescale_scissors;
|
||||
}
|
||||
|
||||
bool TouchDepthBias() {
|
||||
return Exchange(Dirty::DepthBias, false) ||
|
||||
Exchange(VideoCommon::Dirty::DepthBiasGlobal, false);
|
||||
}
|
||||
|
||||
bool TouchBlendConstants() {
|
||||
return Exchange(Dirty::BlendConstants, false);
|
||||
}
|
||||
|
||||
bool TouchDepthBounds() {
|
||||
return Exchange(Dirty::DepthBounds, false);
|
||||
}
|
||||
|
||||
bool TouchStencilProperties() {
|
||||
return Exchange(Dirty::StencilProperties, false);
|
||||
}
|
||||
|
||||
bool TouchLineWidth() const {
|
||||
return Exchange(Dirty::LineWidth, false);
|
||||
}
|
||||
|
||||
bool TouchCullMode() {
|
||||
return Exchange(Dirty::CullMode, false);
|
||||
}
|
||||
|
||||
bool TouchDepthBoundsTestEnable() {
|
||||
return Exchange(Dirty::DepthBoundsEnable, false);
|
||||
}
|
||||
|
||||
bool TouchDepthTestEnable() {
|
||||
return Exchange(Dirty::DepthTestEnable, false);
|
||||
}
|
||||
|
||||
bool TouchDepthWriteEnable() {
|
||||
return Exchange(Dirty::DepthWriteEnable, false);
|
||||
}
|
||||
|
||||
bool TouchDepthCompareOp() {
|
||||
return Exchange(Dirty::DepthCompareOp, false);
|
||||
}
|
||||
|
||||
bool TouchFrontFace() {
|
||||
return Exchange(Dirty::FrontFace, false);
|
||||
}
|
||||
|
||||
bool TouchStencilOp() {
|
||||
return Exchange(Dirty::StencilOp, false);
|
||||
}
|
||||
|
||||
bool TouchStencilTestEnable() {
|
||||
return Exchange(Dirty::StencilTestEnable, false);
|
||||
}
|
||||
|
||||
bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
|
||||
const bool has_changed = current_topology != new_topology;
|
||||
current_topology = new_topology;
|
||||
return has_changed;
|
||||
}
|
||||
|
||||
void SetupTables(Tegra::Control::ChannelState& channel_state);
|
||||
|
||||
void ChangeChannel(Tegra::Control::ChannelState& channel_state);
|
||||
|
||||
void InvalidateState();
|
||||
|
||||
private:
|
||||
static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
|
||||
|
||||
bool Exchange(std::size_t id, bool new_value) const noexcept {
|
||||
const bool is_dirty = (*flags)[id];
|
||||
(*flags)[id] = new_value;
|
||||
return is_dirty;
|
||||
}
|
||||
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags;
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
|
||||
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <limits>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
|
||||
namespace Tegra {
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
} // namespace Tegra
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace Dirty {
|
||||
|
||||
enum : u8 {
|
||||
First = VideoCommon::Dirty::LastCommonEntry,
|
||||
|
||||
VertexInput,
|
||||
VertexAttribute0,
|
||||
VertexAttribute31 = VertexAttribute0 + 31,
|
||||
VertexBinding0,
|
||||
VertexBinding31 = VertexBinding0 + 31,
|
||||
|
||||
Viewports,
|
||||
Scissors,
|
||||
DepthBias,
|
||||
BlendConstants,
|
||||
DepthBounds,
|
||||
StencilProperties,
|
||||
LineWidth,
|
||||
|
||||
CullMode,
|
||||
DepthBoundsEnable,
|
||||
DepthTestEnable,
|
||||
DepthWriteEnable,
|
||||
DepthCompareOp,
|
||||
FrontFace,
|
||||
StencilOp,
|
||||
StencilTestEnable,
|
||||
|
||||
Blending,
|
||||
ViewportSwizzles,
|
||||
|
||||
Last,
|
||||
};
|
||||
static_assert(Last <= std::numeric_limits<u8>::max());
|
||||
|
||||
} // namespace Dirty
|
||||
|
||||
class StateTracker {
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
public:
|
||||
explicit StateTracker();
|
||||
|
||||
void InvalidateCommandBufferState() {
|
||||
(*flags) |= invalidation_flags;
|
||||
current_topology = INVALID_TOPOLOGY;
|
||||
}
|
||||
|
||||
void InvalidateViewports() {
|
||||
(*flags)[Dirty::Viewports] = true;
|
||||
}
|
||||
|
||||
void InvalidateScissors() {
|
||||
(*flags)[Dirty::Scissors] = true;
|
||||
}
|
||||
|
||||
bool TouchViewports() {
|
||||
const bool dirty_viewports = Exchange(Dirty::Viewports, false);
|
||||
const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
|
||||
return dirty_viewports || rescale_viewports;
|
||||
}
|
||||
|
||||
bool TouchScissors() {
|
||||
const bool dirty_scissors = Exchange(Dirty::Scissors, false);
|
||||
const bool rescale_scissors = Exchange(VideoCommon::Dirty::RescaleScissors, false);
|
||||
return dirty_scissors || rescale_scissors;
|
||||
}
|
||||
|
||||
bool TouchDepthBias() {
|
||||
return Exchange(Dirty::DepthBias, false) ||
|
||||
Exchange(VideoCommon::Dirty::DepthBiasGlobal, false);
|
||||
}
|
||||
|
||||
bool TouchBlendConstants() {
|
||||
return Exchange(Dirty::BlendConstants, false);
|
||||
}
|
||||
|
||||
bool TouchDepthBounds() {
|
||||
return Exchange(Dirty::DepthBounds, false);
|
||||
}
|
||||
|
||||
bool TouchStencilProperties() {
|
||||
return Exchange(Dirty::StencilProperties, false);
|
||||
}
|
||||
|
||||
bool TouchLineWidth() const {
|
||||
return Exchange(Dirty::LineWidth, false);
|
||||
}
|
||||
|
||||
bool TouchCullMode() {
|
||||
return Exchange(Dirty::CullMode, false);
|
||||
}
|
||||
|
||||
bool TouchDepthBoundsTestEnable() {
|
||||
return Exchange(Dirty::DepthBoundsEnable, false);
|
||||
}
|
||||
|
||||
bool TouchDepthTestEnable() {
|
||||
return Exchange(Dirty::DepthTestEnable, false);
|
||||
}
|
||||
|
||||
bool TouchDepthWriteEnable() {
|
||||
return Exchange(Dirty::DepthWriteEnable, false);
|
||||
}
|
||||
|
||||
bool TouchDepthCompareOp() {
|
||||
return Exchange(Dirty::DepthCompareOp, false);
|
||||
}
|
||||
|
||||
bool TouchFrontFace() {
|
||||
return Exchange(Dirty::FrontFace, false);
|
||||
}
|
||||
|
||||
bool TouchStencilOp() {
|
||||
return Exchange(Dirty::StencilOp, false);
|
||||
}
|
||||
|
||||
bool TouchStencilTestEnable() {
|
||||
return Exchange(Dirty::StencilTestEnable, false);
|
||||
}
|
||||
|
||||
bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
|
||||
const bool has_changed = current_topology != new_topology;
|
||||
current_topology = new_topology;
|
||||
return has_changed;
|
||||
}
|
||||
|
||||
void SetupTables(Tegra::Control::ChannelState& channel_state);
|
||||
|
||||
void ChangeChannel(Tegra::Control::ChannelState& channel_state);
|
||||
|
||||
void InvalidateState();
|
||||
|
||||
private:
|
||||
static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
|
||||
|
||||
bool Exchange(std::size_t id, bool new_value) const noexcept {
|
||||
const bool is_dirty = (*flags)[id];
|
||||
(*flags)[id] = new_value;
|
||||
return is_dirty;
|
||||
}
|
||||
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags;
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
|
||||
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,281 +1,281 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) {
|
||||
if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
|
||||
VkSurfaceFormatKHR format;
|
||||
format.format = VK_FORMAT_B8G8R8A8_UNORM;
|
||||
format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
|
||||
return format;
|
||||
}
|
||||
const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
|
||||
return format.format == VK_FORMAT_B8G8R8A8_UNORM &&
|
||||
format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
|
||||
});
|
||||
return found != formats.end() ? *found : formats[0];
|
||||
}
|
||||
|
||||
VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) {
|
||||
// Mailbox (triple buffering) doesn't lock the application like fifo (vsync),
|
||||
// prefer it if vsync option is not selected
|
||||
const auto found_mailbox = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR);
|
||||
if (Settings::values.fullscreen_mode.GetValue() == Settings::FullscreenMode::Borderless &&
|
||||
found_mailbox != modes.end() && !Settings::values.use_vsync.GetValue()) {
|
||||
return VK_PRESENT_MODE_MAILBOX_KHR;
|
||||
}
|
||||
if (!Settings::values.use_speed_limit.GetValue()) {
|
||||
// FIFO present mode locks the framerate to the monitor's refresh rate,
|
||||
// Find an alternative to surpass this limitation if FPS is unlocked.
|
||||
const auto found_imm = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_IMMEDIATE_KHR);
|
||||
if (found_imm != modes.end()) {
|
||||
return VK_PRESENT_MODE_IMMEDIATE_KHR;
|
||||
}
|
||||
}
|
||||
return VK_PRESENT_MODE_FIFO_KHR;
|
||||
}
|
||||
|
||||
VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) {
|
||||
constexpr auto undefined_size{std::numeric_limits<u32>::max()};
|
||||
if (capabilities.currentExtent.width != undefined_size) {
|
||||
return capabilities.currentExtent;
|
||||
}
|
||||
VkExtent2D extent;
|
||||
extent.width = std::max(capabilities.minImageExtent.width,
|
||||
std::min(capabilities.maxImageExtent.width, width));
|
||||
extent.height = std::max(capabilities.minImageExtent.height,
|
||||
std::min(capabilities.maxImageExtent.height, height));
|
||||
return extent;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, u32 width,
|
||||
u32 height, bool srgb)
|
||||
: surface{surface_}, device{device_}, scheduler{scheduler_} {
|
||||
Create(width, height, srgb);
|
||||
}
|
||||
|
||||
Swapchain::~Swapchain() = default;
|
||||
|
||||
void Swapchain::Create(u32 width, u32 height, bool srgb) {
|
||||
is_outdated = false;
|
||||
is_suboptimal = false;
|
||||
|
||||
const auto physical_device = device.GetPhysical();
|
||||
const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
|
||||
if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
device.GetLogical().WaitIdle();
|
||||
Destroy();
|
||||
|
||||
CreateSwapchain(capabilities, width, height, srgb);
|
||||
CreateSemaphores();
|
||||
CreateImageViews();
|
||||
|
||||
resource_ticks.clear();
|
||||
resource_ticks.resize(image_count);
|
||||
}
|
||||
|
||||
void Swapchain::AcquireNextImage() {
|
||||
const VkResult result = device.GetLogical().AcquireNextImageKHR(
|
||||
*swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
|
||||
VK_NULL_HANDLE, &image_index);
|
||||
switch (result) {
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_SUBOPTIMAL_KHR:
|
||||
is_suboptimal = true;
|
||||
break;
|
||||
case VK_ERROR_OUT_OF_DATE_KHR:
|
||||
is_outdated = true;
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
|
||||
break;
|
||||
}
|
||||
scheduler.Wait(resource_ticks[image_index]);
|
||||
resource_ticks[image_index] = scheduler.CurrentTick();
|
||||
}
|
||||
|
||||
void Swapchain::Present(VkSemaphore render_semaphore) {
|
||||
const auto present_queue{device.GetPresentQueue()};
|
||||
const VkPresentInfoKHR present_info{
|
||||
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.waitSemaphoreCount = render_semaphore ? 1U : 0U,
|
||||
.pWaitSemaphores = &render_semaphore,
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = swapchain.address(),
|
||||
.pImageIndices = &image_index,
|
||||
.pResults = nullptr,
|
||||
};
|
||||
switch (const VkResult result = present_queue.Present(present_info)) {
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_SUBOPTIMAL_KHR:
|
||||
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
|
||||
break;
|
||||
case VK_ERROR_OUT_OF_DATE_KHR:
|
||||
is_outdated = true;
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
|
||||
break;
|
||||
}
|
||||
++frame_index;
|
||||
if (frame_index >= image_count) {
|
||||
frame_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
|
||||
bool srgb) {
|
||||
const auto physical_device{device.GetPhysical()};
|
||||
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
|
||||
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
|
||||
|
||||
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
|
||||
present_mode = ChooseSwapPresentMode(present_modes);
|
||||
|
||||
u32 requested_image_count{capabilities.minImageCount + 1};
|
||||
// Ensure Tripple buffering if possible.
|
||||
if (capabilities.maxImageCount > 0) {
|
||||
if (requested_image_count > capabilities.maxImageCount) {
|
||||
requested_image_count = capabilities.maxImageCount;
|
||||
} else {
|
||||
requested_image_count =
|
||||
std::max(requested_image_count, std::min(3U, capabilities.maxImageCount));
|
||||
}
|
||||
} else {
|
||||
requested_image_count = std::max(requested_image_count, 3U);
|
||||
}
|
||||
VkSwapchainCreateInfoKHR swapchain_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.surface = surface,
|
||||
.minImageCount = requested_image_count,
|
||||
.imageFormat = surface_format.format,
|
||||
.imageColorSpace = surface_format.colorSpace,
|
||||
.imageExtent = {},
|
||||
.imageArrayLayers = 1,
|
||||
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
.preTransform = capabilities.currentTransform,
|
||||
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
|
||||
.presentMode = present_mode,
|
||||
.clipped = VK_FALSE,
|
||||
.oldSwapchain = nullptr,
|
||||
};
|
||||
const u32 graphics_family{device.GetGraphicsFamily()};
|
||||
const u32 present_family{device.GetPresentFamily()};
|
||||
const std::array<u32, 2> queue_indices{graphics_family, present_family};
|
||||
if (graphics_family != present_family) {
|
||||
swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
|
||||
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
|
||||
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
|
||||
}
|
||||
static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
|
||||
VkImageFormatListCreateInfo format_list{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.viewFormatCount = static_cast<u32>(view_formats.size()),
|
||||
.pViewFormats = view_formats.data(),
|
||||
};
|
||||
if (device.IsKhrSwapchainMutableFormatEnabled()) {
|
||||
format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list);
|
||||
swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR;
|
||||
}
|
||||
// Request the size again to reduce the possibility of a TOCTOU race condition.
|
||||
const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
|
||||
swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
|
||||
// Don't add code within this and the swapchain creation.
|
||||
swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
|
||||
|
||||
extent = swapchain_ci.imageExtent;
|
||||
current_srgb = srgb;
|
||||
current_fps_unlocked = !Settings::values.use_speed_limit.GetValue();
|
||||
|
||||
images = swapchain.GetImages();
|
||||
image_count = static_cast<u32>(images.size());
|
||||
image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
|
||||
}
|
||||
|
||||
void Swapchain::CreateSemaphores() {
|
||||
present_semaphores.resize(image_count);
|
||||
std::ranges::generate(present_semaphores,
|
||||
[this] { return device.GetLogical().CreateSemaphore(); });
|
||||
}
|
||||
|
||||
void Swapchain::CreateImageViews() {
|
||||
VkImageViewCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.image = {},
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = image_view_format,
|
||||
.components =
|
||||
{
|
||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
|
||||
image_views.resize(image_count);
|
||||
for (std::size_t i = 0; i < image_count; i++) {
|
||||
ci.image = images[i];
|
||||
image_views[i] = device.GetLogical().CreateImageView(ci);
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::Destroy() {
|
||||
frame_index = 0;
|
||||
present_semaphores.clear();
|
||||
framebuffers.clear();
|
||||
image_views.clear();
|
||||
swapchain.reset();
|
||||
}
|
||||
|
||||
bool Swapchain::HasFpsUnlockChanged() const {
|
||||
return current_fps_unlocked != !Settings::values.use_speed_limit.GetValue();
|
||||
}
|
||||
|
||||
bool Swapchain::NeedsPresentModeUpdate() const {
|
||||
// Mailbox present mode is the ideal for all scenarios. If it is not available,
|
||||
// A different present mode is needed to support unlocked FPS above the monitor's refresh rate.
|
||||
return present_mode != VK_PRESENT_MODE_MAILBOX_KHR && HasFpsUnlockChanged();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) {
|
||||
if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
|
||||
VkSurfaceFormatKHR format;
|
||||
format.format = VK_FORMAT_B8G8R8A8_UNORM;
|
||||
format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
|
||||
return format;
|
||||
}
|
||||
const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
|
||||
return format.format == VK_FORMAT_B8G8R8A8_UNORM &&
|
||||
format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
|
||||
});
|
||||
return found != formats.end() ? *found : formats[0];
|
||||
}
|
||||
|
||||
VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) {
|
||||
// Mailbox (triple buffering) doesn't lock the application like fifo (vsync),
|
||||
// prefer it if vsync option is not selected
|
||||
const auto found_mailbox = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR);
|
||||
if (Settings::values.fullscreen_mode.GetValue() == Settings::FullscreenMode::Borderless &&
|
||||
found_mailbox != modes.end() && !Settings::values.use_vsync.GetValue()) {
|
||||
return VK_PRESENT_MODE_MAILBOX_KHR;
|
||||
}
|
||||
if (!Settings::values.use_speed_limit.GetValue()) {
|
||||
// FIFO present mode locks the framerate to the monitor's refresh rate,
|
||||
// Find an alternative to surpass this limitation if FPS is unlocked.
|
||||
const auto found_imm = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_IMMEDIATE_KHR);
|
||||
if (found_imm != modes.end()) {
|
||||
return VK_PRESENT_MODE_IMMEDIATE_KHR;
|
||||
}
|
||||
}
|
||||
return VK_PRESENT_MODE_FIFO_KHR;
|
||||
}
|
||||
|
||||
VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) {
|
||||
constexpr auto undefined_size{std::numeric_limits<u32>::max()};
|
||||
if (capabilities.currentExtent.width != undefined_size) {
|
||||
return capabilities.currentExtent;
|
||||
}
|
||||
VkExtent2D extent;
|
||||
extent.width = std::max(capabilities.minImageExtent.width,
|
||||
std::min(capabilities.maxImageExtent.width, width));
|
||||
extent.height = std::max(capabilities.minImageExtent.height,
|
||||
std::min(capabilities.maxImageExtent.height, height));
|
||||
return extent;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, u32 width,
|
||||
u32 height, bool srgb)
|
||||
: surface{surface_}, device{device_}, scheduler{scheduler_} {
|
||||
Create(width, height, srgb);
|
||||
}
|
||||
|
||||
Swapchain::~Swapchain() = default;
|
||||
|
||||
void Swapchain::Create(u32 width, u32 height, bool srgb) {
|
||||
is_outdated = false;
|
||||
is_suboptimal = false;
|
||||
|
||||
const auto physical_device = device.GetPhysical();
|
||||
const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
|
||||
if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
device.GetLogical().WaitIdle();
|
||||
Destroy();
|
||||
|
||||
CreateSwapchain(capabilities, width, height, srgb);
|
||||
CreateSemaphores();
|
||||
CreateImageViews();
|
||||
|
||||
resource_ticks.clear();
|
||||
resource_ticks.resize(image_count);
|
||||
}
|
||||
|
||||
void Swapchain::AcquireNextImage() {
|
||||
const VkResult result = device.GetLogical().AcquireNextImageKHR(
|
||||
*swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
|
||||
VK_NULL_HANDLE, &image_index);
|
||||
switch (result) {
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_SUBOPTIMAL_KHR:
|
||||
is_suboptimal = true;
|
||||
break;
|
||||
case VK_ERROR_OUT_OF_DATE_KHR:
|
||||
is_outdated = true;
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
|
||||
break;
|
||||
}
|
||||
scheduler.Wait(resource_ticks[image_index]);
|
||||
resource_ticks[image_index] = scheduler.CurrentTick();
|
||||
}
|
||||
|
||||
void Swapchain::Present(VkSemaphore render_semaphore) {
|
||||
const auto present_queue{device.GetPresentQueue()};
|
||||
const VkPresentInfoKHR present_info{
|
||||
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.waitSemaphoreCount = render_semaphore ? 1U : 0U,
|
||||
.pWaitSemaphores = &render_semaphore,
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = swapchain.address(),
|
||||
.pImageIndices = &image_index,
|
||||
.pResults = nullptr,
|
||||
};
|
||||
switch (const VkResult result = present_queue.Present(present_info)) {
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_SUBOPTIMAL_KHR:
|
||||
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
|
||||
break;
|
||||
case VK_ERROR_OUT_OF_DATE_KHR:
|
||||
is_outdated = true;
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
|
||||
break;
|
||||
}
|
||||
++frame_index;
|
||||
if (frame_index >= image_count) {
|
||||
frame_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
|
||||
bool srgb) {
|
||||
const auto physical_device{device.GetPhysical()};
|
||||
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
|
||||
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
|
||||
|
||||
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
|
||||
present_mode = ChooseSwapPresentMode(present_modes);
|
||||
|
||||
u32 requested_image_count{capabilities.minImageCount + 1};
|
||||
// Ensure Tripple buffering if possible.
|
||||
if (capabilities.maxImageCount > 0) {
|
||||
if (requested_image_count > capabilities.maxImageCount) {
|
||||
requested_image_count = capabilities.maxImageCount;
|
||||
} else {
|
||||
requested_image_count =
|
||||
std::max(requested_image_count, std::min(3U, capabilities.maxImageCount));
|
||||
}
|
||||
} else {
|
||||
requested_image_count = std::max(requested_image_count, 3U);
|
||||
}
|
||||
VkSwapchainCreateInfoKHR swapchain_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.surface = surface,
|
||||
.minImageCount = requested_image_count,
|
||||
.imageFormat = surface_format.format,
|
||||
.imageColorSpace = surface_format.colorSpace,
|
||||
.imageExtent = {},
|
||||
.imageArrayLayers = 1,
|
||||
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
.preTransform = capabilities.currentTransform,
|
||||
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
|
||||
.presentMode = present_mode,
|
||||
.clipped = VK_FALSE,
|
||||
.oldSwapchain = nullptr,
|
||||
};
|
||||
const u32 graphics_family{device.GetGraphicsFamily()};
|
||||
const u32 present_family{device.GetPresentFamily()};
|
||||
const std::array<u32, 2> queue_indices{graphics_family, present_family};
|
||||
if (graphics_family != present_family) {
|
||||
swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
|
||||
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
|
||||
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
|
||||
}
|
||||
static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
|
||||
VkImageFormatListCreateInfo format_list{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.viewFormatCount = static_cast<u32>(view_formats.size()),
|
||||
.pViewFormats = view_formats.data(),
|
||||
};
|
||||
if (device.IsKhrSwapchainMutableFormatEnabled()) {
|
||||
format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list);
|
||||
swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR;
|
||||
}
|
||||
// Request the size again to reduce the possibility of a TOCTOU race condition.
|
||||
const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
|
||||
swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
|
||||
// Don't add code within this and the swapchain creation.
|
||||
swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
|
||||
|
||||
extent = swapchain_ci.imageExtent;
|
||||
current_srgb = srgb;
|
||||
current_fps_unlocked = !Settings::values.use_speed_limit.GetValue();
|
||||
|
||||
images = swapchain.GetImages();
|
||||
image_count = static_cast<u32>(images.size());
|
||||
image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
|
||||
}
|
||||
|
||||
void Swapchain::CreateSemaphores() {
|
||||
present_semaphores.resize(image_count);
|
||||
std::ranges::generate(present_semaphores,
|
||||
[this] { return device.GetLogical().CreateSemaphore(); });
|
||||
}
|
||||
|
||||
void Swapchain::CreateImageViews() {
|
||||
VkImageViewCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.image = {},
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = image_view_format,
|
||||
.components =
|
||||
{
|
||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
|
||||
image_views.resize(image_count);
|
||||
for (std::size_t i = 0; i < image_count; i++) {
|
||||
ci.image = images[i];
|
||||
image_views[i] = device.GetLogical().CreateImageView(ci);
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::Destroy() {
|
||||
frame_index = 0;
|
||||
present_semaphores.clear();
|
||||
framebuffers.clear();
|
||||
image_views.clear();
|
||||
swapchain.reset();
|
||||
}
|
||||
|
||||
bool Swapchain::HasFpsUnlockChanged() const {
|
||||
return current_fps_unlocked != !Settings::values.use_speed_limit.GetValue();
|
||||
}
|
||||
|
||||
bool Swapchain::NeedsPresentModeUpdate() const {
|
||||
// Mailbox present mode is the ideal for all scenarios. If it is not available,
|
||||
// A different present mode is needed to support unlocked FPS above the monitor's refresh rate.
|
||||
return present_mode != VK_PRESENT_MODE_MAILBOX_KHR && HasFpsUnlockChanged();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,121 +1,121 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Layout {
|
||||
struct FramebufferLayout;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class Scheduler;
|
||||
|
||||
class Swapchain {
|
||||
public:
|
||||
explicit Swapchain(VkSurfaceKHR surface, const Device& device, Scheduler& scheduler, u32 width,
|
||||
u32 height, bool srgb);
|
||||
~Swapchain();
|
||||
|
||||
/// Creates (or recreates) the swapchain with a given size.
|
||||
void Create(u32 width, u32 height, bool srgb);
|
||||
|
||||
/// Acquires the next image in the swapchain, waits as needed.
|
||||
void AcquireNextImage();
|
||||
|
||||
/// Presents the rendered image to the swapchain.
|
||||
void Present(VkSemaphore render_semaphore);
|
||||
|
||||
/// Returns true when the swapchain needs to be recreated.
|
||||
bool NeedsRecreation(bool is_srgb) const {
|
||||
return HasColorSpaceChanged(is_srgb) || IsSubOptimal() || NeedsPresentModeUpdate();
|
||||
}
|
||||
|
||||
/// Returns true when the color space has changed.
|
||||
bool HasColorSpaceChanged(bool is_srgb) const {
|
||||
return current_srgb != is_srgb;
|
||||
}
|
||||
|
||||
/// Returns true when the swapchain is outdated.
|
||||
bool IsOutDated() const {
|
||||
return is_outdated;
|
||||
}
|
||||
|
||||
/// Returns true when the swapchain is suboptimal.
|
||||
bool IsSubOptimal() const {
|
||||
return is_suboptimal;
|
||||
}
|
||||
|
||||
VkExtent2D GetSize() const {
|
||||
return extent;
|
||||
}
|
||||
|
||||
std::size_t GetImageCount() const {
|
||||
return image_count;
|
||||
}
|
||||
|
||||
std::size_t GetImageIndex() const {
|
||||
return image_index;
|
||||
}
|
||||
|
||||
VkImage GetImageIndex(std::size_t index) const {
|
||||
return images[index];
|
||||
}
|
||||
|
||||
VkImageView GetImageViewIndex(std::size_t index) const {
|
||||
return *image_views[index];
|
||||
}
|
||||
|
||||
VkFormat GetImageViewFormat() const {
|
||||
return image_view_format;
|
||||
}
|
||||
|
||||
VkSemaphore CurrentPresentSemaphore() const {
|
||||
return *present_semaphores[frame_index];
|
||||
}
|
||||
|
||||
private:
|
||||
void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
|
||||
bool srgb);
|
||||
void CreateSemaphores();
|
||||
void CreateImageViews();
|
||||
|
||||
void Destroy();
|
||||
|
||||
bool HasFpsUnlockChanged() const;
|
||||
|
||||
bool NeedsPresentModeUpdate() const;
|
||||
|
||||
const VkSurfaceKHR surface;
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
|
||||
vk::SwapchainKHR swapchain;
|
||||
|
||||
std::size_t image_count{};
|
||||
std::vector<VkImage> images;
|
||||
std::vector<vk::ImageView> image_views;
|
||||
std::vector<vk::Framebuffer> framebuffers;
|
||||
std::vector<u64> resource_ticks;
|
||||
std::vector<vk::Semaphore> present_semaphores;
|
||||
|
||||
u32 image_index{};
|
||||
u32 frame_index{};
|
||||
|
||||
VkFormat image_view_format{};
|
||||
VkExtent2D extent{};
|
||||
VkPresentModeKHR present_mode{};
|
||||
|
||||
bool current_srgb{};
|
||||
bool current_fps_unlocked{};
|
||||
bool is_outdated{};
|
||||
bool is_suboptimal{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Layout {
|
||||
struct FramebufferLayout;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class Scheduler;
|
||||
|
||||
class Swapchain {
|
||||
public:
|
||||
explicit Swapchain(VkSurfaceKHR surface, const Device& device, Scheduler& scheduler, u32 width,
|
||||
u32 height, bool srgb);
|
||||
~Swapchain();
|
||||
|
||||
/// Creates (or recreates) the swapchain with a given size.
|
||||
void Create(u32 width, u32 height, bool srgb);
|
||||
|
||||
/// Acquires the next image in the swapchain, waits as needed.
|
||||
void AcquireNextImage();
|
||||
|
||||
/// Presents the rendered image to the swapchain.
|
||||
void Present(VkSemaphore render_semaphore);
|
||||
|
||||
/// Returns true when the swapchain needs to be recreated.
|
||||
bool NeedsRecreation(bool is_srgb) const {
|
||||
return HasColorSpaceChanged(is_srgb) || IsSubOptimal() || NeedsPresentModeUpdate();
|
||||
}
|
||||
|
||||
/// Returns true when the color space has changed.
|
||||
bool HasColorSpaceChanged(bool is_srgb) const {
|
||||
return current_srgb != is_srgb;
|
||||
}
|
||||
|
||||
/// Returns true when the swapchain is outdated.
|
||||
bool IsOutDated() const {
|
||||
return is_outdated;
|
||||
}
|
||||
|
||||
/// Returns true when the swapchain is suboptimal.
|
||||
bool IsSubOptimal() const {
|
||||
return is_suboptimal;
|
||||
}
|
||||
|
||||
VkExtent2D GetSize() const {
|
||||
return extent;
|
||||
}
|
||||
|
||||
std::size_t GetImageCount() const {
|
||||
return image_count;
|
||||
}
|
||||
|
||||
std::size_t GetImageIndex() const {
|
||||
return image_index;
|
||||
}
|
||||
|
||||
VkImage GetImageIndex(std::size_t index) const {
|
||||
return images[index];
|
||||
}
|
||||
|
||||
VkImageView GetImageViewIndex(std::size_t index) const {
|
||||
return *image_views[index];
|
||||
}
|
||||
|
||||
VkFormat GetImageViewFormat() const {
|
||||
return image_view_format;
|
||||
}
|
||||
|
||||
VkSemaphore CurrentPresentSemaphore() const {
|
||||
return *present_semaphores[frame_index];
|
||||
}
|
||||
|
||||
private:
|
||||
void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
|
||||
bool srgb);
|
||||
void CreateSemaphores();
|
||||
void CreateImageViews();
|
||||
|
||||
void Destroy();
|
||||
|
||||
bool HasFpsUnlockChanged() const;
|
||||
|
||||
bool NeedsPresentModeUpdate() const;
|
||||
|
||||
const VkSurfaceKHR surface;
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
|
||||
vk::SwapchainKHR swapchain;
|
||||
|
||||
std::size_t image_count{};
|
||||
std::vector<VkImage> images;
|
||||
std::vector<vk::ImageView> image_views;
|
||||
std::vector<vk::Framebuffer> framebuffers;
|
||||
std::vector<u64> resource_ticks;
|
||||
std::vector<vk::Semaphore> present_semaphores;
|
||||
|
||||
u32 image_index{};
|
||||
u32 frame_index{};
|
||||
|
||||
VkFormat image_view_format{};
|
||||
VkExtent2D extent{};
|
||||
VkPresentModeKHR present_mode{};
|
||||
|
||||
bool current_srgb{};
|
||||
bool current_fps_unlocked{};
|
||||
bool is_outdated{};
|
||||
bool is_suboptimal{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,361 +1,361 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/texture_cache/image_view_base.h"
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Settings {
|
||||
struct ResolutionScalingInfo;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCommon::ImageId;
|
||||
using VideoCommon::NUM_RT;
|
||||
using VideoCommon::Region2D;
|
||||
using VideoCommon::RenderTargets;
|
||||
using VideoCommon::SlotVector;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
|
||||
class BlitImageHelper;
|
||||
class DescriptorPool;
|
||||
class Device;
|
||||
class Image;
|
||||
class ImageView;
|
||||
class Framebuffer;
|
||||
class RenderPassCache;
|
||||
class StagingBufferPool;
|
||||
class UpdateDescriptorQueue;
|
||||
class Scheduler;
|
||||
|
||||
class TextureCacheRuntime {
|
||||
public:
|
||||
explicit TextureCacheRuntime(const Device& device_, Scheduler& scheduler_,
|
||||
MemoryAllocator& memory_allocator_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
BlitImageHelper& blit_image_helper_,
|
||||
RenderPassCache& render_pass_cache_,
|
||||
DescriptorPool& descriptor_pool,
|
||||
UpdateDescriptorQueue& update_descriptor_queue);
|
||||
|
||||
void Finish();
|
||||
|
||||
StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
|
||||
StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||
|
||||
void TickFrame();
|
||||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
|
||||
u64 GetDeviceMemoryUsage() const;
|
||||
|
||||
bool CanReportMemoryUsage() const;
|
||||
|
||||
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||
const Region2D& dst_region, const Region2D& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
bool ShouldReinterpret(Image& dst, Image& src);
|
||||
|
||||
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
|
||||
|
||||
bool CanAccelerateImageUpload(Image&) const noexcept {
|
||||
return false;
|
||||
}
|
||||
|
||||
void AccelerateImageUpload(Image&, const StagingBufferRef&,
|
||||
std::span<const VideoCommon::SwizzleParameters>);
|
||||
|
||||
void InsertUploadMemoryBarrier() {}
|
||||
|
||||
bool HasBrokenTextureViewFormats() const noexcept {
|
||||
// No known Vulkan driver has broken image views
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HasNativeBgr() const noexcept {
|
||||
// All known Vulkan drivers can natively handle BGR textures
|
||||
return true;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
|
||||
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
MemoryAllocator& memory_allocator;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
BlitImageHelper& blit_image_helper;
|
||||
RenderPassCache& render_pass_cache;
|
||||
std::optional<ASTCDecoderPass> astc_decoder_pass;
|
||||
const Settings::ResolutionScalingInfo& resolution;
|
||||
|
||||
constexpr static size_t indexing_slots = 8 * sizeof(size_t);
|
||||
std::array<vk::Buffer, indexing_slots> buffers{};
|
||||
std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
|
||||
};
|
||||
|
||||
class Image : public VideoCommon::ImageBase {
|
||||
public:
|
||||
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr);
|
||||
explicit Image(const VideoCommon::NullImageParams&);
|
||||
|
||||
~Image();
|
||||
|
||||
Image(const Image&) = delete;
|
||||
Image& operator=(const Image&) = delete;
|
||||
|
||||
Image(Image&&) = default;
|
||||
Image& operator=(Image&&) = default;
|
||||
|
||||
void UploadMemory(const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void DownloadMemory(const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
[[nodiscard]] VkImage Handle() const noexcept {
|
||||
return current_image;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
|
||||
return aspect_mask;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageView StorageImageView(s32 level) const noexcept {
|
||||
return *storage_image_views[level];
|
||||
}
|
||||
|
||||
/// Returns true when the image is already initialized and mark it as initialized
|
||||
[[nodiscard]] bool ExchangeInitialization() noexcept {
|
||||
return std::exchange(initialized, true);
|
||||
}
|
||||
|
||||
bool IsRescaled() const noexcept;
|
||||
|
||||
bool ScaleUp(bool ignore = false);
|
||||
|
||||
bool ScaleDown(bool ignore = false);
|
||||
|
||||
private:
|
||||
bool BlitScaleHelper(bool scale_up);
|
||||
|
||||
bool NeedsScaleHelper() const;
|
||||
|
||||
Scheduler* scheduler{};
|
||||
TextureCacheRuntime* runtime{};
|
||||
|
||||
vk::Image original_image;
|
||||
MemoryCommit commit;
|
||||
std::vector<vk::ImageView> storage_image_views;
|
||||
VkImageAspectFlags aspect_mask = 0;
|
||||
bool initialized = false;
|
||||
vk::Image scaled_image{};
|
||||
MemoryCommit scaled_commit{};
|
||||
VkImage current_image{};
|
||||
|
||||
std::unique_ptr<Framebuffer> scale_framebuffer;
|
||||
std::unique_ptr<ImageView> scale_view;
|
||||
|
||||
std::unique_ptr<Framebuffer> normal_framebuffer;
|
||||
std::unique_ptr<ImageView> normal_view;
|
||||
};
|
||||
|
||||
class ImageView : public VideoCommon::ImageViewBase {
|
||||
public:
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&,
|
||||
const SlotVector<Image>&);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
|
||||
const VideoCommon::ImageViewInfo&, GPUVAddr);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
|
||||
|
||||
~ImageView();
|
||||
|
||||
ImageView(const ImageView&) = delete;
|
||||
ImageView& operator=(const ImageView&) = delete;
|
||||
|
||||
ImageView(ImageView&&) = default;
|
||||
ImageView& operator=(ImageView&&) = default;
|
||||
|
||||
[[nodiscard]] VkImageView DepthView();
|
||||
|
||||
[[nodiscard]] VkImageView StencilView();
|
||||
|
||||
[[nodiscard]] VkImageView ColorView();
|
||||
|
||||
[[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
|
||||
Shader::ImageFormat image_format);
|
||||
|
||||
[[nodiscard]] bool IsRescaled() const noexcept;
|
||||
|
||||
[[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
|
||||
return *image_views[static_cast<size_t>(texture_type)];
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImage ImageHandle() const noexcept {
|
||||
return image_handle;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageView RenderTarget() const noexcept {
|
||||
return render_target;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
|
||||
return samples;
|
||||
}
|
||||
|
||||
[[nodiscard]] GPUVAddr GpuAddr() const noexcept {
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 BufferSize() const noexcept {
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
private:
|
||||
struct StorageViews {
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
|
||||
};
|
||||
|
||||
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
|
||||
|
||||
const Device* device = nullptr;
|
||||
const SlotVector<Image>* slot_images = nullptr;
|
||||
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
|
||||
std::unique_ptr<StorageViews> storage_views;
|
||||
vk::ImageView depth_view;
|
||||
vk::ImageView stencil_view;
|
||||
vk::ImageView color_view;
|
||||
VkImage image_handle = VK_NULL_HANDLE;
|
||||
VkImageView render_target = VK_NULL_HANDLE;
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
GPUVAddr gpu_addr = 0;
|
||||
u32 buffer_size = 0;
|
||||
};
|
||||
|
||||
class ImageAlloc : public VideoCommon::ImageAllocBase {};
|
||||
|
||||
class Sampler {
|
||||
public:
|
||||
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
|
||||
|
||||
[[nodiscard]] VkSampler Handle() const noexcept {
|
||||
return *sampler;
|
||||
}
|
||||
|
||||
private:
|
||||
vk::Sampler sampler;
|
||||
};
|
||||
|
||||
class Framebuffer {
|
||||
public:
|
||||
explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
|
||||
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
|
||||
|
||||
explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
|
||||
ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled);
|
||||
|
||||
~Framebuffer();
|
||||
|
||||
Framebuffer(const Framebuffer&) = delete;
|
||||
Framebuffer& operator=(const Framebuffer&) = delete;
|
||||
|
||||
Framebuffer(Framebuffer&&) = default;
|
||||
Framebuffer& operator=(Framebuffer&&) = default;
|
||||
|
||||
void CreateFramebuffer(TextureCacheRuntime& runtime,
|
||||
std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer,
|
||||
bool is_rescaled = false);
|
||||
|
||||
[[nodiscard]] VkFramebuffer Handle() const noexcept {
|
||||
return *framebuffer;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkRenderPass RenderPass() const noexcept {
|
||||
return renderpass;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkExtent2D RenderArea() const noexcept {
|
||||
return render_area;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
|
||||
return samples;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 NumColorBuffers() const noexcept {
|
||||
return num_color_buffers;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 NumImages() const noexcept {
|
||||
return num_images;
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
|
||||
return images;
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
|
||||
return image_ranges;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
|
||||
return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectDepthBit() const noexcept {
|
||||
return has_depth;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectStencilBit() const noexcept {
|
||||
return has_stencil;
|
||||
}
|
||||
|
||||
private:
|
||||
vk::Framebuffer framebuffer;
|
||||
VkRenderPass renderpass{};
|
||||
VkExtent2D render_area{};
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
u32 num_color_buffers = 0;
|
||||
u32 num_images = 0;
|
||||
std::array<VkImage, 9> images{};
|
||||
std::array<VkImageSubresourceRange, 9> image_ranges{};
|
||||
bool has_depth{};
|
||||
bool has_stencil{};
|
||||
};
|
||||
|
||||
struct TextureCacheParams {
|
||||
static constexpr bool ENABLE_VALIDATION = true;
|
||||
static constexpr bool FRAMEBUFFER_BLITS = false;
|
||||
static constexpr bool HAS_EMULATED_COPIES = false;
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
|
||||
|
||||
using Runtime = Vulkan::TextureCacheRuntime;
|
||||
using Image = Vulkan::Image;
|
||||
using ImageAlloc = Vulkan::ImageAlloc;
|
||||
using ImageView = Vulkan::ImageView;
|
||||
using Sampler = Vulkan::Sampler;
|
||||
using Framebuffer = Vulkan::Framebuffer;
|
||||
};
|
||||
|
||||
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/texture_cache/image_view_base.h"
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Settings {
|
||||
struct ResolutionScalingInfo;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCommon::ImageId;
|
||||
using VideoCommon::NUM_RT;
|
||||
using VideoCommon::Region2D;
|
||||
using VideoCommon::RenderTargets;
|
||||
using VideoCommon::SlotVector;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
|
||||
class BlitImageHelper;
|
||||
class DescriptorPool;
|
||||
class Device;
|
||||
class Image;
|
||||
class ImageView;
|
||||
class Framebuffer;
|
||||
class RenderPassCache;
|
||||
class StagingBufferPool;
|
||||
class UpdateDescriptorQueue;
|
||||
class Scheduler;
|
||||
|
||||
class TextureCacheRuntime {
|
||||
public:
|
||||
explicit TextureCacheRuntime(const Device& device_, Scheduler& scheduler_,
|
||||
MemoryAllocator& memory_allocator_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
BlitImageHelper& blit_image_helper_,
|
||||
RenderPassCache& render_pass_cache_,
|
||||
DescriptorPool& descriptor_pool,
|
||||
UpdateDescriptorQueue& update_descriptor_queue);
|
||||
|
||||
void Finish();
|
||||
|
||||
StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
|
||||
StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||
|
||||
void TickFrame();
|
||||
|
||||
u64 GetDeviceLocalMemory() const;
|
||||
|
||||
u64 GetDeviceMemoryUsage() const;
|
||||
|
||||
bool CanReportMemoryUsage() const;
|
||||
|
||||
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||
const Region2D& dst_region, const Region2D& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
bool ShouldReinterpret(Image& dst, Image& src);
|
||||
|
||||
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
|
||||
|
||||
bool CanAccelerateImageUpload(Image&) const noexcept {
|
||||
return false;
|
||||
}
|
||||
|
||||
void AccelerateImageUpload(Image&, const StagingBufferRef&,
|
||||
std::span<const VideoCommon::SwizzleParameters>);
|
||||
|
||||
void InsertUploadMemoryBarrier() {}
|
||||
|
||||
bool HasBrokenTextureViewFormats() const noexcept {
|
||||
// No known Vulkan driver has broken image views
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HasNativeBgr() const noexcept {
|
||||
// All known Vulkan drivers can natively handle BGR textures
|
||||
return true;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
|
||||
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
MemoryAllocator& memory_allocator;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
BlitImageHelper& blit_image_helper;
|
||||
RenderPassCache& render_pass_cache;
|
||||
std::optional<ASTCDecoderPass> astc_decoder_pass;
|
||||
const Settings::ResolutionScalingInfo& resolution;
|
||||
|
||||
constexpr static size_t indexing_slots = 8 * sizeof(size_t);
|
||||
std::array<vk::Buffer, indexing_slots> buffers{};
|
||||
std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
|
||||
};
|
||||
|
||||
class Image : public VideoCommon::ImageBase {
|
||||
public:
|
||||
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr);
|
||||
explicit Image(const VideoCommon::NullImageParams&);
|
||||
|
||||
~Image();
|
||||
|
||||
Image(const Image&) = delete;
|
||||
Image& operator=(const Image&) = delete;
|
||||
|
||||
Image(Image&&) = default;
|
||||
Image& operator=(Image&&) = default;
|
||||
|
||||
void UploadMemory(const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void DownloadMemory(const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
[[nodiscard]] VkImage Handle() const noexcept {
|
||||
return current_image;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
|
||||
return aspect_mask;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageView StorageImageView(s32 level) const noexcept {
|
||||
return *storage_image_views[level];
|
||||
}
|
||||
|
||||
/// Returns true when the image is already initialized and mark it as initialized
|
||||
[[nodiscard]] bool ExchangeInitialization() noexcept {
|
||||
return std::exchange(initialized, true);
|
||||
}
|
||||
|
||||
bool IsRescaled() const noexcept;
|
||||
|
||||
bool ScaleUp(bool ignore = false);
|
||||
|
||||
bool ScaleDown(bool ignore = false);
|
||||
|
||||
private:
|
||||
bool BlitScaleHelper(bool scale_up);
|
||||
|
||||
bool NeedsScaleHelper() const;
|
||||
|
||||
Scheduler* scheduler{};
|
||||
TextureCacheRuntime* runtime{};
|
||||
|
||||
vk::Image original_image;
|
||||
MemoryCommit commit;
|
||||
std::vector<vk::ImageView> storage_image_views;
|
||||
VkImageAspectFlags aspect_mask = 0;
|
||||
bool initialized = false;
|
||||
vk::Image scaled_image{};
|
||||
MemoryCommit scaled_commit{};
|
||||
VkImage current_image{};
|
||||
|
||||
std::unique_ptr<Framebuffer> scale_framebuffer;
|
||||
std::unique_ptr<ImageView> scale_view;
|
||||
|
||||
std::unique_ptr<Framebuffer> normal_framebuffer;
|
||||
std::unique_ptr<ImageView> normal_view;
|
||||
};
|
||||
|
||||
class ImageView : public VideoCommon::ImageViewBase {
|
||||
public:
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&,
|
||||
const SlotVector<Image>&);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
|
||||
const VideoCommon::ImageViewInfo&, GPUVAddr);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
|
||||
|
||||
~ImageView();
|
||||
|
||||
ImageView(const ImageView&) = delete;
|
||||
ImageView& operator=(const ImageView&) = delete;
|
||||
|
||||
ImageView(ImageView&&) = default;
|
||||
ImageView& operator=(ImageView&&) = default;
|
||||
|
||||
[[nodiscard]] VkImageView DepthView();
|
||||
|
||||
[[nodiscard]] VkImageView StencilView();
|
||||
|
||||
[[nodiscard]] VkImageView ColorView();
|
||||
|
||||
[[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
|
||||
Shader::ImageFormat image_format);
|
||||
|
||||
[[nodiscard]] bool IsRescaled() const noexcept;
|
||||
|
||||
[[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
|
||||
return *image_views[static_cast<size_t>(texture_type)];
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImage ImageHandle() const noexcept {
|
||||
return image_handle;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageView RenderTarget() const noexcept {
|
||||
return render_target;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
|
||||
return samples;
|
||||
}
|
||||
|
||||
[[nodiscard]] GPUVAddr GpuAddr() const noexcept {
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 BufferSize() const noexcept {
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
private:
|
||||
struct StorageViews {
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
|
||||
};
|
||||
|
||||
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
|
||||
|
||||
const Device* device = nullptr;
|
||||
const SlotVector<Image>* slot_images = nullptr;
|
||||
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
|
||||
std::unique_ptr<StorageViews> storage_views;
|
||||
vk::ImageView depth_view;
|
||||
vk::ImageView stencil_view;
|
||||
vk::ImageView color_view;
|
||||
VkImage image_handle = VK_NULL_HANDLE;
|
||||
VkImageView render_target = VK_NULL_HANDLE;
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
GPUVAddr gpu_addr = 0;
|
||||
u32 buffer_size = 0;
|
||||
};
|
||||
|
||||
class ImageAlloc : public VideoCommon::ImageAllocBase {};
|
||||
|
||||
class Sampler {
|
||||
public:
|
||||
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
|
||||
|
||||
[[nodiscard]] VkSampler Handle() const noexcept {
|
||||
return *sampler;
|
||||
}
|
||||
|
||||
private:
|
||||
vk::Sampler sampler;
|
||||
};
|
||||
|
||||
class Framebuffer {
|
||||
public:
|
||||
explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
|
||||
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
|
||||
|
||||
explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
|
||||
ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled);
|
||||
|
||||
~Framebuffer();
|
||||
|
||||
Framebuffer(const Framebuffer&) = delete;
|
||||
Framebuffer& operator=(const Framebuffer&) = delete;
|
||||
|
||||
Framebuffer(Framebuffer&&) = default;
|
||||
Framebuffer& operator=(Framebuffer&&) = default;
|
||||
|
||||
void CreateFramebuffer(TextureCacheRuntime& runtime,
|
||||
std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer,
|
||||
bool is_rescaled = false);
|
||||
|
||||
[[nodiscard]] VkFramebuffer Handle() const noexcept {
|
||||
return *framebuffer;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkRenderPass RenderPass() const noexcept {
|
||||
return renderpass;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkExtent2D RenderArea() const noexcept {
|
||||
return render_area;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
|
||||
return samples;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 NumColorBuffers() const noexcept {
|
||||
return num_color_buffers;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 NumImages() const noexcept {
|
||||
return num_images;
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
|
||||
return images;
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
|
||||
return image_ranges;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
|
||||
return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectDepthBit() const noexcept {
|
||||
return has_depth;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectStencilBit() const noexcept {
|
||||
return has_stencil;
|
||||
}
|
||||
|
||||
private:
|
||||
vk::Framebuffer framebuffer;
|
||||
VkRenderPass renderpass{};
|
||||
VkExtent2D render_area{};
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
u32 num_color_buffers = 0;
|
||||
u32 num_images = 0;
|
||||
std::array<VkImage, 9> images{};
|
||||
std::array<VkImageSubresourceRange, 9> image_ranges{};
|
||||
bool has_depth{};
|
||||
bool has_stencil{};
|
||||
};
|
||||
|
||||
struct TextureCacheParams {
|
||||
static constexpr bool ENABLE_VALIDATION = true;
|
||||
static constexpr bool FRAMEBUFFER_BLITS = false;
|
||||
static constexpr bool HAS_EMULATED_COPIES = false;
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
|
||||
|
||||
using Runtime = Vulkan::TextureCacheRuntime;
|
||||
using Image = Vulkan::Image;
|
||||
using ImageAlloc = Vulkan::ImageAlloc;
|
||||
using ImageView = Vulkan::ImageView;
|
||||
using Sampler = Vulkan::Sampler;
|
||||
using Framebuffer = Vulkan::Framebuffer;
|
||||
};
|
||||
|
||||
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>;
|
||||
}
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>;
|
||||
}
|
||||
|
||||
@@ -1,39 +1,39 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <variant>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
|
||||
: device{device_}, scheduler{scheduler_} {
|
||||
payload_cursor = payload.data();
|
||||
}
|
||||
|
||||
UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
|
||||
|
||||
void UpdateDescriptorQueue::TickFrame() {
|
||||
payload_cursor = payload.data();
|
||||
}
|
||||
|
||||
void UpdateDescriptorQueue::Acquire() {
|
||||
// Minimum number of entries required.
|
||||
// This is the maximum number of entries a single draw call migth use.
|
||||
static constexpr size_t MIN_ENTRIES = 0x400;
|
||||
|
||||
if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
|
||||
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
|
||||
scheduler.WaitWorker();
|
||||
payload_cursor = payload.data();
|
||||
}
|
||||
upload_start = payload_cursor;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <variant>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
|
||||
: device{device_}, scheduler{scheduler_} {
|
||||
payload_cursor = payload.data();
|
||||
}
|
||||
|
||||
UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
|
||||
|
||||
void UpdateDescriptorQueue::TickFrame() {
|
||||
payload_cursor = payload.data();
|
||||
}
|
||||
|
||||
void UpdateDescriptorQueue::Acquire() {
|
||||
// Minimum number of entries required.
|
||||
// This is the maximum number of entries a single draw call migth use.
|
||||
static constexpr size_t MIN_ENTRIES = 0x400;
|
||||
|
||||
if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
|
||||
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
|
||||
scheduler.WaitWorker();
|
||||
payload_cursor = payload.data();
|
||||
}
|
||||
upload_start = payload_cursor;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -1,81 +1,81 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class Scheduler;
|
||||
|
||||
struct DescriptorUpdateEntry {
|
||||
struct Empty {};
|
||||
|
||||
DescriptorUpdateEntry() = default;
|
||||
DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
|
||||
DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
|
||||
DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
|
||||
|
||||
union {
|
||||
Empty empty{};
|
||||
VkDescriptorImageInfo image;
|
||||
VkDescriptorBufferInfo buffer;
|
||||
VkBufferView texel_buffer;
|
||||
};
|
||||
};
|
||||
|
||||
class UpdateDescriptorQueue final {
|
||||
public:
|
||||
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
|
||||
~UpdateDescriptorQueue();
|
||||
|
||||
void TickFrame();
|
||||
|
||||
void Acquire();
|
||||
|
||||
const DescriptorUpdateEntry* UpdateData() const noexcept {
|
||||
return upload_start;
|
||||
}
|
||||
|
||||
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
|
||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||
.sampler = sampler,
|
||||
.imageView = image_view,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
}
|
||||
|
||||
void AddImage(VkImageView image_view) {
|
||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||
.sampler = VK_NULL_HANDLE,
|
||||
.imageView = image_view,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
}
|
||||
|
||||
void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
|
||||
*(payload_cursor++) = VkDescriptorBufferInfo{
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.range = size,
|
||||
};
|
||||
}
|
||||
|
||||
void AddTexelBuffer(VkBufferView texel_buffer) {
|
||||
*(payload_cursor++) = texel_buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
|
||||
DescriptorUpdateEntry* payload_cursor = nullptr;
|
||||
const DescriptorUpdateEntry* upload_start = nullptr;
|
||||
std::array<DescriptorUpdateEntry, 0x10000> payload;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class Scheduler;
|
||||
|
||||
struct DescriptorUpdateEntry {
|
||||
struct Empty {};
|
||||
|
||||
DescriptorUpdateEntry() = default;
|
||||
DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
|
||||
DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
|
||||
DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
|
||||
|
||||
union {
|
||||
Empty empty{};
|
||||
VkDescriptorImageInfo image;
|
||||
VkDescriptorBufferInfo buffer;
|
||||
VkBufferView texel_buffer;
|
||||
};
|
||||
};
|
||||
|
||||
class UpdateDescriptorQueue final {
|
||||
public:
|
||||
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
|
||||
~UpdateDescriptorQueue();
|
||||
|
||||
void TickFrame();
|
||||
|
||||
void Acquire();
|
||||
|
||||
const DescriptorUpdateEntry* UpdateData() const noexcept {
|
||||
return upload_start;
|
||||
}
|
||||
|
||||
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
|
||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||
.sampler = sampler,
|
||||
.imageView = image_view,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
}
|
||||
|
||||
void AddImage(VkImageView image_view) {
|
||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||
.sampler = VK_NULL_HANDLE,
|
||||
.imageView = image_view,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
}
|
||||
|
||||
void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
|
||||
*(payload_cursor++) = VkDescriptorBufferInfo{
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.range = size,
|
||||
};
|
||||
}
|
||||
|
||||
void AddTexelBuffer(VkBufferView texel_buffer) {
|
||||
*(payload_cursor++) = texel_buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
|
||||
DescriptorUpdateEntry* payload_cursor = nullptr;
|
||||
const DescriptorUpdateEntry* upload_start = nullptr;
|
||||
std::array<DescriptorUpdateEntry, 0x10000> payload;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
Reference in New Issue
Block a user