another try

This commit is contained in:
mgthepro
2022-11-05 13:58:44 +01:00
parent 4a9f2bbf2a
commit 9f63fbe700
2002 changed files with 671171 additions and 671092 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,118 +1,118 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/engines/fermi_2d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/texture_cache/types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using VideoCommon::Region2D;
class Device;
class Framebuffer;
class ImageView;
class StateTracker;
class Scheduler;
struct BlitImagePipelineKey {
constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
VkRenderPass renderpass;
Tegra::Engines::Fermi2D::Operation operation;
};
class BlitImageHelper {
public:
explicit BlitImageHelper(const Device& device, Scheduler& scheduler,
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
~BlitImageHelper();
void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
VkImageView src_stencil_view, const Region2D& dst_region,
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
ImageView& src_image_view);
[[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth);
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
vk::ShaderModule& module, bool single_texture, bool is_target_depth);
void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
vk::ShaderModule& module);
void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
vk::ShaderModule& module);
const Device& device;
Scheduler& scheduler;
StateTracker& state_tracker;
vk::DescriptorSetLayout one_texture_set_layout;
vk::DescriptorSetLayout two_textures_set_layout;
DescriptorAllocator one_texture_descriptor_allocator;
DescriptorAllocator two_textures_descriptor_allocator;
vk::PipelineLayout one_texture_pipeline_layout;
vk::PipelineLayout two_textures_pipeline_layout;
vk::ShaderModule full_screen_vert;
vk::ShaderModule blit_color_to_color_frag;
vk::ShaderModule blit_depth_stencil_frag;
vk::ShaderModule convert_depth_to_float_frag;
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
vk::ShaderModule convert_d24s8_to_abgr8_frag;
vk::ShaderModule convert_s8d24_to_abgr8_frag;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
std::vector<BlitImagePipelineKey> blit_color_keys;
std::vector<vk::Pipeline> blit_color_pipelines;
std::vector<BlitImagePipelineKey> blit_depth_stencil_keys;
std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
vk::Pipeline convert_d32_to_r32_pipeline;
vk::Pipeline convert_r32_to_d32_pipeline;
vk::Pipeline convert_d16_to_r16_pipeline;
vk::Pipeline convert_r16_to_d16_pipeline;
vk::Pipeline convert_abgr8_to_d24s8_pipeline;
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
vk::Pipeline convert_s8d24_to_abgr8_pipeline;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/engines/fermi_2d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/texture_cache/types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using VideoCommon::Region2D;
class Device;
class Framebuffer;
class ImageView;
class StateTracker;
class Scheduler;
struct BlitImagePipelineKey {
constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
VkRenderPass renderpass;
Tegra::Engines::Fermi2D::Operation operation;
};
class BlitImageHelper {
public:
explicit BlitImageHelper(const Device& device, Scheduler& scheduler,
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
~BlitImageHelper();
void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
VkImageView src_stencil_view, const Region2D& dst_region,
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
ImageView& src_image_view);
[[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth);
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
vk::ShaderModule& module, bool single_texture, bool is_target_depth);
void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
vk::ShaderModule& module);
void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
vk::ShaderModule& module);
const Device& device;
Scheduler& scheduler;
StateTracker& state_tracker;
vk::DescriptorSetLayout one_texture_set_layout;
vk::DescriptorSetLayout two_textures_set_layout;
DescriptorAllocator one_texture_descriptor_allocator;
DescriptorAllocator two_textures_descriptor_allocator;
vk::PipelineLayout one_texture_pipeline_layout;
vk::PipelineLayout two_textures_pipeline_layout;
vk::ShaderModule full_screen_vert;
vk::ShaderModule blit_color_to_color_frag;
vk::ShaderModule blit_depth_stencil_frag;
vk::ShaderModule convert_depth_to_float_frag;
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
vk::ShaderModule convert_d24s8_to_abgr8_frag;
vk::ShaderModule convert_s8d24_to_abgr8_frag;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
std::vector<BlitImagePipelineKey> blit_color_keys;
std::vector<vk::Pipeline> blit_color_pipelines;
std::vector<BlitImagePipelineKey> blit_depth_stencil_keys;
std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
vk::Pipeline convert_d32_to_r32_pipeline;
vk::Pipeline convert_r32_to_d32_pipeline;
vk::Pipeline convert_d16_to_r16_pipeline;
vk::Pipeline convert_r16_to_d16_pipeline;
vk::Pipeline convert_abgr8_to_d24s8_pipeline;
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
vk::Pipeline convert_s8d24_to_abgr8_pipeline;
};
} // namespace Vulkan

View File

@@ -1,455 +1,455 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <cstring>
#include "common/bit_cast.h"
#include "common/cityhash.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
namespace Vulkan {
namespace {
constexpr size_t POINT = 0;
constexpr size_t LINE = 1;
constexpr size_t POLYGON = 2;
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POINT, // Points
LINE, // Lines
LINE, // LineLoop
LINE, // LineStrip
POLYGON, // Triangles
POLYGON, // TriangleStrip
POLYGON, // TriangleFan
POLYGON, // Quads
POLYGON, // QuadStrip
POLYGON, // Polygon
LINE, // LinesAdjacency
LINE, // LineStripAdjacency
POLYGON, // TrianglesAdjacency
POLYGON, // TriangleStripAdjacency
POLYGON, // Patches
};
void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
std::ranges::transform(regs.transform_feedback.controls, state.layouts.begin(),
[](const auto& layout) {
return VideoCommon::TransformFeedbackState::Layout{
.stream = layout.stream,
.varying_count = layout.varying_count,
.stride = layout.stride,
};
});
state.varyings = regs.stream_out_layout;
}
} // Anonymous namespace
void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
const Maxwell& regs = maxwell3d.regs;
const std::array enabled_lut{
regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable,
};
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
raw1 = 0;
extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
xfb_enabled.Assign(regs.transform_feedback_enabled != 0);
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value()));
tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value()));
tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==
Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.op));
topology.Assign(regs.draw.topology);
msaa_mode.Assign(regs.anti_alias_samples_mode);
raw2 = 0;
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
const auto test_func =
regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always_GL;
alpha_test_func.Assign(PackComparisonOp(test_func));
early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0);
depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
depth_format.Assign(static_cast<u32>(regs.zeta.format));
y_negate.Assign(regs.window_origin.mode != Maxwell::WindowOrigin::Mode::UpperLeft ? 1 : 0);
provoking_vertex_last.Assign(regs.provoking_vertex == Maxwell::ProvokingVertex::Last ? 1 : 0);
smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0);
for (size_t i = 0; i < regs.rt.size(); ++i) {
color_formats[i] = static_cast<u8>(regs.rt[i].format);
}
alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
point_size = Common::BitCast<u32>(regs.point_size);
if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
if (has_dynamic_vertex_input) {
// Dirty flag will be reset by the command buffer update
static constexpr std::array LUT{
0u, // Invalid
1u, // SignedNorm
1u, // UnsignedNorm
2u, // SignedInt
3u, // UnsignedInt
1u, // UnsignedScaled
1u, // SignedScaled
1u, // Float
};
const auto& attrs = regs.vertex_attrib_format;
attribute_types = 0;
for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
const u32 mask = attrs[i].constant != 0 ? 0 : 3;
const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())];
attribute_types |= static_cast<u64>(type & mask) << (i * 2);
}
} else {
maxwell3d.dirty.flags[Dirty::VertexInput] = false;
enabled_divisors = 0;
for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
const bool is_enabled = regs.vertex_stream_instances.IsInstancingEnabled(index);
binding_divisors[index] = is_enabled ? regs.vertex_streams[index].frequency : 0;
enabled_divisors |= (is_enabled ? u64{1} : 0) << index;
}
for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
const auto& input = regs.vertex_attrib_format[index];
auto& attribute = attributes[index];
attribute.raw = 0;
attribute.enabled.Assign(input.constant ? 0 : 1);
attribute.buffer.Assign(input.buffer);
attribute.offset.Assign(input.offset);
attribute.type.Assign(static_cast<u32>(input.type.Value()));
attribute.size.Assign(static_cast<u32>(input.size.Value()));
}
}
}
if (maxwell3d.dirty.flags[Dirty::Blending]) {
maxwell3d.dirty.flags[Dirty::Blending] = false;
for (size_t index = 0; index < attachments.size(); ++index) {
attachments[index].Refresh(regs, index);
}
}
if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
const auto& transform = regs.viewport_transform;
std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) {
return static_cast<u16>(viewport.swizzle.raw);
});
}
if (!extended_dynamic_state) {
dynamic_state.Refresh(regs);
}
if (xfb_enabled) {
RefreshXfbState(xfb_state, regs);
}
}
void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
raw = 0;
mask_r.Assign(mask.R);
mask_g.Assign(mask.G);
mask_b.Assign(mask.B);
mask_a.Assign(mask.A);
// TODO: C++20 Use templated lambda to deduplicate code
if (!regs.blend_per_target_enabled) {
if (!regs.blend.enable[index]) {
return;
}
const auto& src = regs.blend;
equation_rgb.Assign(PackBlendEquation(src.color_op));
equation_a.Assign(PackBlendEquation(src.alpha_op));
factor_source_rgb.Assign(PackBlendFactor(src.color_source));
factor_dest_rgb.Assign(PackBlendFactor(src.color_dest));
factor_source_a.Assign(PackBlendFactor(src.alpha_source));
factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
enable.Assign(1);
return;
}
if (!regs.blend.enable[index]) {
return;
}
const auto& src = regs.blend_per_target[index];
equation_rgb.Assign(PackBlendEquation(src.color_op));
equation_a.Assign(PackBlendEquation(src.alpha_op));
factor_source_rgb.Assign(PackBlendFactor(src.color_source));
factor_dest_rgb.Assign(PackBlendFactor(src.color_dest));
factor_source_a.Assign(PackBlendFactor(src.alpha_source));
factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
enable.Assign(1);
}
void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
u32 packed_front_face = PackFrontFace(regs.gl_front_face);
if (regs.window_origin.flip_y != 0) {
// Flip front face
packed_front_face = 1 - packed_front_face;
}
raw1 = 0;
raw2 = 0;
front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op.fail));
front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op.zfail));
front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op.zpass));
front.test_func.Assign(PackComparisonOp(regs.stencil_front_op.func));
if (regs.stencil_two_side_enable) {
back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op.fail));
back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op.zfail));
back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op.zpass));
back.test_func.Assign(PackComparisonOp(regs.stencil_back_op.func));
} else {
back.action_stencil_fail.Assign(front.action_stencil_fail);
back.action_depth_fail.Assign(front.action_depth_fail);
back.action_depth_pass.Assign(front.action_depth_pass);
back.test_func.Assign(front.test_func);
}
stencil_enable.Assign(regs.stencil_enable);
depth_write_enable.Assign(regs.depth_write_enabled);
depth_bounds_enable.Assign(regs.depth_bounds_enable);
depth_test_enable.Assign(regs.depth_test_enable);
front_face.Assign(packed_front_face);
depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
cull_face.Assign(PackCullFace(regs.gl_cull_face));
cull_enable.Assign(regs.gl_cull_test_enabled != 0 ? 1 : 0);
std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) {
return static_cast<u16>(array.stride.Value());
});
}
size_t FixedPipelineState::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
return static_cast<size_t>(hash);
}
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
return std::memcmp(this, &rhs, Size()) == 0;
}
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
// OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
// If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
// Perfect for a hash.
const u32 value = static_cast<u32>(op);
return value - (value >= 0x200 ? 0x200 : 1);
}
Maxwell::ComparisonOp FixedPipelineState::UnpackComparisonOp(u32 packed) noexcept {
// Read PackComparisonOp for the logic behind this.
return static_cast<Maxwell::ComparisonOp>(packed + 1);
}
u32 FixedPipelineState::PackStencilOp(Maxwell::StencilOp::Op op) noexcept {
switch (op) {
case Maxwell::StencilOp::Op::Keep_D3D:
case Maxwell::StencilOp::Op::Keep_GL:
return 0;
case Maxwell::StencilOp::Op::Zero_D3D:
case Maxwell::StencilOp::Op::Zero_GL:
return 1;
case Maxwell::StencilOp::Op::Replace_D3D:
case Maxwell::StencilOp::Op::Replace_GL:
return 2;
case Maxwell::StencilOp::Op::IncrSaturate_D3D:
case Maxwell::StencilOp::Op::IncrSaturate_GL:
return 3;
case Maxwell::StencilOp::Op::DecrSaturate_D3D:
case Maxwell::StencilOp::Op::DecrSaturate_GL:
return 4;
case Maxwell::StencilOp::Op::Invert_D3D:
case Maxwell::StencilOp::Op::Invert_GL:
return 5;
case Maxwell::StencilOp::Op::Incr_D3D:
case Maxwell::StencilOp::Op::Incr_GL:
return 6;
case Maxwell::StencilOp::Op::Decr_D3D:
case Maxwell::StencilOp::Op::Decr_GL:
return 7;
}
return 0;
}
Maxwell::StencilOp::Op FixedPipelineState::UnpackStencilOp(u32 packed) noexcept {
static constexpr std::array LUT = {
Maxwell::StencilOp::Op::Keep_D3D, Maxwell::StencilOp::Op::Zero_D3D,
Maxwell::StencilOp::Op::Replace_D3D, Maxwell::StencilOp::Op::IncrSaturate_D3D,
Maxwell::StencilOp::Op::DecrSaturate_D3D, Maxwell::StencilOp::Op::Invert_D3D,
Maxwell::StencilOp::Op::Incr_D3D, Maxwell::StencilOp::Op::Decr_D3D};
return LUT[packed];
}
u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept {
// FrontAndBack is 0x408, by substracting 0x406 in it we get 2.
// Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1.
const u32 value = static_cast<u32>(cull);
return value - (value == 0x408 ? 0x406 : 0x404);
}
Maxwell::CullFace FixedPipelineState::UnpackCullFace(u32 packed) noexcept {
static constexpr std::array LUT = {Maxwell::CullFace::Front, Maxwell::CullFace::Back,
Maxwell::CullFace::FrontAndBack};
return LUT[packed];
}
u32 FixedPipelineState::PackFrontFace(Maxwell::FrontFace face) noexcept {
return static_cast<u32>(face) - 0x900;
}
Maxwell::FrontFace FixedPipelineState::UnpackFrontFace(u32 packed) noexcept {
return static_cast<Maxwell::FrontFace>(packed + 0x900);
}
u32 FixedPipelineState::PackPolygonMode(Maxwell::PolygonMode mode) noexcept {
return static_cast<u32>(mode) - 0x1B00;
}
Maxwell::PolygonMode FixedPipelineState::UnpackPolygonMode(u32 packed) noexcept {
return static_cast<Maxwell::PolygonMode>(packed + 0x1B00);
}
u32 FixedPipelineState::PackLogicOp(Maxwell::LogicOp::Op op) noexcept {
return static_cast<u32>(op) - 0x1500;
}
Maxwell::LogicOp::Op FixedPipelineState::UnpackLogicOp(u32 packed) noexcept {
return static_cast<Maxwell::LogicOp::Op>(packed + 0x1500);
}
u32 FixedPipelineState::PackBlendEquation(Maxwell::Blend::Equation equation) noexcept {
switch (equation) {
case Maxwell::Blend::Equation::Add_D3D:
case Maxwell::Blend::Equation::Add_GL:
return 0;
case Maxwell::Blend::Equation::Subtract_D3D:
case Maxwell::Blend::Equation::Subtract_GL:
return 1;
case Maxwell::Blend::Equation::ReverseSubtract_D3D:
case Maxwell::Blend::Equation::ReverseSubtract_GL:
return 2;
case Maxwell::Blend::Equation::Min_D3D:
case Maxwell::Blend::Equation::Min_GL:
return 3;
case Maxwell::Blend::Equation::Max_D3D:
case Maxwell::Blend::Equation::Max_GL:
return 4;
}
return 0;
}
Maxwell::Blend::Equation FixedPipelineState::UnpackBlendEquation(u32 packed) noexcept {
static constexpr std::array LUT = {
Maxwell::Blend::Equation::Add_D3D, Maxwell::Blend::Equation::Subtract_D3D,
Maxwell::Blend::Equation::ReverseSubtract_D3D, Maxwell::Blend::Equation::Min_D3D,
Maxwell::Blend::Equation::Max_D3D};
return LUT[packed];
}
u32 FixedPipelineState::PackBlendFactor(Maxwell::Blend::Factor factor) noexcept {
switch (factor) {
case Maxwell::Blend::Factor::Zero_D3D:
case Maxwell::Blend::Factor::Zero_GL:
return 0;
case Maxwell::Blend::Factor::One_D3D:
case Maxwell::Blend::Factor::One_GL:
return 1;
case Maxwell::Blend::Factor::SourceColor_D3D:
case Maxwell::Blend::Factor::SourceColor_GL:
return 2;
case Maxwell::Blend::Factor::OneMinusSourceColor_D3D:
case Maxwell::Blend::Factor::OneMinusSourceColor_GL:
return 3;
case Maxwell::Blend::Factor::SourceAlpha_D3D:
case Maxwell::Blend::Factor::SourceAlpha_GL:
return 4;
case Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D:
case Maxwell::Blend::Factor::OneMinusSourceAlpha_GL:
return 5;
case Maxwell::Blend::Factor::DestAlpha_D3D:
case Maxwell::Blend::Factor::DestAlpha_GL:
return 6;
case Maxwell::Blend::Factor::OneMinusDestAlpha_D3D:
case Maxwell::Blend::Factor::OneMinusDestAlpha_GL:
return 7;
case Maxwell::Blend::Factor::DestColor_D3D:
case Maxwell::Blend::Factor::DestColor_GL:
return 8;
case Maxwell::Blend::Factor::OneMinusDestColor_D3D:
case Maxwell::Blend::Factor::OneMinusDestColor_GL:
return 9;
case Maxwell::Blend::Factor::SourceAlphaSaturate_D3D:
case Maxwell::Blend::Factor::SourceAlphaSaturate_GL:
return 10;
case Maxwell::Blend::Factor::Source1Color_D3D:
case Maxwell::Blend::Factor::Source1Color_GL:
return 11;
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
return 12;
case Maxwell::Blend::Factor::Source1Alpha_D3D:
case Maxwell::Blend::Factor::Source1Alpha_GL:
return 13;
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
return 14;
case Maxwell::Blend::Factor::BlendFactor_D3D:
case Maxwell::Blend::Factor::ConstantColor_GL:
return 15;
case Maxwell::Blend::Factor::OneMinusBlendFactor_D3D:
case Maxwell::Blend::Factor::OneMinusConstantColor_GL:
return 16;
case Maxwell::Blend::Factor::BothSourceAlpha_D3D:
case Maxwell::Blend::Factor::ConstantAlpha_GL:
return 17;
case Maxwell::Blend::Factor::OneMinusBothSourceAlpha_D3D:
case Maxwell::Blend::Factor::OneMinusConstantAlpha_GL:
return 18;
}
UNIMPLEMENTED_MSG("Unknown blend factor {}", static_cast<u32>(factor));
return 0;
}
Maxwell::Blend::Factor FixedPipelineState::UnpackBlendFactor(u32 packed) noexcept {
static constexpr std::array LUT = {
Maxwell::Blend::Factor::Zero_D3D,
Maxwell::Blend::Factor::One_D3D,
Maxwell::Blend::Factor::SourceColor_D3D,
Maxwell::Blend::Factor::OneMinusSourceColor_D3D,
Maxwell::Blend::Factor::SourceAlpha_D3D,
Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D,
Maxwell::Blend::Factor::DestAlpha_D3D,
Maxwell::Blend::Factor::OneMinusDestAlpha_D3D,
Maxwell::Blend::Factor::DestColor_D3D,
Maxwell::Blend::Factor::OneMinusDestColor_D3D,
Maxwell::Blend::Factor::SourceAlphaSaturate_D3D,
Maxwell::Blend::Factor::Source1Color_D3D,
Maxwell::Blend::Factor::OneMinusSource1Color_D3D,
Maxwell::Blend::Factor::Source1Alpha_D3D,
Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D,
Maxwell::Blend::Factor::BlendFactor_D3D,
Maxwell::Blend::Factor::OneMinusBlendFactor_D3D,
Maxwell::Blend::Factor::BothSourceAlpha_D3D,
Maxwell::Blend::Factor::OneMinusBothSourceAlpha_D3D,
};
ASSERT(packed < LUT.size());
return LUT[packed];
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <cstring>
#include "common/bit_cast.h"
#include "common/cityhash.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
namespace Vulkan {
namespace {
constexpr size_t POINT = 0;
constexpr size_t LINE = 1;
constexpr size_t POLYGON = 2;
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POINT, // Points
LINE, // Lines
LINE, // LineLoop
LINE, // LineStrip
POLYGON, // Triangles
POLYGON, // TriangleStrip
POLYGON, // TriangleFan
POLYGON, // Quads
POLYGON, // QuadStrip
POLYGON, // Polygon
LINE, // LinesAdjacency
LINE, // LineStripAdjacency
POLYGON, // TrianglesAdjacency
POLYGON, // TriangleStripAdjacency
POLYGON, // Patches
};
void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
std::ranges::transform(regs.transform_feedback.controls, state.layouts.begin(),
[](const auto& layout) {
return VideoCommon::TransformFeedbackState::Layout{
.stream = layout.stream,
.varying_count = layout.varying_count,
.stride = layout.stride,
};
});
state.varyings = regs.stream_out_layout;
}
} // Anonymous namespace
void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
const Maxwell& regs = maxwell3d.regs;
const std::array enabled_lut{
regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable,
};
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
raw1 = 0;
extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
xfb_enabled.Assign(regs.transform_feedback_enabled != 0);
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value()));
tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value()));
tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==
Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.op));
topology.Assign(regs.draw.topology);
msaa_mode.Assign(regs.anti_alias_samples_mode);
raw2 = 0;
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
const auto test_func =
regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always_GL;
alpha_test_func.Assign(PackComparisonOp(test_func));
early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0);
depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
depth_format.Assign(static_cast<u32>(regs.zeta.format));
y_negate.Assign(regs.window_origin.mode != Maxwell::WindowOrigin::Mode::UpperLeft ? 1 : 0);
provoking_vertex_last.Assign(regs.provoking_vertex == Maxwell::ProvokingVertex::Last ? 1 : 0);
smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0);
for (size_t i = 0; i < regs.rt.size(); ++i) {
color_formats[i] = static_cast<u8>(regs.rt[i].format);
}
alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
point_size = Common::BitCast<u32>(regs.point_size);
if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
if (has_dynamic_vertex_input) {
// Dirty flag will be reset by the command buffer update
static constexpr std::array LUT{
0u, // Invalid
1u, // SignedNorm
1u, // UnsignedNorm
2u, // SignedInt
3u, // UnsignedInt
1u, // UnsignedScaled
1u, // SignedScaled
1u, // Float
};
const auto& attrs = regs.vertex_attrib_format;
attribute_types = 0;
for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
const u32 mask = attrs[i].constant != 0 ? 0 : 3;
const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())];
attribute_types |= static_cast<u64>(type & mask) << (i * 2);
}
} else {
maxwell3d.dirty.flags[Dirty::VertexInput] = false;
enabled_divisors = 0;
for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
const bool is_enabled = regs.vertex_stream_instances.IsInstancingEnabled(index);
binding_divisors[index] = is_enabled ? regs.vertex_streams[index].frequency : 0;
enabled_divisors |= (is_enabled ? u64{1} : 0) << index;
}
for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
const auto& input = regs.vertex_attrib_format[index];
auto& attribute = attributes[index];
attribute.raw = 0;
attribute.enabled.Assign(input.constant ? 0 : 1);
attribute.buffer.Assign(input.buffer);
attribute.offset.Assign(input.offset);
attribute.type.Assign(static_cast<u32>(input.type.Value()));
attribute.size.Assign(static_cast<u32>(input.size.Value()));
}
}
}
if (maxwell3d.dirty.flags[Dirty::Blending]) {
maxwell3d.dirty.flags[Dirty::Blending] = false;
for (size_t index = 0; index < attachments.size(); ++index) {
attachments[index].Refresh(regs, index);
}
}
if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
const auto& transform = regs.viewport_transform;
std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) {
return static_cast<u16>(viewport.swizzle.raw);
});
}
if (!extended_dynamic_state) {
dynamic_state.Refresh(regs);
}
if (xfb_enabled) {
RefreshXfbState(xfb_state, regs);
}
}
void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
raw = 0;
mask_r.Assign(mask.R);
mask_g.Assign(mask.G);
mask_b.Assign(mask.B);
mask_a.Assign(mask.A);
// TODO: C++20 Use templated lambda to deduplicate code
if (!regs.blend_per_target_enabled) {
if (!regs.blend.enable[index]) {
return;
}
const auto& src = regs.blend;
equation_rgb.Assign(PackBlendEquation(src.color_op));
equation_a.Assign(PackBlendEquation(src.alpha_op));
factor_source_rgb.Assign(PackBlendFactor(src.color_source));
factor_dest_rgb.Assign(PackBlendFactor(src.color_dest));
factor_source_a.Assign(PackBlendFactor(src.alpha_source));
factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
enable.Assign(1);
return;
}
if (!regs.blend.enable[index]) {
return;
}
const auto& src = regs.blend_per_target[index];
equation_rgb.Assign(PackBlendEquation(src.color_op));
equation_a.Assign(PackBlendEquation(src.alpha_op));
factor_source_rgb.Assign(PackBlendFactor(src.color_source));
factor_dest_rgb.Assign(PackBlendFactor(src.color_dest));
factor_source_a.Assign(PackBlendFactor(src.alpha_source));
factor_dest_a.Assign(PackBlendFactor(src.alpha_dest));
enable.Assign(1);
}
void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
u32 packed_front_face = PackFrontFace(regs.gl_front_face);
if (regs.window_origin.flip_y != 0) {
// Flip front face
packed_front_face = 1 - packed_front_face;
}
raw1 = 0;
raw2 = 0;
front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op.fail));
front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op.zfail));
front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op.zpass));
front.test_func.Assign(PackComparisonOp(regs.stencil_front_op.func));
if (regs.stencil_two_side_enable) {
back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op.fail));
back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op.zfail));
back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op.zpass));
back.test_func.Assign(PackComparisonOp(regs.stencil_back_op.func));
} else {
back.action_stencil_fail.Assign(front.action_stencil_fail);
back.action_depth_fail.Assign(front.action_depth_fail);
back.action_depth_pass.Assign(front.action_depth_pass);
back.test_func.Assign(front.test_func);
}
stencil_enable.Assign(regs.stencil_enable);
depth_write_enable.Assign(regs.depth_write_enabled);
depth_bounds_enable.Assign(regs.depth_bounds_enable);
depth_test_enable.Assign(regs.depth_test_enable);
front_face.Assign(packed_front_face);
depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
cull_face.Assign(PackCullFace(regs.gl_cull_face));
cull_enable.Assign(regs.gl_cull_test_enabled != 0 ? 1 : 0);
std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) {
return static_cast<u16>(array.stride.Value());
});
}
size_t FixedPipelineState::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
return static_cast<size_t>(hash);
}
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
return std::memcmp(this, &rhs, Size()) == 0;
}
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
// OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
// If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
// Perfect for a hash.
const u32 value = static_cast<u32>(op);
return value - (value >= 0x200 ? 0x200 : 1);
}
Maxwell::ComparisonOp FixedPipelineState::UnpackComparisonOp(u32 packed) noexcept {
// Read PackComparisonOp for the logic behind this.
return static_cast<Maxwell::ComparisonOp>(packed + 1);
}
u32 FixedPipelineState::PackStencilOp(Maxwell::StencilOp::Op op) noexcept {
switch (op) {
case Maxwell::StencilOp::Op::Keep_D3D:
case Maxwell::StencilOp::Op::Keep_GL:
return 0;
case Maxwell::StencilOp::Op::Zero_D3D:
case Maxwell::StencilOp::Op::Zero_GL:
return 1;
case Maxwell::StencilOp::Op::Replace_D3D:
case Maxwell::StencilOp::Op::Replace_GL:
return 2;
case Maxwell::StencilOp::Op::IncrSaturate_D3D:
case Maxwell::StencilOp::Op::IncrSaturate_GL:
return 3;
case Maxwell::StencilOp::Op::DecrSaturate_D3D:
case Maxwell::StencilOp::Op::DecrSaturate_GL:
return 4;
case Maxwell::StencilOp::Op::Invert_D3D:
case Maxwell::StencilOp::Op::Invert_GL:
return 5;
case Maxwell::StencilOp::Op::Incr_D3D:
case Maxwell::StencilOp::Op::Incr_GL:
return 6;
case Maxwell::StencilOp::Op::Decr_D3D:
case Maxwell::StencilOp::Op::Decr_GL:
return 7;
}
return 0;
}
Maxwell::StencilOp::Op FixedPipelineState::UnpackStencilOp(u32 packed) noexcept {
static constexpr std::array LUT = {
Maxwell::StencilOp::Op::Keep_D3D, Maxwell::StencilOp::Op::Zero_D3D,
Maxwell::StencilOp::Op::Replace_D3D, Maxwell::StencilOp::Op::IncrSaturate_D3D,
Maxwell::StencilOp::Op::DecrSaturate_D3D, Maxwell::StencilOp::Op::Invert_D3D,
Maxwell::StencilOp::Op::Incr_D3D, Maxwell::StencilOp::Op::Decr_D3D};
return LUT[packed];
}
u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept {
// FrontAndBack is 0x408, by substracting 0x406 in it we get 2.
// Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1.
const u32 value = static_cast<u32>(cull);
return value - (value == 0x408 ? 0x406 : 0x404);
}
Maxwell::CullFace FixedPipelineState::UnpackCullFace(u32 packed) noexcept {
static constexpr std::array LUT = {Maxwell::CullFace::Front, Maxwell::CullFace::Back,
Maxwell::CullFace::FrontAndBack};
return LUT[packed];
}
u32 FixedPipelineState::PackFrontFace(Maxwell::FrontFace face) noexcept {
return static_cast<u32>(face) - 0x900;
}
Maxwell::FrontFace FixedPipelineState::UnpackFrontFace(u32 packed) noexcept {
return static_cast<Maxwell::FrontFace>(packed + 0x900);
}
u32 FixedPipelineState::PackPolygonMode(Maxwell::PolygonMode mode) noexcept {
return static_cast<u32>(mode) - 0x1B00;
}
Maxwell::PolygonMode FixedPipelineState::UnpackPolygonMode(u32 packed) noexcept {
return static_cast<Maxwell::PolygonMode>(packed + 0x1B00);
}
u32 FixedPipelineState::PackLogicOp(Maxwell::LogicOp::Op op) noexcept {
return static_cast<u32>(op) - 0x1500;
}
Maxwell::LogicOp::Op FixedPipelineState::UnpackLogicOp(u32 packed) noexcept {
return static_cast<Maxwell::LogicOp::Op>(packed + 0x1500);
}
u32 FixedPipelineState::PackBlendEquation(Maxwell::Blend::Equation equation) noexcept {
switch (equation) {
case Maxwell::Blend::Equation::Add_D3D:
case Maxwell::Blend::Equation::Add_GL:
return 0;
case Maxwell::Blend::Equation::Subtract_D3D:
case Maxwell::Blend::Equation::Subtract_GL:
return 1;
case Maxwell::Blend::Equation::ReverseSubtract_D3D:
case Maxwell::Blend::Equation::ReverseSubtract_GL:
return 2;
case Maxwell::Blend::Equation::Min_D3D:
case Maxwell::Blend::Equation::Min_GL:
return 3;
case Maxwell::Blend::Equation::Max_D3D:
case Maxwell::Blend::Equation::Max_GL:
return 4;
}
return 0;
}
Maxwell::Blend::Equation FixedPipelineState::UnpackBlendEquation(u32 packed) noexcept {
static constexpr std::array LUT = {
Maxwell::Blend::Equation::Add_D3D, Maxwell::Blend::Equation::Subtract_D3D,
Maxwell::Blend::Equation::ReverseSubtract_D3D, Maxwell::Blend::Equation::Min_D3D,
Maxwell::Blend::Equation::Max_D3D};
return LUT[packed];
}
u32 FixedPipelineState::PackBlendFactor(Maxwell::Blend::Factor factor) noexcept {
switch (factor) {
case Maxwell::Blend::Factor::Zero_D3D:
case Maxwell::Blend::Factor::Zero_GL:
return 0;
case Maxwell::Blend::Factor::One_D3D:
case Maxwell::Blend::Factor::One_GL:
return 1;
case Maxwell::Blend::Factor::SourceColor_D3D:
case Maxwell::Blend::Factor::SourceColor_GL:
return 2;
case Maxwell::Blend::Factor::OneMinusSourceColor_D3D:
case Maxwell::Blend::Factor::OneMinusSourceColor_GL:
return 3;
case Maxwell::Blend::Factor::SourceAlpha_D3D:
case Maxwell::Blend::Factor::SourceAlpha_GL:
return 4;
case Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D:
case Maxwell::Blend::Factor::OneMinusSourceAlpha_GL:
return 5;
case Maxwell::Blend::Factor::DestAlpha_D3D:
case Maxwell::Blend::Factor::DestAlpha_GL:
return 6;
case Maxwell::Blend::Factor::OneMinusDestAlpha_D3D:
case Maxwell::Blend::Factor::OneMinusDestAlpha_GL:
return 7;
case Maxwell::Blend::Factor::DestColor_D3D:
case Maxwell::Blend::Factor::DestColor_GL:
return 8;
case Maxwell::Blend::Factor::OneMinusDestColor_D3D:
case Maxwell::Blend::Factor::OneMinusDestColor_GL:
return 9;
case Maxwell::Blend::Factor::SourceAlphaSaturate_D3D:
case Maxwell::Blend::Factor::SourceAlphaSaturate_GL:
return 10;
case Maxwell::Blend::Factor::Source1Color_D3D:
case Maxwell::Blend::Factor::Source1Color_GL:
return 11;
case Maxwell::Blend::Factor::OneMinusSource1Color_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Color_GL:
return 12;
case Maxwell::Blend::Factor::Source1Alpha_D3D:
case Maxwell::Blend::Factor::Source1Alpha_GL:
return 13;
case Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D:
case Maxwell::Blend::Factor::OneMinusSource1Alpha_GL:
return 14;
case Maxwell::Blend::Factor::BlendFactor_D3D:
case Maxwell::Blend::Factor::ConstantColor_GL:
return 15;
case Maxwell::Blend::Factor::OneMinusBlendFactor_D3D:
case Maxwell::Blend::Factor::OneMinusConstantColor_GL:
return 16;
case Maxwell::Blend::Factor::BothSourceAlpha_D3D:
case Maxwell::Blend::Factor::ConstantAlpha_GL:
return 17;
case Maxwell::Blend::Factor::OneMinusBothSourceAlpha_D3D:
case Maxwell::Blend::Factor::OneMinusConstantAlpha_GL:
return 18;
}
UNIMPLEMENTED_MSG("Unknown blend factor {}", static_cast<u32>(factor));
return 0;
}
Maxwell::Blend::Factor FixedPipelineState::UnpackBlendFactor(u32 packed) noexcept {
static constexpr std::array LUT = {
Maxwell::Blend::Factor::Zero_D3D,
Maxwell::Blend::Factor::One_D3D,
Maxwell::Blend::Factor::SourceColor_D3D,
Maxwell::Blend::Factor::OneMinusSourceColor_D3D,
Maxwell::Blend::Factor::SourceAlpha_D3D,
Maxwell::Blend::Factor::OneMinusSourceAlpha_D3D,
Maxwell::Blend::Factor::DestAlpha_D3D,
Maxwell::Blend::Factor::OneMinusDestAlpha_D3D,
Maxwell::Blend::Factor::DestColor_D3D,
Maxwell::Blend::Factor::OneMinusDestColor_D3D,
Maxwell::Blend::Factor::SourceAlphaSaturate_D3D,
Maxwell::Blend::Factor::Source1Color_D3D,
Maxwell::Blend::Factor::OneMinusSource1Color_D3D,
Maxwell::Blend::Factor::Source1Alpha_D3D,
Maxwell::Blend::Factor::OneMinusSource1Alpha_D3D,
Maxwell::Blend::Factor::BlendFactor_D3D,
Maxwell::Blend::Factor::OneMinusBlendFactor_D3D,
Maxwell::Blend::Factor::BothSourceAlpha_D3D,
Maxwell::Blend::Factor::OneMinusBothSourceAlpha_D3D,
};
ASSERT(packed < LUT.size());
return LUT[packed];
}
} // namespace Vulkan

View File

@@ -1,261 +1,261 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <type_traits>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
#include "video_core/transform_feedback.h"
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct FixedPipelineState {
static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
static u32 PackStencilOp(Maxwell::StencilOp::Op op) noexcept;
static Maxwell::StencilOp::Op UnpackStencilOp(u32 packed) noexcept;
static u32 PackCullFace(Maxwell::CullFace cull) noexcept;
static Maxwell::CullFace UnpackCullFace(u32 packed) noexcept;
static u32 PackFrontFace(Maxwell::FrontFace face) noexcept;
static Maxwell::FrontFace UnpackFrontFace(u32 packed) noexcept;
static u32 PackPolygonMode(Maxwell::PolygonMode mode) noexcept;
static Maxwell::PolygonMode UnpackPolygonMode(u32 packed) noexcept;
static u32 PackLogicOp(Maxwell::LogicOp::Op op) noexcept;
static Maxwell::LogicOp::Op UnpackLogicOp(u32 packed) noexcept;
static u32 PackBlendEquation(Maxwell::Blend::Equation equation) noexcept;
static Maxwell::Blend::Equation UnpackBlendEquation(u32 packed) noexcept;
static u32 PackBlendFactor(Maxwell::Blend::Factor factor) noexcept;
static Maxwell::Blend::Factor UnpackBlendFactor(u32 packed) noexcept;
struct BlendingAttachment {
union {
u32 raw;
BitField<0, 1, u32> mask_r;
BitField<1, 1, u32> mask_g;
BitField<2, 1, u32> mask_b;
BitField<3, 1, u32> mask_a;
BitField<4, 3, u32> equation_rgb;
BitField<7, 3, u32> equation_a;
BitField<10, 5, u32> factor_source_rgb;
BitField<15, 5, u32> factor_dest_rgb;
BitField<20, 5, u32> factor_source_a;
BitField<25, 5, u32> factor_dest_a;
BitField<30, 1, u32> enable;
};
void Refresh(const Maxwell& regs, size_t index);
std::array<bool, 4> Mask() const noexcept {
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
}
Maxwell::Blend::Equation EquationRGB() const noexcept {
return UnpackBlendEquation(equation_rgb.Value());
}
Maxwell::Blend::Equation EquationAlpha() const noexcept {
return UnpackBlendEquation(equation_a.Value());
}
Maxwell::Blend::Factor SourceRGBFactor() const noexcept {
return UnpackBlendFactor(factor_source_rgb.Value());
}
Maxwell::Blend::Factor DestRGBFactor() const noexcept {
return UnpackBlendFactor(factor_dest_rgb.Value());
}
Maxwell::Blend::Factor SourceAlphaFactor() const noexcept {
return UnpackBlendFactor(factor_source_a.Value());
}
Maxwell::Blend::Factor DestAlphaFactor() const noexcept {
return UnpackBlendFactor(factor_dest_a.Value());
}
};
union VertexAttribute {
u32 raw;
BitField<0, 1, u32> enabled;
BitField<1, 5, u32> buffer;
BitField<6, 14, u32> offset;
BitField<20, 3, u32> type;
BitField<23, 6, u32> size;
Maxwell::VertexAttribute::Type Type() const noexcept {
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
}
Maxwell::VertexAttribute::Size Size() const noexcept {
return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
}
};
template <size_t Position>
union StencilFace {
BitField<Position + 0, 3, u32> action_stencil_fail;
BitField<Position + 3, 3, u32> action_depth_fail;
BitField<Position + 6, 3, u32> action_depth_pass;
BitField<Position + 9, 3, u32> test_func;
Maxwell::StencilOp::Op ActionStencilFail() const noexcept {
return UnpackStencilOp(action_stencil_fail);
}
Maxwell::StencilOp::Op ActionDepthFail() const noexcept {
return UnpackStencilOp(action_depth_fail);
}
Maxwell::StencilOp::Op ActionDepthPass() const noexcept {
return UnpackStencilOp(action_depth_pass);
}
Maxwell::ComparisonOp TestFunc() const noexcept {
return UnpackComparisonOp(test_func);
}
};
struct DynamicState {
union {
u32 raw1;
StencilFace<0> front;
StencilFace<12> back;
BitField<24, 1, u32> stencil_enable;
BitField<25, 1, u32> depth_write_enable;
BitField<26, 1, u32> depth_bounds_enable;
BitField<27, 1, u32> depth_test_enable;
BitField<28, 1, u32> front_face;
BitField<29, 3, u32> depth_test_func;
};
union {
u32 raw2;
BitField<0, 2, u32> cull_face;
BitField<2, 1, u32> cull_enable;
};
// Vertex stride is a 12 bits value, we have 4 bits to spare per element
std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
void Refresh(const Maxwell& regs);
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
return UnpackComparisonOp(depth_test_func);
}
Maxwell::CullFace CullFace() const noexcept {
return UnpackCullFace(cull_face.Value());
}
Maxwell::FrontFace FrontFace() const noexcept {
return UnpackFrontFace(front_face.Value());
}
};
union {
u32 raw1;
BitField<0, 1, u32> extended_dynamic_state;
BitField<1, 1, u32> dynamic_vertex_input;
BitField<2, 1, u32> xfb_enabled;
BitField<3, 1, u32> primitive_restart_enable;
BitField<4, 1, u32> depth_bias_enable;
BitField<5, 1, u32> depth_clamp_disabled;
BitField<6, 1, u32> ndc_minus_one_to_one;
BitField<7, 2, u32> polygon_mode;
BitField<9, 5, u32> patch_control_points_minus_one;
BitField<14, 2, u32> tessellation_primitive;
BitField<16, 2, u32> tessellation_spacing;
BitField<18, 1, u32> tessellation_clockwise;
BitField<19, 1, u32> logic_op_enable;
BitField<20, 4, u32> logic_op;
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
};
union {
u32 raw2;
BitField<0, 1, u32> rasterize_enable;
BitField<1, 3, u32> alpha_test_func;
BitField<4, 1, u32> early_z;
BitField<5, 1, u32> depth_enabled;
BitField<6, 5, u32> depth_format;
BitField<11, 1, u32> y_negate;
BitField<12, 1, u32> provoking_vertex_last;
BitField<14, 1, u32> smooth_lines;
};
std::array<u8, Maxwell::NumRenderTargets> color_formats;
u32 alpha_test_ref;
u32 point_size;
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
union {
u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
u64 enabled_divisors;
};
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
DynamicState dynamic_state;
VideoCommon::TransformFeedbackState xfb_state;
void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
bool has_dynamic_vertex_input);
size_t Hash() const noexcept;
bool operator==(const FixedPipelineState& rhs) const noexcept;
bool operator!=(const FixedPipelineState& rhs) const noexcept {
return !operator==(rhs);
}
size_t Size() const noexcept {
if (xfb_enabled) {
// When transform feedback is enabled, use the whole struct
return sizeof(*this);
}
if (dynamic_vertex_input) {
// Exclude dynamic state and attributes
return offsetof(FixedPipelineState, attributes);
}
if (extended_dynamic_state) {
// Exclude dynamic state
return offsetof(FixedPipelineState, dynamic_state);
}
// Default
return offsetof(FixedPipelineState, xfb_state);
}
u32 DynamicAttributeType(size_t index) const noexcept {
return (attribute_types >> (index * 2)) & 0b11;
}
};
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::FixedPipelineState> {
size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
return k.Hash();
}
};
} // namespace std
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <type_traits>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
#include "video_core/transform_feedback.h"
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct FixedPipelineState {
static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
static u32 PackStencilOp(Maxwell::StencilOp::Op op) noexcept;
static Maxwell::StencilOp::Op UnpackStencilOp(u32 packed) noexcept;
static u32 PackCullFace(Maxwell::CullFace cull) noexcept;
static Maxwell::CullFace UnpackCullFace(u32 packed) noexcept;
static u32 PackFrontFace(Maxwell::FrontFace face) noexcept;
static Maxwell::FrontFace UnpackFrontFace(u32 packed) noexcept;
static u32 PackPolygonMode(Maxwell::PolygonMode mode) noexcept;
static Maxwell::PolygonMode UnpackPolygonMode(u32 packed) noexcept;
static u32 PackLogicOp(Maxwell::LogicOp::Op op) noexcept;
static Maxwell::LogicOp::Op UnpackLogicOp(u32 packed) noexcept;
static u32 PackBlendEquation(Maxwell::Blend::Equation equation) noexcept;
static Maxwell::Blend::Equation UnpackBlendEquation(u32 packed) noexcept;
static u32 PackBlendFactor(Maxwell::Blend::Factor factor) noexcept;
static Maxwell::Blend::Factor UnpackBlendFactor(u32 packed) noexcept;
struct BlendingAttachment {
union {
u32 raw;
BitField<0, 1, u32> mask_r;
BitField<1, 1, u32> mask_g;
BitField<2, 1, u32> mask_b;
BitField<3, 1, u32> mask_a;
BitField<4, 3, u32> equation_rgb;
BitField<7, 3, u32> equation_a;
BitField<10, 5, u32> factor_source_rgb;
BitField<15, 5, u32> factor_dest_rgb;
BitField<20, 5, u32> factor_source_a;
BitField<25, 5, u32> factor_dest_a;
BitField<30, 1, u32> enable;
};
void Refresh(const Maxwell& regs, size_t index);
std::array<bool, 4> Mask() const noexcept {
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
}
Maxwell::Blend::Equation EquationRGB() const noexcept {
return UnpackBlendEquation(equation_rgb.Value());
}
Maxwell::Blend::Equation EquationAlpha() const noexcept {
return UnpackBlendEquation(equation_a.Value());
}
Maxwell::Blend::Factor SourceRGBFactor() const noexcept {
return UnpackBlendFactor(factor_source_rgb.Value());
}
Maxwell::Blend::Factor DestRGBFactor() const noexcept {
return UnpackBlendFactor(factor_dest_rgb.Value());
}
Maxwell::Blend::Factor SourceAlphaFactor() const noexcept {
return UnpackBlendFactor(factor_source_a.Value());
}
Maxwell::Blend::Factor DestAlphaFactor() const noexcept {
return UnpackBlendFactor(factor_dest_a.Value());
}
};
union VertexAttribute {
u32 raw;
BitField<0, 1, u32> enabled;
BitField<1, 5, u32> buffer;
BitField<6, 14, u32> offset;
BitField<20, 3, u32> type;
BitField<23, 6, u32> size;
Maxwell::VertexAttribute::Type Type() const noexcept {
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
}
Maxwell::VertexAttribute::Size Size() const noexcept {
return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
}
};
template <size_t Position>
union StencilFace {
BitField<Position + 0, 3, u32> action_stencil_fail;
BitField<Position + 3, 3, u32> action_depth_fail;
BitField<Position + 6, 3, u32> action_depth_pass;
BitField<Position + 9, 3, u32> test_func;
Maxwell::StencilOp::Op ActionStencilFail() const noexcept {
return UnpackStencilOp(action_stencil_fail);
}
Maxwell::StencilOp::Op ActionDepthFail() const noexcept {
return UnpackStencilOp(action_depth_fail);
}
Maxwell::StencilOp::Op ActionDepthPass() const noexcept {
return UnpackStencilOp(action_depth_pass);
}
Maxwell::ComparisonOp TestFunc() const noexcept {
return UnpackComparisonOp(test_func);
}
};
struct DynamicState {
union {
u32 raw1;
StencilFace<0> front;
StencilFace<12> back;
BitField<24, 1, u32> stencil_enable;
BitField<25, 1, u32> depth_write_enable;
BitField<26, 1, u32> depth_bounds_enable;
BitField<27, 1, u32> depth_test_enable;
BitField<28, 1, u32> front_face;
BitField<29, 3, u32> depth_test_func;
};
union {
u32 raw2;
BitField<0, 2, u32> cull_face;
BitField<2, 1, u32> cull_enable;
};
// Vertex stride is a 12 bits value, we have 4 bits to spare per element
std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
void Refresh(const Maxwell& regs);
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
return UnpackComparisonOp(depth_test_func);
}
Maxwell::CullFace CullFace() const noexcept {
return UnpackCullFace(cull_face.Value());
}
Maxwell::FrontFace FrontFace() const noexcept {
return UnpackFrontFace(front_face.Value());
}
};
union {
u32 raw1;
BitField<0, 1, u32> extended_dynamic_state;
BitField<1, 1, u32> dynamic_vertex_input;
BitField<2, 1, u32> xfb_enabled;
BitField<3, 1, u32> primitive_restart_enable;
BitField<4, 1, u32> depth_bias_enable;
BitField<5, 1, u32> depth_clamp_disabled;
BitField<6, 1, u32> ndc_minus_one_to_one;
BitField<7, 2, u32> polygon_mode;
BitField<9, 5, u32> patch_control_points_minus_one;
BitField<14, 2, u32> tessellation_primitive;
BitField<16, 2, u32> tessellation_spacing;
BitField<18, 1, u32> tessellation_clockwise;
BitField<19, 1, u32> logic_op_enable;
BitField<20, 4, u32> logic_op;
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
};
union {
u32 raw2;
BitField<0, 1, u32> rasterize_enable;
BitField<1, 3, u32> alpha_test_func;
BitField<4, 1, u32> early_z;
BitField<5, 1, u32> depth_enabled;
BitField<6, 5, u32> depth_format;
BitField<11, 1, u32> y_negate;
BitField<12, 1, u32> provoking_vertex_last;
BitField<14, 1, u32> smooth_lines;
};
std::array<u8, Maxwell::NumRenderTargets> color_formats;
u32 alpha_test_ref;
u32 point_size;
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
union {
u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
u64 enabled_divisors;
};
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
DynamicState dynamic_state;
VideoCommon::TransformFeedbackState xfb_state;
void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
bool has_dynamic_vertex_input);
size_t Hash() const noexcept;
bool operator==(const FixedPipelineState& rhs) const noexcept;
bool operator!=(const FixedPipelineState& rhs) const noexcept {
return !operator==(rhs);
}
size_t Size() const noexcept {
if (xfb_enabled) {
// When transform feedback is enabled, use the whole struct
return sizeof(*this);
}
if (dynamic_vertex_input) {
// Exclude dynamic state and attributes
return offsetof(FixedPipelineState, attributes);
}
if (extended_dynamic_state) {
// Exclude dynamic state
return offsetof(FixedPipelineState, dynamic_state);
}
// Default
return offsetof(FixedPipelineState, xfb_state);
}
u32 DynamicAttributeType(size_t index) const noexcept {
return (attribute_types >> (index * 2)) & 0b11;
}
};
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::FixedPipelineState> {
size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
return k.Hash();
}
};
} // namespace std

File diff suppressed because it is too large Load Diff

View File

@@ -1,78 +1,78 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/stage.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
#include "video_core/textures/texture.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan::MaxwellToVK {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = VideoCore::Surface::PixelFormat;
namespace Sampler {
VkFilter Filter(Tegra::Texture::TextureFilter filter);
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode,
Tegra::Texture::TextureFilter filter);
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
} // namespace Sampler
struct FormatInfo {
VkFormat format;
bool attachable;
bool storage;
};
/**
* Returns format properties supported in the host
* @param device Host device
* @param format_type Type of image the buffer will use
* @param with_srgb True when the format can be sRGB when converted to another format (ASTC)
* @param pixel_format Guest pixel format to describe
*/
[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
PixelFormat pixel_format);
VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
Maxwell::VertexAttribute::Size size);
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
VkStencilOp StencilOp(Maxwell::StencilOp::Op stencil_op);
VkBlendOp BlendEquation(Maxwell::Blend::Equation equation);
VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor);
VkFrontFace FrontFace(Maxwell::FrontFace front_face);
VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode);
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
} // namespace Vulkan::MaxwellToVK
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/stage.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
#include "video_core/textures/texture.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan::MaxwellToVK {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = VideoCore::Surface::PixelFormat;
namespace Sampler {
VkFilter Filter(Tegra::Texture::TextureFilter filter);
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode,
Tegra::Texture::TextureFilter filter);
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
} // namespace Sampler
struct FormatInfo {
VkFormat format;
bool attachable;
bool storage;
};
/**
* Returns format properties supported in the host
* @param device Host device
* @param format_type Type of image the buffer will use
* @param with_srgb True when the format can be sRGB when converted to another format (ASTC)
* @param pixel_format Guest pixel format to describe
*/
[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
PixelFormat pixel_format);
VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
Maxwell::VertexAttribute::Size size);
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
VkStencilOp StencilOp(Maxwell::StencilOp::Op stencil_op);
VkBlendOp BlendEquation(Maxwell::Blend::Equation equation);
VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor);
VkFrontFace FrontFace(Maxwell::FrontFace front_face);
VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode);
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
} // namespace Vulkan::MaxwellToVK

View File

@@ -1,202 +1,202 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <boost/container/small_vector.hpp>
#include "common/common_types.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/types.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
class DescriptorLayoutBuilder {
public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
bool CanUsePushDescriptor() const noexcept {
return device->IsKhrPushDescriptorSupported() &&
num_descriptors <= device->MaxPushDescriptors();
}
vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const {
if (bindings.empty()) {
return nullptr;
}
const VkDescriptorSetLayoutCreateFlags flags =
use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0;
return device->GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = flags,
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
});
}
vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
VkPipelineLayout pipeline_layout,
bool use_push_descriptor) const {
if (entries.empty()) {
return nullptr;
}
const VkDescriptorUpdateTemplateType type =
use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
: VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
.pNext = nullptr,
.flags = 0,
.descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
.pDescriptorUpdateEntries = entries.data(),
.templateType = type,
.descriptorSetLayout = descriptor_set_layout,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.pipelineLayout = pipeline_layout,
.set = 0,
});
}
vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
using Shader::Backend::SPIRV::RescalingLayout;
const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u;
const VkPushConstantRange range{
.stageFlags = static_cast<VkShaderStageFlags>(
is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS),
.offset = 0,
.size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset,
};
return device->GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = descriptor_set_layout ? 1U : 0U,
.pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &range,
});
}
void Add(const Shader::Info& info, VkShaderStageFlags stage) {
is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0;
Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors);
Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors);
Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors);
}
private:
template <typename Descriptors>
void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) {
const size_t num{descriptors.size()};
for (size_t i = 0; i < num; ++i) {
bindings.push_back({
.binding = binding,
.descriptorType = type,
.descriptorCount = descriptors[i].count,
.stageFlags = stage,
.pImmutableSamplers = nullptr,
});
entries.push_back({
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = descriptors[i].count,
.descriptorType = type,
.offset = offset,
.stride = sizeof(DescriptorUpdateEntry),
});
++binding;
num_descriptors += descriptors[i].count;
offset += sizeof(DescriptorUpdateEntry);
}
}
const Device* device{};
bool is_compute{};
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
u32 binding{};
u32 num_descriptors{};
size_t offset{};
};
class RescalingPushConstant {
public:
explicit RescalingPushConstant() noexcept {}
void PushTexture(bool is_rescaled) noexcept {
*texture_ptr |= is_rescaled ? texture_bit : 0u;
texture_bit <<= 1u;
if (texture_bit == 0u) {
texture_bit = 1u;
++texture_ptr;
}
}
void PushImage(bool is_rescaled) noexcept {
*image_ptr |= is_rescaled ? image_bit : 0u;
image_bit <<= 1u;
if (image_bit == 0u) {
image_bit = 1u;
++image_ptr;
}
}
const std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS>& Data() const noexcept {
return words;
}
private:
std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS> words{};
u32* texture_ptr{words.data()};
u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS};
u32 texture_bit{1u};
u32 image_bit{1u};
};
inline void PushImageDescriptors(TextureCache& texture_cache,
UpdateDescriptorQueue& update_descriptor_queue,
const Shader::Info& info, RescalingPushConstant& rescaling,
const VkSampler*& samplers,
const VideoCommon::ImageViewInOut*& views) {
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
views += num_texture_buffers;
views += num_image_buffers;
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const VideoCommon::ImageViewId image_view_id{(views++)->id};
const VkSampler sampler{*(samplers++)};
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
update_descriptor_queue.AddImage(vk_image_view);
rescaling.PushImage(texture_cache.IsRescaling(image_view));
}
}
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <boost/container/small_vector.hpp>
#include "common/common_types.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/types.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
class DescriptorLayoutBuilder {
public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
bool CanUsePushDescriptor() const noexcept {
return device->IsKhrPushDescriptorSupported() &&
num_descriptors <= device->MaxPushDescriptors();
}
vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const {
if (bindings.empty()) {
return nullptr;
}
const VkDescriptorSetLayoutCreateFlags flags =
use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0;
return device->GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = flags,
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
});
}
vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
VkPipelineLayout pipeline_layout,
bool use_push_descriptor) const {
if (entries.empty()) {
return nullptr;
}
const VkDescriptorUpdateTemplateType type =
use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
: VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
.pNext = nullptr,
.flags = 0,
.descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
.pDescriptorUpdateEntries = entries.data(),
.templateType = type,
.descriptorSetLayout = descriptor_set_layout,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.pipelineLayout = pipeline_layout,
.set = 0,
});
}
vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
using Shader::Backend::SPIRV::RescalingLayout;
const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u;
const VkPushConstantRange range{
.stageFlags = static_cast<VkShaderStageFlags>(
is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS),
.offset = 0,
.size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset,
};
return device->GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = descriptor_set_layout ? 1U : 0U,
.pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &range,
});
}
void Add(const Shader::Info& info, VkShaderStageFlags stage) {
is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0;
Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors);
Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors);
Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors);
}
private:
template <typename Descriptors>
void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) {
const size_t num{descriptors.size()};
for (size_t i = 0; i < num; ++i) {
bindings.push_back({
.binding = binding,
.descriptorType = type,
.descriptorCount = descriptors[i].count,
.stageFlags = stage,
.pImmutableSamplers = nullptr,
});
entries.push_back({
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = descriptors[i].count,
.descriptorType = type,
.offset = offset,
.stride = sizeof(DescriptorUpdateEntry),
});
++binding;
num_descriptors += descriptors[i].count;
offset += sizeof(DescriptorUpdateEntry);
}
}
const Device* device{};
bool is_compute{};
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
u32 binding{};
u32 num_descriptors{};
size_t offset{};
};
class RescalingPushConstant {
public:
explicit RescalingPushConstant() noexcept {}
void PushTexture(bool is_rescaled) noexcept {
*texture_ptr |= is_rescaled ? texture_bit : 0u;
texture_bit <<= 1u;
if (texture_bit == 0u) {
texture_bit = 1u;
++texture_ptr;
}
}
void PushImage(bool is_rescaled) noexcept {
*image_ptr |= is_rescaled ? image_bit : 0u;
image_bit <<= 1u;
if (image_bit == 0u) {
image_bit = 1u;
++image_ptr;
}
}
const std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS>& Data() const noexcept {
return words;
}
private:
std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS> words{};
u32* texture_ptr{words.data()};
u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS};
u32 texture_bit{1u};
u32 image_bit{1u};
};
inline void PushImageDescriptors(TextureCache& texture_cache,
UpdateDescriptorQueue& update_descriptor_queue,
const Shader::Info& info, RescalingPushConstant& rescaling,
const VkSampler*& samplers,
const VideoCommon::ImageViewInOut*& views) {
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
views += num_texture_buffers;
views += num_image_buffers;
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const VideoCommon::ImageViewId image_view_id{(views++)->id};
const VkSampler sampler{*(samplers++)};
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
update_descriptor_queue.AddImage(vk_image_view);
rescaling.PushImage(texture_cache.IsRescaling(image_view));
}
}
}
} // namespace Vulkan

View File

@@ -1,99 +1,99 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <string_view>
#include <fmt/format.h>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using namespace std::string_view_literals;
static u64 GetUint64(const VkPipelineExecutableStatisticKHR& statistic) {
switch (statistic.format) {
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
return static_cast<u64>(statistic.value.i64);
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
return statistic.value.u64;
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
return static_cast<u64>(statistic.value.f64);
default:
return 0;
}
}
PipelineStatistics::PipelineStatistics(const Device& device_) : device{device_} {}
void PipelineStatistics::Collect(VkPipeline pipeline) {
const auto& dev{device.GetLogical()};
const std::vector properties{dev.GetPipelineExecutablePropertiesKHR(pipeline)};
const u32 num_executables{static_cast<u32>(properties.size())};
for (u32 executable = 0; executable < num_executables; ++executable) {
const auto statistics{dev.GetPipelineExecutableStatisticsKHR(pipeline, executable)};
if (statistics.empty()) {
continue;
}
Stats stage_stats;
for (const auto& statistic : statistics) {
const char* const name{statistic.name};
if (name == "Binary Size"sv || name == "Code size"sv || name == "Instruction Count"sv) {
stage_stats.code_size = GetUint64(statistic);
} else if (name == "Register Count"sv) {
stage_stats.register_count = GetUint64(statistic);
} else if (name == "SGPRs"sv || name == "numUsedSgprs"sv) {
stage_stats.sgpr_count = GetUint64(statistic);
} else if (name == "VGPRs"sv || name == "numUsedVgprs"sv) {
stage_stats.vgpr_count = GetUint64(statistic);
} else if (name == "Branches"sv) {
stage_stats.branches_count = GetUint64(statistic);
} else if (name == "Basic Block Count"sv) {
stage_stats.basic_block_count = GetUint64(statistic);
}
}
std::scoped_lock lock{mutex};
collected_stats.push_back(stage_stats);
}
}
void PipelineStatistics::Report() const {
double num{};
Stats total;
{
std::scoped_lock lock{mutex};
for (const Stats& stats : collected_stats) {
total.code_size += stats.code_size;
total.register_count += stats.register_count;
total.sgpr_count += stats.sgpr_count;
total.vgpr_count += stats.vgpr_count;
total.branches_count += stats.branches_count;
total.basic_block_count += stats.basic_block_count;
}
num = static_cast<double>(collected_stats.size());
}
std::string report;
const auto add = [&](const char* fmt, u64 value) {
if (value > 0) {
report += fmt::format(fmt::runtime(fmt), static_cast<double>(value) / num);
}
};
add("Code size: {:9.03f}\n", total.code_size);
add("Register count: {:9.03f}\n", total.register_count);
add("SGPRs: {:9.03f}\n", total.sgpr_count);
add("VGPRs: {:9.03f}\n", total.vgpr_count);
add("Branches count: {:9.03f}\n", total.branches_count);
add("Basic blocks: {:9.03f}\n", total.basic_block_count);
LOG_INFO(Render_Vulkan,
"\nAverage pipeline statistics\n"
"==========================================\n"
"{}\n",
report);
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <string_view>
#include <fmt/format.h>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using namespace std::string_view_literals;
static u64 GetUint64(const VkPipelineExecutableStatisticKHR& statistic) {
switch (statistic.format) {
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
return static_cast<u64>(statistic.value.i64);
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
return statistic.value.u64;
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
return static_cast<u64>(statistic.value.f64);
default:
return 0;
}
}
PipelineStatistics::PipelineStatistics(const Device& device_) : device{device_} {}
void PipelineStatistics::Collect(VkPipeline pipeline) {
const auto& dev{device.GetLogical()};
const std::vector properties{dev.GetPipelineExecutablePropertiesKHR(pipeline)};
const u32 num_executables{static_cast<u32>(properties.size())};
for (u32 executable = 0; executable < num_executables; ++executable) {
const auto statistics{dev.GetPipelineExecutableStatisticsKHR(pipeline, executable)};
if (statistics.empty()) {
continue;
}
Stats stage_stats;
for (const auto& statistic : statistics) {
const char* const name{statistic.name};
if (name == "Binary Size"sv || name == "Code size"sv || name == "Instruction Count"sv) {
stage_stats.code_size = GetUint64(statistic);
} else if (name == "Register Count"sv) {
stage_stats.register_count = GetUint64(statistic);
} else if (name == "SGPRs"sv || name == "numUsedSgprs"sv) {
stage_stats.sgpr_count = GetUint64(statistic);
} else if (name == "VGPRs"sv || name == "numUsedVgprs"sv) {
stage_stats.vgpr_count = GetUint64(statistic);
} else if (name == "Branches"sv) {
stage_stats.branches_count = GetUint64(statistic);
} else if (name == "Basic Block Count"sv) {
stage_stats.basic_block_count = GetUint64(statistic);
}
}
std::scoped_lock lock{mutex};
collected_stats.push_back(stage_stats);
}
}
void PipelineStatistics::Report() const {
double num{};
Stats total;
{
std::scoped_lock lock{mutex};
for (const Stats& stats : collected_stats) {
total.code_size += stats.code_size;
total.register_count += stats.register_count;
total.sgpr_count += stats.sgpr_count;
total.vgpr_count += stats.vgpr_count;
total.branches_count += stats.branches_count;
total.basic_block_count += stats.basic_block_count;
}
num = static_cast<double>(collected_stats.size());
}
std::string report;
const auto add = [&](const char* fmt, u64 value) {
if (value > 0) {
report += fmt::format(fmt::runtime(fmt), static_cast<double>(value) / num);
}
};
add("Code size: {:9.03f}\n", total.code_size);
add("Register count: {:9.03f}\n", total.register_count);
add("SGPRs: {:9.03f}\n", total.sgpr_count);
add("VGPRs: {:9.03f}\n", total.vgpr_count);
add("Branches count: {:9.03f}\n", total.branches_count);
add("Basic blocks: {:9.03f}\n", total.basic_block_count);
LOG_INFO(Render_Vulkan,
"\nAverage pipeline statistics\n"
"==========================================\n"
"{}\n",
report);
}
} // namespace Vulkan

View File

@@ -1,39 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class PipelineStatistics {
public:
explicit PipelineStatistics(const Device& device_);
void Collect(VkPipeline pipeline);
void Report() const;
private:
struct Stats {
u64 code_size{};
u64 register_count{};
u64 sgpr_count{};
u64 vgpr_count{};
u64 branches_count{};
u64 basic_block_count{};
};
const Device& device;
mutable std::mutex mutex;
std::vector<Stats> collected_stats;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class PipelineStatistics {
public:
explicit PipelineStatistics(const Device& device_);
void Collect(VkPipeline pipeline);
void Report() const;
private:
struct Stats {
u64 code_size{};
u64 register_count{};
u64 sgpr_count{};
u64 vgpr_count{};
u64 branches_count{};
u64 basic_block_count{};
};
const Device& device;
mutable std::mutex mutex;
std::vector<Stats> collected_stats;
};
} // namespace Vulkan

View File

@@ -1,345 +1,345 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstring>
#include <memory>
#include <optional>
#include <string>
#include <vector>
#include <fmt/format.h>
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/telemetry.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/telemetry_session.h"
#include "video_core/gpu.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_debug_callback.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_instance.h"
#include "video_core/vulkan_common/vulkan_library.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_surface.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
std::string GetReadableVersion(u32 version) {
return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
VK_VERSION_PATCH(version));
}
std::string GetDriverVersion(const Device& device) {
// Extracted from
// https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
const u32 version = device.GetDriverVersion();
if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
const u32 major = (version >> 22) & 0x3ff;
const u32 minor = (version >> 14) & 0x0ff;
const u32 secondary = (version >> 6) & 0x0ff;
const u32 tertiary = version & 0x003f;
return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
}
if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
const u32 major = version >> 14;
const u32 minor = version & 0x3fff;
return fmt::format("{}.{}", major, minor);
}
return GetReadableVersion(version);
}
std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) {
std::sort(std::begin(available_extensions), std::end(available_extensions));
static constexpr std::size_t AverageExtensionSize = 64;
std::string separated_extensions;
separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
const auto end = std::end(available_extensions);
for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
if (const bool is_last = extension + 1 == end; is_last) {
separated_extensions += *extension;
} else {
separated_extensions += fmt::format("{},", *extension);
}
}
return separated_extensions;
}
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface) {
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
const s32 device_index = Settings::values.vulkan_device.GetValue();
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
const vk::PhysicalDevice physical_device(devices[device_index], dld);
return Device(*instance, physical_device, surface, dld);
}
} // Anonymous namespace
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
: RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
true, Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
state_tracker(), scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
screen_info),
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
state_tracker, scheduler) {
Report();
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
}
RendererVulkan::~RendererVulkan() {
void(device.GetLogical().WaitIdle());
}
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
if (!render_window.IsShown()) {
return;
}
// Update screen info if the framebuffer size has changed.
if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) {
screen_info.width = framebuffer->width;
screen_info.height = framebuffer->height;
}
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
RenderScreenshot(*framebuffer, use_accelerated);
bool has_been_recreated = false;
const auto recreate_swapchain = [&] {
if (!has_been_recreated) {
has_been_recreated = true;
scheduler.Finish();
}
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
swapchain.Create(layout.width, layout.height, is_srgb);
};
if (swapchain.NeedsRecreation(is_srgb)) {
recreate_swapchain();
}
bool is_outdated;
do {
swapchain.AcquireNextImage();
is_outdated = swapchain.IsOutDated();
if (is_outdated) {
recreate_swapchain();
}
} while (is_outdated);
if (has_been_recreated) {
blit_screen.Recreate();
}
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
scheduler.Flush(render_semaphore, present_semaphore);
scheduler.WaitWorker();
swapchain.Present(render_semaphore);
gpu.RendererFrameEndNotify();
rasterizer.TickFrame();
}
void RendererVulkan::Report() const {
const std::string vendor_name{device.GetVendorName()};
const std::string model_name{device.GetModelName()};
const std::string driver_version = GetDriverVersion(device);
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
const std::string api_version = GetReadableVersion(device.ApiVersion());
const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
telemetry_session.AddField(field, "GPU_Model", model_name);
telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version);
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
}
void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated) {
if (!renderer_settings.screenshot_requested) {
return;
}
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
.imageType = VK_IMAGE_TYPE_2D,
.format = VK_FORMAT_B8G8R8A8_UNORM,
.extent =
{
.width = layout.width,
.height = layout.height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *staging_image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = screen_info.is_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM,
.components{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
});
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
// Since we're not rendering to the screen, ignore the render semaphore.
void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
const VkBufferCreateInfo dst_buffer_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = buffer_size,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *staging_image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *staging_image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
static constexpr VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const VkBufferImageCopy copy{
.bufferOffset = 0,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset{.x = 0, .y = 0, .z = 0},
.imageExtent{
.width = layout.width,
.height = layout.height,
.depth = 1,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer,
copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
// Ensure the copy is fully completed before saving the screenshot
scheduler.Finish();
// Copy backing image data to the QImage screenshot buffer
const auto dst_memory_map = dst_buffer_memory.Map();
std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
renderer_settings.screenshot_complete_callback(false);
renderer_settings.screenshot_requested = false;
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstring>
#include <memory>
#include <optional>
#include <string>
#include <vector>
#include <fmt/format.h>
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/telemetry.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/telemetry_session.h"
#include "video_core/gpu.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_debug_callback.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_instance.h"
#include "video_core/vulkan_common/vulkan_library.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_surface.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
std::string GetReadableVersion(u32 version) {
return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
VK_VERSION_PATCH(version));
}
std::string GetDriverVersion(const Device& device) {
// Extracted from
// https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
const u32 version = device.GetDriverVersion();
if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
const u32 major = (version >> 22) & 0x3ff;
const u32 minor = (version >> 14) & 0x0ff;
const u32 secondary = (version >> 6) & 0x0ff;
const u32 tertiary = version & 0x003f;
return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
}
if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
const u32 major = version >> 14;
const u32 minor = version & 0x3fff;
return fmt::format("{}.{}", major, minor);
}
return GetReadableVersion(version);
}
std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) {
std::sort(std::begin(available_extensions), std::end(available_extensions));
static constexpr std::size_t AverageExtensionSize = 64;
std::string separated_extensions;
separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
const auto end = std::end(available_extensions);
for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
if (const bool is_last = extension + 1 == end; is_last) {
separated_extensions += *extension;
} else {
separated_extensions += fmt::format("{},", *extension);
}
}
return separated_extensions;
}
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface) {
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
const s32 device_index = Settings::values.vulkan_device.GetValue();
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
const vk::PhysicalDevice physical_device(devices[device_index], dld);
return Device(*instance, physical_device, surface, dld);
}
} // Anonymous namespace
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
: RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
true, Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
state_tracker(), scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
screen_info),
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
state_tracker, scheduler) {
Report();
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
}
RendererVulkan::~RendererVulkan() {
void(device.GetLogical().WaitIdle());
}
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
if (!render_window.IsShown()) {
return;
}
// Update screen info if the framebuffer size has changed.
if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) {
screen_info.width = framebuffer->width;
screen_info.height = framebuffer->height;
}
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
RenderScreenshot(*framebuffer, use_accelerated);
bool has_been_recreated = false;
const auto recreate_swapchain = [&] {
if (!has_been_recreated) {
has_been_recreated = true;
scheduler.Finish();
}
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
swapchain.Create(layout.width, layout.height, is_srgb);
};
if (swapchain.NeedsRecreation(is_srgb)) {
recreate_swapchain();
}
bool is_outdated;
do {
swapchain.AcquireNextImage();
is_outdated = swapchain.IsOutDated();
if (is_outdated) {
recreate_swapchain();
}
} while (is_outdated);
if (has_been_recreated) {
blit_screen.Recreate();
}
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
scheduler.Flush(render_semaphore, present_semaphore);
scheduler.WaitWorker();
swapchain.Present(render_semaphore);
gpu.RendererFrameEndNotify();
rasterizer.TickFrame();
}
void RendererVulkan::Report() const {
const std::string vendor_name{device.GetVendorName()};
const std::string model_name{device.GetModelName()};
const std::string driver_version = GetDriverVersion(device);
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
const std::string api_version = GetReadableVersion(device.ApiVersion());
const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
telemetry_session.AddField(field, "GPU_Model", model_name);
telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version);
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
}
void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated) {
if (!renderer_settings.screenshot_requested) {
return;
}
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
.imageType = VK_IMAGE_TYPE_2D,
.format = VK_FORMAT_B8G8R8A8_UNORM,
.extent =
{
.width = layout.width,
.height = layout.height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *staging_image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = screen_info.is_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM,
.components{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
});
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
// Since we're not rendering to the screen, ignore the render semaphore.
void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
const VkBufferCreateInfo dst_buffer_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = buffer_size,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *staging_image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *staging_image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
static constexpr VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const VkBufferImageCopy copy{
.bufferOffset = 0,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset{.x = 0, .y = 0, .z = 0},
.imageExtent{
.width = layout.width,
.height = layout.height,
.depth = 1,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer,
copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
// Ensure the copy is fully completed before saving the screenshot
scheduler.Finish();
// Copy backing image data to the QImage screenshot buffer
const auto dst_memory_map = dst_buffer_memory.Map();
std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
renderer_settings.screenshot_complete_callback(false);
renderer_settings.screenshot_requested = false;
}
} // namespace Vulkan

View File

@@ -1,79 +1,79 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <string>
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class TelemetrySession;
}
namespace Core::Memory {
class Memory;
}
namespace Tegra {
class GPU;
}
namespace Vulkan {
class RendererVulkan final : public VideoCore::RendererBase {
public:
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererVulkan() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer;
}
[[nodiscard]] std::string GetDeviceVendor() const override {
return device.GetDriverName();
}
private:
void Report() const;
void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated);
Core::TelemetrySession& telemetry_session;
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
Common::DynamicLibrary library;
vk::InstanceDispatch dld;
vk::Instance instance;
vk::DebugUtilsMessenger debug_callback;
vk::SurfaceKHR surface;
ScreenInfo screen_info;
Device device;
MemoryAllocator memory_allocator;
StateTracker state_tracker;
Scheduler scheduler;
Swapchain swapchain;
BlitScreen blit_screen;
RasterizerVulkan rasterizer;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <string>
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class TelemetrySession;
}
namespace Core::Memory {
class Memory;
}
namespace Tegra {
class GPU;
}
namespace Vulkan {
class RendererVulkan final : public VideoCore::RendererBase {
public:
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererVulkan() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer;
}
[[nodiscard]] std::string GetDeviceVendor() const override {
return device.GetDriverName();
}
private:
void Report() const;
void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated);
Core::TelemetrySession& telemetry_session;
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
Common::DynamicLibrary library;
vk::InstanceDispatch dld;
vk::Instance instance;
vk::DebugUtilsMessenger debug_callback;
vk::SurfaceKHR surface;
ScreenInfo screen_info;
Device device;
MemoryAllocator memory_allocator;
StateTracker state_tracker;
Scheduler scheduler;
Swapchain swapchain;
BlitScreen blit_screen;
RasterizerVulkan rasterizer;
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -1,168 +1,168 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
}
namespace Core::Memory {
class Memory;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra {
struct FramebufferConfig;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Service::android {
enum class PixelFormat : u32;
}
namespace Vulkan {
struct ScreenInfo;
class Device;
class FSR;
class RasterizerVulkan;
class Scheduler;
class Swapchain;
struct ScreenInfo {
VkImageView image_view{};
u32 width{};
u32 height{};
bool is_srgb{};
};
class BlitScreen {
public:
explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window,
const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
Scheduler& scheduler, const ScreenInfo& screen_info);
~BlitScreen();
void Recreate();
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated);
[[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent, vk::RenderPass& rd);
private:
struct BufferData;
void CreateStaticResources();
void CreateShaders();
void CreateSemaphores();
void CreateDescriptorPool();
void CreateRenderPass();
vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
void CreateGraphicsPipeline();
void CreateSampler();
void CreateDynamicResources();
void CreateFramebuffers();
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
void ReleaseRawImages();
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
void CreateFSR();
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
const Device& device;
MemoryAllocator& memory_allocator;
Swapchain& swapchain;
Scheduler& scheduler;
std::size_t image_count;
const ScreenInfo& screen_info;
vk::ShaderModule vertex_shader;
vk::ShaderModule fxaa_vertex_shader;
vk::ShaderModule fxaa_fragment_shader;
vk::ShaderModule bilinear_fragment_shader;
vk::ShaderModule bicubic_fragment_shader;
vk::ShaderModule gaussian_fragment_shader;
vk::ShaderModule scaleforce_fragment_shader;
vk::DescriptorPool descriptor_pool;
vk::DescriptorSetLayout descriptor_set_layout;
vk::PipelineLayout pipeline_layout;
vk::Pipeline nearest_neightbor_pipeline;
vk::Pipeline bilinear_pipeline;
vk::Pipeline bicubic_pipeline;
vk::Pipeline gaussian_pipeline;
vk::Pipeline scaleforce_pipeline;
vk::RenderPass renderpass;
std::vector<vk::Framebuffer> framebuffers;
vk::DescriptorSets descriptor_sets;
vk::Sampler nn_sampler;
vk::Sampler sampler;
vk::Buffer buffer;
MemoryCommit buffer_commit;
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> semaphores;
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
std::vector<MemoryCommit> raw_buffer_commits;
vk::DescriptorPool aa_descriptor_pool;
vk::DescriptorSetLayout aa_descriptor_set_layout;
vk::PipelineLayout aa_pipeline_layout;
vk::Pipeline aa_pipeline;
vk::RenderPass aa_renderpass;
vk::Framebuffer aa_framebuffer;
vk::DescriptorSets aa_descriptor_sets;
vk::Image aa_image;
vk::ImageView aa_image_view;
MemoryCommit aa_commit;
u32 raw_width = 0;
u32 raw_height = 0;
Service::android::PixelFormat pixel_format{};
std::unique_ptr<FSR> fsr;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
}
namespace Core::Memory {
class Memory;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra {
struct FramebufferConfig;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Service::android {
enum class PixelFormat : u32;
}
namespace Vulkan {
struct ScreenInfo;
class Device;
class FSR;
class RasterizerVulkan;
class Scheduler;
class Swapchain;
struct ScreenInfo {
VkImageView image_view{};
u32 width{};
u32 height{};
bool is_srgb{};
};
class BlitScreen {
public:
explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window,
const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
Scheduler& scheduler, const ScreenInfo& screen_info);
~BlitScreen();
void Recreate();
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated);
[[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent, vk::RenderPass& rd);
private:
struct BufferData;
void CreateStaticResources();
void CreateShaders();
void CreateSemaphores();
void CreateDescriptorPool();
void CreateRenderPass();
vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
void CreateGraphicsPipeline();
void CreateSampler();
void CreateDynamicResources();
void CreateFramebuffers();
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
void ReleaseRawImages();
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
void CreateFSR();
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
const Device& device;
MemoryAllocator& memory_allocator;
Swapchain& swapchain;
Scheduler& scheduler;
std::size_t image_count;
const ScreenInfo& screen_info;
vk::ShaderModule vertex_shader;
vk::ShaderModule fxaa_vertex_shader;
vk::ShaderModule fxaa_fragment_shader;
vk::ShaderModule bilinear_fragment_shader;
vk::ShaderModule bicubic_fragment_shader;
vk::ShaderModule gaussian_fragment_shader;
vk::ShaderModule scaleforce_fragment_shader;
vk::DescriptorPool descriptor_pool;
vk::DescriptorSetLayout descriptor_set_layout;
vk::PipelineLayout pipeline_layout;
vk::Pipeline nearest_neightbor_pipeline;
vk::Pipeline bilinear_pipeline;
vk::Pipeline bicubic_pipeline;
vk::Pipeline gaussian_pipeline;
vk::Pipeline scaleforce_pipeline;
vk::RenderPass renderpass;
std::vector<vk::Framebuffer> framebuffers;
vk::DescriptorSets descriptor_sets;
vk::Sampler nn_sampler;
vk::Sampler sampler;
vk::Buffer buffer;
MemoryCommit buffer_commit;
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> semaphores;
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
std::vector<MemoryCommit> raw_buffer_commits;
vk::DescriptorPool aa_descriptor_pool;
vk::DescriptorSetLayout aa_descriptor_set_layout;
vk::PipelineLayout aa_pipeline_layout;
vk::Pipeline aa_pipeline;
vk::RenderPass aa_renderpass;
vk::Framebuffer aa_framebuffer;
vk::DescriptorSets aa_descriptor_sets;
vk::Image aa_image;
vk::ImageView aa_image_view;
MemoryCommit aa_commit;
u32 raw_width = 0;
u32 raw_height = 0;
Service::android::PixelFormat pixel_format{};
std::unique_ptr<FSR> fsr;
};
} // namespace Vulkan

View File

@@ -1,429 +1,429 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstring>
#include <span>
#include <vector>
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
return VkBufferCopy{
.srcOffset = copy.src_offset,
.dstOffset = copy.dst_offset,
.size = copy.size,
};
}
VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
return VK_INDEX_TYPE_UINT8_EXT;
}
if (num_elements <= 0xffff) {
return VK_INDEX_TYPE_UINT16;
}
return VK_INDEX_TYPE_UINT32;
}
size_t BytesPerIndex(VkIndexType index_type) {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
return 1;
case VK_INDEX_TYPE_UINT16:
return 2;
case VK_INDEX_TYPE_UINT32:
return 4;
default:
ASSERT_MSG(false, "Invalid index type={}", index_type);
return 1;
}
}
template <typename T>
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
for (T& index : indices) {
index = static_cast<T>(first + index + quad * 4);
}
return indices;
}
vk::Buffer CreateBuffer(const Device& device, u64 size) {
VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
if (device.IsExtTransformFeedbackSupported()) {
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
return device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = size,
.usage = flags,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
}
} // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
}
VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
if (!device) {
// Null buffer, return a null descriptor
return VK_NULL_HANDLE;
}
const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
return offset == view.offset && size == view.size && format == view.format;
})};
if (it != views.end()) {
return *it->handle;
}
views.push_back({
.offset = offset,
.size = size,
.format = format,
.handle = device->GetLogical().CreateBufferView({
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.buffer = *buffer,
.format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format,
.offset = offset,
.range = size,
}),
});
return *views.back().handle;
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
UpdateDescriptorQueue& update_descriptor_queue_,
DescriptorPool& descriptor_pool)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Upload);
}
StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Download);
}
u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
return device.GetDeviceLocalMemory();
}
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
return device.GetDeviceMemoryUsage();
}
bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
void BufferCacheRuntime::Finish() {
scheduler.Finish();
}
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
};
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
if (barrier) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER);
}
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
if (barrier) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
}
});
}
void BufferCacheRuntime::PreCopyBarrier() {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, READ_BARRIER);
});
}
void BufferCacheRuntime::PostCopyBarrier() {
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, WRITE_BARRIER);
});
}
void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
};
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dest_buffer, offset, size, value](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, READ_BARRIER);
cmdbuf.FillBuffer(dest_buffer, offset, size, value);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, WRITE_BARRIER);
});
}
void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
u32 base_vertex, u32 num_indices, VkBuffer buffer,
u32 offset, [[maybe_unused]] u32 size) {
VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
VkDeviceSize vk_offset = offset;
VkBuffer vk_buffer = buffer;
if (topology == PrimitiveTopology::Quads) {
vk_index_type = VK_INDEX_TYPE_UINT32;
std::tie(vk_buffer, vk_offset) =
quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
} else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
vk_index_type = VK_INDEX_TYPE_UINT16;
std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
}
if (vk_buffer == VK_NULL_HANDLE) {
// Vulkan doesn't support null index buffers. Replace it with our own null buffer.
ReserveNullBuffer();
vk_buffer = *null_buffer;
}
scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
});
}
void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
if (count == 0) {
ReserveNullBuffer();
scheduler.Record([this](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(*null_buffer, 0, VK_INDEX_TYPE_UINT32);
});
return;
}
ReserveQuadArrayLUT(first + count, true);
// The LUT has the indices 0, 1, 2, and 3 copied as an array
// To apply these 'first' offsets we can apply an offset based on the modulus.
const VkIndexType index_type = quad_array_lut_index_type;
const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(buffer, offset, index_type);
});
}
void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
u32 stride) {
if (device.IsExtExtendedDynamicStateSupported()) {
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0;
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
const VkDeviceSize vk_stride = stride;
cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
});
} else {
scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffer(index, buffer, offset);
});
}
}
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
u32 size) {
if (!device.IsExtTransformFeedbackSupported()) {
// Already logged in the rasterizer
return;
}
if (buffer == VK_NULL_HANDLE) {
// Vulkan doesn't support null transform feedback buffers.
// Replace it with our own null buffer.
ReserveNullBuffer();
buffer = *null_buffer;
offset = 0;
size = 0;
}
scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = offset;
const VkDeviceSize vk_size = size;
cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
});
}
void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
if (num_indices <= current_num_indices) {
return;
}
if (wait_for_idle) {
scheduler.Finish();
}
current_num_indices = num_indices;
quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
const u32 num_quads = num_indices / 4;
const u32 num_triangle_indices = num_quads * 6;
const u32 num_first_offset_copies = 4;
const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = size_bytes,
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
if (device.HasDebuggingToolAttached()) {
quad_array_lut.SetObjectNameEXT("Quad LUT");
}
quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
u8* staging_data = staging.mapped_span.data();
const size_t quad_size = bytes_per_index * 6;
for (u32 first = 0; first < num_first_offset_copies; ++first) {
for (u32 quad = 0; quad < num_quads; ++quad) {
switch (quad_array_lut_index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
break;
case VK_INDEX_TYPE_UINT16:
std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
break;
case VK_INDEX_TYPE_UINT32:
std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
break;
default:
ASSERT(false);
break;
}
staging_data += quad_size;
}
}
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
const VkBufferCopy copy{
.srcOffset = src_offset,
.dstOffset = 0,
.size = size_bytes,
};
const VkBufferMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = dst_buffer,
.offset = 0,
.size = size_bytes,
};
cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0, write_barrier);
});
}
void BufferCacheRuntime::ReserveNullBuffer() {
if (null_buffer) {
return;
}
VkBufferCreateInfo create_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 4,
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
if (device.IsExtTransformFeedbackSupported()) {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
null_buffer = device.GetLogical().CreateBuffer(create_info);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null buffer");
}
null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
});
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstring>
#include <span>
#include <vector>
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
return VkBufferCopy{
.srcOffset = copy.src_offset,
.dstOffset = copy.dst_offset,
.size = copy.size,
};
}
VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
return VK_INDEX_TYPE_UINT8_EXT;
}
if (num_elements <= 0xffff) {
return VK_INDEX_TYPE_UINT16;
}
return VK_INDEX_TYPE_UINT32;
}
size_t BytesPerIndex(VkIndexType index_type) {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
return 1;
case VK_INDEX_TYPE_UINT16:
return 2;
case VK_INDEX_TYPE_UINT32:
return 4;
default:
ASSERT_MSG(false, "Invalid index type={}", index_type);
return 1;
}
}
template <typename T>
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
for (T& index : indices) {
index = static_cast<T>(first + index + quad * 4);
}
return indices;
}
vk::Buffer CreateBuffer(const Device& device, u64 size) {
VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
if (device.IsExtTransformFeedbackSupported()) {
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
return device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = size,
.usage = flags,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
}
} // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
}
VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
if (!device) {
// Null buffer, return a null descriptor
return VK_NULL_HANDLE;
}
const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
return offset == view.offset && size == view.size && format == view.format;
})};
if (it != views.end()) {
return *it->handle;
}
views.push_back({
.offset = offset,
.size = size,
.format = format,
.handle = device->GetLogical().CreateBufferView({
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.buffer = *buffer,
.format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format,
.offset = offset,
.range = size,
}),
});
return *views.back().handle;
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
UpdateDescriptorQueue& update_descriptor_queue_,
DescriptorPool& descriptor_pool)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Upload);
}
StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Download);
}
u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
return device.GetDeviceLocalMemory();
}
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
return device.GetDeviceMemoryUsage();
}
bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
void BufferCacheRuntime::Finish() {
scheduler.Finish();
}
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
};
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
if (barrier) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER);
}
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
if (barrier) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
}
});
}
void BufferCacheRuntime::PreCopyBarrier() {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, READ_BARRIER);
});
}
void BufferCacheRuntime::PostCopyBarrier() {
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, WRITE_BARRIER);
});
}
void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
};
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dest_buffer, offset, size, value](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, READ_BARRIER);
cmdbuf.FillBuffer(dest_buffer, offset, size, value);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, WRITE_BARRIER);
});
}
void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
u32 base_vertex, u32 num_indices, VkBuffer buffer,
u32 offset, [[maybe_unused]] u32 size) {
VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
VkDeviceSize vk_offset = offset;
VkBuffer vk_buffer = buffer;
if (topology == PrimitiveTopology::Quads) {
vk_index_type = VK_INDEX_TYPE_UINT32;
std::tie(vk_buffer, vk_offset) =
quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
} else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
vk_index_type = VK_INDEX_TYPE_UINT16;
std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
}
if (vk_buffer == VK_NULL_HANDLE) {
// Vulkan doesn't support null index buffers. Replace it with our own null buffer.
ReserveNullBuffer();
vk_buffer = *null_buffer;
}
scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
});
}
void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
if (count == 0) {
ReserveNullBuffer();
scheduler.Record([this](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(*null_buffer, 0, VK_INDEX_TYPE_UINT32);
});
return;
}
ReserveQuadArrayLUT(first + count, true);
// The LUT has the indices 0, 1, 2, and 3 copied as an array
// To apply these 'first' offsets we can apply an offset based on the modulus.
const VkIndexType index_type = quad_array_lut_index_type;
const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(buffer, offset, index_type);
});
}
void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
u32 stride) {
if (device.IsExtExtendedDynamicStateSupported()) {
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0;
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
const VkDeviceSize vk_stride = stride;
cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
});
} else {
scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffer(index, buffer, offset);
});
}
}
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
u32 size) {
if (!device.IsExtTransformFeedbackSupported()) {
// Already logged in the rasterizer
return;
}
if (buffer == VK_NULL_HANDLE) {
// Vulkan doesn't support null transform feedback buffers.
// Replace it with our own null buffer.
ReserveNullBuffer();
buffer = *null_buffer;
offset = 0;
size = 0;
}
scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = offset;
const VkDeviceSize vk_size = size;
cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
});
}
void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
if (num_indices <= current_num_indices) {
return;
}
if (wait_for_idle) {
scheduler.Finish();
}
current_num_indices = num_indices;
quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
const u32 num_quads = num_indices / 4;
const u32 num_triangle_indices = num_quads * 6;
const u32 num_first_offset_copies = 4;
const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = size_bytes,
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
if (device.HasDebuggingToolAttached()) {
quad_array_lut.SetObjectNameEXT("Quad LUT");
}
quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
u8* staging_data = staging.mapped_span.data();
const size_t quad_size = bytes_per_index * 6;
for (u32 first = 0; first < num_first_offset_copies; ++first) {
for (u32 quad = 0; quad < num_quads; ++quad) {
switch (quad_array_lut_index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
break;
case VK_INDEX_TYPE_UINT16:
std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
break;
case VK_INDEX_TYPE_UINT32:
std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
break;
default:
ASSERT(false);
break;
}
staging_data += quad_size;
}
}
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
const VkBufferCopy copy{
.srcOffset = src_offset,
.dstOffset = 0,
.size = size_bytes,
};
const VkBufferMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = dst_buffer,
.offset = 0,
.size = size_bytes,
};
cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0, write_barrier);
});
}
void BufferCacheRuntime::ReserveNullBuffer() {
if (null_buffer) {
return;
}
VkBufferCreateInfo create_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 4,
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
if (device.IsExtTransformFeedbackSupported()) {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
null_buffer = device.GetLogical().CreateBuffer(create_info);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null buffer");
}
null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
});
}
} // namespace Vulkan

View File

@@ -1,158 +1,158 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class DescriptorPool;
class Scheduler;
class BufferCacheRuntime;
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_);
[[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
[[nodiscard]] VkBuffer Handle() const noexcept {
return *buffer;
}
operator VkBuffer() const noexcept {
return *buffer;
}
private:
struct BufferView {
u32 offset;
u32 size;
VideoCore::Surface::PixelFormat format;
vk::BufferView handle;
};
const Device* device{};
vk::Buffer buffer;
MemoryCommit commit;
std::vector<BufferView> views;
};
class BufferCacheRuntime {
friend Buffer;
using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
public:
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
UpdateDescriptorQueue& update_descriptor_queue_,
DescriptorPool& descriptor_pool);
void Finish();
u64 GetDeviceLocalMemory() const;
u64 GetDeviceMemoryUsage() const;
bool CanReportMemoryUsage() const;
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
void PreCopyBarrier();
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void PostCopyBarrier();
void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
void BindQuadArrayIndexBuffer(u32 first, u32 count);
void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
[[maybe_unused]] u32 binding_index, u32 size) {
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
return ref.mapped_span;
}
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
BindBuffer(buffer, offset, size);
}
void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
[[maybe_unused]] bool is_written) {
BindBuffer(buffer, offset, size);
}
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format) {
update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
}
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
update_descriptor_queue.AddBuffer(buffer, offset, size);
}
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
void ReserveNullBuffer();
const Device& device;
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
StagingBufferPool& staging_pool;
UpdateDescriptorQueue& update_descriptor_queue;
vk::Buffer quad_array_lut;
MemoryCommit quad_array_lut_commit;
VkIndexType quad_array_lut_index_type{};
u32 current_num_indices = 0;
vk::Buffer null_buffer;
MemoryCommit null_buffer_commit;
Uint8Pass uint8_pass;
QuadIndexedPass quad_index_pass;
};
struct BufferCacheParams {
using Runtime = Vulkan::BufferCacheRuntime;
using Buffer = Vulkan::Buffer;
static constexpr bool IS_OPENGL = false;
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class DescriptorPool;
class Scheduler;
class BufferCacheRuntime;
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_);
[[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
[[nodiscard]] VkBuffer Handle() const noexcept {
return *buffer;
}
operator VkBuffer() const noexcept {
return *buffer;
}
private:
struct BufferView {
u32 offset;
u32 size;
VideoCore::Surface::PixelFormat format;
vk::BufferView handle;
};
const Device* device{};
vk::Buffer buffer;
MemoryCommit commit;
std::vector<BufferView> views;
};
class BufferCacheRuntime {
friend Buffer;
using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
public:
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
UpdateDescriptorQueue& update_descriptor_queue_,
DescriptorPool& descriptor_pool);
void Finish();
u64 GetDeviceLocalMemory() const;
u64 GetDeviceMemoryUsage() const;
bool CanReportMemoryUsage() const;
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
void PreCopyBarrier();
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void PostCopyBarrier();
void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
void BindQuadArrayIndexBuffer(u32 first, u32 count);
void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
[[maybe_unused]] u32 binding_index, u32 size) {
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
return ref.mapped_span;
}
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
BindBuffer(buffer, offset, size);
}
void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
[[maybe_unused]] bool is_written) {
BindBuffer(buffer, offset, size);
}
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format) {
update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
}
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
update_descriptor_queue.AddBuffer(buffer, offset, size);
}
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
void ReserveNullBuffer();
const Device& device;
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
StagingBufferPool& staging_pool;
UpdateDescriptorQueue& update_descriptor_queue;
vk::Buffer quad_array_lut;
MemoryCommit quad_array_lut_commit;
VkIndexType quad_array_lut_index_type{};
u32 current_num_indices = 0;
vk::Buffer null_buffer;
MemoryCommit null_buffer_commit;
Uint8Pass uint8_pass;
QuadIndexedPass quad_index_pass;
};
struct BufferCacheParams {
using Runtime = Vulkan::BufferCacheRuntime;
using Buffer = Vulkan::Buffer;
static constexpr bool IS_OPENGL = false;
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
} // namespace Vulkan

View File

@@ -1,45 +1,45 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstddef>
#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4;
struct CommandPool::Pool {
vk::CommandPool handle;
vk::CommandBuffers cmdbufs;
};
CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& device_)
: ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {}
CommandPool::~CommandPool() = default;
void CommandPool::Allocate(size_t begin, size_t end) {
// Command buffers are going to be commited, recorded, executed every single usage cycle.
// They are also going to be reseted when commited.
Pool& pool = pools.emplace_back();
pool.handle = device.GetLogical().CreateCommandPool({
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.pNext = nullptr,
.flags =
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = device.GetGraphicsFamily(),
});
pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
}
VkCommandBuffer CommandPool::Commit() {
const size_t index = CommitResource();
const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
return pools[pool_index].cmdbufs[sub_index];
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstddef>
#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4;
struct CommandPool::Pool {
vk::CommandPool handle;
vk::CommandBuffers cmdbufs;
};
CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& device_)
: ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {}
CommandPool::~CommandPool() = default;
void CommandPool::Allocate(size_t begin, size_t end) {
// Command buffers are going to be commited, recorded, executed every single usage cycle.
// They are also going to be reseted when commited.
Pool& pool = pools.emplace_back();
pool.handle = device.GetLogical().CreateCommandPool({
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.pNext = nullptr,
.flags =
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = device.GetGraphicsFamily(),
});
pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
}
VkCommandBuffer CommandPool::Commit() {
const size_t index = CommitResource();
const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
return pools[pool_index].cmdbufs[sub_index];
}
} // namespace Vulkan

View File

@@ -1,33 +1,33 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <vector>
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class MasterSemaphore;
class CommandPool final : public ResourcePool {
public:
explicit CommandPool(MasterSemaphore& master_semaphore_, const Device& device_);
~CommandPool() override;
void Allocate(size_t begin, size_t end) override;
VkCommandBuffer Commit();
private:
struct Pool;
const Device& device;
std::vector<Pool> pools;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <vector>
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class MasterSemaphore;
class CommandPool final : public ResourcePool {
public:
explicit CommandPool(MasterSemaphore& master_semaphore_, const Device& device_);
~CommandPool() override;
void Allocate(size_t begin, size_t end) override;
VkCommandBuffer Commit();
private:
struct Pool;
const Device& device;
std::vector<Pool> pools;
};
} // namespace Vulkan

View File

@@ -1,414 +1,414 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <memory>
#include <optional>
#include <utility>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "video_core/host_shaders/astc_decoder_comp_spv.h"
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
#include "video_core/texture_cache/types.h"
#include "video_core/textures/decoders.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1;
constexpr size_t ASTC_NUM_BINDINGS = 2;
template <size_t size>
inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.offset = 0,
.size = static_cast<u32>(size),
};
constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
}};
constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
.uniform_buffers = 0,
.storage_buffers = 2,
.texture_buffers = 0,
.image_buffers = 0,
.textures = 0,
.images = 0,
.score = 2,
};
constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{
{
.binding = ASTC_BINDING_INPUT_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_OUTPUT_IMAGE,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
}};
constexpr DescriptorBankInfo ASTC_BANK_INFO{
.uniform_buffers = 0,
.storage_buffers = 1,
.texture_buffers = 0,
.image_buffers = 0,
.textures = 0,
.images = 1,
.score = 2,
};
constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 2,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = 0,
.stride = sizeof(DescriptorUpdateEntry),
};
constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
{
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
{
.dstBinding = ASTC_BINDING_OUTPUT_IMAGE,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.offset = ASTC_BINDING_OUTPUT_IMAGE * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
}};
struct AstcPushConstants {
std::array<u32, 2> blocks_dims;
u32 layer_stride;
u32 block_size;
u32 x_shift;
u32 block_height;
u32 block_height_mask;
};
} // Anonymous namespace
ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
const DescriptorBankInfo& bank_info,
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
: device{device_} {
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = bindings.size(),
.pBindings = bindings.data(),
});
layout = device.GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = push_constants.size(),
.pPushConstantRanges = push_constants.data(),
});
if (!templates.empty()) {
descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
.pNext = nullptr,
.flags = 0,
.descriptorUpdateEntryCount = templates.size(),
.pDescriptorUpdateEntries = templates.data(),
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
.descriptorSetLayout = *descriptor_set_layout,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.pipelineLayout = *layout,
.set = 0,
});
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info);
}
module = device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.codeSize = static_cast<u32>(code.size_bytes()),
.pCode = code.data(),
});
device.SaveShader(code);
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *module,
.pName = "main",
.pSpecializationInfo = nullptr,
},
.layout = *layout,
.basePipelineHandle = nullptr,
.basePipelineIndex = 0,
});
}
ComputePass::~ComputePass() = default;
Uint8Pass::Uint8Pass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool,
StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_)
: ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
VULKAN_UINT8_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
Uint8Pass::~Uint8Pass() = default;
std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u32 src_offset) {
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
});
return {staging.buffer, staging.offset};
}
QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_)
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
QuadIndexedPass::~QuadIndexedPass() = default;
std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
VkBuffer src_buffer, u32 src_offset) {
const u32 index_shift = [index_format] {
switch (index_format) {
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
return 0;
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort:
return 1;
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt:
return 2;
}
ASSERT(false);
return 2;
}();
const u32 input_size = num_vertices << index_shift;
const u32 num_tri_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex,
index_shift](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
const std::array<u32, 2> push_constants{base_vertex, index_shift};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
&push_constants);
cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
});
return {staging.buffer, staging.offset};
}
ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_)
: ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
ASTCDecoderPass::~ASTCDecoderPass() = default;
void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
using namespace VideoCommon::Accelerated;
const std::array<u32, 2> block_dims{
VideoCore::Surface::DefaultBlockWidth(image.info.format),
VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
scheduler.RequestOutsideRenderPassOperationContext();
const VkPipeline vk_pipeline = *pipeline;
const VkImageAspectFlags aspect_mask = image.AspectMask();
const VkImage vk_image = image.Handle();
const bool is_initialized = image.ExchangeInitialization();
scheduler.Record([vk_pipeline, vk_image, aspect_mask,
is_initialized](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier image_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = static_cast<VkAccessFlags>(is_initialized ? VK_ACCESS_SHADER_WRITE_BIT
: VK_ACCESS_NONE),
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = vk_image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
: VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
});
for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const u32 num_dispatches_z = image.info.resources.layers;
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
// To unswizzle the ASTC data
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
ASSERT(params.bytes_per_block_log2 == 4);
scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
params, descriptor_data](vk::CommandBuffer cmdbuf) {
const AstcPushConstants uniforms{
.blocks_dims = block_dims,
.layer_stride = params.layer_stride,
.block_size = params.block_size,
.x_shift = params.x_shift,
.block_height = params.block_height,
.block_height_mask = params.block_height_mask,
};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
});
}
scheduler.Record([vk_image, aspect_mask](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier image_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = vk_image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
});
scheduler.Finish();
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <memory>
#include <optional>
#include <utility>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "video_core/host_shaders/astc_decoder_comp_spv.h"
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
#include "video_core/texture_cache/types.h"
#include "video_core/textures/decoders.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1;
constexpr size_t ASTC_NUM_BINDINGS = 2;
template <size_t size>
inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.offset = 0,
.size = static_cast<u32>(size),
};
constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
}};
constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
.uniform_buffers = 0,
.storage_buffers = 2,
.texture_buffers = 0,
.image_buffers = 0,
.textures = 0,
.images = 0,
.score = 2,
};
constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{
{
.binding = ASTC_BINDING_INPUT_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_OUTPUT_IMAGE,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
}};
constexpr DescriptorBankInfo ASTC_BANK_INFO{
.uniform_buffers = 0,
.storage_buffers = 1,
.texture_buffers = 0,
.image_buffers = 0,
.textures = 0,
.images = 1,
.score = 2,
};
constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 2,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = 0,
.stride = sizeof(DescriptorUpdateEntry),
};
constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
{
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
{
.dstBinding = ASTC_BINDING_OUTPUT_IMAGE,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.offset = ASTC_BINDING_OUTPUT_IMAGE * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
}};
struct AstcPushConstants {
std::array<u32, 2> blocks_dims;
u32 layer_stride;
u32 block_size;
u32 x_shift;
u32 block_height;
u32 block_height_mask;
};
} // Anonymous namespace
ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
const DescriptorBankInfo& bank_info,
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
: device{device_} {
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = bindings.size(),
.pBindings = bindings.data(),
});
layout = device.GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = push_constants.size(),
.pPushConstantRanges = push_constants.data(),
});
if (!templates.empty()) {
descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
.pNext = nullptr,
.flags = 0,
.descriptorUpdateEntryCount = templates.size(),
.pDescriptorUpdateEntries = templates.data(),
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
.descriptorSetLayout = *descriptor_set_layout,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.pipelineLayout = *layout,
.set = 0,
});
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info);
}
module = device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.codeSize = static_cast<u32>(code.size_bytes()),
.pCode = code.data(),
});
device.SaveShader(code);
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *module,
.pName = "main",
.pSpecializationInfo = nullptr,
},
.layout = *layout,
.basePipelineHandle = nullptr,
.basePipelineIndex = 0,
});
}
ComputePass::~ComputePass() = default;
Uint8Pass::Uint8Pass(const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool,
StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_)
: ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
VULKAN_UINT8_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
Uint8Pass::~Uint8Pass() = default;
std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u32 src_offset) {
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
});
return {staging.buffer, staging.offset};
}
QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_)
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
QuadIndexedPass::~QuadIndexedPass() = default;
std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
VkBuffer src_buffer, u32 src_offset) {
const u32 index_shift = [index_format] {
switch (index_format) {
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
return 0;
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort:
return 1;
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt:
return 2;
}
ASSERT(false);
return 2;
}();
const u32 input_size = num_vertices << index_shift;
const u32 num_tri_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex,
index_shift](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
const std::array<u32, 2> push_constants{base_vertex, index_shift};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
&push_constants);
cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
});
return {staging.buffer, staging.offset};
}
ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_)
: ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
ASTCDecoderPass::~ASTCDecoderPass() = default;
void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
using namespace VideoCommon::Accelerated;
const std::array<u32, 2> block_dims{
VideoCore::Surface::DefaultBlockWidth(image.info.format),
VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
scheduler.RequestOutsideRenderPassOperationContext();
const VkPipeline vk_pipeline = *pipeline;
const VkImageAspectFlags aspect_mask = image.AspectMask();
const VkImage vk_image = image.Handle();
const bool is_initialized = image.ExchangeInitialization();
scheduler.Record([vk_pipeline, vk_image, aspect_mask,
is_initialized](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier image_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = static_cast<VkAccessFlags>(is_initialized ? VK_ACCESS_SHADER_WRITE_BIT
: VK_ACCESS_NONE),
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = vk_image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
: VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
});
for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const u32 num_dispatches_z = image.info.resources.layers;
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
// To unswizzle the ASTC data
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
ASSERT(params.bytes_per_block_log2 == 4);
scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
params, descriptor_data](vk::CommandBuffer cmdbuf) {
const AstcPushConstants uniforms{
.blocks_dims = block_dims,
.layer_stride = params.layer_stride,
.block_size = params.block_size,
.x_shift = params.x_shift,
.block_height = params.block_height,
.block_height_mask = params.block_height_mask,
};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
});
}
scheduler.Record([vk_image, aspect_mask](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier image_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = vk_image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
});
scheduler.Finish();
}
} // namespace Vulkan

View File

@@ -1,104 +1,104 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <utility>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace VideoCommon {
struct SwizzleParameters;
}
namespace Vulkan {
class Device;
class StagingBufferPool;
class Scheduler;
class UpdateDescriptorQueue;
class Image;
struct StagingBufferRef;
class ComputePass {
public:
explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
const DescriptorBankInfo& bank_info,
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
~ComputePass();
protected:
const Device& device;
vk::DescriptorUpdateTemplateKHR descriptor_template;
vk::PipelineLayout layout;
vk::Pipeline pipeline;
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
private:
vk::ShaderModule module;
};
class Uint8Pass final : public ComputePass {
public:
explicit Uint8Pass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
/// Assemble uint8 indices into an uint16 index buffer
/// Returns a pair with the staging buffer, and the offset where the assembled data is
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
u32 src_offset);
private:
Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
UpdateDescriptorQueue& update_descriptor_queue;
};
class QuadIndexedPass final : public ComputePass {
public:
explicit QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
std::pair<VkBuffer, VkDeviceSize> Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
private:
Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
UpdateDescriptorQueue& update_descriptor_queue;
};
class ASTCDecoderPass final : public ComputePass {
public:
explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_);
~ASTCDecoderPass();
void Assemble(Image& image, const StagingBufferRef& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
private:
Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
UpdateDescriptorQueue& update_descriptor_queue;
MemoryAllocator& memory_allocator;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <utility>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace VideoCommon {
struct SwizzleParameters;
}
namespace Vulkan {
class Device;
class StagingBufferPool;
class Scheduler;
class UpdateDescriptorQueue;
class Image;
struct StagingBufferRef;
class ComputePass {
public:
explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
vk::Span<VkDescriptorSetLayoutBinding> bindings,
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
const DescriptorBankInfo& bank_info,
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
~ComputePass();
protected:
const Device& device;
vk::DescriptorUpdateTemplateKHR descriptor_template;
vk::PipelineLayout layout;
vk::Pipeline pipeline;
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
private:
vk::ShaderModule module;
};
class Uint8Pass final : public ComputePass {
public:
explicit Uint8Pass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
/// Assemble uint8 indices into an uint16 index buffer
/// Returns a pair with the staging buffer, and the offset where the assembled data is
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
u32 src_offset);
private:
Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
UpdateDescriptorQueue& update_descriptor_queue;
};
class QuadIndexedPass final : public ComputePass {
public:
explicit QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
std::pair<VkBuffer, VkDeviceSize> Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
private:
Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
UpdateDescriptorQueue& update_descriptor_queue;
};
class ASTCDecoderPass final : public ComputePass {
public:
explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
UpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_);
~ASTCDecoderPass();
void Assemble(Image& image, const StagingBufferRef& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
private:
Scheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
UpdateDescriptorQueue& update_descriptor_queue;
MemoryAllocator& memory_allocator;
};
} // namespace Vulkan

View File

@@ -1,222 +1,222 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <vector>
#include <boost/container/small_vector.hpp>
#include "video_core/renderer_vulkan/pipeline_helper.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_notify.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using Shader::ImageBufferDescriptor;
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
vk::ShaderModule spv_module_)
: device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
spv_module(std::move(spv_module_)) {
if (shader_notify) {
shader_notify->MarkShaderBuilding();
}
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin());
auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics] {
DescriptorLayoutBuilder builder{device};
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
descriptor_set_layout = builder.CreateDescriptorSetLayout(false);
pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
descriptor_update_template =
builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
.pNext = nullptr,
.requiredSubgroupSize = GuestWarpSize,
};
VkPipelineCreateFlags flags{};
if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = flags,
.stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *spv_module,
.pName = "main",
.pSpecializationInfo = nullptr,
},
.layout = *pipeline_layout,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
});
if (pipeline_statistics) {
pipeline_statistics->Collect(*pipeline);
}
std::scoped_lock lock{build_mutex};
is_built = true;
build_condvar.notify_one();
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
}};
if (thread_worker) {
thread_worker->QueueWork(std::move(func));
} else {
func();
}
}
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
BufferCache& buffer_cache, TextureCache& texture_cache) {
update_descriptor_queue.Acquire();
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
buffer_cache.UnbindComputeStorageBuffers();
size_t ssbo_index{};
for (const auto& desc : info.storage_buffers_descriptors) {
ASSERT(desc.count == 1);
buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
desc.is_written);
++ssbo_index;
}
texture_cache.SynchronizeComputeDescriptors();
static constexpr size_t max_elements = 64;
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
boost::container::static_vector<VkSampler, max_elements> samplers;
const auto& qmd{kepler_compute.launch_description};
const auto& cbufs{qmd.const_buffer_config};
const bool via_header_index{qmd.linked_tsc != 0};
const auto read_handle{[&](const auto& desc, u32 index) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
const u32 index_offset{index << desc.size_shift};
const u32 offset{desc.cbuf_offset + index_offset};
const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
if (desc.has_secondary) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
secondary_offset};
const u32 lhs_raw{gpu_memory.Read<u32>(addr) << desc.shift_left};
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr) << desc.secondary_shift_left};
return TexturePair(lhs_raw | rhs_raw, via_header_index);
}
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc, bool blacklist) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({
.index = handle.first,
.blacklist = blacklist,
.id = {},
});
}
}};
for (const auto& desc : info.texture_buffer_descriptors) {
add_image(desc, false);
}
for (const auto& desc : info.image_buffer_descriptors) {
add_image(desc, false);
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers.push_back(sampler->Handle());
}
}
for (const auto& desc : info.image_descriptors) {
add_image(desc, desc.is_written);
}
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
buffer_cache.UnbindComputeTextureBuffers();
size_t index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view = texture_cache.GetImageView(views[index].id);
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++index;
}
}};
std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
buffer_cache.UpdateComputeBuffers();
buffer_cache.BindHostComputeBuffers();
RescalingPushConstant rescaling;
const VkSampler* samplers_it{samplers.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it,
views_it);
if (!is_built.load(std::memory_order::relaxed)) {
// Wait for the pipeline to be built
scheduler.Record([this](vk::CommandBuffer) {
std::unique_lock lock{build_mutex};
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
});
}
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
scheduler.Record([this, descriptor_data, is_rescaling,
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
if (!descriptor_set_layout) {
return;
}
if (is_rescaling) {
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
rescaling_data.data());
}
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
const vk::Device& dev{device.GetLogical()};
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
descriptor_set, nullptr);
});
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <vector>
#include <boost/container/small_vector.hpp>
#include "video_core/renderer_vulkan/pipeline_helper.h"
#include "video_core/renderer_vulkan/pipeline_statistics.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_notify.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using Shader::ImageBufferDescriptor;
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
vk::ShaderModule spv_module_)
: device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
spv_module(std::move(spv_module_)) {
if (shader_notify) {
shader_notify->MarkShaderBuilding();
}
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin());
auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics] {
DescriptorLayoutBuilder builder{device};
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
descriptor_set_layout = builder.CreateDescriptorSetLayout(false);
pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
descriptor_update_template =
builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
.pNext = nullptr,
.requiredSubgroupSize = GuestWarpSize,
};
VkPipelineCreateFlags flags{};
if (device.IsKhrPipelineEexecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = flags,
.stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *spv_module,
.pName = "main",
.pSpecializationInfo = nullptr,
},
.layout = *pipeline_layout,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
});
if (pipeline_statistics) {
pipeline_statistics->Collect(*pipeline);
}
std::scoped_lock lock{build_mutex};
is_built = true;
build_condvar.notify_one();
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
}};
if (thread_worker) {
thread_worker->QueueWork(std::move(func));
} else {
func();
}
}
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
BufferCache& buffer_cache, TextureCache& texture_cache) {
update_descriptor_queue.Acquire();
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
buffer_cache.UnbindComputeStorageBuffers();
size_t ssbo_index{};
for (const auto& desc : info.storage_buffers_descriptors) {
ASSERT(desc.count == 1);
buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
desc.is_written);
++ssbo_index;
}
texture_cache.SynchronizeComputeDescriptors();
static constexpr size_t max_elements = 64;
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
boost::container::static_vector<VkSampler, max_elements> samplers;
const auto& qmd{kepler_compute.launch_description};
const auto& cbufs{qmd.const_buffer_config};
const bool via_header_index{qmd.linked_tsc != 0};
const auto read_handle{[&](const auto& desc, u32 index) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
const u32 index_offset{index << desc.size_shift};
const u32 offset{desc.cbuf_offset + index_offset};
const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
if (desc.has_secondary) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
secondary_offset};
const u32 lhs_raw{gpu_memory.Read<u32>(addr) << desc.shift_left};
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr) << desc.secondary_shift_left};
return TexturePair(lhs_raw | rhs_raw, via_header_index);
}
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc, bool blacklist) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({
.index = handle.first,
.blacklist = blacklist,
.id = {},
});
}
}};
for (const auto& desc : info.texture_buffer_descriptors) {
add_image(desc, false);
}
for (const auto& desc : info.image_buffer_descriptors) {
add_image(desc, false);
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers.push_back(sampler->Handle());
}
}
for (const auto& desc : info.image_descriptors) {
add_image(desc, desc.is_written);
}
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
buffer_cache.UnbindComputeTextureBuffers();
size_t index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view = texture_cache.GetImageView(views[index].id);
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++index;
}
}};
std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
buffer_cache.UpdateComputeBuffers();
buffer_cache.BindHostComputeBuffers();
RescalingPushConstant rescaling;
const VkSampler* samplers_it{samplers.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it,
views_it);
if (!is_built.load(std::memory_order::relaxed)) {
// Wait for the pipeline to be built
scheduler.Record([this](vk::CommandBuffer) {
std::unique_lock lock{build_mutex};
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
});
}
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
scheduler.Record([this, descriptor_data, is_rescaling,
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
if (!descriptor_set_layout) {
return;
}
if (is_rescaling) {
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
rescaling_data.data());
}
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
const vk::Device& dev{device.GetLogical()};
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
descriptor_set, nullptr);
});
}
} // namespace Vulkan

View File

@@ -1,66 +1,66 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <condition_variable>
#include <mutex>
#include "common/common_types.h"
#include "common/thread_worker.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace VideoCore {
class ShaderNotify;
}
namespace Vulkan {
class Device;
class PipelineStatistics;
class Scheduler;
class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
vk::ShaderModule spv_module);
ComputePipeline& operator=(ComputePipeline&&) noexcept = delete;
ComputePipeline(ComputePipeline&&) noexcept = delete;
ComputePipeline& operator=(const ComputePipeline&) = delete;
ComputePipeline(const ComputePipeline&) = delete;
void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
private:
const Device& device;
UpdateDescriptorQueue& update_descriptor_queue;
Shader::Info info;
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
vk::ShaderModule spv_module;
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
vk::PipelineLayout pipeline_layout;
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
vk::Pipeline pipeline;
std::condition_variable build_condvar;
std::mutex build_mutex;
std::atomic_bool is_built{false};
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <condition_variable>
#include <mutex>
#include "common/common_types.h"
#include "common/thread_worker.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace VideoCore {
class ShaderNotify;
}
namespace Vulkan {
class Device;
class PipelineStatistics;
class Scheduler;
class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
vk::ShaderModule spv_module);
ComputePipeline& operator=(ComputePipeline&&) noexcept = delete;
ComputePipeline(ComputePipeline&&) noexcept = delete;
ComputePipeline& operator=(const ComputePipeline&) = delete;
ComputePipeline(const ComputePipeline&) = delete;
void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
private:
const Device& device;
UpdateDescriptorQueue& update_descriptor_queue;
Shader::Info info;
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
vk::ShaderModule spv_module;
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
vk::PipelineLayout pipeline_layout;
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
vk::Pipeline pipeline;
std::condition_variable build_condvar;
std::mutex build_mutex;
std::atomic_bool is_built{false};
};
} // namespace Vulkan

View File

@@ -1,163 +1,163 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <mutex>
#include <span>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
constexpr size_t SETS_GROW_RATE = 16;
constexpr s32 SCORE_THRESHOLD = 3;
struct DescriptorBank {
DescriptorBankInfo info;
std::vector<vk::DescriptorPool> pools;
};
bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
textures >= subset.textures && images >= subset.image_buffers;
}
template <typename Descriptors>
static u32 Accumulate(const Descriptors& descriptors) {
u32 count = 0;
for (const auto& descriptor : descriptors) {
count += descriptor.count;
}
return count;
}
static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
DescriptorBankInfo bank;
for (const Shader::Info& info : infos) {
bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
bank.texture_buffers += Accumulate(info.texture_buffer_descriptors);
bank.image_buffers += Accumulate(info.image_buffer_descriptors);
bank.textures += Accumulate(info.texture_descriptors);
bank.images += Accumulate(info.image_descriptors);
}
bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
bank.image_buffers + bank.textures + bank.images;
return bank;
}
static void AllocatePool(const Device& device, DescriptorBank& bank) {
std::array<VkDescriptorPoolSize, 6> pool_sizes;
size_t pool_cursor{};
const u32 sets_per_pool = device.GetSetsPerPool();
const auto add = [&](VkDescriptorType type, u32 count) {
if (count > 0) {
pool_sizes[pool_cursor++] = {
.type = type,
.descriptorCount = count * sets_per_pool,
};
}
};
const auto& info{bank.info};
add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers);
add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers);
add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers);
add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers);
add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures);
add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images);
bank.pools.push_back(device.GetLogical().CreateDescriptorPool({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = sets_per_pool,
.poolSizeCount = static_cast<u32>(pool_cursor),
.pPoolSizes = std::data(pool_sizes),
}));
}
DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
DescriptorBank& bank_, VkDescriptorSetLayout layout_)
: ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_},
layout{layout_} {}
VkDescriptorSet DescriptorAllocator::Commit() {
const size_t index = CommitResource();
return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
}
void DescriptorAllocator::Allocate(size_t begin, size_t end) {
sets.push_back(AllocateDescriptors(end - begin));
}
vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
const std::vector<VkDescriptorSetLayout> layouts(count, layout);
VkDescriptorSetAllocateInfo allocate_info{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = *bank->pools.back(),
.descriptorSetCount = static_cast<u32>(count),
.pSetLayouts = layouts.data(),
};
vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
if (!new_sets.IsOutOfPoolMemory()) {
return new_sets;
}
// Our current pool is out of memory. Allocate a new one and retry
AllocatePool(*device, *bank);
allocate_info.descriptorPool = *bank->pools.back();
new_sets = bank->pools.back().Allocate(allocate_info);
if (!new_sets.IsOutOfPoolMemory()) {
return new_sets;
}
// After allocating a new pool, we are out of memory again. We can't handle this from here.
throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
}
DescriptorPool::DescriptorPool(const Device& device_, Scheduler& scheduler)
: device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
DescriptorPool::~DescriptorPool() = default;
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
std::span<const Shader::Info> infos) {
return Allocator(layout, MakeBankInfo(infos));
}
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
const Shader::Info& info) {
return Allocator(layout, MakeBankInfo(std::array{info}));
}
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
const DescriptorBankInfo& info) {
return DescriptorAllocator(device, master_semaphore, Bank(info), layout);
}
DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
std::shared_lock read_lock{banks_mutex};
const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
});
if (it != bank_infos.end()) {
return *banks[std::distance(bank_infos.begin(), it)].get();
}
read_lock.unlock();
std::unique_lock write_lock{banks_mutex};
bank_infos.push_back(reqs);
auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
bank.info = reqs;
AllocatePool(device, bank);
return bank;
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <mutex>
#include <span>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
constexpr size_t SETS_GROW_RATE = 16;
constexpr s32 SCORE_THRESHOLD = 3;
struct DescriptorBank {
DescriptorBankInfo info;
std::vector<vk::DescriptorPool> pools;
};
bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
textures >= subset.textures && images >= subset.image_buffers;
}
template <typename Descriptors>
static u32 Accumulate(const Descriptors& descriptors) {
u32 count = 0;
for (const auto& descriptor : descriptors) {
count += descriptor.count;
}
return count;
}
static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
DescriptorBankInfo bank;
for (const Shader::Info& info : infos) {
bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
bank.texture_buffers += Accumulate(info.texture_buffer_descriptors);
bank.image_buffers += Accumulate(info.image_buffer_descriptors);
bank.textures += Accumulate(info.texture_descriptors);
bank.images += Accumulate(info.image_descriptors);
}
bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
bank.image_buffers + bank.textures + bank.images;
return bank;
}
static void AllocatePool(const Device& device, DescriptorBank& bank) {
std::array<VkDescriptorPoolSize, 6> pool_sizes;
size_t pool_cursor{};
const u32 sets_per_pool = device.GetSetsPerPool();
const auto add = [&](VkDescriptorType type, u32 count) {
if (count > 0) {
pool_sizes[pool_cursor++] = {
.type = type,
.descriptorCount = count * sets_per_pool,
};
}
};
const auto& info{bank.info};
add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers);
add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers);
add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers);
add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers);
add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures);
add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images);
bank.pools.push_back(device.GetLogical().CreateDescriptorPool({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = sets_per_pool,
.poolSizeCount = static_cast<u32>(pool_cursor),
.pPoolSizes = std::data(pool_sizes),
}));
}
DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
DescriptorBank& bank_, VkDescriptorSetLayout layout_)
: ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_},
layout{layout_} {}
VkDescriptorSet DescriptorAllocator::Commit() {
const size_t index = CommitResource();
return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
}
void DescriptorAllocator::Allocate(size_t begin, size_t end) {
sets.push_back(AllocateDescriptors(end - begin));
}
vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
const std::vector<VkDescriptorSetLayout> layouts(count, layout);
VkDescriptorSetAllocateInfo allocate_info{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = *bank->pools.back(),
.descriptorSetCount = static_cast<u32>(count),
.pSetLayouts = layouts.data(),
};
vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
if (!new_sets.IsOutOfPoolMemory()) {
return new_sets;
}
// Our current pool is out of memory. Allocate a new one and retry
AllocatePool(*device, *bank);
allocate_info.descriptorPool = *bank->pools.back();
new_sets = bank->pools.back().Allocate(allocate_info);
if (!new_sets.IsOutOfPoolMemory()) {
return new_sets;
}
// After allocating a new pool, we are out of memory again. We can't handle this from here.
throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
}
DescriptorPool::DescriptorPool(const Device& device_, Scheduler& scheduler)
: device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
DescriptorPool::~DescriptorPool() = default;
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
std::span<const Shader::Info> infos) {
return Allocator(layout, MakeBankInfo(infos));
}
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
const Shader::Info& info) {
return Allocator(layout, MakeBankInfo(std::array{info}));
}
DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
const DescriptorBankInfo& info) {
return DescriptorAllocator(device, master_semaphore, Bank(info), layout);
}
DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
std::shared_lock read_lock{banks_mutex};
const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
});
if (it != bank_infos.end()) {
return *banks[std::distance(bank_infos.begin(), it)].get();
}
read_lock.unlock();
std::unique_lock write_lock{banks_mutex};
bank_infos.push_back(reqs);
auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
bank.info = reqs;
AllocatePool(device, bank);
return bank;
}
} // namespace Vulkan

View File

@@ -1,87 +1,87 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <shared_mutex>
#include <span>
#include <vector>
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class Scheduler;
struct DescriptorBank;
struct DescriptorBankInfo {
[[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept;
u32 uniform_buffers{}; ///< Number of uniform buffer descriptors
u32 storage_buffers{}; ///< Number of storage buffer descriptors
u32 texture_buffers{}; ///< Number of texture buffer descriptors
u32 image_buffers{}; ///< Number of image buffer descriptors
u32 textures{}; ///< Number of texture descriptors
u32 images{}; ///< Number of image descriptors
s32 score{}; ///< Number of descriptors in total
};
class DescriptorAllocator final : public ResourcePool {
friend class DescriptorPool;
public:
explicit DescriptorAllocator() = default;
~DescriptorAllocator() override = default;
DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
DescriptorAllocator(const DescriptorAllocator&) = delete;
VkDescriptorSet Commit();
private:
explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
DescriptorBank& bank_, VkDescriptorSetLayout layout_);
void Allocate(size_t begin, size_t end) override;
vk::DescriptorSets AllocateDescriptors(size_t count);
const Device* device{};
DescriptorBank* bank{};
VkDescriptorSetLayout layout{};
std::vector<vk::DescriptorSets> sets;
};
class DescriptorPool {
public:
explicit DescriptorPool(const Device& device, Scheduler& scheduler);
~DescriptorPool();
DescriptorPool& operator=(const DescriptorPool&) = delete;
DescriptorPool(const DescriptorPool&) = delete;
DescriptorAllocator Allocator(VkDescriptorSetLayout layout,
std::span<const Shader::Info> infos);
DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info);
DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info);
private:
DescriptorBank& Bank(const DescriptorBankInfo& reqs);
const Device& device;
MasterSemaphore& master_semaphore;
std::shared_mutex banks_mutex;
std::vector<DescriptorBankInfo> bank_infos;
std::vector<std::unique_ptr<DescriptorBank>> banks;
};
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <shared_mutex>
#include <span>
#include <vector>
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class Scheduler;
struct DescriptorBank;
struct DescriptorBankInfo {
[[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept;
u32 uniform_buffers{}; ///< Number of uniform buffer descriptors
u32 storage_buffers{}; ///< Number of storage buffer descriptors
u32 texture_buffers{}; ///< Number of texture buffer descriptors
u32 image_buffers{}; ///< Number of image buffer descriptors
u32 textures{}; ///< Number of texture descriptors
u32 images{}; ///< Number of image descriptors
s32 score{}; ///< Number of descriptors in total
};
class DescriptorAllocator final : public ResourcePool {
friend class DescriptorPool;
public:
explicit DescriptorAllocator() = default;
~DescriptorAllocator() override = default;
DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
DescriptorAllocator(const DescriptorAllocator&) = delete;
VkDescriptorSet Commit();
private:
explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
DescriptorBank& bank_, VkDescriptorSetLayout layout_);
void Allocate(size_t begin, size_t end) override;
vk::DescriptorSets AllocateDescriptors(size_t count);
const Device* device{};
DescriptorBank* bank{};
VkDescriptorSetLayout layout{};
std::vector<vk::DescriptorSets> sets;
};
class DescriptorPool {
public:
explicit DescriptorPool(const Device& device, Scheduler& scheduler);
~DescriptorPool();
DescriptorPool& operator=(const DescriptorPool&) = delete;
DescriptorPool(const DescriptorPool&) = delete;
DescriptorAllocator Allocator(VkDescriptorSetLayout layout,
std::span<const Shader::Info> infos);
DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info);
DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info);
private:
DescriptorBank& Bank(const DescriptorBankInfo& reqs);
const Device& device;
MasterSemaphore& master_semaphore;
std::shared_mutex banks_mutex;
std::vector<DescriptorBankInfo> bank_infos;
std::vector<std::unique_ptr<DescriptorBank>> banks;
};
} // namespace Vulkan

View File

@@ -1,64 +1,64 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <memory>
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_)
: FenceBase{is_stubbed_}, scheduler{scheduler_} {}
InnerFence::~InnerFence() = default;
void InnerFence::Queue() {
if (is_stubbed) {
return;
}
// Get the current tick so we can wait for it
wait_tick = scheduler.CurrentTick();
scheduler.Flush();
}
bool InnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
return scheduler.IsFree(wait_tick);
}
void InnerFence::Wait() {
if (is_stubbed) {
return;
}
scheduler.Wait(wait_tick);
}
FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TextureCache& texture_cache_, BufferCache& buffer_cache_,
QueryCache& query_cache_, const Device& device_, Scheduler& scheduler_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
scheduler{scheduler_} {}
Fence FenceManager::CreateFence(bool is_stubbed) {
return std::make_shared<InnerFence>(scheduler, is_stubbed);
}
void FenceManager::QueueFence(Fence& fence) {
fence->Queue();
}
bool FenceManager::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
void FenceManager::WaitFence(Fence& fence) {
fence->Wait();
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <memory>
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_)
: FenceBase{is_stubbed_}, scheduler{scheduler_} {}
InnerFence::~InnerFence() = default;
void InnerFence::Queue() {
if (is_stubbed) {
return;
}
// Get the current tick so we can wait for it
wait_tick = scheduler.CurrentTick();
scheduler.Flush();
}
bool InnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
return scheduler.IsFree(wait_tick);
}
void InnerFence::Wait() {
if (is_stubbed) {
return;
}
scheduler.Wait(wait_tick);
}
FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TextureCache& texture_cache_, BufferCache& buffer_cache_,
QueryCache& query_cache_, const Device& device_, Scheduler& scheduler_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
scheduler{scheduler_} {}
Fence FenceManager::CreateFence(bool is_stubbed) {
return std::make_shared<InnerFence>(scheduler, is_stubbed);
}
void FenceManager::QueueFence(Fence& fence) {
fence->Queue();
}
bool FenceManager::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
void FenceManager::WaitFence(Fence& fence) {
fence->Wait();
}
} // namespace Vulkan

View File

@@ -1,61 +1,61 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class Device;
class QueryCache;
class Scheduler;
class InnerFence : public VideoCommon::FenceBase {
public:
explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_);
~InnerFence();
void Queue();
bool IsSignaled() const;
void Wait();
private:
Scheduler& scheduler;
u64 wait_tick = 0;
};
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
class FenceManager final : public GenericFenceManager {
public:
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCache& texture_cache, BufferCache& buffer_cache,
QueryCache& query_cache, const Device& device, Scheduler& scheduler);
protected:
Fence CreateFence(bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
private:
Scheduler& scheduler;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class Device;
class QueryCache;
class Scheduler;
class InnerFence : public VideoCommon::FenceBase {
public:
explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_);
~InnerFence();
void Queue();
bool IsSignaled() const;
void Wait();
private:
Scheduler& scheduler;
u64 wait_tick = 0;
};
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
class FenceManager final : public GenericFenceManager {
public:
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCache& texture_cache, BufferCache& buffer_cache,
QueryCache& query_cache, const Device& device, Scheduler& scheduler);
protected:
Fence CreateFence(bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
private:
Scheduler& scheduler;
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -1,53 +1,53 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/math_util.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class Scheduler;
class FSR {
public:
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
VkExtent2D output_size);
VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
private:
void CreateDescriptorPool();
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreateImages();
void CreateSampler();
void CreateShaders();
void CreatePipeline();
void CreatePipelineLayout();
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
const Device& device;
MemoryAllocator& memory_allocator;
size_t image_count;
VkExtent2D output_size;
vk::DescriptorPool descriptor_pool;
vk::DescriptorSetLayout descriptor_set_layout;
vk::DescriptorSets descriptor_sets;
vk::PipelineLayout pipeline_layout;
vk::ShaderModule easu_shader;
vk::ShaderModule rcas_shader;
vk::Pipeline easu_pipeline;
vk::Pipeline rcas_pipeline;
vk::Sampler sampler;
std::vector<vk::Image> images;
std::vector<vk::ImageView> image_views;
std::vector<MemoryCommit> buffer_commits;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/math_util.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class Scheduler;
class FSR {
public:
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
VkExtent2D output_size);
VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
private:
void CreateDescriptorPool();
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreateImages();
void CreateSampler();
void CreateShaders();
void CreatePipeline();
void CreatePipelineLayout();
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
const Device& device;
MemoryAllocator& memory_allocator;
size_t image_count;
VkExtent2D output_size;
vk::DescriptorPool descriptor_pool;
vk::DescriptorSetLayout descriptor_set_layout;
vk::DescriptorSets descriptor_sets;
vk::PipelineLayout pipeline_layout;
vk::ShaderModule easu_shader;
vk::ShaderModule rcas_shader;
vk::Pipeline easu_pipeline;
vk::Pipeline rcas_pipeline;
vk::Sampler sampler;
std::vector<vk::Image> images;
std::vector<vk::ImageView> image_views;
std::vector<MemoryCommit> buffer_commits;
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -1,161 +1,161 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <array>
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <type_traits>
#include "common/thread_worker.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace VideoCore {
class ShaderNotify;
}
namespace Vulkan {
struct GraphicsPipelineCacheKey {
std::array<u64, 6> unique_hashes;
FixedPipelineState state;
size_t Hash() const noexcept;
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
size_t Size() const noexcept {
return sizeof(unique_hashes) + state.Size();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::GraphicsPipelineCacheKey> {
size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class Device;
class PipelineStatistics;
class RenderPassCache;
class RescalingPushConstant;
class Scheduler;
class UpdateDescriptorQueue;
class GraphicsPipeline {
static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
public:
explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache,
TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify,
const Device& device, DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* worker_thread,
PipelineStatistics* pipeline_statistics,
RenderPassCache& render_pass_cache,
const GraphicsPipelineCacheKey& key,
std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos);
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
GraphicsPipeline(const GraphicsPipeline&) = delete;
void AddTransition(GraphicsPipeline* transition);
void Configure(bool is_indexed) {
configure_func(this, is_indexed);
}
[[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
if (key == current_key) {
return this;
}
const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
: nullptr;
}
[[nodiscard]] bool IsBuilt() const noexcept {
return is_built.load(std::memory_order::relaxed);
}
template <typename Spec>
static auto MakeConfigureSpecFunc() {
return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
}
void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) {
maxwell3d = maxwell3d_;
gpu_memory = gpu_memory_;
}
private:
template <typename Spec>
void ConfigureImpl(bool is_indexed);
void ConfigureDraw(const RescalingPushConstant& rescaling);
void MakePipeline(VkRenderPass render_pass);
void Validate();
const GraphicsPipelineCacheKey key;
Tegra::Engines::Maxwell3D* maxwell3d;
Tegra::MemoryManager* gpu_memory;
const Device& device;
TextureCache& texture_cache;
BufferCache& buffer_cache;
Scheduler& scheduler;
UpdateDescriptorQueue& update_descriptor_queue;
void (*configure_func)(GraphicsPipeline*, bool){};
std::vector<GraphicsPipelineCacheKey> transition_keys;
std::vector<GraphicsPipeline*> transitions;
std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
std::array<Shader::Info, NUM_STAGES> stage_infos;
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
u32 num_textures{};
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
vk::PipelineLayout pipeline_layout;
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
vk::Pipeline pipeline;
std::condition_variable build_condvar;
std::mutex build_mutex;
std::atomic_bool is_built{false};
bool uses_push_descriptor{false};
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <array>
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <type_traits>
#include "common/thread_worker.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace VideoCore {
class ShaderNotify;
}
namespace Vulkan {
struct GraphicsPipelineCacheKey {
std::array<u64, 6> unique_hashes;
FixedPipelineState state;
size_t Hash() const noexcept;
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
size_t Size() const noexcept {
return sizeof(unique_hashes) + state.Size();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::GraphicsPipelineCacheKey> {
size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class Device;
class PipelineStatistics;
class RenderPassCache;
class RescalingPushConstant;
class Scheduler;
class UpdateDescriptorQueue;
class GraphicsPipeline {
static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
public:
explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache,
TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify,
const Device& device, DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* worker_thread,
PipelineStatistics* pipeline_statistics,
RenderPassCache& render_pass_cache,
const GraphicsPipelineCacheKey& key,
std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos);
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
GraphicsPipeline(const GraphicsPipeline&) = delete;
void AddTransition(GraphicsPipeline* transition);
void Configure(bool is_indexed) {
configure_func(this, is_indexed);
}
[[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
if (key == current_key) {
return this;
}
const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
: nullptr;
}
[[nodiscard]] bool IsBuilt() const noexcept {
return is_built.load(std::memory_order::relaxed);
}
template <typename Spec>
static auto MakeConfigureSpecFunc() {
return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
}
void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) {
maxwell3d = maxwell3d_;
gpu_memory = gpu_memory_;
}
private:
template <typename Spec>
void ConfigureImpl(bool is_indexed);
void ConfigureDraw(const RescalingPushConstant& rescaling);
void MakePipeline(VkRenderPass render_pass);
void Validate();
const GraphicsPipelineCacheKey key;
Tegra::Engines::Maxwell3D* maxwell3d;
Tegra::MemoryManager* gpu_memory;
const Device& device;
TextureCache& texture_cache;
BufferCache& buffer_cache;
Scheduler& scheduler;
UpdateDescriptorQueue& update_descriptor_queue;
void (*configure_func)(GraphicsPipeline*, bool){};
std::vector<GraphicsPipelineCacheKey> transition_keys;
std::vector<GraphicsPipeline*> transitions;
std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
std::array<Shader::Info, NUM_STAGES> stage_infos;
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
u32 num_textures{};
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
vk::PipelineLayout pipeline_layout;
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
vk::Pipeline pipeline;
std::condition_variable build_condvar;
std::mutex build_mutex;
std::atomic_bool is_built{false};
bool uses_push_descriptor{false};
};
} // namespace Vulkan

View File

@@ -1,45 +1,45 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <thread>
#include "common/settings.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
MasterSemaphore::MasterSemaphore(const Device& device) {
static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
.pNext = nullptr,
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
.initialValue = 0,
};
static constexpr VkSemaphoreCreateInfo semaphore_ci{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = &semaphore_type_ci,
.flags = 0,
};
semaphore = device.GetLogical().CreateSemaphore(semaphore_ci);
if (!Settings::values.renderer_debug) {
return;
}
// Validation layers have a bug where they fail to track resource usage when using timeline
// semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
// a separate thread waiting for each timeline semaphore value.
debug_thread = std::jthread([this](std::stop_token stop_token) {
u64 counter = 0;
while (!stop_token.stop_requested()) {
if (semaphore.Wait(counter, 10'000'000)) {
++counter;
}
}
});
}
MasterSemaphore::~MasterSemaphore() = default;
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <thread>
#include "common/settings.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
MasterSemaphore::MasterSemaphore(const Device& device) {
static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
.pNext = nullptr,
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
.initialValue = 0,
};
static constexpr VkSemaphoreCreateInfo semaphore_ci{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = &semaphore_type_ci,
.flags = 0,
};
semaphore = device.GetLogical().CreateSemaphore(semaphore_ci);
if (!Settings::values.renderer_debug) {
return;
}
// Validation layers have a bug where they fail to track resource usage when using timeline
// semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
// a separate thread waiting for each timeline semaphore value.
debug_thread = std::jthread([this](std::stop_token stop_token) {
u64 counter = 0;
while (!stop_token.stop_requested()) {
if (semaphore.Wait(counter, 10'000'000)) {
++counter;
}
}
});
}
MasterSemaphore::~MasterSemaphore() = default;
} // namespace Vulkan

View File

@@ -1,84 +1,84 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <thread>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class MasterSemaphore {
public:
explicit MasterSemaphore(const Device& device);
~MasterSemaphore();
/// Returns the current logical tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return current_tick.load(std::memory_order_acquire);
}
/// Returns the last known GPU tick.
[[nodiscard]] u64 KnownGpuTick() const noexcept {
return gpu_tick.load(std::memory_order_acquire);
}
/// Returns the timeline semaphore handle.
[[nodiscard]] VkSemaphore Handle() const noexcept {
return *semaphore;
}
/// Returns true when a tick has been hit by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return KnownGpuTick() >= tick;
}
/// Advance to the logical tick and return the old one
[[nodiscard]] u64 NextTick() noexcept {
return current_tick.fetch_add(1, std::memory_order_release);
}
/// Refresh the known GPU tick
void Refresh() {
u64 this_tick{};
u64 counter{};
do {
this_tick = gpu_tick.load(std::memory_order_acquire);
counter = semaphore.GetCounter();
if (counter < this_tick) {
return;
}
} while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
std::memory_order_relaxed));
}
/// Waits for a tick to be hit on the GPU
void Wait(u64 tick) {
// No need to wait if the GPU is ahead of the tick
if (IsFree(tick)) {
return;
}
// Update the GPU tick and try again
Refresh();
if (IsFree(tick)) {
return;
}
// If none of the above is hit, fallback to a regular wait
while (!semaphore.Wait(tick)) {
}
Refresh();
}
private:
vk::Semaphore semaphore; ///< Timeline semaphore.
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
std::atomic<u64> current_tick{1}; ///< Current logical tick.
std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs.
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <thread>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class MasterSemaphore {
public:
explicit MasterSemaphore(const Device& device);
~MasterSemaphore();
/// Returns the current logical tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return current_tick.load(std::memory_order_acquire);
}
/// Returns the last known GPU tick.
[[nodiscard]] u64 KnownGpuTick() const noexcept {
return gpu_tick.load(std::memory_order_acquire);
}
/// Returns the timeline semaphore handle.
[[nodiscard]] VkSemaphore Handle() const noexcept {
return *semaphore;
}
/// Returns true when a tick has been hit by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return KnownGpuTick() >= tick;
}
/// Advance to the logical tick and return the old one
[[nodiscard]] u64 NextTick() noexcept {
return current_tick.fetch_add(1, std::memory_order_release);
}
/// Refresh the known GPU tick
void Refresh() {
u64 this_tick{};
u64 counter{};
do {
this_tick = gpu_tick.load(std::memory_order_acquire);
counter = semaphore.GetCounter();
if (counter < this_tick) {
return;
}
} while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
std::memory_order_relaxed));
}
/// Waits for a tick to be hit on the GPU
void Wait(u64 tick) {
// No need to wait if the GPU is ahead of the tick
if (IsFree(tick)) {
return;
}
// Update the GPU tick and try again
Refresh();
if (IsFree(tick)) {
return;
}
// If none of the above is hit, fallback to a regular wait
while (!semaphore.Wait(tick)) {
}
Refresh();
}
private:
vk::Semaphore semaphore; ///< Timeline semaphore.
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
std::atomic<u64> current_tick{1}; ///< Current logical tick.
std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs.
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -1,165 +1,165 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <cstddef>
#include <filesystem>
#include <memory>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
#include "common/thread_worker.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/profile.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/shader_cache.h"
namespace Core {
class System;
}
namespace Shader::IR {
struct Program;
}
namespace VideoCore {
class ShaderNotify;
}
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ComputePipelineCacheKey {
u64 unique_hash;
u32 shared_memory_size;
std::array<u32, 3> workgroup_size;
size_t Hash() const noexcept;
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::ComputePipelineCacheKey> {
size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class ComputePipeline;
class DescriptorPool;
class Device;
class PipelineStatistics;
class RasterizerVulkan;
class RenderPassCache;
class Scheduler;
class UpdateDescriptorQueue;
using VideoCommon::ShaderInfo;
struct ShaderPools {
void ReleaseContents() {
flow_block.ReleaseContents();
block.ReleaseContents();
inst.ReleaseContents();
}
Shader::ObjectPool<Shader::IR::Inst> inst;
Shader::ObjectPool<Shader::IR::Block> block;
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
class PipelineCache : public VideoCommon::ShaderCache {
public:
explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue,
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
~PipelineCache();
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
private:
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
bool build_in_parallel);
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
const ShaderInfo* shader);
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
const ComputePipelineCacheKey& key,
Shader::Environment& env,
PipelineStatistics* statistics,
bool build_in_parallel);
const Device& device;
Scheduler& scheduler;
DescriptorPool& descriptor_pool;
UpdateDescriptorQueue& update_descriptor_queue;
RenderPassCache& render_pass_cache;
BufferCache& buffer_cache;
TextureCache& texture_cache;
VideoCore::ShaderNotify& shader_notify;
bool use_asynchronous_shaders{};
GraphicsPipelineCacheKey graphics_key{};
GraphicsPipeline* current_pipeline{};
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
ShaderPools main_pools;
Shader::Profile profile;
Shader::HostTranslateInfo host_info;
std::filesystem::path pipeline_cache_filename;
Common::ThreadWorker workers;
Common::ThreadWorker serialization_thread;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <cstddef>
#include <filesystem>
#include <memory>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
#include "common/thread_worker.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/profile.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/shader_cache.h"
namespace Core {
class System;
}
namespace Shader::IR {
struct Program;
}
namespace VideoCore {
class ShaderNotify;
}
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ComputePipelineCacheKey {
u64 unique_hash;
u32 shared_memory_size;
std::array<u32, 3> workgroup_size;
size_t Hash() const noexcept;
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::ComputePipelineCacheKey> {
size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class ComputePipeline;
class DescriptorPool;
class Device;
class PipelineStatistics;
class RasterizerVulkan;
class RenderPassCache;
class Scheduler;
class UpdateDescriptorQueue;
using VideoCommon::ShaderInfo;
struct ShaderPools {
void ReleaseContents() {
flow_block.ReleaseContents();
block.ReleaseContents();
inst.ReleaseContents();
}
Shader::ObjectPool<Shader::IR::Inst> inst;
Shader::ObjectPool<Shader::IR::Block> block;
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
class PipelineCache : public VideoCommon::ShaderCache {
public:
explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue,
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
~PipelineCache();
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
private:
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
bool build_in_parallel);
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
const ShaderInfo* shader);
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
const ComputePipelineCacheKey& key,
Shader::Environment& env,
PipelineStatistics* statistics,
bool build_in_parallel);
const Device& device;
Scheduler& scheduler;
DescriptorPool& descriptor_pool;
UpdateDescriptorQueue& update_descriptor_queue;
RenderPassCache& render_pass_cache;
BufferCache& buffer_cache;
TextureCache& texture_cache;
VideoCore::ShaderNotify& shader_notify;
bool use_asynchronous_shaders{};
GraphicsPipelineCacheKey graphics_key{};
GraphicsPipeline* current_pipeline{};
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
ShaderPools main_pools;
Shader::Profile profile;
Shader::HostTranslateInfo host_info;
std::filesystem::path pipeline_cache_filename;
Common::ThreadWorker workers;
Common::ThreadWorker serialization_thread;
};
} // namespace Vulkan

View File

@@ -1,133 +1,133 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <cstddef>
#include <utility>
#include <vector>
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using VideoCore::QueryType;
namespace {
constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION};
constexpr VkQueryType GetTarget(QueryType type) {
return QUERY_TARGETS[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
QueryPool::QueryPool(const Device& device_, Scheduler& scheduler, QueryType type_)
: ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
QueryPool::~QueryPool() = default;
std::pair<VkQueryPool, u32> QueryPool::Commit() {
std::size_t index;
do {
index = CommitResource();
} while (usage[index]);
usage[index] = true;
return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)};
}
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
usage.resize(end);
pools.push_back(device.GetLogical().CreateQueryPool({
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.queryType = GetTarget(type),
.queryCount = static_cast<u32>(end - begin),
.pipelineStatistics = 0,
}));
}
void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
const auto it =
std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) {
return query_pool == *pool;
});
if (it != std::end(pools)) {
const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
}
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
Scheduler& scheduler_)
: QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
query_pools{
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
} {}
QueryCache::~QueryCache() {
// TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class
// destructor is called. The query cache should be redesigned to have a proper ownership model
// instead of using shared pointers.
for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) {
auto& stream = Stream(static_cast<QueryType>(query_type));
stream.Update(false);
stream.Reset();
}
}
std::pair<VkQueryPool, u32> QueryCache::AllocateQuery(QueryType type) {
return query_pools[static_cast<std::size_t>(type)].Commit();
}
void QueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
query_pools[static_cast<std::size_t>(type)].Reserve(query);
}
HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
QueryType type_)
: HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_},
query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
const vk::Device* logical = &cache.GetDevice().GetLogical();
cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
logical->ResetQueryPoolEXT(query.first, query.second, 1);
cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
});
}
HostCounter::~HostCounter() {
cache.Reserve(type, query);
}
void HostCounter::EndQuery() {
cache.GetScheduler().Record(
[query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
}
u64 HostCounter::BlockingQuery() const {
cache.GetScheduler().Wait(tick);
u64 data;
const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
query.first, query.second, 1, sizeof(data), &data, sizeof(data),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
switch (query_result) {
case VK_SUCCESS:
return data;
case VK_ERROR_DEVICE_LOST:
cache.GetDevice().ReportLoss();
[[fallthrough]];
default:
throw vk::Exception(query_result);
}
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <cstddef>
#include <utility>
#include <vector>
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using VideoCore::QueryType;
namespace {
constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION};
constexpr VkQueryType GetTarget(QueryType type) {
return QUERY_TARGETS[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
QueryPool::QueryPool(const Device& device_, Scheduler& scheduler, QueryType type_)
: ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
QueryPool::~QueryPool() = default;
std::pair<VkQueryPool, u32> QueryPool::Commit() {
std::size_t index;
do {
index = CommitResource();
} while (usage[index]);
usage[index] = true;
return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)};
}
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
usage.resize(end);
pools.push_back(device.GetLogical().CreateQueryPool({
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.queryType = GetTarget(type),
.queryCount = static_cast<u32>(end - begin),
.pipelineStatistics = 0,
}));
}
void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
const auto it =
std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) {
return query_pool == *pool;
});
if (it != std::end(pools)) {
const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
}
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
Scheduler& scheduler_)
: QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
query_pools{
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
} {}
QueryCache::~QueryCache() {
// TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class
// destructor is called. The query cache should be redesigned to have a proper ownership model
// instead of using shared pointers.
for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) {
auto& stream = Stream(static_cast<QueryType>(query_type));
stream.Update(false);
stream.Reset();
}
}
std::pair<VkQueryPool, u32> QueryCache::AllocateQuery(QueryType type) {
return query_pools[static_cast<std::size_t>(type)].Commit();
}
void QueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
query_pools[static_cast<std::size_t>(type)].Reserve(query);
}
HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
QueryType type_)
: HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_},
query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
const vk::Device* logical = &cache.GetDevice().GetLogical();
cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
logical->ResetQueryPoolEXT(query.first, query.second, 1);
cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
});
}
HostCounter::~HostCounter() {
cache.Reserve(type, query);
}
void HostCounter::EndQuery() {
cache.GetScheduler().Record(
[query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
}
u64 HostCounter::BlockingQuery() const {
cache.GetScheduler().Wait(tick);
u64 data;
const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
query.first, query.second, 1, sizeof(data), &data, sizeof(data),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
switch (query_result) {
case VK_SUCCESS:
return data;
case VK_ERROR_DEVICE_LOST:
cache.GetDevice().ReportLoss();
[[fallthrough]];
default:
throw vk::Exception(query_result);
}
}
} // namespace Vulkan

View File

@@ -1,100 +1,100 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <memory>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class CachedQuery;
class Device;
class HostCounter;
class QueryCache;
class Scheduler;
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryPool final : public ResourcePool {
public:
explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type);
~QueryPool() override;
std::pair<VkQueryPool, u32> Commit();
void Reserve(std::pair<VkQueryPool, u32> query);
protected:
void Allocate(std::size_t begin, std::size_t end) override;
private:
static constexpr std::size_t GROW_STEP = 512;
const Device& device;
const VideoCore::QueryType type;
std::vector<vk::QueryPool> pools;
std::vector<bool> usage;
};
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
Scheduler& scheduler_);
~QueryCache();
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query);
const Device& GetDevice() const noexcept {
return device;
}
Scheduler& GetScheduler() const noexcept {
return scheduler;
}
private:
const Device& device;
Scheduler& scheduler;
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
public:
explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
VideoCore::QueryType type_);
~HostCounter();
void EndQuery();
private:
u64 BlockingQuery() const override;
QueryCache& cache;
const VideoCore::QueryType type;
const std::pair<VkQueryPool, u32> query;
const u64 tick;
};
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
public:
explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_)
: CachedQueryBase{cpu_addr_, host_ptr_} {}
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <memory>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class CachedQuery;
class Device;
class HostCounter;
class QueryCache;
class Scheduler;
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryPool final : public ResourcePool {
public:
explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type);
~QueryPool() override;
std::pair<VkQueryPool, u32> Commit();
void Reserve(std::pair<VkQueryPool, u32> query);
protected:
void Allocate(std::size_t begin, std::size_t end) override;
private:
static constexpr std::size_t GROW_STEP = 512;
const Device& device;
const VideoCore::QueryType type;
std::vector<vk::QueryPool> pools;
std::vector<bool> usage;
};
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
Scheduler& scheduler_);
~QueryCache();
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query);
const Device& GetDevice() const noexcept {
return device;
}
Scheduler& GetScheduler() const noexcept {
return scheduler;
}
private:
const Device& device;
Scheduler& scheduler;
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
public:
explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
VideoCore::QueryType type_);
~HostCounter();
void EndQuery();
private:
u64 BlockingQuery() const override;
QueryCache& cache;
const VideoCore::QueryType type;
const std::pair<VkQueryPool, u32> query;
const u64 tick;
};
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
public:
explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_)
: CachedQueryBase{cpu_addr_, host_ptr_} {}
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -1,178 +1,178 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <boost/container/static_vector.hpp>
#include "common/common_types.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra::Engines {
class Maxwell3D;
}
namespace Vulkan {
struct ScreenInfo;
class StateTracker;
class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
public:
explicit AccelerateDMA(BufferCache& buffer_cache);
bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override;
bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
private:
BufferCache& buffer_cache;
};
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
const Device& device_, MemoryAllocator& memory_allocator_,
StateTracker& state_tracker_, Scheduler& scheduler_);
~RasterizerVulkan() override;
void Draw(bool is_indexed, u32 instance_count) override;
void Clear() override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
void SignalFence(std::function<void()>&& func) override;
void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<const u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
void InitializeChannel(Tegra::Control::ChannelState& channel) override;
void BindChannel(Tegra::Control::ChannelState& channel) override;
void ReleaseChannel(s32 channel_id) override;
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
void FlushWork();
void UpdateDynamicStates();
void BeginTransformFeedback();
void EndTransformFeedback();
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
void BindInlineIndexBuffer();
Tegra::GPU& gpu;
ScreenInfo& screen_info;
const Device& device;
MemoryAllocator& memory_allocator;
StateTracker& state_tracker;
Scheduler& scheduler;
StagingBufferPool staging_pool;
DescriptorPool descriptor_pool;
UpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
RenderPassCache render_pass_cache;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
PipelineCache pipeline_cache;
QueryCache query_cache;
AccelerateDMA accelerate_dma;
FenceManager fence_manager;
vk::Event wfi_event;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
u32 draw_counter = 0;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <boost/container/static_vector.hpp>
#include "common/common_types.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
class System;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra::Engines {
class Maxwell3D;
}
namespace Vulkan {
struct ScreenInfo;
class StateTracker;
class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
public:
explicit AccelerateDMA(BufferCache& buffer_cache);
bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override;
bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
private:
BufferCache& buffer_cache;
};
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
const Device& device_, MemoryAllocator& memory_allocator_,
StateTracker& state_tracker_, Scheduler& scheduler_);
~RasterizerVulkan() override;
void Draw(bool is_indexed, u32 instance_count) override;
void Clear() override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
void SignalFence(std::function<void()>&& func) override;
void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<const u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
void InitializeChannel(Tegra::Control::ChannelState& channel) override;
void BindChannel(Tegra::Control::ChannelState& channel) override;
void ReleaseChannel(s32 channel_id) override;
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
void FlushWork();
void UpdateDynamicStates();
void BeginTransformFeedback();
void EndTransformFeedback();
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
void BindInlineIndexBuffer();
Tegra::GPU& gpu;
ScreenInfo& screen_info;
const Device& device;
MemoryAllocator& memory_allocator;
StateTracker& state_tracker;
Scheduler& scheduler;
StagingBufferPool staging_pool;
DescriptorPool descriptor_pool;
UpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
RenderPassCache render_pass_cache;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
PipelineCache pipeline_cache;
QueryCache query_cache;
AccelerateDMA accelerate_dma;
FenceManager fence_manager;
vk::Event wfi_event;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
u32 draw_counter = 0;
};
} // namespace Vulkan

View File

@@ -1,95 +1,95 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <unordered_map>
#include <boost/container/static_vector.hpp>
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
using VideoCore::Surface::PixelFormat;
VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
VkSampleCountFlagBits samples) {
using MaxwellToVK::SurfaceFormat;
return {
.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
.samples = samples,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
};
}
} // Anonymous namespace
RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
std::scoped_lock lock{mutex};
const auto [pair, is_new] = cache.try_emplace(key);
if (!is_new) {
return *pair->second;
}
boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
std::array<VkAttachmentReference, 8> references{};
u32 num_attachments{};
u32 num_colors{};
for (size_t index = 0; index < key.color_formats.size(); ++index) {
const PixelFormat format{key.color_formats[index]};
const bool is_valid{format != PixelFormat::Invalid};
references[index] = VkAttachmentReference{
.attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED,
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
if (is_valid) {
descriptions.push_back(AttachmentDescription(*device, format, key.samples));
num_attachments = static_cast<u32>(index + 1);
++num_colors;
}
}
const bool has_depth{key.depth_format != PixelFormat::Invalid};
VkAttachmentReference depth_reference{};
if (key.depth_format != PixelFormat::Invalid) {
depth_reference = VkAttachmentReference{
.attachment = num_colors,
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
}
const VkSubpassDescription subpass{
.flags = 0,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.pInputAttachments = nullptr,
.colorAttachmentCount = num_attachments,
.pColorAttachments = references.data(),
.pResolveAttachments = nullptr,
.pDepthStencilAttachment = has_depth ? &depth_reference : nullptr,
.preserveAttachmentCount = 0,
.pPreserveAttachments = nullptr,
};
pair->second = device->GetLogical().CreateRenderPass({
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.attachmentCount = static_cast<u32>(descriptions.size()),
.pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 0,
.pDependencies = nullptr,
});
return *pair->second;
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <unordered_map>
#include <boost/container/static_vector.hpp>
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
using VideoCore::Surface::PixelFormat;
VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
VkSampleCountFlagBits samples) {
using MaxwellToVK::SurfaceFormat;
return {
.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
.samples = samples,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
};
}
} // Anonymous namespace
RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
std::scoped_lock lock{mutex};
const auto [pair, is_new] = cache.try_emplace(key);
if (!is_new) {
return *pair->second;
}
boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
std::array<VkAttachmentReference, 8> references{};
u32 num_attachments{};
u32 num_colors{};
for (size_t index = 0; index < key.color_formats.size(); ++index) {
const PixelFormat format{key.color_formats[index]};
const bool is_valid{format != PixelFormat::Invalid};
references[index] = VkAttachmentReference{
.attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED,
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
if (is_valid) {
descriptions.push_back(AttachmentDescription(*device, format, key.samples));
num_attachments = static_cast<u32>(index + 1);
++num_colors;
}
}
const bool has_depth{key.depth_format != PixelFormat::Invalid};
VkAttachmentReference depth_reference{};
if (key.depth_format != PixelFormat::Invalid) {
depth_reference = VkAttachmentReference{
.attachment = num_colors,
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
}
const VkSubpassDescription subpass{
.flags = 0,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.pInputAttachments = nullptr,
.colorAttachmentCount = num_attachments,
.pColorAttachments = references.data(),
.pResolveAttachments = nullptr,
.pDepthStencilAttachment = has_depth ? &depth_reference : nullptr,
.preserveAttachmentCount = 0,
.pPreserveAttachments = nullptr,
};
pair->second = device->GetLogical().CreateRenderPass({
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.attachmentCount = static_cast<u32>(descriptions.size()),
.pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 0,
.pDependencies = nullptr,
});
return *pair->second;
}
} // namespace Vulkan

View File

@@ -1,54 +1,54 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <unordered_map>
#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
struct RenderPassKey {
auto operator<=>(const RenderPassKey&) const noexcept = default;
std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
VideoCore::Surface::PixelFormat depth_format;
VkSampleCountFlagBits samples;
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::RenderPassKey> {
[[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
size_t value = static_cast<size_t>(key.depth_format) << 48;
value ^= static_cast<size_t>(key.samples) << 52;
for (size_t i = 0; i < key.color_formats.size(); ++i) {
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
}
return value;
}
};
} // namespace std
namespace Vulkan {
class Device;
class RenderPassCache {
public:
explicit RenderPassCache(const Device& device_);
VkRenderPass Get(const RenderPassKey& key);
private:
const Device* device{};
std::unordered_map<RenderPassKey, vk::RenderPass> cache;
std::mutex mutex;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <unordered_map>
#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
struct RenderPassKey {
auto operator<=>(const RenderPassKey&) const noexcept = default;
std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
VideoCore::Surface::PixelFormat depth_format;
VkSampleCountFlagBits samples;
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::RenderPassKey> {
[[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
size_t value = static_cast<size_t>(key.depth_format) << 48;
value ^= static_cast<size_t>(key.samples) << 52;
for (size_t i = 0; i < key.color_formats.size(); ++i) {
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
}
return value;
}
};
} // namespace std
namespace Vulkan {
class Device;
class RenderPassCache {
public:
explicit RenderPassCache(const Device& device_);
VkRenderPass Get(const RenderPassKey& key);
private:
const Device* device{};
std::unordered_map<RenderPassKey, vk::RenderPass> cache;
std::mutex mutex;
};
} // namespace Vulkan

View File

@@ -1,60 +1,60 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <optional>
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace Vulkan {
ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
: master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
size_t ResourcePool::CommitResource() {
// Refresh semaphore to query updated results
master_semaphore->Refresh();
const u64 gpu_tick = master_semaphore->KnownGpuTick();
const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
for (size_t iterator = begin; iterator < end; ++iterator) {
if (gpu_tick >= ticks[iterator]) {
ticks[iterator] = master_semaphore->CurrentTick();
return iterator;
}
}
return std::nullopt;
};
// Try to find a free resource from the hinted position to the end.
std::optional<size_t> found = search(hint_iterator, ticks.size());
if (!found) {
// Search from beginning to the hinted position.
found = search(0, hint_iterator);
if (!found) {
// Both searches failed, the pool is full; handle it.
const size_t free_resource = ManageOverflow();
ticks[free_resource] = master_semaphore->CurrentTick();
found = free_resource;
}
}
// Free iterator is hinted to the resource after the one that's been commited.
hint_iterator = (*found + 1) % ticks.size();
return *found;
}
size_t ResourcePool::ManageOverflow() {
const size_t old_capacity = ticks.size();
Grow();
// The last entry is guaranted to be free, since it's the first element of the freshly
// allocated resources.
return old_capacity;
}
void ResourcePool::Grow() {
const size_t old_capacity = ticks.size();
ticks.resize(old_capacity + grow_step);
Allocate(old_capacity, old_capacity + grow_step);
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <optional>
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace Vulkan {
ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
: master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
size_t ResourcePool::CommitResource() {
// Refresh semaphore to query updated results
master_semaphore->Refresh();
const u64 gpu_tick = master_semaphore->KnownGpuTick();
const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
for (size_t iterator = begin; iterator < end; ++iterator) {
if (gpu_tick >= ticks[iterator]) {
ticks[iterator] = master_semaphore->CurrentTick();
return iterator;
}
}
return std::nullopt;
};
// Try to find a free resource from the hinted position to the end.
std::optional<size_t> found = search(hint_iterator, ticks.size());
if (!found) {
// Search from beginning to the hinted position.
found = search(0, hint_iterator);
if (!found) {
// Both searches failed, the pool is full; handle it.
const size_t free_resource = ManageOverflow();
ticks[free_resource] = master_semaphore->CurrentTick();
found = free_resource;
}
}
// Free iterator is hinted to the resource after the one that's been commited.
hint_iterator = (*found + 1) % ticks.size();
return *found;
}
size_t ResourcePool::ManageOverflow() {
const size_t old_capacity = ticks.size();
Grow();
// The last entry is guaranted to be free, since it's the first element of the freshly
// allocated resources.
return old_capacity;
}
void ResourcePool::Grow() {
const size_t old_capacity = ticks.size();
ticks.resize(old_capacity + grow_step);
Allocate(old_capacity, old_capacity + grow_step);
}
} // namespace Vulkan

View File

@@ -1,50 +1,50 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "common/common_types.h"
namespace Vulkan {
class MasterSemaphore;
/**
* Handles a pool of resources protected by fences. Manages resource overflow allocating more
* resources.
*/
class ResourcePool {
public:
explicit ResourcePool() = default;
explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
virtual ~ResourcePool() = default;
ResourcePool& operator=(ResourcePool&&) noexcept = default;
ResourcePool(ResourcePool&&) noexcept = default;
ResourcePool& operator=(const ResourcePool&) = default;
ResourcePool(const ResourcePool&) = default;
protected:
size_t CommitResource();
/// Called when a chunk of resources have to be allocated.
virtual void Allocate(size_t begin, size_t end) = 0;
private:
/// Manages pool overflow allocating new resources.
size_t ManageOverflow();
/// Allocates a new page of resources.
void Grow();
MasterSemaphore* master_semaphore{};
size_t grow_step = 0; ///< Number of new resources created after an overflow
size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
std::vector<u64> ticks; ///< Ticks for each resource
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "common/common_types.h"
namespace Vulkan {
class MasterSemaphore;
/**
* Handles a pool of resources protected by fences. Manages resource overflow allocating more
* resources.
*/
class ResourcePool {
public:
explicit ResourcePool() = default;
explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
virtual ~ResourcePool() = default;
ResourcePool& operator=(ResourcePool&&) noexcept = default;
ResourcePool(ResourcePool&&) noexcept = default;
ResourcePool& operator=(const ResourcePool&) = default;
ResourcePool(const ResourcePool&) = default;
protected:
size_t CommitResource();
/// Called when a chunk of resources have to be allocated.
virtual void Allocate(size_t begin, size_t end) = 0;
private:
/// Manages pool overflow allocating new resources.
size_t ManageOverflow();
/// Allocates a new page of resources.
void Grow();
MasterSemaphore* master_semaphore{};
size_t grow_step = 0; ///< Number of new resources created after an overflow
size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
std::vector<u64> ticks; ///< Ticks for each resource
};
} // namespace Vulkan

View File

@@ -1,295 +1,295 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <memory>
#include <mutex>
#include <thread>
#include <utility>
#include "common/microprofile.h"
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
auto command = first;
while (command != nullptr) {
auto next = command->GetNext();
command->Execute(cmdbuf);
command->~Command();
command = next;
}
submit = false;
command_offset = 0;
first = nullptr;
last = nullptr;
}
Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
: device{device_}, state_tracker{state_tracker_},
master_semaphore{std::make_unique<MasterSemaphore>(device)},
command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
AcquireNewChunk();
AllocateWorkerCommandBuffer();
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
}
Scheduler::~Scheduler() = default;
void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
SubmitExecution(signal_semaphore, wait_semaphore);
AllocateNewContext();
}
void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
const u64 presubmit_tick = CurrentTick();
SubmitExecution(signal_semaphore, wait_semaphore);
WaitWorker();
Wait(presubmit_tick);
AllocateNewContext();
}
void Scheduler::WaitWorker() {
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
std::unique_lock lock{work_mutex};
wait_cv.wait(lock, [this] { return work_queue.empty(); });
}
void Scheduler::DispatchWork() {
if (chunk->Empty()) {
return;
}
{
std::scoped_lock lock{work_mutex};
work_queue.push(std::move(chunk));
}
work_cv.notify_one();
AcquireNewChunk();
}
void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) {
const VkRenderPass renderpass = framebuffer->RenderPass();
const VkFramebuffer framebuffer_handle = framebuffer->Handle();
const VkExtent2D render_area = framebuffer->RenderArea();
if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&
render_area.width == state.render_area.width &&
render_area.height == state.render_area.height) {
return;
}
EndRenderPass();
state.renderpass = renderpass;
state.framebuffer = framebuffer_handle;
state.render_area = render_area;
Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) {
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = renderpass,
.framebuffer = framebuffer_handle,
.renderArea =
{
.offset = {.x = 0, .y = 0},
.extent = render_area,
},
.clearValueCount = 0,
.pClearValues = nullptr,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
});
num_renderpass_images = framebuffer->NumImages();
renderpass_images = framebuffer->Images();
renderpass_image_ranges = framebuffer->ImageRanges();
}
void Scheduler::RequestOutsideRenderPassOperationContext() {
EndRenderPass();
}
bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
if (state.graphics_pipeline == pipeline) {
return false;
}
state.graphics_pipeline = pipeline;
return true;
}
bool Scheduler::UpdateRescaling(bool is_rescaling) {
if (state.rescaling_defined && is_rescaling == state.is_rescaling) {
return false;
}
state.rescaling_defined = true;
state.is_rescaling = is_rescaling;
return true;
}
void Scheduler::WorkerThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("VulkanWorker");
do {
std::unique_ptr<CommandChunk> work;
bool has_submit{false};
{
std::unique_lock lock{work_mutex};
if (work_queue.empty()) {
wait_cv.notify_all();
}
work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); });
if (stop_token.stop_requested()) {
continue;
}
work = std::move(work_queue.front());
work_queue.pop();
has_submit = work->HasSubmit();
work->ExecuteAll(current_cmdbuf);
}
if (has_submit) {
AllocateWorkerCommandBuffer();
}
std::scoped_lock reserve_lock{reserve_mutex};
chunk_reserve.push_back(std::move(work));
} while (!stop_token.stop_requested());
}
void Scheduler::AllocateWorkerCommandBuffer() {
current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
current_cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
}
void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
EndPendingOperations();
InvalidateState();
const u64 signal_value = master_semaphore->NextTick();
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
cmdbuf.End();
const VkSemaphore timeline_semaphore = master_semaphore->Handle();
const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
const std::array signal_values{signal_value, u64(0)};
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
const std::array wait_values{signal_value - 1, u64(1)};
const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
};
const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
.pNext = nullptr,
.waitSemaphoreValueCount = num_wait_semaphores,
.pWaitSemaphoreValues = wait_values.data(),
.signalSemaphoreValueCount = num_signal_semaphores,
.pSignalSemaphoreValues = signal_values.data(),
};
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &timeline_si,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(),
};
switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
device.ReportLoss();
[[fallthrough]];
default:
vk::Check(result);
}
});
chunk->MarkSubmit();
DispatchWork();
}
void Scheduler::AllocateNewContext() {
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
query_cache->UpdateCounters();
}
}
void Scheduler::InvalidateState() {
state.graphics_pipeline = nullptr;
state.rescaling_defined = false;
state_tracker.InvalidateCommandBufferState();
}
void Scheduler::EndPendingOperations() {
query_cache->DisableStreams();
EndRenderPass();
}
void Scheduler::EndRenderPass() {
if (!state.renderpass) {
return;
}
Record([num_images = num_renderpass_images, images = renderpass_images,
ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
std::array<VkImageMemoryBarrier, 9> barriers;
for (size_t i = 0; i < num_images; ++i) {
barriers[i] = VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = images[i],
.subresourceRange = ranges[i],
};
}
cmdbuf.EndRenderPass();
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
vk::Span(barriers.data(), num_images));
});
state.renderpass = nullptr;
num_renderpass_images = 0;
}
void Scheduler::AcquireNewChunk() {
std::scoped_lock lock{reserve_mutex};
if (chunk_reserve.empty()) {
chunk = std::make_unique<CommandChunk>();
return;
}
chunk = std::move(chunk_reserve.back());
chunk_reserve.pop_back();
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <memory>
#include <mutex>
#include <thread>
#include <utility>
#include "common/microprofile.h"
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
auto command = first;
while (command != nullptr) {
auto next = command->GetNext();
command->Execute(cmdbuf);
command->~Command();
command = next;
}
submit = false;
command_offset = 0;
first = nullptr;
last = nullptr;
}
Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
: device{device_}, state_tracker{state_tracker_},
master_semaphore{std::make_unique<MasterSemaphore>(device)},
command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
AcquireNewChunk();
AllocateWorkerCommandBuffer();
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
}
Scheduler::~Scheduler() = default;
void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
SubmitExecution(signal_semaphore, wait_semaphore);
AllocateNewContext();
}
void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
const u64 presubmit_tick = CurrentTick();
SubmitExecution(signal_semaphore, wait_semaphore);
WaitWorker();
Wait(presubmit_tick);
AllocateNewContext();
}
void Scheduler::WaitWorker() {
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
std::unique_lock lock{work_mutex};
wait_cv.wait(lock, [this] { return work_queue.empty(); });
}
void Scheduler::DispatchWork() {
if (chunk->Empty()) {
return;
}
{
std::scoped_lock lock{work_mutex};
work_queue.push(std::move(chunk));
}
work_cv.notify_one();
AcquireNewChunk();
}
void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) {
const VkRenderPass renderpass = framebuffer->RenderPass();
const VkFramebuffer framebuffer_handle = framebuffer->Handle();
const VkExtent2D render_area = framebuffer->RenderArea();
if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&
render_area.width == state.render_area.width &&
render_area.height == state.render_area.height) {
return;
}
EndRenderPass();
state.renderpass = renderpass;
state.framebuffer = framebuffer_handle;
state.render_area = render_area;
Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) {
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = renderpass,
.framebuffer = framebuffer_handle,
.renderArea =
{
.offset = {.x = 0, .y = 0},
.extent = render_area,
},
.clearValueCount = 0,
.pClearValues = nullptr,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
});
num_renderpass_images = framebuffer->NumImages();
renderpass_images = framebuffer->Images();
renderpass_image_ranges = framebuffer->ImageRanges();
}
void Scheduler::RequestOutsideRenderPassOperationContext() {
EndRenderPass();
}
bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
if (state.graphics_pipeline == pipeline) {
return false;
}
state.graphics_pipeline = pipeline;
return true;
}
bool Scheduler::UpdateRescaling(bool is_rescaling) {
if (state.rescaling_defined && is_rescaling == state.is_rescaling) {
return false;
}
state.rescaling_defined = true;
state.is_rescaling = is_rescaling;
return true;
}
void Scheduler::WorkerThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("VulkanWorker");
do {
std::unique_ptr<CommandChunk> work;
bool has_submit{false};
{
std::unique_lock lock{work_mutex};
if (work_queue.empty()) {
wait_cv.notify_all();
}
work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); });
if (stop_token.stop_requested()) {
continue;
}
work = std::move(work_queue.front());
work_queue.pop();
has_submit = work->HasSubmit();
work->ExecuteAll(current_cmdbuf);
}
if (has_submit) {
AllocateWorkerCommandBuffer();
}
std::scoped_lock reserve_lock{reserve_mutex};
chunk_reserve.push_back(std::move(work));
} while (!stop_token.stop_requested());
}
void Scheduler::AllocateWorkerCommandBuffer() {
current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
current_cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
}
void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
EndPendingOperations();
InvalidateState();
const u64 signal_value = master_semaphore->NextTick();
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
cmdbuf.End();
const VkSemaphore timeline_semaphore = master_semaphore->Handle();
const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
const std::array signal_values{signal_value, u64(0)};
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
const std::array wait_values{signal_value - 1, u64(1)};
const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
};
const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
.pNext = nullptr,
.waitSemaphoreValueCount = num_wait_semaphores,
.pWaitSemaphoreValues = wait_values.data(),
.signalSemaphoreValueCount = num_signal_semaphores,
.pSignalSemaphoreValues = signal_values.data(),
};
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &timeline_si,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(),
};
switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
device.ReportLoss();
[[fallthrough]];
default:
vk::Check(result);
}
});
chunk->MarkSubmit();
DispatchWork();
}
void Scheduler::AllocateNewContext() {
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
query_cache->UpdateCounters();
}
}
void Scheduler::InvalidateState() {
state.graphics_pipeline = nullptr;
state.rescaling_defined = false;
state_tracker.InvalidateCommandBufferState();
}
void Scheduler::EndPendingOperations() {
query_cache->DisableStreams();
EndRenderPass();
}
void Scheduler::EndRenderPass() {
if (!state.renderpass) {
return;
}
Record([num_images = num_renderpass_images, images = renderpass_images,
ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
std::array<VkImageMemoryBarrier, 9> barriers;
for (size_t i = 0; i < num_images; ++i) {
barriers[i] = VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = images[i],
.subresourceRange = ranges[i],
};
}
cmdbuf.EndRenderPass();
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
vk::Span(barriers.data(), num_images));
});
state.renderpass = nullptr;
num_renderpass_images = 0;
}
void Scheduler::AcquireNewChunk() {
std::scoped_lock lock{reserve_mutex};
if (chunk_reserve.empty()) {
chunk = std::make_unique<CommandChunk>();
return;
}
chunk = std::move(chunk_reserve.back());
chunk_reserve.pop_back();
}
} // namespace Vulkan

View File

@@ -1,234 +1,234 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <cstddef>
#include <memory>
#include <thread>
#include <utility>
#include <queue>
#include "common/alignment.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class CommandPool;
class Device;
class Framebuffer;
class GraphicsPipeline;
class StateTracker;
class QueryCache;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class Scheduler {
public:
explicit Scheduler(const Device& device, StateTracker& state_tracker);
~Scheduler();
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
void WaitWorker();
/// Sends currently recorded work to the worker thread.
void DispatchWork();
/// Requests to begin a renderpass.
void RequestRenderpass(const Framebuffer* framebuffer);
/// Requests the current executino context to be able to execute operations only allowed outside
/// of a renderpass.
void RequestOutsideRenderPassOperationContext();
/// Update the pipeline to the current execution context.
bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
/// Update the rescaling state. Returns true if the state has to be updated.
bool UpdateRescaling(bool is_rescaling);
/// Invalidates current command buffer state except for render passes
void InvalidateState();
/// Assigns the query cache.
void SetQueryCache(QueryCache& query_cache_) {
query_cache = &query_cache_;
}
/// Send work to a separate thread.
template <typename T>
void Record(T&& command) {
if (chunk->Record(command)) {
return;
}
DispatchWork();
(void)chunk->Record(command);
}
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore->CurrentTick();
}
/// Returns true when a tick has been triggered by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return master_semaphore->IsFree(tick);
}
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick) {
if (tick >= master_semaphore->CurrentTick()) {
// Make sure we are not waiting for the current tick without signalling
Flush();
}
master_semaphore->Wait(tick);
}
/// Returns the master timeline semaphore.
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
return *master_semaphore;
}
private:
class Command {
public:
virtual ~Command() = default;
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
Command* GetNext() const {
return next;
}
void SetNext(Command* next_) {
next = next_;
}
private:
Command* next = nullptr;
};
template <typename T>
class TypedCommand final : public Command {
public:
explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
~TypedCommand() override = default;
TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete;
void Execute(vk::CommandBuffer cmdbuf) const override {
command(cmdbuf);
}
private:
T command;
};
class CommandChunk final {
public:
void ExecuteAll(vk::CommandBuffer cmdbuf);
template <typename T>
bool Record(T& command) {
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
command_offset = Common::AlignUp(command_offset, alignof(FuncType));
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
}
Command* const current_last = last;
last = new (data.data() + command_offset) FuncType(std::move(command));
if (current_last) {
current_last->SetNext(last);
} else {
first = last;
}
command_offset += sizeof(FuncType);
return true;
}
void MarkSubmit() {
submit = true;
}
bool Empty() const {
return command_offset == 0;
}
bool HasSubmit() const {
return submit;
}
private:
Command* first = nullptr;
Command* last = nullptr;
size_t command_offset = 0;
bool submit = false;
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
};
struct State {
VkRenderPass renderpass = nullptr;
VkFramebuffer framebuffer = nullptr;
VkExtent2D render_area = {0, 0};
GraphicsPipeline* graphics_pipeline = nullptr;
bool is_rescaling = false;
bool rescaling_defined = false;
};
void WorkerThread(std::stop_token stop_token);
void AllocateWorkerCommandBuffer();
void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
void AllocateNewContext();
void EndPendingOperations();
void EndRenderPass();
void AcquireNewChunk();
const Device& device;
StateTracker& state_tracker;
std::unique_ptr<MasterSemaphore> master_semaphore;
std::unique_ptr<CommandPool> command_pool;
QueryCache* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;
std::unique_ptr<CommandChunk> chunk;
State state;
u32 num_renderpass_images = 0;
std::array<VkImage, 9> renderpass_images{};
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
std::queue<std::unique_ptr<CommandChunk>> work_queue;
std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex reserve_mutex;
std::mutex work_mutex;
std::condition_variable_any work_cv;
std::condition_variable wait_cv;
std::jthread worker_thread;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <cstddef>
#include <memory>
#include <thread>
#include <utility>
#include <queue>
#include "common/alignment.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class CommandPool;
class Device;
class Framebuffer;
class GraphicsPipeline;
class StateTracker;
class QueryCache;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class Scheduler {
public:
explicit Scheduler(const Device& device, StateTracker& state_tracker);
~Scheduler();
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
void WaitWorker();
/// Sends currently recorded work to the worker thread.
void DispatchWork();
/// Requests to begin a renderpass.
void RequestRenderpass(const Framebuffer* framebuffer);
/// Requests the current executino context to be able to execute operations only allowed outside
/// of a renderpass.
void RequestOutsideRenderPassOperationContext();
/// Update the pipeline to the current execution context.
bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
/// Update the rescaling state. Returns true if the state has to be updated.
bool UpdateRescaling(bool is_rescaling);
/// Invalidates current command buffer state except for render passes
void InvalidateState();
/// Assigns the query cache.
void SetQueryCache(QueryCache& query_cache_) {
query_cache = &query_cache_;
}
/// Send work to a separate thread.
template <typename T>
void Record(T&& command) {
if (chunk->Record(command)) {
return;
}
DispatchWork();
(void)chunk->Record(command);
}
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore->CurrentTick();
}
/// Returns true when a tick has been triggered by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return master_semaphore->IsFree(tick);
}
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick) {
if (tick >= master_semaphore->CurrentTick()) {
// Make sure we are not waiting for the current tick without signalling
Flush();
}
master_semaphore->Wait(tick);
}
/// Returns the master timeline semaphore.
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
return *master_semaphore;
}
private:
class Command {
public:
virtual ~Command() = default;
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
Command* GetNext() const {
return next;
}
void SetNext(Command* next_) {
next = next_;
}
private:
Command* next = nullptr;
};
template <typename T>
class TypedCommand final : public Command {
public:
explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
~TypedCommand() override = default;
TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete;
void Execute(vk::CommandBuffer cmdbuf) const override {
command(cmdbuf);
}
private:
T command;
};
class CommandChunk final {
public:
void ExecuteAll(vk::CommandBuffer cmdbuf);
template <typename T>
bool Record(T& command) {
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
command_offset = Common::AlignUp(command_offset, alignof(FuncType));
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
}
Command* const current_last = last;
last = new (data.data() + command_offset) FuncType(std::move(command));
if (current_last) {
current_last->SetNext(last);
} else {
first = last;
}
command_offset += sizeof(FuncType);
return true;
}
void MarkSubmit() {
submit = true;
}
bool Empty() const {
return command_offset == 0;
}
bool HasSubmit() const {
return submit;
}
private:
Command* first = nullptr;
Command* last = nullptr;
size_t command_offset = 0;
bool submit = false;
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
};
struct State {
VkRenderPass renderpass = nullptr;
VkFramebuffer framebuffer = nullptr;
VkExtent2D render_area = {0, 0};
GraphicsPipeline* graphics_pipeline = nullptr;
bool is_rescaling = false;
bool rescaling_defined = false;
};
void WorkerThread(std::stop_token stop_token);
void AllocateWorkerCommandBuffer();
void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
void AllocateNewContext();
void EndPendingOperations();
void EndRenderPass();
void AcquireNewChunk();
const Device& device;
StateTracker& state_tracker;
std::unique_ptr<MasterSemaphore> master_semaphore;
std::unique_ptr<CommandPool> command_pool;
QueryCache* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;
std::unique_ptr<CommandChunk> chunk;
State state;
u32 num_renderpass_images = 0;
std::array<VkImage, 9> renderpass_images{};
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
std::queue<std::unique_ptr<CommandChunk>> work_queue;
std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex reserve_mutex;
std::mutex work_mutex;
std::condition_variable_any work_cv;
std::condition_variable wait_cv;
std::jthread worker_thread;
};
} // namespace Vulkan

View File

@@ -1,23 +1,23 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) {
return device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.codeSize = static_cast<u32>(code.size_bytes()),
.pCode = code.data(),
});
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) {
return device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.codeSize = static_cast<u32>(code.size_bytes()),
.pCode = code.data(),
});
}
} // namespace Vulkan

View File

@@ -1,17 +1,17 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code);
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code);
} // namespace Vulkan

View File

@@ -1,300 +1,300 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <utility>
#include <vector>
#include <fmt/format.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/literals.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
using namespace Common::Literals;
// Maximum potential alignment of a Vulkan buffer
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
// Maximum size to put elements in the stream buffer
constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
// Stream buffer size in bytes
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
constexpr VkMemoryPropertyFlags HOST_FLAGS =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
bool IsStreamHeap(VkMemoryHeap heap) noexcept {
return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
}
std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
VkMemoryPropertyFlags flags) noexcept {
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
if (((type_mask >> type_index) & 1) == 0) {
// Memory type is incompatible
continue;
}
const VkMemoryType& memory_type = props.memoryTypes[type_index];
if ((memory_type.propertyFlags & flags) != flags) {
// Memory type doesn't have the flags we want
continue;
}
if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
// Memory heap is not suitable for streaming
continue;
}
// Success!
return type_index;
}
return std::nullopt;
}
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
bool try_device_local) {
std::optional<u32> type;
if (try_device_local) {
// Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
if (type) {
return *type;
}
}
// Otherwise try without the DEVICE_LOCAL_BIT
type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
if (type) {
return *type;
}
// This should never happen, and in case it does, signal it as an out of memory situation
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
size_t Region(size_t iterator) noexcept {
return iterator / REGION_SIZE;
}
} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
const vk::Device& dev = device.GetLogical();
stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = STREAM_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
if (device.HasDebuggingToolAttached()) {
stream_buffer.SetObjectNameEXT("Stream Buffer");
}
VkMemoryDedicatedRequirements dedicated_reqs{
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
.pNext = nullptr,
.prefersDedicatedAllocation = VK_FALSE,
.requiresDedicatedAllocation = VK_FALSE,
};
const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
const VkMemoryDedicatedAllocateInfo dedicated_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
.pNext = nullptr,
.image = nullptr,
.buffer = *stream_buffer,
};
const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties;
VkMemoryAllocateInfo stream_memory_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = make_dedicated ? &dedicated_info : nullptr,
.allocationSize = requirements.size,
.memoryTypeIndex =
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
};
stream_memory = dev.TryAllocateMemory(stream_memory_info);
if (!stream_memory) {
LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
stream_memory_info.memoryTypeIndex =
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
stream_memory = dev.AllocateMemory(stream_memory_info);
}
if (device.HasDebuggingToolAttached()) {
stream_memory.SetObjectNameEXT("Stream Buffer Memory");
}
stream_buffer.BindMemory(*stream_memory, 0);
stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
}
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
return GetStreamBuffer(size);
}
return GetStagingBuffer(size, usage);
}
void StagingBufferPool::TickFrame() {
current_delete_level = (current_delete_level + 1) % NUM_LEVELS;
ReleaseCache(MemoryUsage::DeviceLocal);
ReleaseCache(MemoryUsage::Upload);
ReleaseCache(MemoryUsage::Download);
}
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
if (AreRegionsActive(Region(free_iterator) + 1,
std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
const u64 current_tick = scheduler.CurrentTick();
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
current_tick);
used_iterator = iterator;
free_iterator = std::max(free_iterator, iterator + size);
if (iterator + size >= STREAM_BUFFER_SIZE) {
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
current_tick);
used_iterator = 0;
iterator = 0;
free_iterator = size;
if (AreRegionsActive(0, Region(size) + 1)) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
}
const size_t offset = iterator;
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
return StagingBufferRef{
.buffer = *stream_buffer,
.offset = static_cast<VkDeviceSize>(offset),
.mapped_span = std::span<u8>(stream_pointer + offset, size),
};
}
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
[gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
};
StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
return *ref;
}
return CreateStagingBuffer(size, usage);
}
std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
MemoryUsage usage) {
StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
const auto is_free = [this](const StagingBuffer& entry) {
return scheduler.IsFree(entry.tick);
};
auto& entries = cache_level.entries;
const auto hint_it = entries.begin() + cache_level.iterate_index;
auto it = std::find_if(entries.begin() + cache_level.iterate_index, entries.end(), is_free);
if (it == entries.end()) {
it = std::find_if(entries.begin(), hint_it, is_free);
if (it == hint_it) {
return std::nullopt;
}
}
cache_level.iterate_index = std::distance(entries.begin(), it) + 1;
it->tick = scheduler.CurrentTick();
return it->Ref();
}
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) {
const u32 log2 = Common::Log2Ceil64(size);
vk::Buffer buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 1ULL << log2,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
if (device.HasDebuggingToolAttached()) {
++buffer_index;
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
}
MemoryCommit commit = memory_allocator.Commit(buffer, usage);
const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
.buffer = std::move(buffer),
.commit = std::move(commit),
.mapped_span = mapped_span,
.tick = scheduler.CurrentTick(),
});
return entry.Ref();
}
StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::DeviceLocal:
return device_local_cache;
case MemoryUsage::Upload:
return upload_cache;
case MemoryUsage::Download:
return download_cache;
default:
ASSERT_MSG(false, "Invalid memory usage={}", usage);
return upload_cache;
}
}
void StagingBufferPool::ReleaseCache(MemoryUsage usage) {
ReleaseLevel(GetCache(usage), current_delete_level);
}
void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) {
constexpr size_t deletions_per_tick = 16;
auto& staging = cache[log2];
auto& entries = staging.entries;
const size_t old_size = entries.size();
const auto is_deleteable = [this](const StagingBuffer& entry) {
return scheduler.IsFree(entry.tick);
};
const size_t begin_offset = staging.delete_index;
const size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
const auto begin = entries.begin() + begin_offset;
const auto end = entries.begin() + end_offset;
entries.erase(std::remove_if(begin, end, is_deleteable), end);
const size_t new_size = entries.size();
staging.delete_index += deletions_per_tick;
if (staging.delete_index >= new_size) {
staging.delete_index = 0;
}
if (staging.iterate_index > new_size) {
staging.iterate_index = 0;
}
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <utility>
#include <vector>
#include <fmt/format.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/literals.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
using namespace Common::Literals;
// Maximum potential alignment of a Vulkan buffer
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
// Maximum size to put elements in the stream buffer
constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
// Stream buffer size in bytes
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
constexpr VkMemoryPropertyFlags HOST_FLAGS =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
bool IsStreamHeap(VkMemoryHeap heap) noexcept {
return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
}
std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
VkMemoryPropertyFlags flags) noexcept {
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
if (((type_mask >> type_index) & 1) == 0) {
// Memory type is incompatible
continue;
}
const VkMemoryType& memory_type = props.memoryTypes[type_index];
if ((memory_type.propertyFlags & flags) != flags) {
// Memory type doesn't have the flags we want
continue;
}
if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
// Memory heap is not suitable for streaming
continue;
}
// Success!
return type_index;
}
return std::nullopt;
}
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
bool try_device_local) {
std::optional<u32> type;
if (try_device_local) {
// Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
if (type) {
return *type;
}
}
// Otherwise try without the DEVICE_LOCAL_BIT
type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
if (type) {
return *type;
}
// This should never happen, and in case it does, signal it as an out of memory situation
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
size_t Region(size_t iterator) noexcept {
return iterator / REGION_SIZE;
}
} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
const vk::Device& dev = device.GetLogical();
stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = STREAM_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
if (device.HasDebuggingToolAttached()) {
stream_buffer.SetObjectNameEXT("Stream Buffer");
}
VkMemoryDedicatedRequirements dedicated_reqs{
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
.pNext = nullptr,
.prefersDedicatedAllocation = VK_FALSE,
.requiresDedicatedAllocation = VK_FALSE,
};
const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
const VkMemoryDedicatedAllocateInfo dedicated_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
.pNext = nullptr,
.image = nullptr,
.buffer = *stream_buffer,
};
const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties;
VkMemoryAllocateInfo stream_memory_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = make_dedicated ? &dedicated_info : nullptr,
.allocationSize = requirements.size,
.memoryTypeIndex =
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
};
stream_memory = dev.TryAllocateMemory(stream_memory_info);
if (!stream_memory) {
LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
stream_memory_info.memoryTypeIndex =
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
stream_memory = dev.AllocateMemory(stream_memory_info);
}
if (device.HasDebuggingToolAttached()) {
stream_memory.SetObjectNameEXT("Stream Buffer Memory");
}
stream_buffer.BindMemory(*stream_memory, 0);
stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
}
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
return GetStreamBuffer(size);
}
return GetStagingBuffer(size, usage);
}
void StagingBufferPool::TickFrame() {
current_delete_level = (current_delete_level + 1) % NUM_LEVELS;
ReleaseCache(MemoryUsage::DeviceLocal);
ReleaseCache(MemoryUsage::Upload);
ReleaseCache(MemoryUsage::Download);
}
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
if (AreRegionsActive(Region(free_iterator) + 1,
std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
const u64 current_tick = scheduler.CurrentTick();
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
current_tick);
used_iterator = iterator;
free_iterator = std::max(free_iterator, iterator + size);
if (iterator + size >= STREAM_BUFFER_SIZE) {
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
current_tick);
used_iterator = 0;
iterator = 0;
free_iterator = size;
if (AreRegionsActive(0, Region(size) + 1)) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
}
const size_t offset = iterator;
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
return StagingBufferRef{
.buffer = *stream_buffer,
.offset = static_cast<VkDeviceSize>(offset),
.mapped_span = std::span<u8>(stream_pointer + offset, size),
};
}
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
[gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
};
StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
return *ref;
}
return CreateStagingBuffer(size, usage);
}
std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
MemoryUsage usage) {
StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
const auto is_free = [this](const StagingBuffer& entry) {
return scheduler.IsFree(entry.tick);
};
auto& entries = cache_level.entries;
const auto hint_it = entries.begin() + cache_level.iterate_index;
auto it = std::find_if(entries.begin() + cache_level.iterate_index, entries.end(), is_free);
if (it == entries.end()) {
it = std::find_if(entries.begin(), hint_it, is_free);
if (it == hint_it) {
return std::nullopt;
}
}
cache_level.iterate_index = std::distance(entries.begin(), it) + 1;
it->tick = scheduler.CurrentTick();
return it->Ref();
}
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) {
const u32 log2 = Common::Log2Ceil64(size);
vk::Buffer buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 1ULL << log2,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
if (device.HasDebuggingToolAttached()) {
++buffer_index;
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
}
MemoryCommit commit = memory_allocator.Commit(buffer, usage);
const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
.buffer = std::move(buffer),
.commit = std::move(commit),
.mapped_span = mapped_span,
.tick = scheduler.CurrentTick(),
});
return entry.Ref();
}
StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::DeviceLocal:
return device_local_cache;
case MemoryUsage::Upload:
return upload_cache;
case MemoryUsage::Download:
return download_cache;
default:
ASSERT_MSG(false, "Invalid memory usage={}", usage);
return upload_cache;
}
}
void StagingBufferPool::ReleaseCache(MemoryUsage usage) {
ReleaseLevel(GetCache(usage), current_delete_level);
}
void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) {
constexpr size_t deletions_per_tick = 16;
auto& staging = cache[log2];
auto& entries = staging.entries;
const size_t old_size = entries.size();
const auto is_deleteable = [this](const StagingBuffer& entry) {
return scheduler.IsFree(entry.tick);
};
const size_t begin_offset = staging.delete_index;
const size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
const auto begin = entries.begin() + begin_offset;
const auto end = entries.begin() + end_offset;
entries.erase(std::remove_if(begin, end, is_deleteable), end);
const size_t new_size = entries.size();
staging.delete_index += deletions_per_tick;
if (staging.delete_index >= new_size) {
staging.delete_index = 0;
}
if (staging.iterate_index > new_size) {
staging.iterate_index = 0;
}
}
} // namespace Vulkan

View File

@@ -1,104 +1,104 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <climits>
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class Scheduler;
struct StagingBufferRef {
VkBuffer buffer;
VkDeviceSize offset;
std::span<u8> mapped_span;
};
class StagingBufferPool {
public:
static constexpr size_t NUM_SYNCS = 16;
explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
Scheduler& scheduler);
~StagingBufferPool();
StagingBufferRef Request(size_t size, MemoryUsage usage);
void TickFrame();
private:
struct StreamBufferCommit {
size_t upper_bound;
u64 tick;
};
struct StagingBuffer {
vk::Buffer buffer;
MemoryCommit commit;
std::span<u8> mapped_span;
u64 tick = 0;
StagingBufferRef Ref() const noexcept {
return {
.buffer = *buffer,
.offset = 0,
.mapped_span = mapped_span,
};
}
};
struct StagingBuffers {
std::vector<StagingBuffer> entries;
size_t delete_index = 0;
size_t iterate_index = 0;
};
static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
StagingBufferRef GetStreamBuffer(size_t size);
bool AreRegionsActive(size_t region_begin, size_t region_end) const;
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
StagingBuffersCache& GetCache(MemoryUsage usage);
void ReleaseCache(MemoryUsage usage);
void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
const Device& device;
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
vk::Buffer stream_buffer;
vk::DeviceMemory stream_memory;
u8* stream_pointer = nullptr;
size_t iterator = 0;
size_t used_iterator = 0;
size_t free_iterator = 0;
std::array<u64, NUM_SYNCS> sync_ticks{};
StagingBuffersCache device_local_cache;
StagingBuffersCache upload_cache;
StagingBuffersCache download_cache;
size_t current_delete_level = 0;
u64 buffer_index = 0;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <climits>
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class Scheduler;
struct StagingBufferRef {
VkBuffer buffer;
VkDeviceSize offset;
std::span<u8> mapped_span;
};
class StagingBufferPool {
public:
static constexpr size_t NUM_SYNCS = 16;
explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
Scheduler& scheduler);
~StagingBufferPool();
StagingBufferRef Request(size_t size, MemoryUsage usage);
void TickFrame();
private:
struct StreamBufferCommit {
size_t upper_bound;
u64 tick;
};
struct StagingBuffer {
vk::Buffer buffer;
MemoryCommit commit;
std::span<u8> mapped_span;
u64 tick = 0;
StagingBufferRef Ref() const noexcept {
return {
.buffer = *buffer,
.offset = 0,
.mapped_span = mapped_span,
};
}
};
struct StagingBuffers {
std::vector<StagingBuffer> entries;
size_t delete_index = 0;
size_t iterate_index = 0;
};
static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
StagingBufferRef GetStreamBuffer(size_t size);
bool AreRegionsActive(size_t region_begin, size_t region_end) const;
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
StagingBuffersCache& GetCache(MemoryUsage usage);
void ReleaseCache(MemoryUsage usage);
void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
const Device& device;
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
vk::Buffer stream_buffer;
vk::DeviceMemory stream_memory;
u8* stream_pointer = nullptr;
size_t iterator = 0;
size_t used_iterator = 0;
size_t free_iterator = 0;
std::array<u64, NUM_SYNCS> sync_ticks{};
StagingBuffersCache device_local_cache;
StagingBuffersCache upload_cache;
StagingBuffersCache download_cache;
size_t current_delete_level = 0;
u64 buffer_index = 0;
};
} // namespace Vulkan

View File

@@ -1,212 +1,212 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstddef>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace Vulkan {
namespace {
using namespace Dirty;
using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D;
using Regs = Maxwell3D::Regs;
using Tables = Maxwell3D::DirtyState::Tables;
using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
static constexpr int INVALIDATION_FLAGS[]{
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable,
DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
VertexBuffers, VertexInput,
};
Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
flags[flag] = true;
}
for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
flags[index] = true;
}
for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) {
flags[index] = true;
}
for (int index = VertexBinding0; index <= VertexBinding31; ++index) {
flags[index] = true;
}
return flags;
}
void SetupDirtyViewports(Tables& tables) {
FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
tables[0][OFF(viewport_scale_offset_enbled)] = Viewports;
tables[1][OFF(window_origin)] = Viewports;
}
void SetupDirtyScissors(Tables& tables) {
FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors);
}
void SetupDirtyDepthBias(Tables& tables) {
auto& table = tables[0];
table[OFF(depth_bias)] = DepthBias;
table[OFF(depth_bias_clamp)] = DepthBias;
table[OFF(slope_scale_depth_bias)] = DepthBias;
}
void SetupDirtyBlendConstants(Tables& tables) {
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants);
}
void SetupDirtyDepthBounds(Tables& tables) {
FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds);
}
void SetupDirtyStencilProperties(Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_two_side_enable)] = StencilProperties;
table[OFF(stencil_front_ref)] = StencilProperties;
table[OFF(stencil_front_mask)] = StencilProperties;
table[OFF(stencil_front_func_mask)] = StencilProperties;
table[OFF(stencil_back_ref)] = StencilProperties;
table[OFF(stencil_back_mask)] = StencilProperties;
table[OFF(stencil_back_func_mask)] = StencilProperties;
}
void SetupDirtyLineWidth(Tables& tables) {
tables[0][OFF(line_width_smooth)] = LineWidth;
tables[0][OFF(line_width_aliased)] = LineWidth;
}
void SetupDirtyCullMode(Tables& tables) {
auto& table = tables[0];
table[OFF(gl_cull_face)] = CullMode;
table[OFF(gl_cull_test_enabled)] = CullMode;
}
void SetupDirtyDepthBoundsEnable(Tables& tables) {
tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable;
}
void SetupDirtyDepthTestEnable(Tables& tables) {
tables[0][OFF(depth_test_enable)] = DepthTestEnable;
}
void SetupDirtyDepthWriteEnable(Tables& tables) {
tables[0][OFF(depth_write_enabled)] = DepthWriteEnable;
}
void SetupDirtyDepthCompareOp(Tables& tables) {
tables[0][OFF(depth_test_func)] = DepthCompareOp;
}
void SetupDirtyFrontFace(Tables& tables) {
auto& table = tables[0];
table[OFF(gl_front_face)] = FrontFace;
table[OFF(window_origin)] = FrontFace;
}
void SetupDirtyStencilOp(Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_front_op.fail)] = StencilOp;
table[OFF(stencil_front_op.zfail)] = StencilOp;
table[OFF(stencil_front_op.zpass)] = StencilOp;
table[OFF(stencil_front_op.func)] = StencilOp;
table[OFF(stencil_back_op.fail)] = StencilOp;
table[OFF(stencil_back_op.zfail)] = StencilOp;
table[OFF(stencil_back_op.zpass)] = StencilOp;
table[OFF(stencil_back_op.func)] = StencilOp;
// Table 0 is used by StencilProperties
tables[1][OFF(stencil_two_side_enable)] = StencilOp;
}
void SetupDirtyStencilTestEnable(Tables& tables) {
tables[0][OFF(stencil_enable)] = StencilTestEnable;
}
void SetupDirtyBlending(Tables& tables) {
tables[0][OFF(color_mask_common)] = Blending;
tables[0][OFF(blend_per_target_enabled)] = Blending;
FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
FillBlock(tables[0], OFF(blend_per_target), NUM(blend_per_target), Blending);
}
void SetupDirtyViewportSwizzles(Tables& tables) {
static constexpr size_t swizzle_offset = 6;
for (size_t index = 0; index < Regs::NumViewports; ++index) {
tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
ViewportSwizzles;
}
}
void SetupDirtyVertexAttributes(Tables& tables) {
for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
}
FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
}
void SetupDirtyVertexBindings(Tables& tables) {
// Do NOT include stride here, it's implicit in VertexBuffer
static constexpr size_t divisor_offset = 3;
for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
const u8 flag = static_cast<u8>(VertexBinding0 + i);
tables[0][OFF(vertex_stream_instances) + i] = VertexInput;
tables[1][OFF(vertex_stream_instances) + i] = flag;
tables[0][OFF(vertex_streams) + i * NUM(vertex_streams[0]) + divisor_offset] = VertexInput;
tables[1][OFF(vertex_streams) + i * NUM(vertex_streams[0]) + divisor_offset] = flag;
}
}
} // Anonymous namespace
void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
auto& tables{channel_state.maxwell_3d->dirty.tables};
SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyDepthBias(tables);
SetupDirtyBlendConstants(tables);
SetupDirtyDepthBounds(tables);
SetupDirtyStencilProperties(tables);
SetupDirtyLineWidth(tables);
SetupDirtyCullMode(tables);
SetupDirtyDepthBoundsEnable(tables);
SetupDirtyDepthTestEnable(tables);
SetupDirtyDepthWriteEnable(tables);
SetupDirtyDepthCompareOp(tables);
SetupDirtyFrontFace(tables);
SetupDirtyStencilOp(tables);
SetupDirtyStencilTestEnable(tables);
SetupDirtyBlending(tables);
SetupDirtyViewportSwizzles(tables);
SetupDirtyVertexAttributes(tables);
SetupDirtyVertexBindings(tables);
}
void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
flags = &channel_state.maxwell_3d->dirty.flags;
}
void StateTracker::InvalidateState() {
flags->set();
}
StateTracker::StateTracker()
: flags{&default_flags}, default_flags{}, invalidation_flags{MakeInvalidationFlags()} {}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstddef>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace Vulkan {
namespace {
using namespace Dirty;
using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D;
using Regs = Maxwell3D::Regs;
using Tables = Maxwell3D::DirtyState::Tables;
using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
static constexpr int INVALIDATION_FLAGS[]{
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable,
DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
VertexBuffers, VertexInput,
};
Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
flags[flag] = true;
}
for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
flags[index] = true;
}
for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) {
flags[index] = true;
}
for (int index = VertexBinding0; index <= VertexBinding31; ++index) {
flags[index] = true;
}
return flags;
}
void SetupDirtyViewports(Tables& tables) {
FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
tables[0][OFF(viewport_scale_offset_enbled)] = Viewports;
tables[1][OFF(window_origin)] = Viewports;
}
void SetupDirtyScissors(Tables& tables) {
FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors);
}
void SetupDirtyDepthBias(Tables& tables) {
auto& table = tables[0];
table[OFF(depth_bias)] = DepthBias;
table[OFF(depth_bias_clamp)] = DepthBias;
table[OFF(slope_scale_depth_bias)] = DepthBias;
}
void SetupDirtyBlendConstants(Tables& tables) {
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants);
}
void SetupDirtyDepthBounds(Tables& tables) {
FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds);
}
void SetupDirtyStencilProperties(Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_two_side_enable)] = StencilProperties;
table[OFF(stencil_front_ref)] = StencilProperties;
table[OFF(stencil_front_mask)] = StencilProperties;
table[OFF(stencil_front_func_mask)] = StencilProperties;
table[OFF(stencil_back_ref)] = StencilProperties;
table[OFF(stencil_back_mask)] = StencilProperties;
table[OFF(stencil_back_func_mask)] = StencilProperties;
}
void SetupDirtyLineWidth(Tables& tables) {
tables[0][OFF(line_width_smooth)] = LineWidth;
tables[0][OFF(line_width_aliased)] = LineWidth;
}
void SetupDirtyCullMode(Tables& tables) {
auto& table = tables[0];
table[OFF(gl_cull_face)] = CullMode;
table[OFF(gl_cull_test_enabled)] = CullMode;
}
void SetupDirtyDepthBoundsEnable(Tables& tables) {
tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable;
}
void SetupDirtyDepthTestEnable(Tables& tables) {
tables[0][OFF(depth_test_enable)] = DepthTestEnable;
}
void SetupDirtyDepthWriteEnable(Tables& tables) {
tables[0][OFF(depth_write_enabled)] = DepthWriteEnable;
}
void SetupDirtyDepthCompareOp(Tables& tables) {
tables[0][OFF(depth_test_func)] = DepthCompareOp;
}
void SetupDirtyFrontFace(Tables& tables) {
auto& table = tables[0];
table[OFF(gl_front_face)] = FrontFace;
table[OFF(window_origin)] = FrontFace;
}
void SetupDirtyStencilOp(Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_front_op.fail)] = StencilOp;
table[OFF(stencil_front_op.zfail)] = StencilOp;
table[OFF(stencil_front_op.zpass)] = StencilOp;
table[OFF(stencil_front_op.func)] = StencilOp;
table[OFF(stencil_back_op.fail)] = StencilOp;
table[OFF(stencil_back_op.zfail)] = StencilOp;
table[OFF(stencil_back_op.zpass)] = StencilOp;
table[OFF(stencil_back_op.func)] = StencilOp;
// Table 0 is used by StencilProperties
tables[1][OFF(stencil_two_side_enable)] = StencilOp;
}
void SetupDirtyStencilTestEnable(Tables& tables) {
tables[0][OFF(stencil_enable)] = StencilTestEnable;
}
void SetupDirtyBlending(Tables& tables) {
tables[0][OFF(color_mask_common)] = Blending;
tables[0][OFF(blend_per_target_enabled)] = Blending;
FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
FillBlock(tables[0], OFF(blend_per_target), NUM(blend_per_target), Blending);
}
void SetupDirtyViewportSwizzles(Tables& tables) {
static constexpr size_t swizzle_offset = 6;
for (size_t index = 0; index < Regs::NumViewports; ++index) {
tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
ViewportSwizzles;
}
}
void SetupDirtyVertexAttributes(Tables& tables) {
for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
}
FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
}
void SetupDirtyVertexBindings(Tables& tables) {
// Do NOT include stride here, it's implicit in VertexBuffer
static constexpr size_t divisor_offset = 3;
for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
const u8 flag = static_cast<u8>(VertexBinding0 + i);
tables[0][OFF(vertex_stream_instances) + i] = VertexInput;
tables[1][OFF(vertex_stream_instances) + i] = flag;
tables[0][OFF(vertex_streams) + i * NUM(vertex_streams[0]) + divisor_offset] = VertexInput;
tables[1][OFF(vertex_streams) + i * NUM(vertex_streams[0]) + divisor_offset] = flag;
}
}
} // Anonymous namespace
void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
auto& tables{channel_state.maxwell_3d->dirty.tables};
SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyDepthBias(tables);
SetupDirtyBlendConstants(tables);
SetupDirtyDepthBounds(tables);
SetupDirtyStencilProperties(tables);
SetupDirtyLineWidth(tables);
SetupDirtyCullMode(tables);
SetupDirtyDepthBoundsEnable(tables);
SetupDirtyDepthTestEnable(tables);
SetupDirtyDepthWriteEnable(tables);
SetupDirtyDepthCompareOp(tables);
SetupDirtyFrontFace(tables);
SetupDirtyStencilOp(tables);
SetupDirtyStencilTestEnable(tables);
SetupDirtyBlending(tables);
SetupDirtyViewportSwizzles(tables);
SetupDirtyVertexAttributes(tables);
SetupDirtyVertexBindings(tables);
}
void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
flags = &channel_state.maxwell_3d->dirty.flags;
}
void StateTracker::InvalidateState() {
flags->set();
}
StateTracker::StateTracker()
: flags{&default_flags}, default_flags{}, invalidation_flags{MakeInvalidationFlags()} {}
} // namespace Vulkan

View File

@@ -1,169 +1,169 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <limits>
#include "common/common_types.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
namespace Tegra {
namespace Control {
struct ChannelState;
}
} // namespace Tegra
namespace Vulkan {
namespace Dirty {
enum : u8 {
First = VideoCommon::Dirty::LastCommonEntry,
VertexInput,
VertexAttribute0,
VertexAttribute31 = VertexAttribute0 + 31,
VertexBinding0,
VertexBinding31 = VertexBinding0 + 31,
Viewports,
Scissors,
DepthBias,
BlendConstants,
DepthBounds,
StencilProperties,
LineWidth,
CullMode,
DepthBoundsEnable,
DepthTestEnable,
DepthWriteEnable,
DepthCompareOp,
FrontFace,
StencilOp,
StencilTestEnable,
Blending,
ViewportSwizzles,
Last,
};
static_assert(Last <= std::numeric_limits<u8>::max());
} // namespace Dirty
class StateTracker {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
explicit StateTracker();
void InvalidateCommandBufferState() {
(*flags) |= invalidation_flags;
current_topology = INVALID_TOPOLOGY;
}
void InvalidateViewports() {
(*flags)[Dirty::Viewports] = true;
}
void InvalidateScissors() {
(*flags)[Dirty::Scissors] = true;
}
bool TouchViewports() {
const bool dirty_viewports = Exchange(Dirty::Viewports, false);
const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
return dirty_viewports || rescale_viewports;
}
bool TouchScissors() {
const bool dirty_scissors = Exchange(Dirty::Scissors, false);
const bool rescale_scissors = Exchange(VideoCommon::Dirty::RescaleScissors, false);
return dirty_scissors || rescale_scissors;
}
bool TouchDepthBias() {
return Exchange(Dirty::DepthBias, false) ||
Exchange(VideoCommon::Dirty::DepthBiasGlobal, false);
}
bool TouchBlendConstants() {
return Exchange(Dirty::BlendConstants, false);
}
bool TouchDepthBounds() {
return Exchange(Dirty::DepthBounds, false);
}
bool TouchStencilProperties() {
return Exchange(Dirty::StencilProperties, false);
}
bool TouchLineWidth() const {
return Exchange(Dirty::LineWidth, false);
}
bool TouchCullMode() {
return Exchange(Dirty::CullMode, false);
}
bool TouchDepthBoundsTestEnable() {
return Exchange(Dirty::DepthBoundsEnable, false);
}
bool TouchDepthTestEnable() {
return Exchange(Dirty::DepthTestEnable, false);
}
bool TouchDepthWriteEnable() {
return Exchange(Dirty::DepthWriteEnable, false);
}
bool TouchDepthCompareOp() {
return Exchange(Dirty::DepthCompareOp, false);
}
bool TouchFrontFace() {
return Exchange(Dirty::FrontFace, false);
}
bool TouchStencilOp() {
return Exchange(Dirty::StencilOp, false);
}
bool TouchStencilTestEnable() {
return Exchange(Dirty::StencilTestEnable, false);
}
bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
const bool has_changed = current_topology != new_topology;
current_topology = new_topology;
return has_changed;
}
void SetupTables(Tegra::Control::ChannelState& channel_state);
void ChangeChannel(Tegra::Control::ChannelState& channel_state);
void InvalidateState();
private:
static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
bool Exchange(std::size_t id, bool new_value) const noexcept {
const bool is_dirty = (*flags)[id];
(*flags)[id] = new_value;
return is_dirty;
}
Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <limits>
#include "common/common_types.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
namespace Tegra {
namespace Control {
struct ChannelState;
}
} // namespace Tegra
namespace Vulkan {
namespace Dirty {
enum : u8 {
First = VideoCommon::Dirty::LastCommonEntry,
VertexInput,
VertexAttribute0,
VertexAttribute31 = VertexAttribute0 + 31,
VertexBinding0,
VertexBinding31 = VertexBinding0 + 31,
Viewports,
Scissors,
DepthBias,
BlendConstants,
DepthBounds,
StencilProperties,
LineWidth,
CullMode,
DepthBoundsEnable,
DepthTestEnable,
DepthWriteEnable,
DepthCompareOp,
FrontFace,
StencilOp,
StencilTestEnable,
Blending,
ViewportSwizzles,
Last,
};
static_assert(Last <= std::numeric_limits<u8>::max());
} // namespace Dirty
class StateTracker {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
explicit StateTracker();
void InvalidateCommandBufferState() {
(*flags) |= invalidation_flags;
current_topology = INVALID_TOPOLOGY;
}
void InvalidateViewports() {
(*flags)[Dirty::Viewports] = true;
}
void InvalidateScissors() {
(*flags)[Dirty::Scissors] = true;
}
bool TouchViewports() {
const bool dirty_viewports = Exchange(Dirty::Viewports, false);
const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false);
return dirty_viewports || rescale_viewports;
}
bool TouchScissors() {
const bool dirty_scissors = Exchange(Dirty::Scissors, false);
const bool rescale_scissors = Exchange(VideoCommon::Dirty::RescaleScissors, false);
return dirty_scissors || rescale_scissors;
}
bool TouchDepthBias() {
return Exchange(Dirty::DepthBias, false) ||
Exchange(VideoCommon::Dirty::DepthBiasGlobal, false);
}
bool TouchBlendConstants() {
return Exchange(Dirty::BlendConstants, false);
}
bool TouchDepthBounds() {
return Exchange(Dirty::DepthBounds, false);
}
bool TouchStencilProperties() {
return Exchange(Dirty::StencilProperties, false);
}
bool TouchLineWidth() const {
return Exchange(Dirty::LineWidth, false);
}
bool TouchCullMode() {
return Exchange(Dirty::CullMode, false);
}
bool TouchDepthBoundsTestEnable() {
return Exchange(Dirty::DepthBoundsEnable, false);
}
bool TouchDepthTestEnable() {
return Exchange(Dirty::DepthTestEnable, false);
}
bool TouchDepthWriteEnable() {
return Exchange(Dirty::DepthWriteEnable, false);
}
bool TouchDepthCompareOp() {
return Exchange(Dirty::DepthCompareOp, false);
}
bool TouchFrontFace() {
return Exchange(Dirty::FrontFace, false);
}
bool TouchStencilOp() {
return Exchange(Dirty::StencilOp, false);
}
bool TouchStencilTestEnable() {
return Exchange(Dirty::StencilTestEnable, false);
}
bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
const bool has_changed = current_topology != new_topology;
current_topology = new_topology;
return has_changed;
}
void SetupTables(Tegra::Control::ChannelState& channel_state);
void ChangeChannel(Tegra::Control::ChannelState& channel_state);
void InvalidateState();
private:
static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
bool Exchange(std::size_t id, bool new_value) const noexcept {
const bool is_dirty = (*flags)[id];
(*flags)[id] = new_value;
return is_dirty;
}
Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
};
} // namespace Vulkan

View File

@@ -1,281 +1,281 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <limits>
#include <vector>
#include "common/logging/log.h"
#include "common/settings.h"
#include "core/core.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) {
if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
VkSurfaceFormatKHR format;
format.format = VK_FORMAT_B8G8R8A8_UNORM;
format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
return format;
}
const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
return format.format == VK_FORMAT_B8G8R8A8_UNORM &&
format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
});
return found != formats.end() ? *found : formats[0];
}
VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) {
// Mailbox (triple buffering) doesn't lock the application like fifo (vsync),
// prefer it if vsync option is not selected
const auto found_mailbox = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR);
if (Settings::values.fullscreen_mode.GetValue() == Settings::FullscreenMode::Borderless &&
found_mailbox != modes.end() && !Settings::values.use_vsync.GetValue()) {
return VK_PRESENT_MODE_MAILBOX_KHR;
}
if (!Settings::values.use_speed_limit.GetValue()) {
// FIFO present mode locks the framerate to the monitor's refresh rate,
// Find an alternative to surpass this limitation if FPS is unlocked.
const auto found_imm = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_IMMEDIATE_KHR);
if (found_imm != modes.end()) {
return VK_PRESENT_MODE_IMMEDIATE_KHR;
}
}
return VK_PRESENT_MODE_FIFO_KHR;
}
VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) {
constexpr auto undefined_size{std::numeric_limits<u32>::max()};
if (capabilities.currentExtent.width != undefined_size) {
return capabilities.currentExtent;
}
VkExtent2D extent;
extent.width = std::max(capabilities.minImageExtent.width,
std::min(capabilities.maxImageExtent.width, width));
extent.height = std::max(capabilities.minImageExtent.height,
std::min(capabilities.maxImageExtent.height, height));
return extent;
}
} // Anonymous namespace
Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, u32 width,
u32 height, bool srgb)
: surface{surface_}, device{device_}, scheduler{scheduler_} {
Create(width, height, srgb);
}
Swapchain::~Swapchain() = default;
void Swapchain::Create(u32 width, u32 height, bool srgb) {
is_outdated = false;
is_suboptimal = false;
const auto physical_device = device.GetPhysical();
const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
return;
}
device.GetLogical().WaitIdle();
Destroy();
CreateSwapchain(capabilities, width, height, srgb);
CreateSemaphores();
CreateImageViews();
resource_ticks.clear();
resource_ticks.resize(image_count);
}
void Swapchain::AcquireNextImage() {
const VkResult result = device.GetLogical().AcquireNextImageKHR(
*swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
VK_NULL_HANDLE, &image_index);
switch (result) {
case VK_SUCCESS:
break;
case VK_SUBOPTIMAL_KHR:
is_suboptimal = true;
break;
case VK_ERROR_OUT_OF_DATE_KHR:
is_outdated = true;
break;
default:
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
break;
}
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
}
void Swapchain::Present(VkSemaphore render_semaphore) {
const auto present_queue{device.GetPresentQueue()};
const VkPresentInfoKHR present_info{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pNext = nullptr,
.waitSemaphoreCount = render_semaphore ? 1U : 0U,
.pWaitSemaphores = &render_semaphore,
.swapchainCount = 1,
.pSwapchains = swapchain.address(),
.pImageIndices = &image_index,
.pResults = nullptr,
};
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
break;
case VK_SUBOPTIMAL_KHR:
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
break;
case VK_ERROR_OUT_OF_DATE_KHR:
is_outdated = true;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
break;
}
++frame_index;
if (frame_index >= image_count) {
frame_index = 0;
}
}
void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
bool srgb) {
const auto physical_device{device.GetPhysical()};
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
present_mode = ChooseSwapPresentMode(present_modes);
u32 requested_image_count{capabilities.minImageCount + 1};
// Ensure Tripple buffering if possible.
if (capabilities.maxImageCount > 0) {
if (requested_image_count > capabilities.maxImageCount) {
requested_image_count = capabilities.maxImageCount;
} else {
requested_image_count =
std::max(requested_image_count, std::min(3U, capabilities.maxImageCount));
}
} else {
requested_image_count = std::max(requested_image_count, 3U);
}
VkSwapchainCreateInfoKHR swapchain_ci{
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
.pNext = nullptr,
.flags = 0,
.surface = surface,
.minImageCount = requested_image_count,
.imageFormat = surface_format.format,
.imageColorSpace = surface_format.colorSpace,
.imageExtent = {},
.imageArrayLayers = 1,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.preTransform = capabilities.currentTransform,
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
.presentMode = present_mode,
.clipped = VK_FALSE,
.oldSwapchain = nullptr,
};
const u32 graphics_family{device.GetGraphicsFamily()};
const u32 present_family{device.GetPresentFamily()};
const std::array<u32, 2> queue_indices{graphics_family, present_family};
if (graphics_family != present_family) {
swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
}
static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
VkImageFormatListCreateInfo format_list{
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
.pNext = nullptr,
.viewFormatCount = static_cast<u32>(view_formats.size()),
.pViewFormats = view_formats.data(),
};
if (device.IsKhrSwapchainMutableFormatEnabled()) {
format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list);
swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR;
}
// Request the size again to reduce the possibility of a TOCTOU race condition.
const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
// Don't add code within this and the swapchain creation.
swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
extent = swapchain_ci.imageExtent;
current_srgb = srgb;
current_fps_unlocked = !Settings::values.use_speed_limit.GetValue();
images = swapchain.GetImages();
image_count = static_cast<u32>(images.size());
image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
}
void Swapchain::CreateSemaphores() {
present_semaphores.resize(image_count);
std::ranges::generate(present_semaphores,
[this] { return device.GetLogical().CreateSemaphore(); });
}
void Swapchain::CreateImageViews() {
VkImageViewCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = {},
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = image_view_format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
image_views.resize(image_count);
for (std::size_t i = 0; i < image_count; i++) {
ci.image = images[i];
image_views[i] = device.GetLogical().CreateImageView(ci);
}
}
void Swapchain::Destroy() {
frame_index = 0;
present_semaphores.clear();
framebuffers.clear();
image_views.clear();
swapchain.reset();
}
bool Swapchain::HasFpsUnlockChanged() const {
return current_fps_unlocked != !Settings::values.use_speed_limit.GetValue();
}
bool Swapchain::NeedsPresentModeUpdate() const {
// Mailbox present mode is the ideal for all scenarios. If it is not available,
// A different present mode is needed to support unlocked FPS above the monitor's refresh rate.
return present_mode != VK_PRESENT_MODE_MAILBOX_KHR && HasFpsUnlockChanged();
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <limits>
#include <vector>
#include "common/logging/log.h"
#include "common/settings.h"
#include "core/core.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) {
if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
VkSurfaceFormatKHR format;
format.format = VK_FORMAT_B8G8R8A8_UNORM;
format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
return format;
}
const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
return format.format == VK_FORMAT_B8G8R8A8_UNORM &&
format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
});
return found != formats.end() ? *found : formats[0];
}
VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) {
// Mailbox (triple buffering) doesn't lock the application like fifo (vsync),
// prefer it if vsync option is not selected
const auto found_mailbox = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR);
if (Settings::values.fullscreen_mode.GetValue() == Settings::FullscreenMode::Borderless &&
found_mailbox != modes.end() && !Settings::values.use_vsync.GetValue()) {
return VK_PRESENT_MODE_MAILBOX_KHR;
}
if (!Settings::values.use_speed_limit.GetValue()) {
// FIFO present mode locks the framerate to the monitor's refresh rate,
// Find an alternative to surpass this limitation if FPS is unlocked.
const auto found_imm = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_IMMEDIATE_KHR);
if (found_imm != modes.end()) {
return VK_PRESENT_MODE_IMMEDIATE_KHR;
}
}
return VK_PRESENT_MODE_FIFO_KHR;
}
VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) {
constexpr auto undefined_size{std::numeric_limits<u32>::max()};
if (capabilities.currentExtent.width != undefined_size) {
return capabilities.currentExtent;
}
VkExtent2D extent;
extent.width = std::max(capabilities.minImageExtent.width,
std::min(capabilities.maxImageExtent.width, width));
extent.height = std::max(capabilities.minImageExtent.height,
std::min(capabilities.maxImageExtent.height, height));
return extent;
}
} // Anonymous namespace
Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, u32 width,
u32 height, bool srgb)
: surface{surface_}, device{device_}, scheduler{scheduler_} {
Create(width, height, srgb);
}
Swapchain::~Swapchain() = default;
void Swapchain::Create(u32 width, u32 height, bool srgb) {
is_outdated = false;
is_suboptimal = false;
const auto physical_device = device.GetPhysical();
const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
return;
}
device.GetLogical().WaitIdle();
Destroy();
CreateSwapchain(capabilities, width, height, srgb);
CreateSemaphores();
CreateImageViews();
resource_ticks.clear();
resource_ticks.resize(image_count);
}
void Swapchain::AcquireNextImage() {
const VkResult result = device.GetLogical().AcquireNextImageKHR(
*swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
VK_NULL_HANDLE, &image_index);
switch (result) {
case VK_SUCCESS:
break;
case VK_SUBOPTIMAL_KHR:
is_suboptimal = true;
break;
case VK_ERROR_OUT_OF_DATE_KHR:
is_outdated = true;
break;
default:
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
break;
}
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
}
void Swapchain::Present(VkSemaphore render_semaphore) {
const auto present_queue{device.GetPresentQueue()};
const VkPresentInfoKHR present_info{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pNext = nullptr,
.waitSemaphoreCount = render_semaphore ? 1U : 0U,
.pWaitSemaphores = &render_semaphore,
.swapchainCount = 1,
.pSwapchains = swapchain.address(),
.pImageIndices = &image_index,
.pResults = nullptr,
};
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
break;
case VK_SUBOPTIMAL_KHR:
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
break;
case VK_ERROR_OUT_OF_DATE_KHR:
is_outdated = true;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
break;
}
++frame_index;
if (frame_index >= image_count) {
frame_index = 0;
}
}
void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
bool srgb) {
const auto physical_device{device.GetPhysical()};
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
present_mode = ChooseSwapPresentMode(present_modes);
u32 requested_image_count{capabilities.minImageCount + 1};
// Ensure Tripple buffering if possible.
if (capabilities.maxImageCount > 0) {
if (requested_image_count > capabilities.maxImageCount) {
requested_image_count = capabilities.maxImageCount;
} else {
requested_image_count =
std::max(requested_image_count, std::min(3U, capabilities.maxImageCount));
}
} else {
requested_image_count = std::max(requested_image_count, 3U);
}
VkSwapchainCreateInfoKHR swapchain_ci{
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
.pNext = nullptr,
.flags = 0,
.surface = surface,
.minImageCount = requested_image_count,
.imageFormat = surface_format.format,
.imageColorSpace = surface_format.colorSpace,
.imageExtent = {},
.imageArrayLayers = 1,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.preTransform = capabilities.currentTransform,
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
.presentMode = present_mode,
.clipped = VK_FALSE,
.oldSwapchain = nullptr,
};
const u32 graphics_family{device.GetGraphicsFamily()};
const u32 present_family{device.GetPresentFamily()};
const std::array<u32, 2> queue_indices{graphics_family, present_family};
if (graphics_family != present_family) {
swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
}
static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
VkImageFormatListCreateInfo format_list{
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
.pNext = nullptr,
.viewFormatCount = static_cast<u32>(view_formats.size()),
.pViewFormats = view_formats.data(),
};
if (device.IsKhrSwapchainMutableFormatEnabled()) {
format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list);
swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR;
}
// Request the size again to reduce the possibility of a TOCTOU race condition.
const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
// Don't add code within this and the swapchain creation.
swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
extent = swapchain_ci.imageExtent;
current_srgb = srgb;
current_fps_unlocked = !Settings::values.use_speed_limit.GetValue();
images = swapchain.GetImages();
image_count = static_cast<u32>(images.size());
image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
}
void Swapchain::CreateSemaphores() {
present_semaphores.resize(image_count);
std::ranges::generate(present_semaphores,
[this] { return device.GetLogical().CreateSemaphore(); });
}
void Swapchain::CreateImageViews() {
VkImageViewCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = {},
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = image_view_format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
image_views.resize(image_count);
for (std::size_t i = 0; i < image_count; i++) {
ci.image = images[i];
image_views[i] = device.GetLogical().CreateImageView(ci);
}
}
void Swapchain::Destroy() {
frame_index = 0;
present_semaphores.clear();
framebuffers.clear();
image_views.clear();
swapchain.reset();
}
bool Swapchain::HasFpsUnlockChanged() const {
return current_fps_unlocked != !Settings::values.use_speed_limit.GetValue();
}
bool Swapchain::NeedsPresentModeUpdate() const {
// Mailbox present mode is the ideal for all scenarios. If it is not available,
// A different present mode is needed to support unlocked FPS above the monitor's refresh rate.
return present_mode != VK_PRESENT_MODE_MAILBOX_KHR && HasFpsUnlockChanged();
}
} // namespace Vulkan

View File

@@ -1,121 +1,121 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Layout {
struct FramebufferLayout;
}
namespace Vulkan {
class Device;
class Scheduler;
class Swapchain {
public:
explicit Swapchain(VkSurfaceKHR surface, const Device& device, Scheduler& scheduler, u32 width,
u32 height, bool srgb);
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
void AcquireNextImage();
/// Presents the rendered image to the swapchain.
void Present(VkSemaphore render_semaphore);
/// Returns true when the swapchain needs to be recreated.
bool NeedsRecreation(bool is_srgb) const {
return HasColorSpaceChanged(is_srgb) || IsSubOptimal() || NeedsPresentModeUpdate();
}
/// Returns true when the color space has changed.
bool HasColorSpaceChanged(bool is_srgb) const {
return current_srgb != is_srgb;
}
/// Returns true when the swapchain is outdated.
bool IsOutDated() const {
return is_outdated;
}
/// Returns true when the swapchain is suboptimal.
bool IsSubOptimal() const {
return is_suboptimal;
}
VkExtent2D GetSize() const {
return extent;
}
std::size_t GetImageCount() const {
return image_count;
}
std::size_t GetImageIndex() const {
return image_index;
}
VkImage GetImageIndex(std::size_t index) const {
return images[index];
}
VkImageView GetImageViewIndex(std::size_t index) const {
return *image_views[index];
}
VkFormat GetImageViewFormat() const {
return image_view_format;
}
VkSemaphore CurrentPresentSemaphore() const {
return *present_semaphores[frame_index];
}
private:
void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
bool srgb);
void CreateSemaphores();
void CreateImageViews();
void Destroy();
bool HasFpsUnlockChanged() const;
bool NeedsPresentModeUpdate() const;
const VkSurfaceKHR surface;
const Device& device;
Scheduler& scheduler;
vk::SwapchainKHR swapchain;
std::size_t image_count{};
std::vector<VkImage> images;
std::vector<vk::ImageView> image_views;
std::vector<vk::Framebuffer> framebuffers;
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> present_semaphores;
u32 image_index{};
u32 frame_index{};
VkFormat image_view_format{};
VkExtent2D extent{};
VkPresentModeKHR present_mode{};
bool current_srgb{};
bool current_fps_unlocked{};
bool is_outdated{};
bool is_suboptimal{};
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Layout {
struct FramebufferLayout;
}
namespace Vulkan {
class Device;
class Scheduler;
class Swapchain {
public:
explicit Swapchain(VkSurfaceKHR surface, const Device& device, Scheduler& scheduler, u32 width,
u32 height, bool srgb);
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
void AcquireNextImage();
/// Presents the rendered image to the swapchain.
void Present(VkSemaphore render_semaphore);
/// Returns true when the swapchain needs to be recreated.
bool NeedsRecreation(bool is_srgb) const {
return HasColorSpaceChanged(is_srgb) || IsSubOptimal() || NeedsPresentModeUpdate();
}
/// Returns true when the color space has changed.
bool HasColorSpaceChanged(bool is_srgb) const {
return current_srgb != is_srgb;
}
/// Returns true when the swapchain is outdated.
bool IsOutDated() const {
return is_outdated;
}
/// Returns true when the swapchain is suboptimal.
bool IsSubOptimal() const {
return is_suboptimal;
}
VkExtent2D GetSize() const {
return extent;
}
std::size_t GetImageCount() const {
return image_count;
}
std::size_t GetImageIndex() const {
return image_index;
}
VkImage GetImageIndex(std::size_t index) const {
return images[index];
}
VkImageView GetImageViewIndex(std::size_t index) const {
return *image_views[index];
}
VkFormat GetImageViewFormat() const {
return image_view_format;
}
VkSemaphore CurrentPresentSemaphore() const {
return *present_semaphores[frame_index];
}
private:
void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
bool srgb);
void CreateSemaphores();
void CreateImageViews();
void Destroy();
bool HasFpsUnlockChanged() const;
bool NeedsPresentModeUpdate() const;
const VkSurfaceKHR surface;
const Device& device;
Scheduler& scheduler;
vk::SwapchainKHR swapchain;
std::size_t image_count{};
std::vector<VkImage> images;
std::vector<vk::ImageView> image_views;
std::vector<vk::Framebuffer> framebuffers;
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> present_semaphores;
u32 image_index{};
u32 frame_index{};
VkFormat image_view_format{};
VkExtent2D extent{};
VkPresentModeKHR present_mode{};
bool current_srgb{};
bool current_fps_unlocked{};
bool is_outdated{};
bool is_suboptimal{};
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -1,361 +1,361 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Settings {
struct ResolutionScalingInfo;
}
namespace Vulkan {
using VideoCommon::ImageId;
using VideoCommon::NUM_RT;
using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
using VideoCommon::SlotVector;
using VideoCore::Surface::PixelFormat;
class BlitImageHelper;
class DescriptorPool;
class Device;
class Image;
class ImageView;
class Framebuffer;
class RenderPassCache;
class StagingBufferPool;
class UpdateDescriptorQueue;
class Scheduler;
class TextureCacheRuntime {
public:
explicit TextureCacheRuntime(const Device& device_, Scheduler& scheduler_,
MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_buffer_pool_,
BlitImageHelper& blit_image_helper_,
RenderPassCache& render_pass_cache_,
DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue);
void Finish();
StagingBufferRef UploadStagingBuffer(size_t size);
StagingBufferRef DownloadStagingBuffer(size_t size);
void TickFrame();
u64 GetDeviceLocalMemory() const;
u64 GetDeviceMemoryUsage() const;
bool CanReportMemoryUsage() const;
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
bool ShouldReinterpret(Image& dst, Image& src);
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
bool CanAccelerateImageUpload(Image&) const noexcept {
return false;
}
void AccelerateImageUpload(Image&, const StagingBufferRef&,
std::span<const VideoCommon::SwizzleParameters>);
void InsertUploadMemoryBarrier() {}
bool HasBrokenTextureViewFormats() const noexcept {
// No known Vulkan driver has broken image views
return false;
}
bool HasNativeBgr() const noexcept {
// All known Vulkan drivers can natively handle BGR textures
return true;
}
[[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
const Device& device;
Scheduler& scheduler;
MemoryAllocator& memory_allocator;
StagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
RenderPassCache& render_pass_cache;
std::optional<ASTCDecoderPass> astc_decoder_pass;
const Settings::ResolutionScalingInfo& resolution;
constexpr static size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
};
class Image : public VideoCommon::ImageBase {
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
explicit Image(const VideoCommon::NullImageParams&);
~Image();
Image(const Image&) = delete;
Image& operator=(const Image&) = delete;
Image(Image&&) = default;
Image& operator=(Image&&) = default;
void UploadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
[[nodiscard]] VkImage Handle() const noexcept {
return current_image;
}
[[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
return aspect_mask;
}
[[nodiscard]] VkImageView StorageImageView(s32 level) const noexcept {
return *storage_image_views[level];
}
/// Returns true when the image is already initialized and mark it as initialized
[[nodiscard]] bool ExchangeInitialization() noexcept {
return std::exchange(initialized, true);
}
bool IsRescaled() const noexcept;
bool ScaleUp(bool ignore = false);
bool ScaleDown(bool ignore = false);
private:
bool BlitScaleHelper(bool scale_up);
bool NeedsScaleHelper() const;
Scheduler* scheduler{};
TextureCacheRuntime* runtime{};
vk::Image original_image;
MemoryCommit commit;
std::vector<vk::ImageView> storage_image_views;
VkImageAspectFlags aspect_mask = 0;
bool initialized = false;
vk::Image scaled_image{};
MemoryCommit scaled_commit{};
VkImage current_image{};
std::unique_ptr<Framebuffer> scale_framebuffer;
std::unique_ptr<ImageView> scale_view;
std::unique_ptr<Framebuffer> normal_framebuffer;
std::unique_ptr<ImageView> normal_view;
};
class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&,
const SlotVector<Image>&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
~ImageView();
ImageView(const ImageView&) = delete;
ImageView& operator=(const ImageView&) = delete;
ImageView(ImageView&&) = default;
ImageView& operator=(ImageView&&) = default;
[[nodiscard]] VkImageView DepthView();
[[nodiscard]] VkImageView StencilView();
[[nodiscard]] VkImageView ColorView();
[[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
[[nodiscard]] bool IsRescaled() const noexcept;
[[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
return *image_views[static_cast<size_t>(texture_type)];
}
[[nodiscard]] VkImage ImageHandle() const noexcept {
return image_handle;
}
[[nodiscard]] VkImageView RenderTarget() const noexcept {
return render_target;
}
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
[[nodiscard]] GPUVAddr GpuAddr() const noexcept {
return gpu_addr;
}
[[nodiscard]] u32 BufferSize() const noexcept {
return buffer_size;
}
private:
struct StorageViews {
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
};
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
const Device* device = nullptr;
const SlotVector<Image>* slot_images = nullptr;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
std::unique_ptr<StorageViews> storage_views;
vk::ImageView depth_view;
vk::ImageView stencil_view;
vk::ImageView color_view;
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
GPUVAddr gpu_addr = 0;
u32 buffer_size = 0;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
class Sampler {
public:
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
[[nodiscard]] VkSampler Handle() const noexcept {
return *sampler;
}
private:
vk::Sampler sampler;
};
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled);
~Framebuffer();
Framebuffer(const Framebuffer&) = delete;
Framebuffer& operator=(const Framebuffer&) = delete;
Framebuffer(Framebuffer&&) = default;
Framebuffer& operator=(Framebuffer&&) = default;
void CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer,
bool is_rescaled = false);
[[nodiscard]] VkFramebuffer Handle() const noexcept {
return *framebuffer;
}
[[nodiscard]] VkRenderPass RenderPass() const noexcept {
return renderpass;
}
[[nodiscard]] VkExtent2D RenderArea() const noexcept {
return render_area;
}
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
[[nodiscard]] u32 NumColorBuffers() const noexcept {
return num_color_buffers;
}
[[nodiscard]] u32 NumImages() const noexcept {
return num_images;
}
[[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
return images;
}
[[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
return image_ranges;
}
[[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
}
[[nodiscard]] bool HasAspectDepthBit() const noexcept {
return has_depth;
}
[[nodiscard]] bool HasAspectStencilBit() const noexcept {
return has_stencil;
}
private:
vk::Framebuffer framebuffer;
VkRenderPass renderpass{};
VkExtent2D render_area{};
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
u32 num_color_buffers = 0;
u32 num_images = 0;
std::array<VkImage, 9> images{};
std::array<VkImageSubresourceRange, 9> image_ranges{};
bool has_depth{};
bool has_stencil{};
};
struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
using Runtime = Vulkan::TextureCacheRuntime;
using Image = Vulkan::Image;
using ImageAlloc = Vulkan::ImageAlloc;
using ImageView = Vulkan::ImageView;
using Sampler = Vulkan::Sampler;
using Framebuffer = Vulkan::Framebuffer;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Settings {
struct ResolutionScalingInfo;
}
namespace Vulkan {
using VideoCommon::ImageId;
using VideoCommon::NUM_RT;
using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
using VideoCommon::SlotVector;
using VideoCore::Surface::PixelFormat;
class BlitImageHelper;
class DescriptorPool;
class Device;
class Image;
class ImageView;
class Framebuffer;
class RenderPassCache;
class StagingBufferPool;
class UpdateDescriptorQueue;
class Scheduler;
class TextureCacheRuntime {
public:
explicit TextureCacheRuntime(const Device& device_, Scheduler& scheduler_,
MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_buffer_pool_,
BlitImageHelper& blit_image_helper_,
RenderPassCache& render_pass_cache_,
DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue);
void Finish();
StagingBufferRef UploadStagingBuffer(size_t size);
StagingBufferRef DownloadStagingBuffer(size_t size);
void TickFrame();
u64 GetDeviceLocalMemory() const;
u64 GetDeviceMemoryUsage() const;
bool CanReportMemoryUsage() const;
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
bool ShouldReinterpret(Image& dst, Image& src);
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
bool CanAccelerateImageUpload(Image&) const noexcept {
return false;
}
void AccelerateImageUpload(Image&, const StagingBufferRef&,
std::span<const VideoCommon::SwizzleParameters>);
void InsertUploadMemoryBarrier() {}
bool HasBrokenTextureViewFormats() const noexcept {
// No known Vulkan driver has broken image views
return false;
}
bool HasNativeBgr() const noexcept {
// All known Vulkan drivers can natively handle BGR textures
return true;
}
[[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
const Device& device;
Scheduler& scheduler;
MemoryAllocator& memory_allocator;
StagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
RenderPassCache& render_pass_cache;
std::optional<ASTCDecoderPass> astc_decoder_pass;
const Settings::ResolutionScalingInfo& resolution;
constexpr static size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
};
class Image : public VideoCommon::ImageBase {
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
explicit Image(const VideoCommon::NullImageParams&);
~Image();
Image(const Image&) = delete;
Image& operator=(const Image&) = delete;
Image(Image&&) = default;
Image& operator=(Image&&) = default;
void UploadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
[[nodiscard]] VkImage Handle() const noexcept {
return current_image;
}
[[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
return aspect_mask;
}
[[nodiscard]] VkImageView StorageImageView(s32 level) const noexcept {
return *storage_image_views[level];
}
/// Returns true when the image is already initialized and mark it as initialized
[[nodiscard]] bool ExchangeInitialization() noexcept {
return std::exchange(initialized, true);
}
bool IsRescaled() const noexcept;
bool ScaleUp(bool ignore = false);
bool ScaleDown(bool ignore = false);
private:
bool BlitScaleHelper(bool scale_up);
bool NeedsScaleHelper() const;
Scheduler* scheduler{};
TextureCacheRuntime* runtime{};
vk::Image original_image;
MemoryCommit commit;
std::vector<vk::ImageView> storage_image_views;
VkImageAspectFlags aspect_mask = 0;
bool initialized = false;
vk::Image scaled_image{};
MemoryCommit scaled_commit{};
VkImage current_image{};
std::unique_ptr<Framebuffer> scale_framebuffer;
std::unique_ptr<ImageView> scale_view;
std::unique_ptr<Framebuffer> normal_framebuffer;
std::unique_ptr<ImageView> normal_view;
};
class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&,
const SlotVector<Image>&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
~ImageView();
ImageView(const ImageView&) = delete;
ImageView& operator=(const ImageView&) = delete;
ImageView(ImageView&&) = default;
ImageView& operator=(ImageView&&) = default;
[[nodiscard]] VkImageView DepthView();
[[nodiscard]] VkImageView StencilView();
[[nodiscard]] VkImageView ColorView();
[[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
[[nodiscard]] bool IsRescaled() const noexcept;
[[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
return *image_views[static_cast<size_t>(texture_type)];
}
[[nodiscard]] VkImage ImageHandle() const noexcept {
return image_handle;
}
[[nodiscard]] VkImageView RenderTarget() const noexcept {
return render_target;
}
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
[[nodiscard]] GPUVAddr GpuAddr() const noexcept {
return gpu_addr;
}
[[nodiscard]] u32 BufferSize() const noexcept {
return buffer_size;
}
private:
struct StorageViews {
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
};
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
const Device* device = nullptr;
const SlotVector<Image>* slot_images = nullptr;
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
std::unique_ptr<StorageViews> storage_views;
vk::ImageView depth_view;
vk::ImageView stencil_view;
vk::ImageView color_view;
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
GPUVAddr gpu_addr = 0;
u32 buffer_size = 0;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
class Sampler {
public:
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
[[nodiscard]] VkSampler Handle() const noexcept {
return *sampler;
}
private:
vk::Sampler sampler;
};
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled);
~Framebuffer();
Framebuffer(const Framebuffer&) = delete;
Framebuffer& operator=(const Framebuffer&) = delete;
Framebuffer(Framebuffer&&) = default;
Framebuffer& operator=(Framebuffer&&) = default;
void CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer,
bool is_rescaled = false);
[[nodiscard]] VkFramebuffer Handle() const noexcept {
return *framebuffer;
}
[[nodiscard]] VkRenderPass RenderPass() const noexcept {
return renderpass;
}
[[nodiscard]] VkExtent2D RenderArea() const noexcept {
return render_area;
}
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
[[nodiscard]] u32 NumColorBuffers() const noexcept {
return num_color_buffers;
}
[[nodiscard]] u32 NumImages() const noexcept {
return num_images;
}
[[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
return images;
}
[[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
return image_ranges;
}
[[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
}
[[nodiscard]] bool HasAspectDepthBit() const noexcept {
return has_depth;
}
[[nodiscard]] bool HasAspectStencilBit() const noexcept {
return has_stencil;
}
private:
vk::Framebuffer framebuffer;
VkRenderPass renderpass{};
VkExtent2D render_area{};
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
u32 num_color_buffers = 0;
u32 num_images = 0;
std::array<VkImage, 9> images{};
std::array<VkImageSubresourceRange, 9> image_ranges{};
bool has_depth{};
bool has_stencil{};
};
struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
using Runtime = Vulkan::TextureCacheRuntime;
using Image = Vulkan::Image;
using ImageAlloc = Vulkan::ImageAlloc;
using ImageView = Vulkan::ImageView;
using Sampler = Vulkan::Sampler;
using Framebuffer = Vulkan::Framebuffer;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
} // namespace Vulkan

View File

@@ -1,9 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>;
}
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>;
}

View File

@@ -1,39 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <variant>
#include <boost/container/static_vector.hpp>
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
payload_cursor = payload.data();
}
UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
void UpdateDescriptorQueue::TickFrame() {
payload_cursor = payload.data();
}
void UpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
// This is the maximum number of entries a single draw call migth use.
static constexpr size_t MIN_ENTRIES = 0x400;
if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
payload_cursor = payload.data();
}
upload_start = payload_cursor;
}
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <variant>
#include <boost/container/static_vector.hpp>
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
payload_cursor = payload.data();
}
UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
void UpdateDescriptorQueue::TickFrame() {
payload_cursor = payload.data();
}
void UpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
// This is the maximum number of entries a single draw call migth use.
static constexpr size_t MIN_ENTRIES = 0x400;
if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
payload_cursor = payload.data();
}
upload_start = payload_cursor;
}
} // namespace Vulkan

View File

@@ -1,81 +1,81 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class Scheduler;
struct DescriptorUpdateEntry {
struct Empty {};
DescriptorUpdateEntry() = default;
DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
union {
Empty empty{};
VkDescriptorImageInfo image;
VkDescriptorBufferInfo buffer;
VkBufferView texel_buffer;
};
};
class UpdateDescriptorQueue final {
public:
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
~UpdateDescriptorQueue();
void TickFrame();
void Acquire();
const DescriptorUpdateEntry* UpdateData() const noexcept {
return upload_start;
}
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
}
void AddImage(VkImageView image_view) {
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
}
void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
*(payload_cursor++) = VkDescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
};
}
void AddTexelBuffer(VkBufferView texel_buffer) {
*(payload_cursor++) = texel_buffer;
}
private:
const Device& device;
Scheduler& scheduler;
DescriptorUpdateEntry* payload_cursor = nullptr;
const DescriptorUpdateEntry* upload_start = nullptr;
std::array<DescriptorUpdateEntry, 0x10000> payload;
};
} // namespace Vulkan
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class Scheduler;
struct DescriptorUpdateEntry {
struct Empty {};
DescriptorUpdateEntry() = default;
DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
union {
Empty empty{};
VkDescriptorImageInfo image;
VkDescriptorBufferInfo buffer;
VkBufferView texel_buffer;
};
};
class UpdateDescriptorQueue final {
public:
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
~UpdateDescriptorQueue();
void TickFrame();
void Acquire();
const DescriptorUpdateEntry* UpdateData() const noexcept {
return upload_start;
}
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
}
void AddImage(VkImageView image_view) {
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
}
void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
*(payload_cursor++) = VkDescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
};
}
void AddTexelBuffer(VkBufferView texel_buffer) {
*(payload_cursor++) = texel_buffer;
}
private:
const Device& device;
Scheduler& scheduler;
DescriptorUpdateEntry* payload_cursor = nullptr;
const DescriptorUpdateEntry* upload_start = nullptr;
std::array<DescriptorUpdateEntry, 0x10000> payload;
};
} // namespace Vulkan