early-access version 1332

This commit is contained in:
pineappleEA
2021-01-17 03:19:34 +01:00
parent 233493ad87
commit f70af7672d
126 changed files with 3856 additions and 3241 deletions

View File

@@ -526,13 +526,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
return {};
}
VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) {
VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
switch (index_format) {
case Maxwell::IndexFormat::UnsignedByte:
if (!device.IsExtIndexTypeUint8Supported()) {
UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
return VK_INDEX_TYPE_UINT16;
}
return VK_INDEX_TYPE_UINT8_EXT;
case Maxwell::IndexFormat::UnsignedShort:
return VK_INDEX_TYPE_UINT16;

View File

@@ -45,7 +45,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format);
VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);

View File

@@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
return separated_extensions;
}
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface) {
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
const s32 device_index = Settings::values.vulkan_device.GetValue();
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
const vk::PhysicalDevice physical_device(devices[device_index], dld);
return Device(*instance, physical_device, surface, dld);
}
} // Anonymous namespace
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_},
cpu_memory{cpu_memory_}, gpu{gpu_} {}
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
: RendererBase(emu_window, std::move(context_)),
telemetry_session(telemetry_session_),
cpu_memory(cpu_memory_),
gpu(gpu_),
library(OpenLibrary()),
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
true, Settings::values.renderer_debug)),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)),
memory_allocator(device, false),
state_tracker(gpu),
scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
screen_info),
rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
memory_allocator, state_tracker, scheduler) {
Report();
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
}
RendererVulkan::~RendererVulkan() {
ShutDown();
void(device.GetLogical().WaitIdle());
}
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
swapchain->Create(layout.width, layout.height, is_srgb);
blit_screen->Recreate();
if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
swapchain.Create(layout.width, layout.height, is_srgb);
blit_screen.Recreate();
}
scheduler->WaitWorker();
scheduler.WaitWorker();
swapchain->AcquireNextImage();
const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated);
swapchain.AcquireNextImage();
const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
scheduler->Flush(render_semaphore);
scheduler.Flush(render_semaphore);
if (swapchain->Present(render_semaphore)) {
blit_screen->Recreate();
if (swapchain.Present(render_semaphore)) {
blit_screen.Recreate();
}
rasterizer->TickFrame();
rasterizer.TickFrame();
}
render_window.OnFrameDisplayed();
}
bool RendererVulkan::Init() try {
library = OpenLibrary();
instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
true, Settings::values.renderer_debug);
if (Settings::values.renderer_debug) {
debug_callback = CreateDebugCallback(instance);
}
surface = CreateSurface(instance, render_window);
InitializeDevice();
Report();
memory_allocator = std::make_unique<MemoryAllocator>(*device);
state_tracker = std::make_unique<StateTracker>(gpu);
scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
const auto& framebuffer = render_window.GetFramebufferLayout();
swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
swapchain->Create(framebuffer.width, framebuffer.height, false);
rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
cpu_memory, screen_info, *device,
*memory_allocator, *state_tracker, *scheduler);
blit_screen =
std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
*memory_allocator, *swapchain, *scheduler, screen_info);
return true;
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
return false;
}
void RendererVulkan::ShutDown() {
if (!device) {
return;
}
if (const auto& dev = device->GetLogical()) {
dev.WaitIdle();
}
rasterizer.reset();
blit_screen.reset();
scheduler.reset();
swapchain.reset();
memory_allocator.reset();
device.reset();
}
void RendererVulkan::InitializeDevice() {
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
const s32 device_index = Settings::values.vulkan_device.GetValue();
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld);
device = std::make_unique<Device>(*instance, physical_device, *surface, dld);
}
void RendererVulkan::Report() const {
const std::string vendor_name{device->GetVendorName()};
const std::string model_name{device->GetModelName()};
const std::string driver_version = GetDriverVersion(*device);
const std::string vendor_name{device.GetVendorName()};
const std::string model_name{device.GetModelName()};
const std::string driver_version = GetDriverVersion(device);
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
const std::string api_version = GetReadableVersion(device->ApiVersion());
const std::string api_version = GetReadableVersion(device.ApiVersion());
const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
@@ -209,21 +179,4 @@ void RendererVulkan::Report() const {
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
}
std::vector<std::string> RendererVulkan::EnumerateDevices() try {
vk::InstanceDispatch dld;
const Common::DynamicLibrary library = OpenLibrary();
const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
std::vector<std::string> names;
names.reserve(physical_devices.size());
for (const VkPhysicalDevice device : physical_devices) {
names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
}
return names;
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what());
return {};
}
} // namespace Vulkan

View File

@@ -9,8 +9,14 @@
#include <vector>
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
@@ -27,20 +33,6 @@ class GPU;
namespace Vulkan {
class Device;
class StateTracker;
class MemoryAllocator;
class VKBlitScreen;
class VKSwapchain;
class VKScheduler;
struct VKScreenInfo {
VkImageView image_view{};
u32 width{};
u32 height{};
bool is_srgb{};
};
class RendererVulkan final : public VideoCore::RendererBase {
public:
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@@ -49,15 +41,13 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererVulkan() override;
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
static std::vector<std::string> EnumerateDevices();
VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer;
}
private:
void InitializeDevice();
void Report() const;
Core::TelemetrySession& telemetry_session;
@@ -68,18 +58,18 @@ private:
vk::InstanceDispatch dld;
vk::Instance instance;
vk::DebugUtilsMessenger debug_callback;
vk::SurfaceKHR surface;
VKScreenInfo screen_info;
vk::DebugUtilsMessenger debug_callback;
std::unique_ptr<Device> device;
std::unique_ptr<MemoryAllocator> memory_allocator;
std::unique_ptr<StateTracker> state_tracker;
std::unique_ptr<VKScheduler> scheduler;
std::unique_ptr<VKSwapchain> swapchain;
std::unique_ptr<VKBlitScreen> blit_screen;
Device device;
MemoryAllocator memory_allocator;
StateTracker state_tracker;
VKScheduler scheduler;
VKSwapchain swapchain;
VKBlitScreen blit_screen;
RasterizerVulkan rasterizer;
};
} // namespace Vulkan

View File

@@ -18,7 +18,6 @@
#include "video_core/gpu.h"
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
@@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData {
};
VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
Core::Frontend::EmuWindow& render_window_,
VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
Core::Frontend::EmuWindow& render_window_, const Device& device_,
MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
: cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_},
scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
: cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
resource_ticks.resize(image_count);
CreateStaticResources();
@@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
SetUniformData(data, framebuffer);
SetVertexData(data, framebuffer);
const std::span<u8> map = buffer_commit.Map();
std::memcpy(map.data(), &data, sizeof(data));
const std::span<u8> mapped_span = buffer_commit.Map();
std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
@@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
const size_t size_bytes = GetSizeInBytes(framebuffer);
rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
Tegra::Texture::UnswizzleTexture(
map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel,
framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
const VkBufferImageCopy copy{
.bufferOffset = image_offset,
@@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
return *semaphores[image_index];
}

View File

@@ -38,12 +38,18 @@ class RasterizerVulkan;
class VKScheduler;
class VKSwapchain;
class VKBlitScreen final {
struct VKScreenInfo {
VkImageView image_view{};
u32 width{};
u32 height{};
bool is_srgb{};
};
class VKBlitScreen {
public:
explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
Core::Frontend::EmuWindow& render_window,
VideoCore::RasterizerInterface& rasterizer, const Device& device,
MemoryAllocator& memory_allocator, VKSwapchain& swapchain,
Core::Frontend::EmuWindow& render_window, const Device& device,
MemoryAllocator& memory_manager, VKSwapchain& swapchain,
VKScheduler& scheduler, const VKScreenInfo& screen_info);
~VKBlitScreen();
@@ -84,7 +90,6 @@ private:
Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
VideoCore::RasterizerInterface& rasterizer;
const Device& device;
MemoryAllocator& memory_allocator;
VKSwapchain& swapchain;

View File

@@ -3,188 +3,276 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstring>
#include <memory>
#include <span>
#include <vector>
#include "core/core.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
return VkBufferCopy{
.srcOffset = copy.src_offset,
.dstOffset = copy.dst_offset,
.size = copy.size,
};
}
constexpr VkBufferUsageFlags BUFFER_USAGE =
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
return VK_INDEX_TYPE_UINT8_EXT;
}
if (num_elements <= 0xffff) {
return VK_INDEX_TYPE_UINT16;
}
return VK_INDEX_TYPE_UINT32;
}
constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE =
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
size_t BytesPerIndex(VkIndexType index_type) {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
return 1;
case VK_INDEX_TYPE_UINT16:
return 2;
case VK_INDEX_TYPE_UINT32:
return 4;
default:
UNREACHABLE_MSG("Invalid index type={}", index_type);
return 1;
}
}
template <typename T>
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
std::ranges::transform(indices, indices.begin(),
[quad, first](u32 index) { return first + index + quad * 4; });
return indices;
}
} // Anonymous namespace
Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_,
StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
: BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
staging_pool_} {
buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = static_cast<VkDeviceSize>(size_),
.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.size = SizeBytes(),
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
}
Buffer::~Buffer() = default;
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
VKDescriptorPool& descriptor_pool)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload);
std::memcpy(staging.mapped_span.data(), data, data_size);
StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Upload);
}
StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_pool.Request(size, MemoryUsage::Download);
}
void BufferCacheRuntime::Finish() {
scheduler.Finish();
}
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies) {
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
};
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, READ_BARRIER);
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, WRITE_BARRIER);
});
}
const VkBuffer handle = Handle();
scheduler.Record([staging = staging.buffer, handle, offset, data_size,
&device = device](vk::CommandBuffer cmdbuf) {
const VkBufferMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask =
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_WRITE_BIT |
(device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
u32 base_vertex, u32 num_indices, VkBuffer buffer,
u32 offset, [[maybe_unused]] u32 size) {
VkIndexType index_type = MaxwellToVK::IndexFormat(index_format);
if (topology == PrimitiveTopology::Quads) {
index_type = VK_INDEX_TYPE_UINT32;
std::tie(buffer, offset) =
quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
} else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
index_type = VK_INDEX_TYPE_UINT16;
std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset);
}
scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(buffer, offset, index_type);
});
}
void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
ReserveQuadArrayLUT(first + count, true);
// The LUT has the indices 0, 1, 2, and 3 copied as an array
// To apply these 'first' offsets we can apply an offset based on the modulus.
const VkIndexType index_type = quad_array_lut_index_type;
const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(buffer, offset, index_type);
});
}
void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
u32 stride) {
if (device.IsExtExtendedDynamicStateSupported()) {
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = offset;
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
const VkDeviceSize vk_stride = stride;
cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
});
} else {
scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffer(index, buffer, offset);
});
}
}
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
u32 size) {
if (!device.IsExtTransformFeedbackSupported()) {
// Already logged in the rasterizer
return;
}
scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = offset;
const VkDeviceSize vk_size = size;
cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
});
}
void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
update_descriptor_queue.AddBuffer(buffer, offset, size);
}
void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
if (num_indices <= current_num_indices) {
return;
}
if (wait_for_idle) {
scheduler.Finish();
}
current_num_indices = num_indices;
quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
const u32 num_quads = num_indices / 4;
const u32 num_triangle_indices = num_quads * 6;
const u32 num_first_offset_copies = 4;
const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = size_bytes,
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
if (device.HasDebuggingToolAttached()) {
quad_array_lut.SetObjectNameEXT("Quad LUT");
}
quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
u8* staging_data = staging.mapped_span.data();
const size_t quad_size = bytes_per_index * 6;
for (u32 first = 0; first < num_first_offset_copies; ++first) {
for (u32 quad = 0; quad < num_quads; ++quad) {
switch (quad_array_lut_index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
break;
case VK_INDEX_TYPE_UINT16:
std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
break;
case VK_INDEX_TYPE_UINT32:
std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
break;
default:
UNREACHABLE();
break;
}
staging_data += quad_size;
}
}
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = staging.buffer, dst_buffer = *quad_array_lut,
size_bytes](vk::CommandBuffer cmdbuf) {
const VkBufferCopy copy{
.srcOffset = 0,
.dstOffset = 0,
.size = size_bytes,
};
const VkBufferMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = UPLOAD_ACCESS_BARRIERS,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
.buffer = dst_buffer,
.offset = 0,
.size = size_bytes,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
write_barrier);
cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0, write_barrier);
});
}
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
auto staging = staging_pool.Request(data_size, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext();
const VkBuffer handle = Handle();
scheduler.Record(
[staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
const VkBufferMemoryBarrier barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = handle,
.offset = offset,
.size = data_size,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
});
scheduler.Finish();
std::memcpy(data, staging.mapped_span.data(), data_size);
}
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size) {
scheduler.RequestOutsideRenderPassOperationContext();
const VkBuffer dst_buffer = Handle();
scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
copy_size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size});
std::array<VkBufferMemoryBarrier, 2> barriers;
barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barriers[0].pNext = nullptr;
barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[0].buffer = src_buffer;
barriers[0].offset = src_offset;
barriers[0].size = copy_size;
barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barriers[1].pNext = nullptr;
barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[1].buffer = dst_buffer;
barriers[1].offset = dst_offset;
barriers[1].size = copy_size;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
barriers, {});
});
}
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
const Device& device_, MemoryAllocator& memory_allocator_,
VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
StagingBufferPool& staging_pool_)
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
cpu_memory_, stream_buffer_},
device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_} {}
VKBufferCache::~VKBufferCache() = default;
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr,
size);
}
VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
size = std::max(size, std::size_t(4));
const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, size, 0);
});
return {empty.buffer, 0, 0};
}
} // namespace Vulkan

View File

@@ -4,69 +4,112 @@
#pragma once
#include <memory>
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class VKDescriptorPool;
class VKScheduler;
class VKUpdateDescriptorQueue;
class Buffer final : public VideoCommon::BufferBlock {
class BufferCacheRuntime;
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
~Buffer();
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_);
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
void Download(std::size_t offset, std::size_t data_size, u8* data);
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size);
VkBuffer Handle() const {
[[nodiscard]] VkBuffer Handle() const noexcept {
return *buffer;
}
u64 Address() const {
return 0;
operator VkBuffer() const noexcept {
return *buffer;
}
private:
const Device& device;
VKScheduler& scheduler;
StagingBufferPool& staging_pool;
vk::Buffer buffer;
MemoryCommit commit;
};
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
class BufferCacheRuntime {
friend Buffer;
using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
public:
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device, MemoryAllocator& memory_allocator,
VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
StagingBufferPool& staging_pool);
~VKBufferCache();
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
VKDescriptorPool& descriptor_pool);
BufferInfo GetEmptyBuffer(std::size_t size) override;
void Finish();
protected:
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
std::span<const VideoCommon::BufferCopy> copies);
void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
void BindQuadArrayIndexBuffer(u32 first, u32 count);
void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
BindBuffer(buffer, offset, size);
}
void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
[[maybe_unused]] bool is_written) {
BindBuffer(buffer, offset, size);
}
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
const Device& device;
MemoryAllocator& memory_allocator;
VKScheduler& scheduler;
StagingBufferPool& staging_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
vk::Buffer quad_array_lut;
MemoryCommit quad_array_lut_commit;
VkIndexType quad_array_lut_index_type{};
u32 current_num_indices = 0;
Uint8Pass uint8_pass;
QuadIndexedPass quad_index_pass;
};
struct BufferCacheParams {
using Runtime = Vulkan::BufferCacheRuntime;
using Buffer = Vulkan::Buffer;
static constexpr bool IS_OPENGL = false;
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
static constexpr bool USE_MEMORY_MAPS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
} // namespace Vulkan

View File

@@ -10,7 +10,6 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -22,19 +21,7 @@
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
return {
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
};
}
VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
return {
.dstBinding = 0,
@@ -162,55 +149,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
return set;
}
QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
QuadArrayPass::~QuadArrayPass() = default;
std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
const u32 num_triangle_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
ASSERT(num_vertices % 4 == 0);
const u32 num_quads = num_vertices / 4;
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
num_quads, first, set](vk::CommandBuffer cmdbuf) {
constexpr u32 dispatch_size = 1024;
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = buffer;
barrier.offset = 0;
barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
});
return {staging_ref.buffer, 0};
}
Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
@@ -221,18 +159,18 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
Uint8Pass::~Uint8Pass() = default;
std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u64 src_offset) {
std::pair<VkBuffer, u32> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u32 src_offset) {
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
num_vertices](vk::CommandBuffer cmdbuf) {
constexpr u32 dispatch_size = 1024;
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
@@ -252,7 +190,7 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
});
return {staging_ref.buffer, 0};
return {staging.buffer, 0};
}
QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
@@ -267,9 +205,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
QuadIndexedPass::~QuadIndexedPass() = default;
std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
std::pair<VkBuffer, u32> QuadIndexedPass::Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
VkBuffer src_buffer, u64 src_offset) {
VkBuffer src_buffer, u32 src_offset) {
const u32 index_shift = [index_format] {
switch (index_format) {
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@@ -286,15 +224,15 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
const u32 num_tri_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
static constexpr u32 dispatch_size = 1024;
const std::array push_constants = {base_vertex, index_shift};
@@ -317,7 +255,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
});
return {staging_ref.buffer, 0};
return {staging.buffer, 0};
}
} // namespace Vulkan

View File

@@ -41,22 +41,6 @@ private:
vk::ShaderModule module;
};
class QuadArrayPass final : public VKComputePass {
public:
explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadArrayPass();
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
private:
VKScheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
};
class Uint8Pass final : public VKComputePass {
public:
explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
@@ -64,7 +48,9 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
/// Assemble uint8 indices into an uint16 index buffer
/// Returns a pair with the staging buffer, and the offset where the assembled data is
std::pair<VkBuffer, u32> Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset);
private:
VKScheduler& scheduler;
@@ -80,9 +66,9 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
std::pair<VkBuffer, u32> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
u64 src_offset);
u32 src_offset);
private:
VKScheduler& scheduler;

View File

@@ -45,8 +45,8 @@ void InnerFence::Wait() {
}
VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
TextureCache& texture_cache_, BufferCache& buffer_cache_,
VKQueryCache& query_cache_, const Device& device_,
VKScheduler& scheduler_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
scheduler{scheduler_} {}

View File

@@ -22,7 +22,6 @@ class RasterizerInterface;
namespace Vulkan {
class Device;
class VKBufferCache;
class VKQueryCache;
class VKScheduler;
@@ -45,14 +44,14 @@ private:
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
class VKFenceManager final : public GenericFenceManager {
public:
explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
VKScheduler& scheduler_);
explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCache& texture_cache, BufferCache& buffer_cache,
VKQueryCache& query_cache, const Device& device,
VKScheduler& scheduler);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;

File diff suppressed because it is too large Load Diff

View File

@@ -18,14 +18,12 @@
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader/async_shaders.h"
@@ -49,7 +47,6 @@ namespace Vulkan {
struct VKScreenInfo;
class StateTracker;
class BufferBindings;
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
@@ -65,8 +62,11 @@ public:
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
void InvalidateTextureCache(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
@@ -107,24 +107,11 @@ private:
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
struct DrawParameters {
void Draw(vk::CommandBuffer cmdbuf) const;
u32 base_instance = 0;
u32 num_instances = 0;
u32 base_vertex = 0;
u32 num_vertices = 0;
bool is_indexed = 0;
};
void FlushWork();
/// Setups geometry buffers and state.
DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
bool is_indexed, bool is_instanced);
/// Setup descriptors in the graphics pipeline.
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
bool is_indexed);
void UpdateDynamicStates();
@@ -132,16 +119,6 @@ private:
void EndTransformFeedback();
void SetupVertexArrays(BufferBindings& buffer_bindings);
void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
/// Setup constant buffers in the graphics pipeline.
void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup global buffers in the graphics pipeline.
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup uniform texels in the graphics pipeline.
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
@@ -154,12 +131,6 @@ private:
/// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
/// Setup constant buffers in the compute pipeline.
void SetupComputeConstBuffers(const ShaderEntries& entries);
/// Setup global buffers in the compute pipeline.
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
/// Setup texel buffers in the compute pipeline.
void SetupComputeUniformTexels(const ShaderEntries& entries);
@@ -172,11 +143,6 @@ private:
/// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries);
void SetupConstBuffer(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer);
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -193,19 +159,6 @@ private:
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
size_t CalculateComputeStreamBufferSize() const;
size_t CalculateVertexArraysSize() const;
size_t CalculateIndexBufferSize() const;
size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer) const;
VkBuffer DefaultBuffer();
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
@@ -217,24 +170,19 @@ private:
StateTracker& state_tracker;
VKScheduler& scheduler;
VKStreamBuffer stream_buffer;
StagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
QuadArrayPass quad_array_pass;
QuadIndexedPass quad_indexed_pass;
Uint8Pass uint8_pass;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKQueryCache query_cache;
VKFenceManager fence_manager;
vk::Buffer default_buffer;
MemoryCommit default_buffer_commit;
vk::Event wfi_event;
VideoCommon::Shader::AsyncShaders async_shaders;

View File

@@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() {
worker_thread.join();
}
u64 VKScheduler::CurrentTick() const noexcept {
return master_semaphore->CurrentTick();
}
bool VKScheduler::IsFree(u64 tick) const noexcept {
return master_semaphore->IsFree(tick);
}
void VKScheduler::Wait(u64 tick) {
master_semaphore->Wait(tick);
}
void VKScheduler::Flush(VkSemaphore semaphore) {
SubmitExecution(semaphore);
AllocateNewContext();
@@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
vk::Span(barriers.data(), num_images));
});
state.renderpass = nullptr;

View File

@@ -12,6 +12,7 @@
#include <utility>
#include "common/common_types.h"
#include "common/threadsafe_queue.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
@@ -19,7 +20,6 @@ namespace Vulkan {
class CommandPool;
class Device;
class Framebuffer;
class MasterSemaphore;
class StateTracker;
class VKQueryCache;
@@ -30,15 +30,6 @@ public:
explicit VKScheduler(const Device& device, StateTracker& state_tracker);
~VKScheduler();
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept;
/// Returns true when a tick has been triggered by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept;
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick);
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore semaphore = nullptr);
@@ -80,6 +71,21 @@ public:
(void)chunk->Record(command);
}
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore->CurrentTick();
}
/// Returns true when a tick has been triggered by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return master_semaphore->IsFree(tick);
}
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick) {
master_semaphore->Wait(tick);
}
/// Returns the master timeline semaphore.
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
return *master_semaphore;

View File

@@ -3124,6 +3124,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
entries.attributes.insert(GetGenericAttributeLocation(attribute));
}
}
for (const auto& buffer : entries.const_buffers) {
entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
entries.uses_warps = ir.UsesWarps();

View File

@@ -39,24 +39,7 @@ private:
u32 index{};
};
class GlobalBufferEntry {
public:
constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
: cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
constexpr u32 GetCbufIndex() const {
return cbuf_index;
}
constexpr u32 GetCbufOffset() const {
return cbuf_offset;
}
constexpr bool IsWritten() const {
return is_written;
}
private:
struct GlobalBufferEntry {
u32 cbuf_index{};
u32 cbuf_offset{};
bool is_written{};
@@ -78,6 +61,7 @@ struct ShaderEntries {
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
u32 enabled_uniform_buffers{};
bool uses_warps{};
};

View File

@@ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
static constexpr std::array INVALIDATION_FLAGS{
static constexpr int INVALIDATION_FLAGS[]{
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
};
Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
flags[flag] = true;
}
for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
flags[index] = true;
}
return flags;
}
@@ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
StateTracker::StateTracker(Tegra::GPU& gpu)
: flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
auto& tables = gpu.Maxwell3D().dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyDepthBias(tables);

View File

@@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
} // Anonymous namespace
VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_)
: surface{surface_}, device{device_}, scheduler{scheduler_} {}
VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_,
u32 width, u32 height, bool srgb)
: surface{surface_}, device{device_}, scheduler{scheduler_} {
Create(width, height, srgb);
}
VKSwapchain::~VKSwapchain() = default;

View File

@@ -20,7 +20,8 @@ class VKScheduler;
class VKSwapchain {
public:
explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler);
explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler,
u32 width, u32 height, bool srgb);
~VKSwapchain();
/// Creates (or recreates) the swapchain with a given size.

View File

@@ -410,46 +410,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
VkImageAspectFlags aspect_mask, bool is_initialized,
std::span<const VkBufferImageCopy> copies) {
static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = ACCESS_FLAGS,
.srcAccessMask = WRITE_ACCESS_FLAGS,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange =
{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkImageMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = ACCESS_FLAGS,
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange =
{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
read_barrier);
@@ -553,20 +554,12 @@ void TextureCacheRuntime::Finish() {
scheduler.Finish();
}
ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload);
return {
.handle = staging_ref.buffer,
.span = staging_ref.mapped_span,
};
StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.Request(size, MemoryUsage::Upload);
}
ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download);
return {
.handle = staging_ref.buffer,
.span = staging_ref.mapped_span,
};
StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_buffer_pool.Request(size, MemoryUsage::Download);
}
void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
@@ -738,7 +731,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@@ -749,12 +742,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@@ -812,12 +802,12 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
}
}
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const BufferImageCopy> copies) {
// TODO: Move this to another API
scheduler->RequestOutsideRenderPassOperationContext();
std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
const VkBuffer src_buffer = map.handle;
const VkBuffer src_buffer = map.buffer;
const VkImage vk_image = *image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
const bool is_initialized = std::exchange(initialized, true);
@@ -827,12 +817,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
});
}
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies) {
// TODO: Move this to another API
scheduler->RequestOutsideRenderPassOperationContext();
std::vector vk_copies = TransformBufferCopies(copies, buffer_offset);
const VkBuffer src_buffer = map.handle;
const VkBuffer src_buffer = map.buffer;
const VkBuffer dst_buffer = *buffer;
scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
@@ -840,13 +830,58 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
});
}
void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const BufferImageCopy> copies) {
std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
vk_copies](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies);
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
}
@@ -1106,7 +1141,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
.pAttachments = attachments.data(),
.width = key.size.width,
.height = key.size.height,
.layers = static_cast<u32>(num_layers),
.layers = static_cast<u32>(std::max(num_layers, 1)),
});
if (runtime.device.HasDebuggingToolAttached()) {
framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());

View File

@@ -7,6 +7,7 @@
#include <compare>
#include <span>
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> {
namespace Vulkan {
struct ImageBufferMap {
[[nodiscard]] VkBuffer Handle() const noexcept {
return handle;
}
[[nodiscard]] std::span<u8> Span() const noexcept {
return span;
}
VkBuffer handle;
std::span<u8> span;
};
struct TextureCacheRuntime {
const Device& device;
VKScheduler& scheduler;
@@ -76,9 +64,9 @@ struct TextureCacheRuntime {
void Finish();
[[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
[[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size);
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const std::array<Offset2D, 2>& dst_region,
@@ -94,7 +82,7 @@ struct TextureCacheRuntime {
return false;
}
void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t,
std::span<const VideoCommon::SwizzleParameters>) {
UNREACHABLE();
}
@@ -112,13 +100,13 @@ public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
void UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
void UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies);
void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
[[nodiscard]] VkImage Handle() const noexcept {