early-access version 1332
This commit is contained in:
@@ -526,13 +526,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
|
||||
return {};
|
||||
}
|
||||
|
||||
VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) {
|
||||
VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
|
||||
switch (index_format) {
|
||||
case Maxwell::IndexFormat::UnsignedByte:
|
||||
if (!device.IsExtIndexTypeUint8Supported()) {
|
||||
UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
|
||||
return VK_INDEX_TYPE_UINT16;
|
||||
}
|
||||
return VK_INDEX_TYPE_UINT8_EXT;
|
||||
case Maxwell::IndexFormat::UnsignedShort:
|
||||
return VK_INDEX_TYPE_UINT16;
|
||||
|
@@ -45,7 +45,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
|
||||
|
||||
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
|
||||
|
||||
VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format);
|
||||
VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
|
||||
|
||||
VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
|
||||
|
||||
|
@@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
|
||||
return separated_extensions;
|
||||
}
|
||||
|
||||
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
|
||||
VkSurfaceKHR surface) {
|
||||
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
|
||||
const s32 device_index = Settings::values.vulkan_device.GetValue();
|
||||
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
|
||||
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
|
||||
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
|
||||
}
|
||||
const vk::PhysicalDevice physical_device(devices[device_index], dld);
|
||||
return Device(*instance, physical_device, surface, dld);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
|
||||
Core::Frontend::EmuWindow& emu_window,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
|
||||
: RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_},
|
||||
cpu_memory{cpu_memory_}, gpu{gpu_} {}
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
|
||||
: RendererBase(emu_window, std::move(context_)),
|
||||
telemetry_session(telemetry_session_),
|
||||
cpu_memory(cpu_memory_),
|
||||
gpu(gpu_),
|
||||
library(OpenLibrary()),
|
||||
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
|
||||
true, Settings::values.renderer_debug)),
|
||||
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
|
||||
surface(CreateSurface(instance, render_window)),
|
||||
device(CreateDevice(instance, dld, *surface)),
|
||||
memory_allocator(device, false),
|
||||
state_tracker(gpu),
|
||||
scheduler(device, state_tracker),
|
||||
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
|
||||
render_window.GetFramebufferLayout().height, false),
|
||||
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
|
||||
screen_info),
|
||||
rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
|
||||
memory_allocator, state_tracker, scheduler) {
|
||||
Report();
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
|
||||
throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
|
||||
}
|
||||
|
||||
RendererVulkan::~RendererVulkan() {
|
||||
ShutDown();
|
||||
void(device.GetLogical().WaitIdle());
|
||||
}
|
||||
|
||||
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
@@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
|
||||
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
|
||||
const bool use_accelerated =
|
||||
rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
|
||||
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
|
||||
const bool is_srgb = use_accelerated && screen_info.is_srgb;
|
||||
if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
|
||||
swapchain->Create(layout.width, layout.height, is_srgb);
|
||||
blit_screen->Recreate();
|
||||
if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
|
||||
swapchain.Create(layout.width, layout.height, is_srgb);
|
||||
blit_screen.Recreate();
|
||||
}
|
||||
|
||||
scheduler->WaitWorker();
|
||||
scheduler.WaitWorker();
|
||||
|
||||
swapchain->AcquireNextImage();
|
||||
const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated);
|
||||
swapchain.AcquireNextImage();
|
||||
const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
|
||||
|
||||
scheduler->Flush(render_semaphore);
|
||||
scheduler.Flush(render_semaphore);
|
||||
|
||||
if (swapchain->Present(render_semaphore)) {
|
||||
blit_screen->Recreate();
|
||||
if (swapchain.Present(render_semaphore)) {
|
||||
blit_screen.Recreate();
|
||||
}
|
||||
|
||||
rasterizer->TickFrame();
|
||||
rasterizer.TickFrame();
|
||||
}
|
||||
|
||||
render_window.OnFrameDisplayed();
|
||||
}
|
||||
|
||||
bool RendererVulkan::Init() try {
|
||||
library = OpenLibrary();
|
||||
instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
|
||||
true, Settings::values.renderer_debug);
|
||||
if (Settings::values.renderer_debug) {
|
||||
debug_callback = CreateDebugCallback(instance);
|
||||
}
|
||||
surface = CreateSurface(instance, render_window);
|
||||
|
||||
InitializeDevice();
|
||||
Report();
|
||||
|
||||
memory_allocator = std::make_unique<MemoryAllocator>(*device);
|
||||
|
||||
state_tracker = std::make_unique<StateTracker>(gpu);
|
||||
|
||||
scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
|
||||
|
||||
const auto& framebuffer = render_window.GetFramebufferLayout();
|
||||
swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
|
||||
swapchain->Create(framebuffer.width, framebuffer.height, false);
|
||||
|
||||
rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
|
||||
cpu_memory, screen_info, *device,
|
||||
*memory_allocator, *state_tracker, *scheduler);
|
||||
|
||||
blit_screen =
|
||||
std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
|
||||
*memory_allocator, *swapchain, *scheduler, screen_info);
|
||||
return true;
|
||||
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
|
||||
return false;
|
||||
}
|
||||
|
||||
void RendererVulkan::ShutDown() {
|
||||
if (!device) {
|
||||
return;
|
||||
}
|
||||
if (const auto& dev = device->GetLogical()) {
|
||||
dev.WaitIdle();
|
||||
}
|
||||
rasterizer.reset();
|
||||
blit_screen.reset();
|
||||
scheduler.reset();
|
||||
swapchain.reset();
|
||||
memory_allocator.reset();
|
||||
device.reset();
|
||||
}
|
||||
|
||||
void RendererVulkan::InitializeDevice() {
|
||||
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
|
||||
const s32 device_index = Settings::values.vulkan_device.GetValue();
|
||||
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
|
||||
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
|
||||
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
|
||||
}
|
||||
const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld);
|
||||
device = std::make_unique<Device>(*instance, physical_device, *surface, dld);
|
||||
}
|
||||
|
||||
void RendererVulkan::Report() const {
|
||||
const std::string vendor_name{device->GetVendorName()};
|
||||
const std::string model_name{device->GetModelName()};
|
||||
const std::string driver_version = GetDriverVersion(*device);
|
||||
const std::string vendor_name{device.GetVendorName()};
|
||||
const std::string model_name{device.GetModelName()};
|
||||
const std::string driver_version = GetDriverVersion(device);
|
||||
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
|
||||
|
||||
const std::string api_version = GetReadableVersion(device->ApiVersion());
|
||||
const std::string api_version = GetReadableVersion(device.ApiVersion());
|
||||
|
||||
const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
|
||||
const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
|
||||
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
|
||||
@@ -209,21 +179,4 @@ void RendererVulkan::Report() const {
|
||||
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
|
||||
}
|
||||
|
||||
std::vector<std::string> RendererVulkan::EnumerateDevices() try {
|
||||
vk::InstanceDispatch dld;
|
||||
const Common::DynamicLibrary library = OpenLibrary();
|
||||
const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
|
||||
const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
|
||||
std::vector<std::string> names;
|
||||
names.reserve(physical_devices.size());
|
||||
for (const VkPhysicalDevice device : physical_devices) {
|
||||
names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
|
||||
}
|
||||
return names;
|
||||
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what());
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@@ -9,8 +9,14 @@
|
||||
#include <vector>
|
||||
|
||||
#include "common/dynamic_library.h"
|
||||
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Core {
|
||||
@@ -27,20 +33,6 @@ class GPU;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class StateTracker;
|
||||
class MemoryAllocator;
|
||||
class VKBlitScreen;
|
||||
class VKSwapchain;
|
||||
class VKScheduler;
|
||||
|
||||
struct VKScreenInfo {
|
||||
VkImageView image_view{};
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
bool is_srgb{};
|
||||
};
|
||||
|
||||
class RendererVulkan final : public VideoCore::RendererBase {
|
||||
public:
|
||||
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
|
||||
@@ -49,15 +41,13 @@ public:
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
|
||||
~RendererVulkan() override;
|
||||
|
||||
bool Init() override;
|
||||
void ShutDown() override;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
|
||||
static std::vector<std::string> EnumerateDevices();
|
||||
VideoCore::RasterizerInterface* ReadRasterizer() override {
|
||||
return &rasterizer;
|
||||
}
|
||||
|
||||
private:
|
||||
void InitializeDevice();
|
||||
|
||||
void Report() const;
|
||||
|
||||
Core::TelemetrySession& telemetry_session;
|
||||
@@ -68,18 +58,18 @@ private:
|
||||
vk::InstanceDispatch dld;
|
||||
|
||||
vk::Instance instance;
|
||||
|
||||
vk::DebugUtilsMessenger debug_callback;
|
||||
vk::SurfaceKHR surface;
|
||||
|
||||
VKScreenInfo screen_info;
|
||||
|
||||
vk::DebugUtilsMessenger debug_callback;
|
||||
std::unique_ptr<Device> device;
|
||||
std::unique_ptr<MemoryAllocator> memory_allocator;
|
||||
std::unique_ptr<StateTracker> state_tracker;
|
||||
std::unique_ptr<VKScheduler> scheduler;
|
||||
std::unique_ptr<VKSwapchain> swapchain;
|
||||
std::unique_ptr<VKBlitScreen> blit_screen;
|
||||
Device device;
|
||||
MemoryAllocator memory_allocator;
|
||||
StateTracker state_tracker;
|
||||
VKScheduler scheduler;
|
||||
VKSwapchain swapchain;
|
||||
VKBlitScreen blit_screen;
|
||||
RasterizerVulkan rasterizer;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@@ -18,7 +18,6 @@
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
@@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData {
|
||||
};
|
||||
|
||||
VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
|
||||
Core::Frontend::EmuWindow& render_window_,
|
||||
VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
|
||||
Core::Frontend::EmuWindow& render_window_, const Device& device_,
|
||||
MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
|
||||
VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
|
||||
: cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
|
||||
device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_},
|
||||
scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
|
||||
: cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
|
||||
memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
|
||||
image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
|
||||
resource_ticks.resize(image_count);
|
||||
|
||||
CreateStaticResources();
|
||||
@@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
||||
SetUniformData(data, framebuffer);
|
||||
SetVertexData(data, framebuffer);
|
||||
|
||||
const std::span<u8> map = buffer_commit.Map();
|
||||
std::memcpy(map.data(), &data, sizeof(data));
|
||||
const std::span<u8> mapped_span = buffer_commit.Map();
|
||||
std::memcpy(mapped_span.data(), &data, sizeof(data));
|
||||
|
||||
if (!use_accelerated) {
|
||||
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
|
||||
@@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
||||
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
|
||||
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
|
||||
const size_t size_bytes = GetSizeInBytes(framebuffer);
|
||||
rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
|
||||
|
||||
// TODO(Rodrigo): Read this from HLE
|
||||
constexpr u32 block_height_log2 = 4;
|
||||
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
|
||||
Tegra::Texture::UnswizzleTexture(
|
||||
map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel,
|
||||
framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
|
||||
mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
|
||||
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
|
||||
|
||||
const VkBufferImageCopy copy{
|
||||
.bufferOffset = image_offset,
|
||||
@@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
||||
cmdbuf.Draw(4, 1, 0, 0);
|
||||
cmdbuf.EndRenderPass();
|
||||
});
|
||||
|
||||
return *semaphores[image_index];
|
||||
}
|
||||
|
||||
|
@@ -38,12 +38,18 @@ class RasterizerVulkan;
|
||||
class VKScheduler;
|
||||
class VKSwapchain;
|
||||
|
||||
class VKBlitScreen final {
|
||||
struct VKScreenInfo {
|
||||
VkImageView image_view{};
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
bool is_srgb{};
|
||||
};
|
||||
|
||||
class VKBlitScreen {
|
||||
public:
|
||||
explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
|
||||
Core::Frontend::EmuWindow& render_window,
|
||||
VideoCore::RasterizerInterface& rasterizer, const Device& device,
|
||||
MemoryAllocator& memory_allocator, VKSwapchain& swapchain,
|
||||
Core::Frontend::EmuWindow& render_window, const Device& device,
|
||||
MemoryAllocator& memory_manager, VKSwapchain& swapchain,
|
||||
VKScheduler& scheduler, const VKScreenInfo& screen_info);
|
||||
~VKBlitScreen();
|
||||
|
||||
@@ -84,7 +90,6 @@ private:
|
||||
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
Core::Frontend::EmuWindow& render_window;
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
VKSwapchain& swapchain;
|
||||
|
@@ -3,188 +3,276 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "core/core.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
|
||||
return VkBufferCopy{
|
||||
.srcOffset = copy.src_offset,
|
||||
.dstOffset = copy.dst_offset,
|
||||
.size = copy.size,
|
||||
};
|
||||
}
|
||||
|
||||
constexpr VkBufferUsageFlags BUFFER_USAGE =
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||
VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
|
||||
if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
|
||||
return VK_INDEX_TYPE_UINT8_EXT;
|
||||
}
|
||||
if (num_elements <= 0xffff) {
|
||||
return VK_INDEX_TYPE_UINT16;
|
||||
}
|
||||
return VK_INDEX_TYPE_UINT32;
|
||||
}
|
||||
|
||||
constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE =
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
|
||||
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||
|
||||
constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
|
||||
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
|
||||
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
|
||||
|
||||
constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
|
||||
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
|
||||
size_t BytesPerIndex(VkIndexType index_type) {
|
||||
switch (index_type) {
|
||||
case VK_INDEX_TYPE_UINT8_EXT:
|
||||
return 1;
|
||||
case VK_INDEX_TYPE_UINT16:
|
||||
return 2;
|
||||
case VK_INDEX_TYPE_UINT32:
|
||||
return 4;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid index type={}", index_type);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
|
||||
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
|
||||
std::ranges::transform(indices, indices.begin(),
|
||||
[quad, first](u32 index) { return first + index + quad * 4; });
|
||||
return indices;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_,
|
||||
StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
|
||||
: BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
|
||||
staging_pool_} {
|
||||
buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
||||
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
|
||||
|
||||
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
|
||||
buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = static_cast<VkDeviceSize>(size_),
|
||||
.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.size = SizeBytes(),
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
|
||||
if (runtime.device.HasDebuggingToolAttached()) {
|
||||
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
|
||||
}
|
||||
commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
|
||||
}
|
||||
|
||||
Buffer::~Buffer() = default;
|
||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
VKDescriptorPool& descriptor_pool)
|
||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
|
||||
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
|
||||
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
||||
quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
|
||||
|
||||
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
|
||||
const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload);
|
||||
std::memcpy(staging.mapped_span.data(), data, data_size);
|
||||
StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return staging_pool.Request(size, MemoryUsage::Upload);
|
||||
}
|
||||
|
||||
StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return staging_pool.Request(size, MemoryUsage::Download);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
static constexpr VkMemoryBarrier READ_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
};
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
|
||||
boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
|
||||
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, READ_BARRIER);
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, WRITE_BARRIER);
|
||||
});
|
||||
}
|
||||
|
||||
const VkBuffer handle = Handle();
|
||||
scheduler.Record([staging = staging.buffer, handle, offset, data_size,
|
||||
&device = device](vk::CommandBuffer cmdbuf) {
|
||||
const VkBufferMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask =
|
||||
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
|
||||
VK_ACCESS_HOST_WRITE_BIT |
|
||||
(device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = handle,
|
||||
.offset = offset,
|
||||
.size = data_size,
|
||||
void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
|
||||
u32 base_vertex, u32 num_indices, VkBuffer buffer,
|
||||
u32 offset, [[maybe_unused]] u32 size) {
|
||||
VkIndexType index_type = MaxwellToVK::IndexFormat(index_format);
|
||||
if (topology == PrimitiveTopology::Quads) {
|
||||
index_type = VK_INDEX_TYPE_UINT32;
|
||||
std::tie(buffer, offset) =
|
||||
quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
|
||||
} else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
|
||||
index_type = VK_INDEX_TYPE_UINT16;
|
||||
std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset);
|
||||
}
|
||||
scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(buffer, offset, index_type);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
|
||||
ReserveQuadArrayLUT(first + count, true);
|
||||
|
||||
// The LUT has the indices 0, 1, 2, and 3 copied as an array
|
||||
// To apply these 'first' offsets we can apply an offset based on the modulus.
|
||||
const VkIndexType index_type = quad_array_lut_index_type;
|
||||
const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
|
||||
const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
|
||||
scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(buffer, offset, index_type);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
|
||||
u32 stride) {
|
||||
if (device.IsExtExtendedDynamicStateSupported()) {
|
||||
scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
|
||||
const VkDeviceSize vk_offset = offset;
|
||||
const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
|
||||
const VkDeviceSize vk_stride = stride;
|
||||
cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
|
||||
});
|
||||
} else {
|
||||
scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindVertexBuffer(index, buffer, offset);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
|
||||
u32 size) {
|
||||
if (!device.IsExtTransformFeedbackSupported()) {
|
||||
// Already logged in the rasterizer
|
||||
return;
|
||||
}
|
||||
scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
|
||||
const VkDeviceSize vk_offset = offset;
|
||||
const VkDeviceSize vk_size = size;
|
||||
cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
||||
update_descriptor_queue.AddBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
|
||||
if (num_indices <= current_num_indices) {
|
||||
return;
|
||||
}
|
||||
if (wait_for_idle) {
|
||||
scheduler.Finish();
|
||||
}
|
||||
current_num_indices = num_indices;
|
||||
quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
|
||||
|
||||
const u32 num_quads = num_indices / 4;
|
||||
const u32 num_triangle_indices = num_quads * 6;
|
||||
const u32 num_first_offset_copies = 4;
|
||||
const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
|
||||
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
|
||||
quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = size_bytes,
|
||||
.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
quad_array_lut.SetObjectNameEXT("Quad LUT");
|
||||
}
|
||||
quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
|
||||
|
||||
const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
|
||||
u8* staging_data = staging.mapped_span.data();
|
||||
const size_t quad_size = bytes_per_index * 6;
|
||||
for (u32 first = 0; first < num_first_offset_copies; ++first) {
|
||||
for (u32 quad = 0; quad < num_quads; ++quad) {
|
||||
switch (quad_array_lut_index_type) {
|
||||
case VK_INDEX_TYPE_UINT8_EXT:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT16:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT32:
|
||||
std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
staging_data += quad_size;
|
||||
}
|
||||
}
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer = staging.buffer, dst_buffer = *quad_array_lut,
|
||||
size_bytes](vk::CommandBuffer cmdbuf) {
|
||||
const VkBufferCopy copy{
|
||||
.srcOffset = 0,
|
||||
.dstOffset = 0,
|
||||
.size = size_bytes,
|
||||
};
|
||||
const VkBufferMemoryBarrier write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = UPLOAD_ACCESS_BARRIERS,
|
||||
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = handle,
|
||||
.offset = offset,
|
||||
.size = data_size,
|
||||
.buffer = dst_buffer,
|
||||
.offset = 0,
|
||||
.size = size_bytes,
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, read_barrier);
|
||||
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
|
||||
write_barrier);
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
|
||||
0, write_barrier);
|
||||
});
|
||||
}
|
||||
|
||||
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
|
||||
auto staging = staging_pool.Request(data_size, MemoryUsage::Download);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
|
||||
const VkBuffer handle = Handle();
|
||||
scheduler.Record(
|
||||
[staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
|
||||
const VkBufferMemoryBarrier barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = handle,
|
||||
.offset = offset,
|
||||
.size = data_size,
|
||||
};
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
|
||||
cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
|
||||
});
|
||||
scheduler.Finish();
|
||||
|
||||
std::memcpy(data, staging.mapped_span.data(), data_size);
|
||||
}
|
||||
|
||||
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size) {
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
|
||||
const VkBuffer dst_buffer = Handle();
|
||||
scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
|
||||
copy_size](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size});
|
||||
|
||||
std::array<VkBufferMemoryBarrier, 2> barriers;
|
||||
barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barriers[0].pNext = nullptr;
|
||||
barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barriers[0].buffer = src_buffer;
|
||||
barriers[0].offset = src_offset;
|
||||
barriers[0].size = copy_size;
|
||||
barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barriers[1].pNext = nullptr;
|
||||
barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
|
||||
barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barriers[1].buffer = dst_buffer;
|
||||
barriers[1].offset = dst_offset;
|
||||
barriers[1].size = copy_size;
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
|
||||
barriers, {});
|
||||
});
|
||||
}
|
||||
|
||||
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
|
||||
StagingBufferPool& staging_pool_)
|
||||
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
|
||||
cpu_memory_, stream_buffer_},
|
||||
device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
|
||||
staging_pool{staging_pool_} {}
|
||||
|
||||
VKBufferCache::~VKBufferCache() = default;
|
||||
|
||||
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr,
|
||||
size);
|
||||
}
|
||||
|
||||
VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
||||
size = std::max(size, std::size_t(4));
|
||||
const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.FillBuffer(buffer, 0, size, 0);
|
||||
});
|
||||
return {empty.buffer, 0, 0};
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@@ -4,69 +4,112 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class VKDescriptorPool;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
class Buffer final : public VideoCommon::BufferBlock {
|
||||
class BufferCacheRuntime;
|
||||
|
||||
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
|
||||
public:
|
||||
explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
|
||||
StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
|
||||
~Buffer();
|
||||
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
|
||||
explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_);
|
||||
|
||||
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
|
||||
|
||||
void Download(std::size_t offset, std::size_t data_size, u8* data);
|
||||
|
||||
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size);
|
||||
|
||||
VkBuffer Handle() const {
|
||||
[[nodiscard]] VkBuffer Handle() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
u64 Address() const {
|
||||
return 0;
|
||||
operator VkBuffer() const noexcept {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
VKScheduler& scheduler;
|
||||
StagingBufferPool& staging_pool;
|
||||
|
||||
vk::Buffer buffer;
|
||||
MemoryCommit commit;
|
||||
};
|
||||
|
||||
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
|
||||
class BufferCacheRuntime {
|
||||
friend Buffer;
|
||||
|
||||
using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
|
||||
using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
|
||||
|
||||
public:
|
||||
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
|
||||
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
|
||||
const Device& device, MemoryAllocator& memory_allocator,
|
||||
VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
|
||||
StagingBufferPool& staging_pool);
|
||||
~VKBufferCache();
|
||||
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
|
||||
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
VKDescriptorPool& descriptor_pool);
|
||||
|
||||
BufferInfo GetEmptyBuffer(std::size_t size) override;
|
||||
void Finish();
|
||||
|
||||
protected:
|
||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
|
||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||
|
||||
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
|
||||
u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
|
||||
|
||||
void BindQuadArrayIndexBuffer(u32 first, u32 count);
|
||||
|
||||
void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
|
||||
|
||||
void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
|
||||
|
||||
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
||||
BindBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
|
||||
[[maybe_unused]] bool is_written) {
|
||||
BindBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
private:
|
||||
void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
|
||||
|
||||
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
VKScheduler& scheduler;
|
||||
StagingBufferPool& staging_pool;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
|
||||
vk::Buffer quad_array_lut;
|
||||
MemoryCommit quad_array_lut_commit;
|
||||
VkIndexType quad_array_lut_index_type{};
|
||||
u32 current_num_indices = 0;
|
||||
|
||||
Uint8Pass uint8_pass;
|
||||
QuadIndexedPass quad_index_pass;
|
||||
};
|
||||
|
||||
struct BufferCacheParams {
|
||||
using Runtime = Vulkan::BufferCacheRuntime;
|
||||
using Buffer = Vulkan::Buffer;
|
||||
|
||||
static constexpr bool IS_OPENGL = false;
|
||||
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
|
||||
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
|
||||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
|
||||
static constexpr bool USE_MEMORY_MAPS = true;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@@ -10,7 +10,6 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
@@ -22,19 +21,7 @@
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
|
||||
return {
|
||||
.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
};
|
||||
}
|
||||
|
||||
VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
|
||||
return {
|
||||
.dstBinding = 0,
|
||||
@@ -162,55 +149,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
|
||||
return set;
|
||||
}
|
||||
|
||||
QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
|
||||
VKDescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
|
||||
BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
|
||||
BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_} {}
|
||||
|
||||
QuadArrayPass::~QuadArrayPass() = default;
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
|
||||
const u32 num_triangle_vertices = (num_vertices / 4) * 6;
|
||||
const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
|
||||
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
|
||||
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
|
||||
ASSERT(num_vertices % 4 == 0);
|
||||
const u32 num_quads = num_vertices / 4;
|
||||
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
|
||||
num_quads, first, set](vk::CommandBuffer cmdbuf) {
|
||||
constexpr u32 dispatch_size = 1024;
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
|
||||
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
|
||||
cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
|
||||
|
||||
VkBufferMemoryBarrier barrier;
|
||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barrier.pNext = nullptr;
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.buffer = buffer;
|
||||
barrier.offset = 0;
|
||||
barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
|
||||
});
|
||||
return {staging_ref.buffer, 0};
|
||||
}
|
||||
|
||||
Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
|
||||
VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
@@ -221,18 +159,18 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
|
||||
|
||||
Uint8Pass::~Uint8Pass() = default;
|
||||
|
||||
std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
|
||||
u64 src_offset) {
|
||||
std::pair<VkBuffer, u32> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
|
||||
u32 src_offset) {
|
||||
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
|
||||
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
|
||||
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
|
||||
update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size);
|
||||
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
|
||||
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
|
||||
num_vertices](vk::CommandBuffer cmdbuf) {
|
||||
constexpr u32 dispatch_size = 1024;
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
@@ -252,7 +190,7 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
|
||||
});
|
||||
return {staging_ref.buffer, 0};
|
||||
return {staging.buffer, 0};
|
||||
}
|
||||
|
||||
QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
|
||||
@@ -267,9 +205,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
|
||||
|
||||
QuadIndexedPass::~QuadIndexedPass() = default;
|
||||
|
||||
std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
|
||||
std::pair<VkBuffer, u32> QuadIndexedPass::Assemble(
|
||||
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
|
||||
VkBuffer src_buffer, u64 src_offset) {
|
||||
VkBuffer src_buffer, u32 src_offset) {
|
||||
const u32 index_shift = [index_format] {
|
||||
switch (index_format) {
|
||||
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
|
||||
@@ -286,15 +224,15 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
|
||||
const u32 num_tri_vertices = (num_vertices / 4) * 6;
|
||||
|
||||
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
|
||||
const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
|
||||
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
|
||||
update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size);
|
||||
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
|
||||
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
|
||||
num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
|
||||
static constexpr u32 dispatch_size = 1024;
|
||||
const std::array push_constants = {base_vertex, index_shift};
|
||||
@@ -317,7 +255,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
|
||||
});
|
||||
return {staging_ref.buffer, 0};
|
||||
return {staging.buffer, 0};
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@@ -41,22 +41,6 @@ private:
|
||||
vk::ShaderModule module;
|
||||
};
|
||||
|
||||
class QuadArrayPass final : public VKComputePass {
|
||||
public:
|
||||
explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
|
||||
VKDescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_);
|
||||
~QuadArrayPass();
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
|
||||
|
||||
private:
|
||||
VKScheduler& scheduler;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
};
|
||||
|
||||
class Uint8Pass final : public VKComputePass {
|
||||
public:
|
||||
explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
|
||||
@@ -64,7 +48,9 @@ public:
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_);
|
||||
~Uint8Pass();
|
||||
|
||||
std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
|
||||
/// Assemble uint8 indices into an uint16 index buffer
|
||||
/// Returns a pair with the staging buffer, and the offset where the assembled data is
|
||||
std::pair<VkBuffer, u32> Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset);
|
||||
|
||||
private:
|
||||
VKScheduler& scheduler;
|
||||
@@ -80,9 +66,9 @@ public:
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_);
|
||||
~QuadIndexedPass();
|
||||
|
||||
std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
|
||||
std::pair<VkBuffer, u32> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
|
||||
u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
|
||||
u64 src_offset);
|
||||
u32 src_offset);
|
||||
|
||||
private:
|
||||
VKScheduler& scheduler;
|
||||
|
@@ -45,8 +45,8 @@ void InnerFence::Wait() {
|
||||
}
|
||||
|
||||
VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
|
||||
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
|
||||
TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
VKQueryCache& query_cache_, const Device& device_,
|
||||
VKScheduler& scheduler_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
|
||||
scheduler{scheduler_} {}
|
||||
|
@@ -22,7 +22,6 @@ class RasterizerInterface;
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class VKBufferCache;
|
||||
class VKQueryCache;
|
||||
class VKScheduler;
|
||||
|
||||
@@ -45,14 +44,14 @@ private:
|
||||
using Fence = std::shared_ptr<InnerFence>;
|
||||
|
||||
using GenericFenceManager =
|
||||
VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
|
||||
VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
|
||||
|
||||
class VKFenceManager final : public GenericFenceManager {
|
||||
public:
|
||||
explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
|
||||
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
|
||||
VKScheduler& scheduler_);
|
||||
explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
|
||||
TextureCache& texture_cache, BufferCache& buffer_cache,
|
||||
VKQueryCache& query_cache, const Device& device,
|
||||
VKScheduler& scheduler);
|
||||
|
||||
protected:
|
||||
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -18,14 +18,12 @@
|
||||
#include "video_core/renderer_vulkan/blit_image.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
@@ -49,7 +47,6 @@ namespace Vulkan {
|
||||
struct VKScreenInfo;
|
||||
|
||||
class StateTracker;
|
||||
class BufferBindings;
|
||||
|
||||
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
|
||||
public:
|
||||
@@ -65,8 +62,11 @@ public:
|
||||
void DispatchCompute(GPUVAddr code_addr) override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
|
||||
void InvalidateTextureCache(VAddr addr, u64 size) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
@@ -107,24 +107,11 @@ private:
|
||||
|
||||
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
|
||||
|
||||
struct DrawParameters {
|
||||
void Draw(vk::CommandBuffer cmdbuf) const;
|
||||
|
||||
u32 base_instance = 0;
|
||||
u32 num_instances = 0;
|
||||
u32 base_vertex = 0;
|
||||
u32 num_vertices = 0;
|
||||
bool is_indexed = 0;
|
||||
};
|
||||
|
||||
void FlushWork();
|
||||
|
||||
/// Setups geometry buffers and state.
|
||||
DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
|
||||
bool is_indexed, bool is_instanced);
|
||||
|
||||
/// Setup descriptors in the graphics pipeline.
|
||||
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
|
||||
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
|
||||
bool is_indexed);
|
||||
|
||||
void UpdateDynamicStates();
|
||||
|
||||
@@ -132,16 +119,6 @@ private:
|
||||
|
||||
void EndTransformFeedback();
|
||||
|
||||
void SetupVertexArrays(BufferBindings& buffer_bindings);
|
||||
|
||||
void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
|
||||
|
||||
/// Setup constant buffers in the graphics pipeline.
|
||||
void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup global buffers in the graphics pipeline.
|
||||
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup uniform texels in the graphics pipeline.
|
||||
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
@@ -154,12 +131,6 @@ private:
|
||||
/// Setup images in the graphics pipeline.
|
||||
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup constant buffers in the compute pipeline.
|
||||
void SetupComputeConstBuffers(const ShaderEntries& entries);
|
||||
|
||||
/// Setup global buffers in the compute pipeline.
|
||||
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
|
||||
|
||||
/// Setup texel buffers in the compute pipeline.
|
||||
void SetupComputeUniformTexels(const ShaderEntries& entries);
|
||||
|
||||
@@ -172,11 +143,6 @@ private:
|
||||
/// Setup images in the compute pipeline.
|
||||
void SetupComputeImages(const ShaderEntries& entries);
|
||||
|
||||
void SetupConstBuffer(const ConstBufferEntry& entry,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer);
|
||||
|
||||
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
|
||||
|
||||
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
@@ -193,19 +159,6 @@ private:
|
||||
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
|
||||
|
||||
size_t CalculateComputeStreamBufferSize() const;
|
||||
|
||||
size_t CalculateVertexArraysSize() const;
|
||||
|
||||
size_t CalculateIndexBufferSize() const;
|
||||
|
||||
size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer) const;
|
||||
|
||||
VkBuffer DefaultBuffer();
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
@@ -217,24 +170,19 @@ private:
|
||||
StateTracker& state_tracker;
|
||||
VKScheduler& scheduler;
|
||||
|
||||
VKStreamBuffer stream_buffer;
|
||||
StagingBufferPool staging_pool;
|
||||
VKDescriptorPool descriptor_pool;
|
||||
VKUpdateDescriptorQueue update_descriptor_queue;
|
||||
BlitImageHelper blit_image;
|
||||
QuadArrayPass quad_array_pass;
|
||||
QuadIndexedPass quad_indexed_pass;
|
||||
Uint8Pass uint8_pass;
|
||||
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
BufferCache buffer_cache;
|
||||
VKPipelineCache pipeline_cache;
|
||||
VKBufferCache buffer_cache;
|
||||
VKQueryCache query_cache;
|
||||
VKFenceManager fence_manager;
|
||||
|
||||
vk::Buffer default_buffer;
|
||||
MemoryCommit default_buffer_commit;
|
||||
vk::Event wfi_event;
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
|
||||
|
@@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() {
|
||||
worker_thread.join();
|
||||
}
|
||||
|
||||
u64 VKScheduler::CurrentTick() const noexcept {
|
||||
return master_semaphore->CurrentTick();
|
||||
}
|
||||
|
||||
bool VKScheduler::IsFree(u64 tick) const noexcept {
|
||||
return master_semaphore->IsFree(tick);
|
||||
}
|
||||
|
||||
void VKScheduler::Wait(u64 tick) {
|
||||
master_semaphore->Wait(tick);
|
||||
}
|
||||
|
||||
void VKScheduler::Flush(VkSemaphore semaphore) {
|
||||
SubmitExecution(semaphore);
|
||||
AllocateNewContext();
|
||||
@@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
|
||||
vk::Span(barriers.data(), num_images));
|
||||
});
|
||||
state.renderpass = nullptr;
|
||||
|
@@ -12,6 +12,7 @@
|
||||
#include <utility>
|
||||
#include "common/common_types.h"
|
||||
#include "common/threadsafe_queue.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
@@ -19,7 +20,6 @@ namespace Vulkan {
|
||||
class CommandPool;
|
||||
class Device;
|
||||
class Framebuffer;
|
||||
class MasterSemaphore;
|
||||
class StateTracker;
|
||||
class VKQueryCache;
|
||||
|
||||
@@ -30,15 +30,6 @@ public:
|
||||
explicit VKScheduler(const Device& device, StateTracker& state_tracker);
|
||||
~VKScheduler();
|
||||
|
||||
/// Returns the current command buffer tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept;
|
||||
|
||||
/// Returns true when a tick has been triggered by the GPU.
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept;
|
||||
|
||||
/// Waits for the given tick to trigger on the GPU.
|
||||
void Wait(u64 tick);
|
||||
|
||||
/// Sends the current execution context to the GPU.
|
||||
void Flush(VkSemaphore semaphore = nullptr);
|
||||
|
||||
@@ -80,6 +71,21 @@ public:
|
||||
(void)chunk->Record(command);
|
||||
}
|
||||
|
||||
/// Returns the current command buffer tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept {
|
||||
return master_semaphore->CurrentTick();
|
||||
}
|
||||
|
||||
/// Returns true when a tick has been triggered by the GPU.
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
|
||||
return master_semaphore->IsFree(tick);
|
||||
}
|
||||
|
||||
/// Waits for the given tick to trigger on the GPU.
|
||||
void Wait(u64 tick) {
|
||||
master_semaphore->Wait(tick);
|
||||
}
|
||||
|
||||
/// Returns the master timeline semaphore.
|
||||
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
|
||||
return *master_semaphore;
|
||||
|
@@ -3124,6 +3124,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||
entries.attributes.insert(GetGenericAttributeLocation(attribute));
|
||||
}
|
||||
}
|
||||
for (const auto& buffer : entries.const_buffers) {
|
||||
entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
|
||||
}
|
||||
entries.clip_distances = ir.GetClipDistances();
|
||||
entries.shader_length = ir.GetLength();
|
||||
entries.uses_warps = ir.UsesWarps();
|
||||
|
@@ -39,24 +39,7 @@ private:
|
||||
u32 index{};
|
||||
};
|
||||
|
||||
class GlobalBufferEntry {
|
||||
public:
|
||||
constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
|
||||
: cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
|
||||
|
||||
constexpr u32 GetCbufIndex() const {
|
||||
return cbuf_index;
|
||||
}
|
||||
|
||||
constexpr u32 GetCbufOffset() const {
|
||||
return cbuf_offset;
|
||||
}
|
||||
|
||||
constexpr bool IsWritten() const {
|
||||
return is_written;
|
||||
}
|
||||
|
||||
private:
|
||||
struct GlobalBufferEntry {
|
||||
u32 cbuf_index{};
|
||||
u32 cbuf_offset{};
|
||||
bool is_written{};
|
||||
@@ -78,6 +61,7 @@ struct ShaderEntries {
|
||||
std::set<u32> attributes;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
std::size_t shader_length{};
|
||||
u32 enabled_uniform_buffers{};
|
||||
bool uses_warps{};
|
||||
};
|
||||
|
||||
|
@@ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table;
|
||||
using Flags = Maxwell3D::DirtyState::Flags;
|
||||
|
||||
Flags MakeInvalidationFlags() {
|
||||
static constexpr std::array INVALIDATION_FLAGS{
|
||||
static constexpr int INVALIDATION_FLAGS[]{
|
||||
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
|
||||
StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
|
||||
DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
|
||||
DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
|
||||
};
|
||||
Flags flags{};
|
||||
for (const int flag : INVALIDATION_FLAGS) {
|
||||
flags[flag] = true;
|
||||
}
|
||||
for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
@@ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
|
||||
StateTracker::StateTracker(Tegra::GPU& gpu)
|
||||
: flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
|
||||
auto& tables = gpu.Maxwell3D().dirty.tables;
|
||||
SetupDirtyRenderTargets(tables);
|
||||
SetupDirtyFlags(tables);
|
||||
SetupDirtyViewports(tables);
|
||||
SetupDirtyScissors(tables);
|
||||
SetupDirtyDepthBias(tables);
|
||||
|
@@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_)
|
||||
: surface{surface_}, device{device_}, scheduler{scheduler_} {}
|
||||
VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_,
|
||||
u32 width, u32 height, bool srgb)
|
||||
: surface{surface_}, device{device_}, scheduler{scheduler_} {
|
||||
Create(width, height, srgb);
|
||||
}
|
||||
|
||||
VKSwapchain::~VKSwapchain() = default;
|
||||
|
||||
|
@@ -20,7 +20,8 @@ class VKScheduler;
|
||||
|
||||
class VKSwapchain {
|
||||
public:
|
||||
explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler);
|
||||
explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler,
|
||||
u32 width, u32 height, bool srgb);
|
||||
~VKSwapchain();
|
||||
|
||||
/// Creates (or recreates) the swapchain with a given size.
|
||||
|
@@ -410,46 +410,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
|
||||
VkImageAspectFlags aspect_mask, bool is_initialized,
|
||||
std::span<const VkBufferImageCopy> copies) {
|
||||
static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
|
||||
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
|
||||
const VkImageMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = ACCESS_FLAGS,
|
||||
.srcAccessMask = WRITE_ACCESS_FLAGS,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const VkImageMemoryBarrier write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = ACCESS_FLAGS,
|
||||
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
|
||||
read_barrier);
|
||||
@@ -553,20 +554,12 @@ void TextureCacheRuntime::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
|
||||
const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload);
|
||||
return {
|
||||
.handle = staging_ref.buffer,
|
||||
.span = staging_ref.mapped_span,
|
||||
};
|
||||
StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return staging_buffer_pool.Request(size, MemoryUsage::Upload);
|
||||
}
|
||||
|
||||
ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
|
||||
const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download);
|
||||
return {
|
||||
.handle = staging_ref.buffer,
|
||||
.span = staging_ref.mapped_span,
|
||||
};
|
||||
StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return staging_buffer_pool.Request(size, MemoryUsage::Download);
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||
@@ -738,7 +731,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
@@ -749,12 +742,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
||||
VkImageMemoryBarrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
@@ -812,12 +802,12 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
|
||||
}
|
||||
}
|
||||
|
||||
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
|
||||
std::span<const BufferImageCopy> copies) {
|
||||
// TODO: Move this to another API
|
||||
scheduler->RequestOutsideRenderPassOperationContext();
|
||||
std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
|
||||
const VkBuffer src_buffer = map.handle;
|
||||
const VkBuffer src_buffer = map.buffer;
|
||||
const VkImage vk_image = *image;
|
||||
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
|
||||
const bool is_initialized = std::exchange(initialized, true);
|
||||
@@ -827,12 +817,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
});
|
||||
}
|
||||
|
||||
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
// TODO: Move this to another API
|
||||
scheduler->RequestOutsideRenderPassOperationContext();
|
||||
std::vector vk_copies = TransformBufferCopies(copies, buffer_offset);
|
||||
const VkBuffer src_buffer = map.handle;
|
||||
const VkBuffer src_buffer = map.buffer;
|
||||
const VkBuffer dst_buffer = *buffer;
|
||||
scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
|
||||
// TODO: Barriers
|
||||
@@ -840,13 +830,58 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
});
|
||||
}
|
||||
|
||||
void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset,
|
||||
std::span<const BufferImageCopy> copies) {
|
||||
std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
|
||||
scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
|
||||
scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
|
||||
vk_copies](vk::CommandBuffer cmdbuf) {
|
||||
// TODO: Barriers
|
||||
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies);
|
||||
const VkImageMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const VkImageMemoryBarrier image_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const VkMemoryBarrier memory_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, read_barrier);
|
||||
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, memory_write_barrier, nullptr, image_write_barrier);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1106,7 +1141,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
|
||||
.pAttachments = attachments.data(),
|
||||
.width = key.size.width,
|
||||
.height = key.size.height,
|
||||
.layers = static_cast<u32>(num_layers),
|
||||
.layers = static_cast<u32>(std::max(num_layers, 1)),
|
||||
});
|
||||
if (runtime.device.HasDebuggingToolAttached()) {
|
||||
framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
|
||||
|
@@ -7,6 +7,7 @@
|
||||
#include <compare>
|
||||
#include <span>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
@@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> {
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct ImageBufferMap {
|
||||
[[nodiscard]] VkBuffer Handle() const noexcept {
|
||||
return handle;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::span<u8> Span() const noexcept {
|
||||
return span;
|
||||
}
|
||||
|
||||
VkBuffer handle;
|
||||
std::span<u8> span;
|
||||
};
|
||||
|
||||
struct TextureCacheRuntime {
|
||||
const Device& device;
|
||||
VKScheduler& scheduler;
|
||||
@@ -76,9 +64,9 @@ struct TextureCacheRuntime {
|
||||
|
||||
void Finish();
|
||||
|
||||
[[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
|
||||
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
|
||||
[[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size);
|
||||
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||
|
||||
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||
const std::array<Offset2D, 2>& dst_region,
|
||||
@@ -94,7 +82,7 @@ struct TextureCacheRuntime {
|
||||
return false;
|
||||
}
|
||||
|
||||
void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
|
||||
void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t,
|
||||
std::span<const VideoCommon::SwizzleParameters>) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
@@ -112,13 +100,13 @@ public:
|
||||
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
[[nodiscard]] VkImage Handle() const noexcept {
|
||||
|
Reference in New Issue
Block a user