early-access version 1332
This commit is contained in:
@@ -2,98 +2,235 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_instance.h"
|
||||
#include "video_core/vulkan_common/vulkan_library.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
|
||||
namespace OpenGL {
|
||||
namespace {
|
||||
struct BindlessSSBO {
|
||||
GLuint64EXT address;
|
||||
GLsizei length;
|
||||
GLsizei padding;
|
||||
};
|
||||
static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
constexpr std::array PROGRAM_LUT{
|
||||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
||||
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
|
||||
|
||||
Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
|
||||
: BufferBlock{cpu_addr_, size_} {
|
||||
gl_buffer.Create();
|
||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
|
||||
if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
|
||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
|
||||
VAddr cpu_addr_, u64 size_bytes_)
|
||||
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
|
||||
buffer.Create();
|
||||
const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
|
||||
glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
|
||||
if (runtime.device.UseAssemblyShaders()) {
|
||||
CreateMemoryObjects(runtime);
|
||||
glNamedBufferStorageMemEXT(buffer.handle, SizeBytes(), memory_commit.ExportOpenGLHandle(),
|
||||
memory_commit.Offset());
|
||||
} else {
|
||||
glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
|
||||
}
|
||||
if (runtime.has_unified_vertex_buffers) {
|
||||
glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
|
||||
}
|
||||
}
|
||||
|
||||
Buffer::~Buffer() = default;
|
||||
|
||||
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
|
||||
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(data_size), data);
|
||||
void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
|
||||
glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
|
||||
}
|
||||
|
||||
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
|
||||
const GLintptr gl_offset = static_cast<GLintptr>(offset);
|
||||
if (read_buffer.handle == 0) {
|
||||
read_buffer.Create();
|
||||
glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
|
||||
GL_STREAM_READ);
|
||||
}
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
|
||||
glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
|
||||
void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
|
||||
glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
|
||||
}
|
||||
|
||||
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size) {
|
||||
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
|
||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
|
||||
}
|
||||
|
||||
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const Device& device_, OGLStreamBuffer& stream_buffer_,
|
||||
StateTracker& state_tracker)
|
||||
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
|
||||
if (!device.HasFastBufferSubData()) {
|
||||
void Buffer::MakeResident(GLenum access) noexcept {
|
||||
// Abuse GLenum's order to exit early
|
||||
// GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
|
||||
if (access <= current_residency_access || buffer.handle == 0) {
|
||||
return;
|
||||
}
|
||||
if (std::exchange(current_residency_access, access) != GL_NONE) {
|
||||
// If the buffer is already resident, remove its residency before promoting it
|
||||
glMakeNamedBufferNonResidentNV(buffer.handle);
|
||||
}
|
||||
glMakeNamedBufferResidentNV(buffer.handle, access);
|
||||
}
|
||||
|
||||
static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
|
||||
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||
for (const GLuint cbuf : cbufs) {
|
||||
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
|
||||
GLuint Buffer::SubBuffer(u32 offset) {
|
||||
if (offset == 0) {
|
||||
return buffer.handle;
|
||||
}
|
||||
for (const auto& [sub_buffer, sub_offset] : subs) {
|
||||
if (sub_offset == offset) {
|
||||
return sub_buffer.handle;
|
||||
}
|
||||
}
|
||||
OGLBuffer sub_buffer;
|
||||
sub_buffer.Create();
|
||||
glNamedBufferStorageMemEXT(sub_buffer.handle, SizeBytes() - offset,
|
||||
memory_commit.ExportOpenGLHandle(), memory_commit.Offset() + offset);
|
||||
return subs.emplace_back(std::move(sub_buffer), offset).first.handle;
|
||||
}
|
||||
|
||||
void Buffer::CreateMemoryObjects(BufferCacheRuntime& runtime) {
|
||||
auto& allocator = runtime.vulkan_memory_allocator;
|
||||
auto& device = runtime.vulkan_device->GetLogical();
|
||||
auto vulkan_buffer = device.CreateBuffer(VkBufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = SizeBytes(),
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
});
|
||||
const VkMemoryRequirements requirements = device.GetBufferMemoryRequirements(*vulkan_buffer);
|
||||
memory_commit = allocator->Commit(requirements, Vulkan::MemoryUsage::DeviceLocal);
|
||||
}
|
||||
|
||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_,
|
||||
Vulkan::MemoryAllocator* vulkan_memory_allocator_)
|
||||
: device{device_}, vulkan_device{vulkan_device_},
|
||||
vulkan_memory_allocator{vulkan_memory_allocator_},
|
||||
stream_buffer{device.HasFastBufferSubData() ? std::nullopt
|
||||
: std::make_optional<StreamBuffer>()} {
|
||||
GLint gl_max_attributes;
|
||||
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
|
||||
max_attributes = static_cast<u32>(gl_max_attributes);
|
||||
use_assembly_shaders = device.UseAssemblyShaders();
|
||||
has_unified_vertex_buffers = device.HasVertexBufferUnifiedMemory();
|
||||
|
||||
for (auto& stage_uniforms : fast_uniforms) {
|
||||
for (OGLBuffer& buffer : stage_uniforms) {
|
||||
buffer.Create();
|
||||
glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OGLBufferCache::~OGLBufferCache() {
|
||||
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
for (const VideoCommon::BufferCopy& copy : copies) {
|
||||
glCopyNamedBufferSubData(
|
||||
src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
|
||||
static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<Buffer>(device, cpu_addr, size);
|
||||
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
|
||||
if (has_unified_vertex_buffers) {
|
||||
buffer.MakeResident(GL_READ_ONLY);
|
||||
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
|
||||
index_buffer_offset = offset;
|
||||
}
|
||||
}
|
||||
|
||||
OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||
return {0, 0, 0};
|
||||
void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
|
||||
u32 stride) {
|
||||
if (index >= max_attributes) {
|
||||
return;
|
||||
}
|
||||
if (has_unified_vertex_buffers) {
|
||||
buffer.MakeResident(GL_READ_ONLY);
|
||||
glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
|
||||
buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizei>(stride));
|
||||
}
|
||||
}
|
||||
|
||||
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
|
||||
std::size_t size) {
|
||||
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
||||
const GLuint cbuf = cbufs[cbuf_cursor++];
|
||||
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
||||
u32 offset, u32 size) {
|
||||
if (use_assembly_shaders) {
|
||||
const GLuint sub_buffer = buffer.SubBuffer(offset);
|
||||
glBindBufferRangeNV(PABO_LUT[stage], binding_index, sub_buffer, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
||||
return {cbuf, 0, 0};
|
||||
void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
|
||||
u32 size) {
|
||||
if (use_assembly_shaders) {
|
||||
glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index,
|
||||
buffer.SubBuffer(offset), 0, static_cast<GLsizeiptr>(size));
|
||||
} else {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
|
||||
u32 offset, u32 size, bool is_written) {
|
||||
if (use_assembly_shaders) {
|
||||
const BindlessSSBO ssbo{
|
||||
.address = buffer.HostGpuAddr() + offset,
|
||||
.length = static_cast<GLsizei>(size),
|
||||
.padding = 0,
|
||||
};
|
||||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
|
||||
reinterpret_cast<const GLuint*>(&ssbo));
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
|
||||
u32 size, bool is_written) {
|
||||
if (use_assembly_shaders) {
|
||||
const BindlessSSBO ssbo{
|
||||
.address = buffer.HostGpuAddr() + offset,
|
||||
.length = static_cast<GLsizei>(size),
|
||||
.padding = 0,
|
||||
};
|
||||
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
|
||||
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
|
||||
reinterpret_cast<const GLuint*>(&ssbo));
|
||||
} else if (size == 0) {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
|
||||
} else {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
|
||||
u32 size) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@@ -5,79 +5,167 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/dynamic_library.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
namespace Vulkan {
|
||||
class Device;
|
||||
class MemoryAllocator;
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class OGLStreamBuffer;
|
||||
class RasterizerOpenGL;
|
||||
class StateTracker;
|
||||
class BufferCacheRuntime;
|
||||
|
||||
class Buffer : public VideoCommon::BufferBlock {
|
||||
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
|
||||
public:
|
||||
explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
|
||||
~Buffer();
|
||||
explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
|
||||
u64 size_bytes);
|
||||
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
|
||||
|
||||
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
|
||||
void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
|
||||
|
||||
void Download(std::size_t offset, std::size_t data_size, u8* data);
|
||||
void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
|
||||
|
||||
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size);
|
||||
void MakeResident(GLenum access) noexcept;
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return gl_buffer.handle;
|
||||
[[nodiscard]] GLuint SubBuffer(u32 offset);
|
||||
|
||||
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
|
||||
return address;
|
||||
}
|
||||
|
||||
u64 Address() const noexcept {
|
||||
return gpu_address;
|
||||
[[nodiscard]] GLuint Handle() const noexcept {
|
||||
return buffer.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLBuffer gl_buffer;
|
||||
OGLBuffer read_buffer;
|
||||
u64 gpu_address = 0;
|
||||
void CreateMemoryObjects(BufferCacheRuntime& runtime);
|
||||
|
||||
GLuint64EXT address = 0;
|
||||
Vulkan::MemoryCommit memory_commit;
|
||||
OGLBuffer buffer;
|
||||
GLenum current_residency_access = GL_NONE;
|
||||
std::vector<std::pair<OGLBuffer, u32>> subs;
|
||||
};
|
||||
|
||||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||
class OGLBufferCache final : public GenericBufferCache {
|
||||
class BufferCacheRuntime {
|
||||
friend Buffer;
|
||||
|
||||
public:
|
||||
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
|
||||
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
|
||||
const Device& device, OGLStreamBuffer& stream_buffer,
|
||||
StateTracker& state_tracker);
|
||||
~OGLBufferCache();
|
||||
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
|
||||
|
||||
BufferInfo GetEmptyBuffer(std::size_t) override;
|
||||
explicit BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_,
|
||||
Vulkan::MemoryAllocator* vulkan_memory_allocator_);
|
||||
|
||||
void Acquire() noexcept {
|
||||
cbuf_cursor = 0;
|
||||
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
|
||||
|
||||
void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
|
||||
bool is_written);
|
||||
|
||||
void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
|
||||
bool is_written);
|
||||
|
||||
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
|
||||
|
||||
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
|
||||
if (use_assembly_shaders) {
|
||||
const GLuint handle = fast_uniforms[stage][binding_index].handle;
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
|
||||
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
|
||||
} else {
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
|
||||
fast_uniforms[stage][binding_index].handle, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
|
||||
if (use_assembly_shaders) {
|
||||
glProgramBufferParametersIuivNV(
|
||||
PABO_LUT[stage], binding_index, 0,
|
||||
static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
|
||||
reinterpret_cast<const GLuint*>(data.data()));
|
||||
} else {
|
||||
glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
|
||||
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
|
||||
}
|
||||
}
|
||||
|
||||
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
|
||||
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
|
||||
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
|
||||
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
const GLuint binding = base_binding + binding_index;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
|
||||
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
|
||||
return mapped_span;
|
||||
}
|
||||
|
||||
[[nodiscard]] const GLvoid* IndexOffset() const noexcept {
|
||||
return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasFastBufferSubData() const noexcept {
|
||||
return device.HasFastBufferSubData();
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||
static constexpr std::array PABO_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
};
|
||||
|
||||
const Device& device;
|
||||
const Vulkan::Device* vulkan_device;
|
||||
Vulkan::MemoryAllocator* vulkan_memory_allocator;
|
||||
std::optional<StreamBuffer> stream_buffer;
|
||||
|
||||
std::size_t cbuf_cursor = 0;
|
||||
std::array<GLuint, NUM_CBUFS> cbufs{};
|
||||
u32 max_attributes = 0;
|
||||
|
||||
bool use_assembly_shaders = false;
|
||||
bool has_unified_vertex_buffers = false;
|
||||
|
||||
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
|
||||
VideoCommon::NUM_STAGES>
|
||||
fast_uniforms;
|
||||
|
||||
u32 index_buffer_offset = 0;
|
||||
};
|
||||
|
||||
struct BufferCacheParams {
|
||||
using Runtime = OpenGL::BufferCacheRuntime;
|
||||
using Buffer = OpenGL::Buffer;
|
||||
|
||||
static constexpr bool IS_OPENGL = true;
|
||||
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
|
||||
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
|
||||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
|
||||
static constexpr bool USE_MEMORY_MAPS = false;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@@ -21,9 +21,7 @@
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
// One uniform block is reserved for emulation purposes
|
||||
constexpr u32 ReservedUniformBlocks = 1;
|
||||
|
||||
@@ -198,10 +196,18 @@ bool IsASTCSupported() {
|
||||
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string UuidString(std::span<const GLubyte, GL_UUID_SIZE_EXT> uuid) {
|
||||
return fmt::format("{:x}{:x}{:x}{:x}-{:x}{:x}-{:x}{:x}-{:x}{:x}-{:x}{:x}{:x}{:x}{:x}", uuid[0],
|
||||
uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], uuid[8],
|
||||
uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device()
|
||||
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
|
||||
Device::Device(bool has_vulkan_instance) {
|
||||
if (!GLAD_GL_VERSION_4_3) {
|
||||
LOG_ERROR(Render_OpenGL, "OpenGL 4.3 is not available");
|
||||
throw std::runtime_error{"Insufficient version"};
|
||||
}
|
||||
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
|
||||
const std::vector extensions = GetExtensions();
|
||||
@@ -217,6 +223,9 @@ Device::Device()
|
||||
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
||||
disable_fast_buffer_sub_data = true;
|
||||
}
|
||||
|
||||
max_uniform_buffers = BuildMaxUniformBuffers();
|
||||
base_bindings = BuildBaseBindings();
|
||||
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
@@ -243,7 +252,8 @@ Device::Device()
|
||||
|
||||
use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
|
||||
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
|
||||
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
|
||||
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2 &&
|
||||
has_vulkan_instance;
|
||||
|
||||
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
|
||||
|
||||
|
@@ -10,18 +10,16 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
static constexpr u32 EmulationUniformBlockBinding = 0;
|
||||
|
||||
class Device final {
|
||||
class Device {
|
||||
public:
|
||||
struct BaseBindings final {
|
||||
struct BaseBindings {
|
||||
u32 uniform_buffer{};
|
||||
u32 shader_storage_buffer{};
|
||||
u32 sampler{};
|
||||
u32 image{};
|
||||
};
|
||||
|
||||
explicit Device();
|
||||
explicit Device(bool has_vulkan_instance);
|
||||
explicit Device(std::nullptr_t);
|
||||
|
||||
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
|
||||
|
@@ -47,7 +47,7 @@ void GLInnerFence::Wait() {
|
||||
|
||||
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::GPU& gpu_, TextureCache& texture_cache_,
|
||||
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
|
||||
BufferCache& buffer_cache_, QueryCache& query_cache_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
|
||||
|
||||
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
|
||||
|
@@ -32,14 +32,13 @@ private:
|
||||
};
|
||||
|
||||
using Fence = std::shared_ptr<GLInnerFence>;
|
||||
using GenericFenceManager =
|
||||
VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
|
||||
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
|
||||
|
||||
class FenceManagerOpenGL final : public GenericFenceManager {
|
||||
public:
|
||||
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
|
||||
QueryCache& query_cache_);
|
||||
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
|
||||
TextureCache& texture_cache, BufferCache& buffer_cache,
|
||||
QueryCache& query_cache);
|
||||
|
||||
protected:
|
||||
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||
|
@@ -44,17 +44,10 @@ using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::SurfaceTarget;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
||||
MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -104,20 +97,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
|
||||
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
|
||||
}
|
||||
|
||||
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry) {
|
||||
if (!entry.IsIndirect()) {
|
||||
return entry.GetSize();
|
||||
}
|
||||
if (buffer.size > Maxwell::MaxConstBufferSize) {
|
||||
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
|
||||
Maxwell::MaxConstBufferSize);
|
||||
return Maxwell::MaxConstBufferSize;
|
||||
}
|
||||
|
||||
return buffer.size;
|
||||
}
|
||||
|
||||
/// Translates hardware transform feedback indices
|
||||
/// @param location Hardware location
|
||||
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
|
||||
@@ -150,14 +129,6 @@ void oglEnable(GLenum cap, bool state) {
|
||||
(state ? glEnable : glDisable)(cap);
|
||||
}
|
||||
|
||||
void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
|
||||
if (num_ssbos == 0) {
|
||||
return;
|
||||
}
|
||||
glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
|
||||
reinterpret_cast<const GLuint*>(ssbos));
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
|
||||
if (entry.is_buffer) {
|
||||
return ImageViewType::Buffer;
|
||||
@@ -199,49 +170,35 @@ ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory_, const Device& device_,
|
||||
const Vulkan::Device* vulkan_device,
|
||||
Vulkan::MemoryAllocator* vulkan_memory_allocator,
|
||||
ScreenInfo& screen_info_, ProgramManager& program_manager_,
|
||||
StateTracker& state_tracker_)
|
||||
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
|
||||
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
|
||||
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
|
||||
stream_buffer(device, state_tracker),
|
||||
texture_cache_runtime(device, program_manager, state_tracker),
|
||||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
buffer_cache_runtime(device, vulkan_device, vulkan_memory_allocator),
|
||||
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
|
||||
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
|
||||
query_cache(*this, maxwell3d, gpu_memory),
|
||||
buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
|
||||
async_shaders(emu_window_) {
|
||||
unified_uniform_buffer.Create();
|
||||
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
||||
for (const GLuint cbuf : staging_cbufs) {
|
||||
glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
|
||||
nullptr, 0);
|
||||
}
|
||||
}
|
||||
if (device.UseAsynchronousShaders()) {
|
||||
async_shaders.AllocateWorkers();
|
||||
}
|
||||
}
|
||||
|
||||
RasterizerOpenGL::~RasterizerOpenGL() {
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
||||
}
|
||||
}
|
||||
RasterizerOpenGL::~RasterizerOpenGL() = default;
|
||||
|
||||
void RasterizerOpenGL::SetupVertexFormat() {
|
||||
void RasterizerOpenGL::SyncVertexFormats() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexFormats]) {
|
||||
return;
|
||||
}
|
||||
flags[Dirty::VertexFormats] = false;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VAO);
|
||||
|
||||
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
|
||||
// the first 16 vertex attributes always, as we don't know which ones are actually used until
|
||||
// shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
|
||||
@@ -277,55 +234,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexBuffer() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexBuffers]) {
|
||||
return;
|
||||
}
|
||||
flags[Dirty::VertexBuffers] = false;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||
|
||||
const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
|
||||
|
||||
// Upload all guest vertex arrays sequentially to our buffer
|
||||
const auto& regs = maxwell3d.regs;
|
||||
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
|
||||
if (!flags[Dirty::VertexBuffer0 + index]) {
|
||||
continue;
|
||||
}
|
||||
flags[Dirty::VertexBuffer0 + index] = false;
|
||||
|
||||
const auto& vertex_array = regs.vertex_array[index];
|
||||
if (!vertex_array.IsEnabled()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const GPUVAddr start = vertex_array.StartAddress();
|
||||
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
||||
ASSERT(end >= start);
|
||||
|
||||
const GLuint gl_index = static_cast<GLuint>(index);
|
||||
const u64 size = end - start;
|
||||
if (size == 0) {
|
||||
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
|
||||
if (use_unified_memory) {
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const auto info = buffer_cache.UploadMemory(start, size);
|
||||
if (use_unified_memory) {
|
||||
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
|
||||
info.address + info.offset, size);
|
||||
} else {
|
||||
glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexInstances() {
|
||||
void RasterizerOpenGL::SyncVertexInstances() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexInstances]) {
|
||||
return;
|
||||
@@ -346,17 +255,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
|
||||
}
|
||||
}
|
||||
|
||||
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const std::size_t size = CalculateIndexBufferSize();
|
||||
const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
|
||||
return info.offset;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupShaders() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Shader);
|
||||
void RasterizerOpenGL::SetupShaders(bool is_indexed) {
|
||||
u32 clip_distances = 0;
|
||||
|
||||
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
|
||||
@@ -413,11 +312,19 @@ void RasterizerOpenGL::SetupShaders() {
|
||||
const size_t stage = index == 0 ? 0 : index - 1;
|
||||
shaders[stage] = shader;
|
||||
|
||||
SetupDrawConstBuffers(stage, shader);
|
||||
SetupDrawGlobalMemory(stage, shader);
|
||||
SetupDrawTextures(shader, stage);
|
||||
SetupDrawImages(shader, stage);
|
||||
|
||||
buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
|
||||
|
||||
buffer_cache.UnbindGraphicsStorageBuffers(stage);
|
||||
u32 ssbo_index = 0;
|
||||
for (const auto& buffer : shader->GetEntries().global_memory_entries) {
|
||||
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
|
||||
buffer.cbuf_offset, buffer.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
|
||||
// Workaround for Intel drivers.
|
||||
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
||||
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
|
||||
@@ -433,43 +340,26 @@ void RasterizerOpenGL::SetupShaders() {
|
||||
SyncClipEnabled(clip_distances);
|
||||
maxwell3d.dirty.flags[Dirty::Shaders] = false;
|
||||
|
||||
buffer_cache.UpdateGraphicsBuffers(is_indexed);
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
||||
|
||||
buffer_cache.BindHostGeometryBuffers(is_indexed);
|
||||
|
||||
size_t image_view_index = 0;
|
||||
size_t texture_index = 0;
|
||||
size_t image_index = 0;
|
||||
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
const Shader* const shader = shaders[stage];
|
||||
if (shader) {
|
||||
const auto base = device.GetBaseBindings(stage);
|
||||
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
|
||||
texture_index, image_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
std::size_t size = 0;
|
||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
if (!regs.vertex_array[index].IsEnabled())
|
||||
if (!shader) {
|
||||
continue;
|
||||
|
||||
const GPUVAddr start = regs.vertex_array[index].StartAddress();
|
||||
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
|
||||
|
||||
size += end - start;
|
||||
ASSERT(end >= start);
|
||||
}
|
||||
buffer_cache.BindHostStageBuffers(stage);
|
||||
const auto& base = device.GetBaseBindings(stage);
|
||||
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
|
||||
texture_index, image_index);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
|
||||
return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
|
||||
static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
|
||||
@@ -478,6 +368,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Clear() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Clears);
|
||||
if (!maxwell3d.ShouldExecute()) {
|
||||
return;
|
||||
}
|
||||
@@ -528,11 +419,9 @@ void RasterizerOpenGL::Clear() {
|
||||
}
|
||||
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
|
||||
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.UpdateRenderTargets(true);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
}
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.UpdateRenderTargets(true);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
||||
@@ -544,7 +433,6 @@ void RasterizerOpenGL::Clear() {
|
||||
} else if (use_stencil) {
|
||||
glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);
|
||||
}
|
||||
|
||||
++num_queued_commands;
|
||||
}
|
||||
|
||||
@@ -553,75 +441,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||
|
||||
query_cache.UpdateCounters();
|
||||
|
||||
SyncViewport();
|
||||
SyncRasterizeEnable();
|
||||
SyncPolygonModes();
|
||||
SyncColorMask();
|
||||
SyncFragmentColorClampState();
|
||||
SyncMultiSampleState();
|
||||
SyncDepthTestState();
|
||||
SyncDepthClamp();
|
||||
SyncStencilTestState();
|
||||
SyncBlendState();
|
||||
SyncLogicOpState();
|
||||
SyncCullMode();
|
||||
SyncPrimitiveRestart();
|
||||
SyncScissorTest();
|
||||
SyncPointState();
|
||||
SyncLineState();
|
||||
SyncPolygonOffset();
|
||||
SyncAlphaTest();
|
||||
SyncFramebufferSRGB();
|
||||
|
||||
buffer_cache.Acquire();
|
||||
current_cbuf = 0;
|
||||
|
||||
std::size_t buffer_size = CalculateVertexArraysSize();
|
||||
|
||||
// Add space for index buffer
|
||||
if (is_indexed) {
|
||||
buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
|
||||
}
|
||||
|
||||
// Uniform space for the 5 shader stages
|
||||
buffer_size =
|
||||
Common::AlignUp<std::size_t>(buffer_size, 4) +
|
||||
(sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
|
||||
|
||||
// Add space for at least 18 constant buffers
|
||||
buffer_size += Maxwell::MaxConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
|
||||
// Prepare the vertex array.
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
// Prepare vertex array format.
|
||||
SetupVertexFormat();
|
||||
|
||||
// Upload vertex and index data.
|
||||
SetupVertexBuffer();
|
||||
SetupVertexInstances();
|
||||
GLintptr index_buffer_offset = 0;
|
||||
if (is_indexed) {
|
||||
index_buffer_offset = SetupIndexBuffer();
|
||||
}
|
||||
|
||||
// Setup emulation uniform buffer.
|
||||
if (!device.UseAssemblyShaders()) {
|
||||
MaxwellUniformData ubo;
|
||||
ubo.SetFromRegs(maxwell3d);
|
||||
const auto info =
|
||||
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
|
||||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||
}
|
||||
SyncState();
|
||||
|
||||
// Setup shaders and their used resources.
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
SetupShaders();
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
SetupShaders(is_indexed);
|
||||
|
||||
// Signal the buffer cache that we are not going to upload more things.
|
||||
buffer_cache.Unmap();
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
program_manager.BindGraphicsPipeline();
|
||||
@@ -635,7 +460,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||
if (is_indexed) {
|
||||
const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
|
||||
const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
|
||||
const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
|
||||
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
|
||||
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
|
||||
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
|
||||
glDrawElements(primitive_mode, num_vertices, format, offset);
|
||||
@@ -675,22 +500,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||
buffer_cache.Acquire();
|
||||
current_cbuf = 0;
|
||||
|
||||
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
BindComputeTextures(kernel);
|
||||
|
||||
const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
SetupComputeConstBuffers(kernel);
|
||||
SetupComputeGlobalMemory(kernel);
|
||||
|
||||
buffer_cache.Unmap();
|
||||
const auto& entries = kernel->GetEntries();
|
||||
buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
u32 ssbo_index = 0;
|
||||
for (const auto& buffer : entries.global_memory_entries) {
|
||||
buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
|
||||
buffer.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
buffer_cache.UpdateComputeBuffers();
|
||||
buffer_cache.BindHostComputeBuffers();
|
||||
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||
@@ -706,6 +531,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
|
||||
query_cache.Query(gpu_addr, type, timestamp);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
||||
u32 size) {
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushAll() {}
|
||||
|
||||
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||
@@ -714,19 +545,43 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
buffer_cache.FlushRegion(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
query_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateExceptTextureCache(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateTextureCache(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.UnmapMemory(addr, size);
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
return buffer_cache.MustFlushRegion(addr, size);
|
||||
return buffer_cache.IsRegionGpuModified(addr, size);
|
||||
}
|
||||
return texture_cache.IsRegionGpuModified(addr, size) ||
|
||||
buffer_cache.MustFlushRegion(addr, size);
|
||||
buffer_cache.IsRegionGpuModified(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
@@ -735,11 +590,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
buffer_cache.InvalidateRegion(addr, size);
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
@@ -748,26 +606,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.CachedWriteMemory(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncGuestHost() {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
buffer_cache.SyncGuestHost();
|
||||
shader_cache.SyncGuestHost();
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.FlushCachedWrites();
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.UnmapMemory(addr, size);
|
||||
}
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
@@ -802,14 +669,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::WaitForIdle() {
|
||||
// Place a barrier on everything that is not framebuffer related.
|
||||
// This is related to another flag that is not currently implemented.
|
||||
glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
|
||||
GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
|
||||
GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
|
||||
GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
|
||||
GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
|
||||
GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
|
||||
glMemoryBarrier(GL_ALL_BARRIER_BITS);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FragmentBarrier() {
|
||||
@@ -834,18 +694,21 @@ void RasterizerOpenGL::TickFrame() {
|
||||
num_queued_commands = 0;
|
||||
|
||||
fence_manager.TickFrame();
|
||||
buffer_cache.TickFrame();
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.TickFrame();
|
||||
}
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.TickFrame();
|
||||
}
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.BlitImage(dst, src, copy_config);
|
||||
return true;
|
||||
}
|
||||
@@ -857,7 +720,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
}
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
|
||||
if (!image_view) {
|
||||
return false;
|
||||
@@ -924,166 +787,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array PARAMETER_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
};
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& stages = maxwell3d.state.shader_stages;
|
||||
const auto& shader_stage = stages[stage_index];
|
||||
const auto& entries = shader->GetEntries();
|
||||
const bool use_unified = entries.use_unified_uniforms;
|
||||
const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
|
||||
|
||||
const auto base_bindings = device.GetBaseBindings(stage_index);
|
||||
u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
const u32 index = entry.GetIndex();
|
||||
const auto& buffer = shader_stage.const_buffers[index];
|
||||
SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
|
||||
base_unified_offset + index * Maxwell::MaxConstBufferSize);
|
||||
++binding;
|
||||
}
|
||||
if (use_unified) {
|
||||
const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
|
||||
entries.global_memory_entries.size());
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
|
||||
base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
const auto& entries = kernel->GetEntries();
|
||||
const bool use_unified = entries.use_unified_uniforms;
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
|
||||
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
|
||||
Tegra::Engines::ConstBufferInfo buffer;
|
||||
buffer.address = config.Address();
|
||||
buffer.size = config.size;
|
||||
buffer.enabled = mask[entry.GetIndex()];
|
||||
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
|
||||
use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
|
||||
++binding;
|
||||
}
|
||||
if (use_unified) {
|
||||
const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
|
||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry, bool use_unified,
|
||||
std::size_t unified_offset) {
|
||||
if (!buffer.enabled) {
|
||||
// Set values to zero to unbind buffers
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
|
||||
} else {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
|
||||
// UBO alignment requirements.
|
||||
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
|
||||
|
||||
const bool fast_upload = !use_unified && device.HasFastBufferSubData();
|
||||
|
||||
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
|
||||
const GPUVAddr gpu_addr = buffer.address;
|
||||
auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
UNIMPLEMENTED_IF(use_unified);
|
||||
if (info.offset != 0) {
|
||||
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
||||
glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
|
||||
info.handle = staging_cbuf;
|
||||
info.offset = 0;
|
||||
}
|
||||
glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (use_unified) {
|
||||
glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
|
||||
unified_offset, size);
|
||||
} else {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array TARGET_LUT = {
|
||||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
|
||||
const auto& entries{shader->GetEntries().global_memory_entries};
|
||||
|
||||
std::array<BindlessSSBO, 32> ssbos;
|
||||
ASSERT(entries.size() < ssbos.size());
|
||||
|
||||
const bool assembly_shaders = device.UseAssemblyShaders();
|
||||
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
||||
for (const auto& entry : entries) {
|
||||
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||
++binding;
|
||||
}
|
||||
if (assembly_shaders) {
|
||||
UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
|
||||
const auto& entries{kernel->GetEntries().global_memory_entries};
|
||||
|
||||
std::array<BindlessSSBO, 32> ssbos;
|
||||
ASSERT(entries.size() < ssbos.size());
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : entries) {
|
||||
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||
++binding;
|
||||
}
|
||||
if (device.UseAssemblyShaders()) {
|
||||
UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||
GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
|
||||
const size_t alignment{device.GetShaderStorageBufferAlignment()};
|
||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||
if (device.UseAssemblyShaders()) {
|
||||
*ssbo = BindlessSSBO{
|
||||
.address = static_cast<GLuint64EXT>(info.address + info.offset),
|
||||
.length = static_cast<GLsizei>(size),
|
||||
.padding = 0,
|
||||
};
|
||||
} else {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
|
||||
const bool via_header_index =
|
||||
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
@@ -1131,6 +834,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncState() {
|
||||
SyncViewport();
|
||||
SyncRasterizeEnable();
|
||||
SyncPolygonModes();
|
||||
SyncColorMask();
|
||||
SyncFragmentColorClampState();
|
||||
SyncMultiSampleState();
|
||||
SyncDepthTestState();
|
||||
SyncDepthClamp();
|
||||
SyncStencilTestState();
|
||||
SyncBlendState();
|
||||
SyncLogicOpState();
|
||||
SyncCullMode();
|
||||
SyncPrimitiveRestart();
|
||||
SyncScissorTest();
|
||||
SyncPointState();
|
||||
SyncLineState();
|
||||
SyncPolygonOffset();
|
||||
SyncAlphaTest();
|
||||
SyncFramebufferSRGB();
|
||||
SyncVertexFormats();
|
||||
SyncVertexInstances();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncViewport() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
const auto& regs = maxwell3d.regs;
|
||||
@@ -1166,9 +893,11 @@ void RasterizerOpenGL::SyncViewport() {
|
||||
if (regs.screen_y_control.y_negate != 0) {
|
||||
flip_y = !flip_y;
|
||||
}
|
||||
glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
|
||||
regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
|
||||
: GL_NEGATIVE_ONE_TO_ONE);
|
||||
const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
|
||||
const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
|
||||
const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
|
||||
state_tracker.ClipControl(origin, depth);
|
||||
state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
|
||||
}
|
||||
|
||||
if (dirty_viewport) {
|
||||
@@ -1652,36 +1381,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
SyncTransformFeedback();
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
|
||||
const auto& binding = regs.tfb_bindings[index];
|
||||
if (!binding.buffer_enable) {
|
||||
if (enabled_transform_feedback_buffers[index]) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
|
||||
0);
|
||||
}
|
||||
enabled_transform_feedback_buffers[index] = false;
|
||||
continue;
|
||||
}
|
||||
enabled_transform_feedback_buffers[index] = true;
|
||||
|
||||
auto& tfb_buffer = transform_feedback_buffers[index];
|
||||
tfb_buffer.Create();
|
||||
|
||||
const GLuint handle = tfb_buffer.handle;
|
||||
const std::size_t size = binding.buffer_size;
|
||||
glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
|
||||
|
||||
// We may have to call BeginTransformFeedbackNV here since they seem to call different
|
||||
// implementations on Nvidia's driver (the pointer is different) but we are using
|
||||
@@ -1695,23 +1401,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
glEndTransformFeedback();
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
|
||||
const auto& binding = regs.tfb_bindings[index];
|
||||
if (!binding.buffer_enable) {
|
||||
continue;
|
||||
}
|
||||
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
||||
|
||||
const GLuint handle = transform_feedback_buffers[index].handle;
|
||||
const GPUVAddr gpu_addr = binding.Address();
|
||||
const std::size_t size = binding.buffer_size;
|
||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||
glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@@ -30,7 +30,6 @@
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
@@ -47,6 +46,11 @@ namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
class Device;
|
||||
class MemoryAllocator;
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
struct ScreenInfo;
|
||||
@@ -63,6 +67,8 @@ class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
|
||||
public:
|
||||
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory_, const Device& device_,
|
||||
const Vulkan::Device* vulkan_device,
|
||||
Vulkan::MemoryAllocator* vulkan_memory_allocator,
|
||||
ScreenInfo& screen_info_, ProgramManager& program_manager_,
|
||||
StateTracker& state_tracker_);
|
||||
~RasterizerOpenGL() override;
|
||||
@@ -72,8 +78,11 @@ public:
|
||||
void DispatchCompute(GPUVAddr code_addr) override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
|
||||
void InvalidateTextureCache(VAddr addr, u64 size) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
@@ -119,27 +128,6 @@ private:
|
||||
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
|
||||
size_t& image_view_index, size_t& texture_index, size_t& image_index);
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current constbuffers to use for the kernel invocation.
|
||||
void SetupComputeConstBuffers(Shader* kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry, bool use_unified,
|
||||
std::size_t unified_offset);
|
||||
|
||||
/// Configures the current global memory entries to use for the draw command.
|
||||
void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current global memory entries to use for the kernel invocation.
|
||||
void SetupComputeGlobalMemory(Shader* kernel);
|
||||
|
||||
/// Configures a global memory buffer.
|
||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
size_t size, BindlessSSBO* ssbo);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(const Shader* shader, size_t stage_index);
|
||||
|
||||
@@ -152,6 +140,9 @@ private:
|
||||
/// Configures images in a compute shader.
|
||||
void SetupComputeImages(const Shader* shader);
|
||||
|
||||
/// Syncs state to match guest's
|
||||
void SyncState();
|
||||
|
||||
/// Syncs the viewport and depth range to match the guest state
|
||||
void SyncViewport();
|
||||
|
||||
@@ -215,6 +206,12 @@ private:
|
||||
/// Syncs the framebuffer sRGB state to match the guest state
|
||||
void SyncFramebufferSRGB();
|
||||
|
||||
/// Syncs vertex formats to match the guest state
|
||||
void SyncVertexFormats();
|
||||
|
||||
/// Syncs vertex instances to match the guest state
|
||||
void SyncVertexInstances();
|
||||
|
||||
/// Syncs transform feedback state to match guest state
|
||||
/// @note Only valid on assembly shaders
|
||||
void SyncTransformFeedback();
|
||||
@@ -225,19 +222,7 @@ private:
|
||||
/// End a transform feedback
|
||||
void EndTransformFeedback();
|
||||
|
||||
std::size_t CalculateVertexArraysSize() const;
|
||||
|
||||
std::size_t CalculateIndexBufferSize() const;
|
||||
|
||||
/// Updates the current vertex format
|
||||
void SetupVertexFormat();
|
||||
|
||||
void SetupVertexBuffer();
|
||||
void SetupVertexInstances();
|
||||
|
||||
GLintptr SetupIndexBuffer();
|
||||
|
||||
void SetupShaders();
|
||||
void SetupShaders(bool is_indexed);
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
@@ -249,12 +234,12 @@ private:
|
||||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLStreamBuffer stream_buffer;
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
BufferCache buffer_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
QueryCache query_cache;
|
||||
OGLBufferCache buffer_cache;
|
||||
FenceManagerOpenGL fence_manager;
|
||||
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
@@ -262,20 +247,8 @@ private:
|
||||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
|
||||
std::array<GLuint, MAX_TEXTURES> texture_handles;
|
||||
std::array<GLuint, MAX_IMAGES> image_handles;
|
||||
|
||||
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||
transform_feedback_buffers;
|
||||
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||
enabled_transform_feedback_buffers;
|
||||
|
||||
static constexpr std::size_t NUM_CONSTANT_BUFFERS =
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
|
||||
std::size_t current_cbuf = 0;
|
||||
OGLBuffer unified_uniform_buffer;
|
||||
std::array<GLuint, MAX_TEXTURES> texture_handles{};
|
||||
std::array<GLuint, MAX_IMAGES> image_handles{};
|
||||
|
||||
/// Number of commands queued to the OpenGL driver. Resetted on flush.
|
||||
std::size_t num_queued_commands = 0;
|
||||
|
@@ -171,12 +171,6 @@ void OGLBuffer::Release() {
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
|
||||
ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
|
||||
|
||||
glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
|
||||
}
|
||||
|
||||
void OGLSync::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
@@ -234,9 +234,6 @@ public:
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
// Converts the buffer into a stream copy buffer with a fixed size
|
||||
void MakeStreamCopy(std::size_t buffer_size);
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
|
@@ -63,7 +63,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
|
||||
constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
|
||||
|
||||
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
|
||||
constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
|
||||
#define ftou floatBitsToUint
|
||||
#define itof intBitsToFloat
|
||||
#define utof uintBitsToFloat
|
||||
@@ -76,10 +76,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
|
||||
|
||||
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
|
||||
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
|
||||
|
||||
layout (std140, binding = {}) uniform vs_config {{
|
||||
float y_direction;
|
||||
}};
|
||||
)";
|
||||
|
||||
class ShaderWriter final {
|
||||
@@ -401,13 +397,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
|
||||
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
|
||||
}
|
||||
|
||||
bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
|
||||
const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
|
||||
// We waste one UBO for emulation
|
||||
const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
|
||||
return num_ubos > num_available_ubos;
|
||||
}
|
||||
|
||||
struct GenericVaryingDescription {
|
||||
std::string name;
|
||||
u8 first_element = 0;
|
||||
@@ -419,9 +408,8 @@ public:
|
||||
explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
|
||||
ShaderType stage_, std::string_view identifier_,
|
||||
std::string_view suffix_)
|
||||
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_},
|
||||
suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{
|
||||
UseUnifiedUniforms(device_, ir_, stage_)} {
|
||||
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
|
||||
identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
|
||||
if (stage != ShaderType::Compute) {
|
||||
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
|
||||
}
|
||||
@@ -515,7 +503,8 @@ private:
|
||||
if (!identifier.empty()) {
|
||||
code.AddLine("// {}", identifier);
|
||||
}
|
||||
code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
|
||||
const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
|
||||
code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
|
||||
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
|
||||
if (device.HasShaderBallot()) {
|
||||
code.AddLine("#extension GL_ARB_shader_ballot : require");
|
||||
@@ -541,7 +530,7 @@ private:
|
||||
|
||||
code.AddNewLine();
|
||||
|
||||
code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
|
||||
code.AddLine(COMMON_DECLARATIONS);
|
||||
}
|
||||
|
||||
void DeclareVertex() {
|
||||
@@ -864,17 +853,6 @@ private:
|
||||
}
|
||||
|
||||
void DeclareConstantBuffers() {
|
||||
if (use_unified_uniforms) {
|
||||
const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
|
||||
static_cast<u32>(ir.GetGlobalMemory().size());
|
||||
code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
|
||||
binding);
|
||||
code.AddLine(" uint cbufs[];");
|
||||
code.AddLine("}};");
|
||||
code.AddNewLine();
|
||||
return;
|
||||
}
|
||||
|
||||
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
for (const auto& [index, info] : ir.GetConstantBuffers()) {
|
||||
const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
|
||||
@@ -1080,29 +1058,17 @@ private:
|
||||
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
|
||||
const Node offset = cbuf->GetOffset();
|
||||
const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
|
||||
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||
// Direct access
|
||||
const u32 offset_imm = immediate->GetValue();
|
||||
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
|
||||
if (use_unified_uniforms) {
|
||||
return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
|
||||
Type::Uint};
|
||||
} else {
|
||||
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
|
||||
offset_imm / (4 * 4), (offset_imm / 4) % 4),
|
||||
Type::Uint};
|
||||
}
|
||||
}
|
||||
|
||||
// Indirect access
|
||||
if (use_unified_uniforms) {
|
||||
return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
|
||||
Visit(offset).AsUint()),
|
||||
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
|
||||
offset_imm / (4 * 4), (offset_imm / 4) % 4),
|
||||
Type::Uint};
|
||||
}
|
||||
|
||||
// Indirect access
|
||||
const std::string final_offset = code.GenerateTemporary();
|
||||
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
|
||||
|
||||
@@ -2292,7 +2258,6 @@ private:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (header.ps.omap.depth) {
|
||||
// The depth output is always 2 registers after the last color output, and current_reg
|
||||
// already contains one past the last color register.
|
||||
@@ -2336,7 +2301,8 @@ private:
|
||||
}
|
||||
|
||||
Expression YNegate(Operation operation) {
|
||||
return {"y_direction", Type::Float};
|
||||
// Y_NEGATE is mapped to this uniform value
|
||||
return {"gl_FrontMaterial.ambient.a", Type::Float};
|
||||
}
|
||||
|
||||
template <u32 element>
|
||||
@@ -2786,7 +2752,6 @@ private:
|
||||
const std::string_view identifier;
|
||||
const std::string_view suffix;
|
||||
const Header header;
|
||||
const bool use_unified_uniforms;
|
||||
std::unordered_map<u8, VaryingTFB> transform_feedback;
|
||||
|
||||
ShaderWriter code;
|
||||
@@ -3002,8 +2967,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s
|
||||
for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
|
||||
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
|
||||
}
|
||||
for (const auto& buffer : entries.const_buffers) {
|
||||
entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
|
||||
}
|
||||
entries.shader_length = ir.GetLength();
|
||||
entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
|
||||
return entries;
|
||||
}
|
||||
|
||||
|
@@ -55,7 +55,7 @@ struct ShaderEntries {
|
||||
std::vector<ImageEntry> images;
|
||||
std::size_t shader_length{};
|
||||
u32 clip_distances{};
|
||||
bool use_unified_uniforms{};
|
||||
u32 enabled_uniform_buffers{};
|
||||
};
|
||||
|
||||
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
|
@@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) {
|
||||
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
|
||||
}
|
||||
|
||||
void SetupDirtyVertexArrays(Tables& tables) {
|
||||
static constexpr std::size_t num_array = 3;
|
||||
void SetupDirtyVertexInstances(Tables& tables) {
|
||||
static constexpr std::size_t instance_base_offset = 3;
|
||||
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
|
||||
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
|
||||
const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
|
||||
|
||||
FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
|
||||
FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
|
||||
|
||||
const std::size_t instance_array_offset = array_offset + instance_base_offset;
|
||||
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
|
||||
tables[1][instance_array_offset] = VertexInstances;
|
||||
@@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) {
|
||||
StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
|
||||
auto& dirty = gpu.Maxwell3D().dirty;
|
||||
auto& tables = dirty.tables;
|
||||
SetupDirtyRenderTargets(tables);
|
||||
SetupDirtyFlags(tables);
|
||||
SetupDirtyColorMasks(tables);
|
||||
SetupDirtyViewports(tables);
|
||||
SetupDirtyScissors(tables);
|
||||
SetupDirtyVertexArrays(tables);
|
||||
SetupDirtyVertexInstances(tables);
|
||||
SetupDirtyVertexFormat(tables);
|
||||
SetupDirtyShaders(tables);
|
||||
SetupDirtyPolygonModes(tables);
|
||||
@@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
|
||||
SetupDirtyClipControl(tables);
|
||||
SetupDirtyDepthClampEnabled(tables);
|
||||
SetupDirtyMisc(tables);
|
||||
|
||||
auto& store = dirty.on_write_stores;
|
||||
store[VertexBuffers] = true;
|
||||
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
|
||||
store[VertexBuffer0 + i] = true;
|
||||
}
|
||||
}
|
||||
|
||||
void StateTracker::InvalidateStreamBuffer() {
|
||||
flags[Dirty::VertexBuffers] = true;
|
||||
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@@ -28,10 +28,6 @@ enum : u8 {
|
||||
VertexFormat0,
|
||||
VertexFormat31 = VertexFormat0 + 31,
|
||||
|
||||
VertexBuffers,
|
||||
VertexBuffer0,
|
||||
VertexBuffer31 = VertexBuffer0 + 31,
|
||||
|
||||
VertexInstances,
|
||||
VertexInstance0,
|
||||
VertexInstance31 = VertexInstance0 + 31,
|
||||
@@ -92,8 +88,6 @@ class StateTracker {
|
||||
public:
|
||||
explicit StateTracker(Tegra::GPU& gpu);
|
||||
|
||||
void InvalidateStreamBuffer();
|
||||
|
||||
void BindIndexBuffer(GLuint new_index_buffer) {
|
||||
if (index_buffer == new_index_buffer) {
|
||||
return;
|
||||
@@ -110,13 +104,32 @@ public:
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
|
||||
}
|
||||
|
||||
void ClipControl(GLenum new_origin, GLenum new_depth) {
|
||||
if (new_origin == origin && new_depth == depth) {
|
||||
return;
|
||||
}
|
||||
origin = new_origin;
|
||||
depth = new_depth;
|
||||
glClipControl(origin, depth);
|
||||
}
|
||||
|
||||
void SetYNegate(bool new_y_negate) {
|
||||
if (new_y_negate == y_negate) {
|
||||
return;
|
||||
}
|
||||
// Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
|
||||
y_negate = new_y_negate;
|
||||
const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
|
||||
glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
|
||||
}
|
||||
|
||||
void NotifyScreenDrawVertexArray() {
|
||||
flags[OpenGL::Dirty::VertexFormats] = true;
|
||||
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
|
||||
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
|
||||
|
||||
flags[OpenGL::Dirty::VertexBuffers] = true;
|
||||
flags[OpenGL::Dirty::VertexBuffer0] = true;
|
||||
flags[VideoCommon::Dirty::VertexBuffers] = true;
|
||||
flags[VideoCommon::Dirty::VertexBuffer0] = true;
|
||||
|
||||
flags[OpenGL::Dirty::VertexInstances] = true;
|
||||
flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
|
||||
@@ -202,6 +215,9 @@ private:
|
||||
|
||||
GLuint framebuffer = 0;
|
||||
GLuint index_buffer = 0;
|
||||
GLenum origin = GL_LOWER_LEFT;
|
||||
GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
|
||||
bool y_negate = false;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@@ -1,70 +1,64 @@
|
||||
// Copyright 2018 Citra Emulator Project
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||
MP_RGB(128, 128, 192));
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
|
||||
: state_tracker{state_tracker_} {
|
||||
gl_buffer.Create();
|
||||
|
||||
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
|
||||
glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
|
||||
mapped_ptr = static_cast<u8*>(
|
||||
glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||
|
||||
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
|
||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||
StreamBuffer::StreamBuffer() {
|
||||
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
|
||||
buffer.Create();
|
||||
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
|
||||
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
|
||||
mapped_pointer =
|
||||
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
|
||||
for (OGLSync& sync : fences) {
|
||||
sync.Create();
|
||||
}
|
||||
}
|
||||
|
||||
OGLStreamBuffer::~OGLStreamBuffer() {
|
||||
glUnmapNamedBuffer(gl_buffer.handle);
|
||||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= BUFFER_SIZE);
|
||||
ASSERT(alignment <= BUFFER_SIZE);
|
||||
mapped_size = size;
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
||||
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
|
||||
ASSERT(size < REGION_SIZE);
|
||||
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
|
||||
++region) {
|
||||
fences[region].Create();
|
||||
}
|
||||
used_iterator = iterator;
|
||||
|
||||
if (buffer_pos + size > BUFFER_SIZE) {
|
||||
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
||||
glInvalidateBufferData(gl_buffer.handle);
|
||||
state_tracker.InvalidateStreamBuffer();
|
||||
|
||||
buffer_pos = 0;
|
||||
for (size_t region = Region(free_iterator) + 1,
|
||||
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
|
||||
region < region_end; ++region) {
|
||||
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
|
||||
fences[region].Release();
|
||||
}
|
||||
|
||||
return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
|
||||
}
|
||||
|
||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||
ASSERT(size <= mapped_size);
|
||||
|
||||
if (size > 0) {
|
||||
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
|
||||
if (iterator + size > free_iterator) {
|
||||
free_iterator = iterator + size;
|
||||
}
|
||||
if (iterator + size > STREAM_BUFFER_SIZE) {
|
||||
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
|
||||
fences[region].Create();
|
||||
}
|
||||
used_iterator = 0;
|
||||
iterator = 0;
|
||||
free_iterator = size;
|
||||
|
||||
buffer_pos += size;
|
||||
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
|
||||
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
|
||||
fences[region].Release();
|
||||
}
|
||||
}
|
||||
const size_t offset = iterator;
|
||||
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
|
||||
return {std::span(mapped_pointer + offset, size), offset};
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@@ -1,9 +1,12 @@
|
||||
// Copyright 2018 Citra Emulator Project
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
|
||||
#include <glad/glad.h>
|
||||
@@ -13,48 +16,35 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class StateTracker;
|
||||
class StreamBuffer {
|
||||
static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||
static constexpr size_t NUM_SYNCS = 16;
|
||||
static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
|
||||
static constexpr size_t MAX_ALIGNMENT = 256;
|
||||
static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
|
||||
static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
|
||||
static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
|
||||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
public:
|
||||
explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
|
||||
~OGLStreamBuffer();
|
||||
explicit StreamBuffer();
|
||||
|
||||
/*
|
||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
* and the optional alignment requirement.
|
||||
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
|
||||
* The return values are the pointer to the new chunk, and the offset within the buffer.
|
||||
* The actual used size must be specified on unmapping the chunk.
|
||||
*/
|
||||
std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
|
||||
[[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
|
||||
|
||||
void Unmap(GLsizeiptr size);
|
||||
|
||||
GLuint Handle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
u64 Address() const {
|
||||
return gpu_address;
|
||||
}
|
||||
|
||||
GLsizeiptr Size() const noexcept {
|
||||
return BUFFER_SIZE;
|
||||
[[nodiscard]] GLuint Handle() const noexcept {
|
||||
return buffer.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
|
||||
[[nodiscard]] static size_t Region(size_t offset) noexcept {
|
||||
return offset / REGION_SIZE;
|
||||
}
|
||||
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLBuffer gl_buffer;
|
||||
|
||||
GLuint64EXT gpu_address = 0;
|
||||
GLintptr buffer_pos = 0;
|
||||
GLsizeiptr mapped_size = 0;
|
||||
u8* mapped_ptr = nullptr;
|
||||
size_t iterator = 0;
|
||||
size_t used_iterator = 0;
|
||||
size_t free_iterator = 0;
|
||||
u8* mapped_pointer = nullptr;
|
||||
OGLBuffer buffer;
|
||||
std::array<OGLSync, NUM_SYNCS> fences;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
|
||||
: span(map, size), sync{sync_}, handle{handle_} {}
|
||||
|
||||
ImageBufferMap::~ImageBufferMap() {
|
||||
if (sync) {
|
||||
sync->Create();
|
||||
@@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() {
|
||||
glFinish();
|
||||
}
|
||||
|
||||
ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
|
||||
ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return upload_buffers.RequestMap(size, true);
|
||||
}
|
||||
|
||||
ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
|
||||
ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return download_buffers.RequestMap(size, false);
|
||||
}
|
||||
|
||||
@@ -596,7 +593,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_
|
||||
bool insert_fence) {
|
||||
const size_t index = RequestBuffer(requested_size);
|
||||
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
|
||||
return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
|
||||
return ImageBufferMap{
|
||||
.mapped_span = std::span(maps[index], requested_size),
|
||||
.sync = sync,
|
||||
.buffer = buffers[index].handle,
|
||||
};
|
||||
}
|
||||
|
||||
size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
|
||||
@@ -711,7 +712,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
|
||||
|
||||
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
|
||||
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
|
||||
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
@@ -735,7 +736,7 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
for (const VideoCommon::BufferCopy& copy : copies) {
|
||||
glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
|
||||
glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + buffer_offset,
|
||||
copy.dst_offset, copy.size);
|
||||
}
|
||||
}
|
||||
@@ -744,7 +745,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, 1);
|
||||
|
||||
u32 current_row_length = std::numeric_limits<u32>::max();
|
||||
|
@@ -31,23 +31,12 @@ using VideoCommon::NUM_RT;
|
||||
using VideoCommon::Offset2D;
|
||||
using VideoCommon::RenderTargets;
|
||||
|
||||
class ImageBufferMap {
|
||||
public:
|
||||
explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
|
||||
struct ImageBufferMap {
|
||||
~ImageBufferMap();
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return handle;
|
||||
}
|
||||
|
||||
std::span<u8> Span() const noexcept {
|
||||
return span;
|
||||
}
|
||||
|
||||
private:
|
||||
std::span<u8> span;
|
||||
std::span<u8> mapped_span;
|
||||
OGLSync* sync;
|
||||
GLuint handle;
|
||||
GLuint buffer;
|
||||
};
|
||||
|
||||
struct FormatProperties {
|
||||
@@ -69,9 +58,9 @@ public:
|
||||
|
||||
void Finish();
|
||||
|
||||
ImageBufferMap MapUploadBuffer(size_t size);
|
||||
ImageBufferMap UploadStagingBuffer(size_t size);
|
||||
|
||||
ImageBufferMap MapDownloadBuffer(size_t size);
|
||||
ImageBufferMap DownloadStagingBuffer(size_t size);
|
||||
|
||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
|
@@ -27,11 +27,14 @@
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
#include "video_core/vulkan_common/vulkan_debug_callback.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_instance.h"
|
||||
#include "video_core/vulkan_common/vulkan_library.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr GLint PositionLocation = 0;
|
||||
constexpr GLint TexCoordLocation = 1;
|
||||
constexpr GLint ModelViewMatrixLocation = 0;
|
||||
@@ -125,14 +128,100 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
|
||||
}
|
||||
}
|
||||
|
||||
Vulkan::vk::PhysicalDevice FindPhysicalDevice(Vulkan::vk::Instance& instance) {
|
||||
using namespace Vulkan;
|
||||
using UUID = std::array<GLubyte, GL_UUID_SIZE_EXT>;
|
||||
|
||||
GLint num_device_uuids;
|
||||
glGetIntegerv(GL_NUM_DEVICE_UUIDS_EXT, &num_device_uuids);
|
||||
std::vector<UUID> device_uuids(num_device_uuids);
|
||||
for (GLint index = 0; index < num_device_uuids; ++index) {
|
||||
glGetUnsignedBytei_vEXT(GL_DEVICE_UUID_EXT, 0, device_uuids[index].data());
|
||||
}
|
||||
UUID driver_uuid;
|
||||
glGetUnsignedBytevEXT(GL_DRIVER_UUID_EXT, driver_uuid.data());
|
||||
|
||||
for (const VkPhysicalDevice raw_physical_device : instance.EnumeratePhysicalDevices()) {
|
||||
VkPhysicalDeviceIDProperties device_id_properties{};
|
||||
device_id_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
|
||||
|
||||
VkPhysicalDeviceProperties2KHR properties{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
|
||||
.pNext = &device_id_properties,
|
||||
.properties{},
|
||||
};
|
||||
vk::PhysicalDevice physical_device(raw_physical_device, instance.Dispatch());
|
||||
physical_device.GetProperties2KHR(properties);
|
||||
if (!std::ranges::equal(device_id_properties.driverUUID, driver_uuid)) {
|
||||
continue;
|
||||
}
|
||||
const auto it =
|
||||
std::ranges::find_if(device_uuids, [&device_id_properties, driver_uuid](UUID uuid) {
|
||||
return std::ranges::equal(device_id_properties.deviceUUID, uuid);
|
||||
});
|
||||
if (it != device_uuids.end()) {
|
||||
return physical_device;
|
||||
}
|
||||
}
|
||||
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
struct VulkanObjects {
|
||||
static std::unique_ptr<VulkanObjects> TryCreate() {
|
||||
if (!GLAD_GL_EXT_memory_object) {
|
||||
// Interop is not present
|
||||
return nullptr;
|
||||
}
|
||||
const std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
|
||||
if (vendor == "ATI Technologies Inc.") {
|
||||
// Avoid using GL_EXT_memory_object on AMD, as it makes the GL driver crash
|
||||
return nullptr;
|
||||
}
|
||||
if (!Settings::values.use_assembly_shaders.GetValue()) {
|
||||
// We only need interop when assembly shaders are enabled
|
||||
return nullptr;
|
||||
}
|
||||
#ifdef __linux__
|
||||
LOG_WARNING(Render_OpenGL, "Interop doesn't work on Linux at the moment");
|
||||
return nullptr;
|
||||
#endif
|
||||
try {
|
||||
return std::make_unique<VulkanObjects>();
|
||||
} catch (const Vulkan::vk::Exception& exception) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to initialize Vulkan objects with error: {}",
|
||||
exception.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
Common::DynamicLibrary library{Vulkan::OpenLibrary()};
|
||||
Vulkan::vk::InstanceDispatch dld;
|
||||
Vulkan::vk::Instance instance{Vulkan::CreateInstance(library, dld, VK_API_VERSION_1_1)};
|
||||
Vulkan::Device device{*instance, FindPhysicalDevice(instance), nullptr, dld};
|
||||
Vulkan::MemoryAllocator memory_allocator{device, true};
|
||||
};
|
||||
|
||||
RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
|
||||
Core::Frontend::EmuWindow& emu_window_,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
|
||||
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
|
||||
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
|
||||
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_},
|
||||
vulkan_objects{VulkanObjects::TryCreate()}, device{vulkan_objects != nullptr},
|
||||
state_tracker{gpu}, program_manager{device},
|
||||
rasterizer(emu_window, gpu, cpu_memory, device,
|
||||
vulkan_objects ? &vulkan_objects->device : nullptr,
|
||||
vulkan_objects ? &vulkan_objects->memory_allocator : nullptr, screen_info,
|
||||
program_manager, state_tracker) {
|
||||
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||
glDebugMessageCallback(DebugHandler, nullptr);
|
||||
}
|
||||
AddTelemetryFields();
|
||||
InitOpenGLObjects();
|
||||
}
|
||||
|
||||
RendererOpenGL::~RendererOpenGL() = default;
|
||||
|
||||
@@ -148,7 +237,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
|
||||
++m_current_frame;
|
||||
|
||||
rasterizer->TickFrame();
|
||||
rasterizer.TickFrame();
|
||||
|
||||
context->SwapBuffers();
|
||||
render_window.OnFrameDisplayed();
|
||||
@@ -179,7 +268,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
|
||||
framebuffer_crop_rect = framebuffer.crop_rect;
|
||||
|
||||
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
|
||||
if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
|
||||
if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -267,6 +356,7 @@ void RendererOpenGL::InitOpenGLObjects() {
|
||||
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
|
||||
if (device.HasVertexBufferUnifiedMemory()) {
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
|
||||
|
||||
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
|
||||
@@ -289,14 +379,6 @@ void RendererOpenGL::AddTelemetryFields() {
|
||||
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
|
||||
}
|
||||
|
||||
void RendererOpenGL::CreateRasterizer() {
|
||||
if (rasterizer) {
|
||||
return;
|
||||
}
|
||||
rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
|
||||
screen_info, program_manager, state_tracker);
|
||||
}
|
||||
|
||||
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
||||
const Tegra::FramebufferConfig& framebuffer) {
|
||||
texture.width = framebuffer.width;
|
||||
@@ -407,6 +489,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
||||
|
||||
program_manager.BindHostPipeline(pipeline.handle);
|
||||
|
||||
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||
glEnable(GL_CULL_FACE);
|
||||
if (screen_info.display_srgb) {
|
||||
glEnable(GL_FRAMEBUFFER_SRGB);
|
||||
@@ -425,7 +508,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
||||
glCullFace(GL_BACK);
|
||||
glFrontFace(GL_CW);
|
||||
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
||||
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
|
||||
static_cast<GLfloat>(layout.height));
|
||||
glDepthRangeIndexed(0, 0.0, 0.0);
|
||||
@@ -497,25 +579,4 @@ void RendererOpenGL::RenderScreenshot() {
|
||||
renderer_settings.screenshot_requested = false;
|
||||
}
|
||||
|
||||
bool RendererOpenGL::Init() {
|
||||
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||
glDebugMessageCallback(DebugHandler, nullptr);
|
||||
}
|
||||
|
||||
AddTelemetryFields();
|
||||
|
||||
if (!GLAD_GL_VERSION_4_3) {
|
||||
return false;
|
||||
}
|
||||
|
||||
InitOpenGLObjects();
|
||||
CreateRasterizer();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void RendererOpenGL::ShutDown() {}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@@ -10,6 +10,7 @@
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
@@ -37,6 +38,8 @@ class GPU;
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
struct VulkanObjects;
|
||||
|
||||
/// Structure used for storing information about the textures for the Switch screen
|
||||
struct TextureInfo {
|
||||
OGLTexture resource;
|
||||
@@ -63,18 +66,18 @@ public:
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
|
||||
~RendererOpenGL() override;
|
||||
|
||||
bool Init() override;
|
||||
void ShutDown() override;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
|
||||
VideoCore::RasterizerInterface* ReadRasterizer() override {
|
||||
return &rasterizer;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Initializes the OpenGL state and creates persistent objects.
|
||||
void InitOpenGLObjects();
|
||||
|
||||
void AddTelemetryFields();
|
||||
|
||||
void CreateRasterizer();
|
||||
|
||||
void ConfigureFramebufferTexture(TextureInfo& texture,
|
||||
const Tegra::FramebufferConfig& framebuffer);
|
||||
|
||||
@@ -98,8 +101,11 @@ private:
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
const Device device;
|
||||
StateTracker state_tracker{gpu};
|
||||
std::unique_ptr<VulkanObjects> vulkan_objects;
|
||||
Device device;
|
||||
StateTracker state_tracker;
|
||||
ProgramManager program_manager;
|
||||
RasterizerOpenGL rasterizer;
|
||||
|
||||
// OpenGL object IDs
|
||||
OGLSampler present_sampler;
|
||||
@@ -115,9 +121,6 @@ private:
|
||||
/// Display information for Switch screen
|
||||
ScreenInfo screen_info;
|
||||
|
||||
/// Global dummy shader pipeline
|
||||
ProgramManager program_manager;
|
||||
|
||||
/// OpenGL framebuffer data
|
||||
std::vector<u8> gl_framebuffer_data;
|
||||
|
||||
|
@@ -71,7 +71,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||
@@ -91,8 +91,8 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
|
||||
glUniform1ui(5, params.x_shift);
|
||||
glUniform1ui(6, params.block_height);
|
||||
glUniform1ui(7, params.block_height_mask);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, store_format);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
||||
@@ -108,7 +108,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
@@ -132,8 +132,8 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
|
||||
glUniform1ui(7, params.block_height_mask);
|
||||
glUniform1ui(8, params.block_depth);
|
||||
glUniform1ui(9, params.block_depth_mask);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, store_format);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
||||
@@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
|
||||
"Non-power of two images are not implemented");
|
||||
|
||||
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
|
||||
glUniform2ui(LOC_ORIGIN, 0, 0);
|
||||
glUniform2i(LOC_DESTINATION, 0, 0);
|
||||
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
|
||||
@@ -172,8 +172,8 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
|
@@ -15,9 +15,10 @@
|
||||
namespace OpenGL {
|
||||
|
||||
class Image;
|
||||
class ImageBufferMap;
|
||||
class ProgramManager;
|
||||
|
||||
struct ImageBufferMap;
|
||||
|
||||
class UtilShaders {
|
||||
public:
|
||||
explicit UtilShaders(ProgramManager& program_manager);
|
||||
|
Reference in New Issue
Block a user