early-access version 1332

This commit is contained in:
pineappleEA
2021-01-17 03:19:34 +01:00
parent 233493ad87
commit f70af7672d
126 changed files with 3856 additions and 3241 deletions

View File

@@ -2,98 +2,235 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <span>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_instance.h"
#include "video_core/vulkan_common/vulkan_library.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
namespace OpenGL {
namespace {
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
GLsizei padding;
};
static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
constexpr std::array PROGRAM_LUT{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
} // Anonymous namespace
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
: BufferBlock{cpu_addr_, size_} {
gl_buffer.Create();
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
buffer.Create();
const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
if (runtime.device.UseAssemblyShaders()) {
CreateMemoryObjects(runtime);
glNamedBufferStorageMemEXT(buffer.handle, SizeBytes(), memory_commit.ExportOpenGLHandle(),
memory_commit.Offset());
} else {
glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
}
if (runtime.has_unified_vertex_buffers) {
glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
}
}
Buffer::~Buffer() = default;
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(data_size), data);
void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
const GLintptr gl_offset = static_cast<GLintptr>(offset);
if (read_buffer.handle == 0) {
read_buffer.Create();
glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
GL_STREAM_READ);
}
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size) {
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
}
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
const Device& device_, OGLStreamBuffer& stream_buffer_,
StateTracker& state_tracker)
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
if (!device.HasFastBufferSubData()) {
void Buffer::MakeResident(GLenum access) noexcept {
// Abuse GLenum's order to exit early
// GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
if (access <= current_residency_access || buffer.handle == 0) {
return;
}
if (std::exchange(current_residency_access, access) != GL_NONE) {
// If the buffer is already resident, remove its residency before promoting it
glMakeNamedBufferNonResidentNV(buffer.handle);
}
glMakeNamedBufferResidentNV(buffer.handle, access);
}
static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
for (const GLuint cbuf : cbufs) {
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
GLuint Buffer::SubBuffer(u32 offset) {
if (offset == 0) {
return buffer.handle;
}
for (const auto& [sub_buffer, sub_offset] : subs) {
if (sub_offset == offset) {
return sub_buffer.handle;
}
}
OGLBuffer sub_buffer;
sub_buffer.Create();
glNamedBufferStorageMemEXT(sub_buffer.handle, SizeBytes() - offset,
memory_commit.ExportOpenGLHandle(), memory_commit.Offset() + offset);
return subs.emplace_back(std::move(sub_buffer), offset).first.handle;
}
void Buffer::CreateMemoryObjects(BufferCacheRuntime& runtime) {
auto& allocator = runtime.vulkan_memory_allocator;
auto& device = runtime.vulkan_device->GetLogical();
auto vulkan_buffer = device.CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = SizeBytes(),
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
const VkMemoryRequirements requirements = device.GetBufferMemoryRequirements(*vulkan_buffer);
memory_commit = allocator->Commit(requirements, Vulkan::MemoryUsage::DeviceLocal);
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_,
Vulkan::MemoryAllocator* vulkan_memory_allocator_)
: device{device_}, vulkan_device{vulkan_device_},
vulkan_memory_allocator{vulkan_memory_allocator_},
stream_buffer{device.HasFastBufferSubData() ? std::nullopt
: std::make_optional<StreamBuffer>()} {
GLint gl_max_attributes;
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
max_attributes = static_cast<u32>(gl_max_attributes);
use_assembly_shaders = device.UseAssemblyShaders();
has_unified_vertex_buffers = device.HasVertexBufferUnifiedMemory();
for (auto& stage_uniforms : fast_uniforms) {
for (OGLBuffer& buffer : stage_uniforms) {
buffer.Create();
glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
}
}
}
OGLBufferCache::~OGLBufferCache() {
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) {
glCopyNamedBufferSubData(
src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
}
}
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<Buffer>(device, cpu_addr, size);
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
static_cast<GLsizeiptr>(size));
} else {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
index_buffer_offset = offset;
}
}
OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
return {0, 0, 0};
void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
u32 stride) {
if (index >= max_attributes) {
return;
}
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
} else {
glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
static_cast<GLsizei>(stride));
}
}
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
std::size_t size) {
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
const GLuint cbuf = cbufs[cbuf_cursor++];
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size) {
if (use_assembly_shaders) {
const GLuint sub_buffer = buffer.SubBuffer(offset);
glBindBufferRangeNV(PABO_LUT[stage], binding_index, sub_buffer, 0,
static_cast<GLsizeiptr>(size));
} else {
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
return {cbuf, 0, 0};
void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size) {
if (use_assembly_shaders) {
glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index,
buffer.SubBuffer(offset), 0, static_cast<GLsizeiptr>(size));
} else {
glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size, bool is_written) {
if (use_assembly_shaders) {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
} else {
const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size, bool is_written) {
if (use_assembly_shaders) {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
} else if (size == 0) {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
} else {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
u32 size) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
} // namespace OpenGL

View File

@@ -5,79 +5,167 @@
#pragma once
#include <array>
#include <memory>
#include <span>
#include "common/alignment.h"
#include "common/common_types.h"
#include "common/dynamic_library.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
namespace Core {
class System;
}
namespace Vulkan {
class Device;
class MemoryAllocator;
} // namespace Vulkan
namespace OpenGL {
class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
class StateTracker;
class BufferCacheRuntime;
class Buffer : public VideoCommon::BufferBlock {
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
~Buffer();
explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
u64 size_bytes);
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
void Download(std::size_t offset, std::size_t data_size, u8* data);
void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size);
void MakeResident(GLenum access) noexcept;
GLuint Handle() const noexcept {
return gl_buffer.handle;
[[nodiscard]] GLuint SubBuffer(u32 offset);
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
return address;
}
u64 Address() const noexcept {
return gpu_address;
[[nodiscard]] GLuint Handle() const noexcept {
return buffer.handle;
}
private:
OGLBuffer gl_buffer;
OGLBuffer read_buffer;
u64 gpu_address = 0;
void CreateMemoryObjects(BufferCacheRuntime& runtime);
GLuint64EXT address = 0;
Vulkan::MemoryCommit memory_commit;
OGLBuffer buffer;
GLenum current_residency_access = GL_NONE;
std::vector<std::pair<OGLBuffer, u32>> subs;
};
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class OGLBufferCache final : public GenericBufferCache {
class BufferCacheRuntime {
friend Buffer;
public:
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device, OGLStreamBuffer& stream_buffer,
StateTracker& state_tracker);
~OGLBufferCache();
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
BufferInfo GetEmptyBuffer(std::size_t) override;
explicit BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_,
Vulkan::MemoryAllocator* vulkan_memory_allocator_);
void Acquire() noexcept {
cbuf_cursor = 0;
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
bool is_written);
void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
bool is_written);
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
if (use_assembly_shaders) {
const GLuint handle = fast_uniforms[stage][binding_index].handle;
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
} else {
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
fast_uniforms[stage][binding_index].handle, 0,
static_cast<GLsizeiptr>(size));
}
}
protected:
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
if (use_assembly_shaders) {
glProgramBufferParametersIuivNV(
PABO_LUT[stage], binding_index, 0,
static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
reinterpret_cast<const GLuint*>(data.data()));
} else {
glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
}
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
return mapped_span;
}
[[nodiscard]] const GLvoid* IndexOffset() const noexcept {
return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
}
[[nodiscard]] bool HasFastBufferSubData() const noexcept {
return device.HasFastBufferSubData();
}
private:
static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
};
const Device& device;
const Vulkan::Device* vulkan_device;
Vulkan::MemoryAllocator* vulkan_memory_allocator;
std::optional<StreamBuffer> stream_buffer;
std::size_t cbuf_cursor = 0;
std::array<GLuint, NUM_CBUFS> cbufs{};
u32 max_attributes = 0;
bool use_assembly_shaders = false;
bool has_unified_vertex_buffers = false;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
VideoCommon::NUM_STAGES>
fast_uniforms;
u32 index_buffer_offset = 0;
};
struct BufferCacheParams {
using Runtime = OpenGL::BufferCacheRuntime;
using Buffer = OpenGL::Buffer;
static constexpr bool IS_OPENGL = true;
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
} // namespace OpenGL

View File

@@ -21,9 +21,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
namespace {
// One uniform block is reserved for emulation purposes
constexpr u32 ReservedUniformBlocks = 1;
@@ -198,10 +196,18 @@ bool IsASTCSupported() {
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
}
[[nodiscard]] std::string UuidString(std::span<const GLubyte, GL_UUID_SIZE_EXT> uuid) {
return fmt::format("{:x}{:x}{:x}{:x}-{:x}{:x}-{:x}{:x}-{:x}{:x}-{:x}{:x}{:x}{:x}{:x}", uuid[0],
uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], uuid[8],
uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]);
}
} // Anonymous namespace
Device::Device()
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
Device::Device(bool has_vulkan_instance) {
if (!GLAD_GL_VERSION_4_3) {
LOG_ERROR(Render_OpenGL, "OpenGL 4.3 is not available");
throw std::runtime_error{"Insufficient version"};
}
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
@@ -217,6 +223,9 @@ Device::Device()
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
max_uniform_buffers = BuildMaxUniformBuffers();
base_bindings = BuildBaseBindings();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
@@ -243,7 +252,8 @@ Device::Device()
use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2 &&
has_vulkan_instance;
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();

View File

@@ -10,18 +10,16 @@
namespace OpenGL {
static constexpr u32 EmulationUniformBlockBinding = 0;
class Device final {
class Device {
public:
struct BaseBindings final {
struct BaseBindings {
u32 uniform_buffer{};
u32 shader_storage_buffer{};
u32 sampler{};
u32 image{};
};
explicit Device();
explicit Device(bool has_vulkan_instance);
explicit Device(std::nullptr_t);
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {

View File

@@ -47,7 +47,7 @@ void GLInnerFence::Wait() {
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
Tegra::GPU& gpu_, TextureCache& texture_cache_,
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
BufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {

View File

@@ -32,14 +32,13 @@ private:
};
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
QueryCache& query_cache_);
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCache& texture_cache, BufferCache& buffer_cache,
QueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;

View File

@@ -44,17 +44,10 @@ using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
namespace {
@@ -104,20 +97,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
}
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
const ConstBufferEntry& entry) {
if (!entry.IsIndirect()) {
return entry.GetSize();
}
if (buffer.size > Maxwell::MaxConstBufferSize) {
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
Maxwell::MaxConstBufferSize);
return Maxwell::MaxConstBufferSize;
}
return buffer.size;
}
/// Translates hardware transform feedback indices
/// @param location Hardware location
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@@ -150,14 +129,6 @@ void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
if (num_ssbos == 0) {
return;
}
glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
reinterpret_cast<const GLuint*>(ssbos));
}
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
if (entry.is_buffer) {
return ImageViewType::Buffer;
@@ -199,49 +170,35 @@ ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
const Vulkan::Device* vulkan_device,
Vulkan::MemoryAllocator* vulkan_memory_allocator,
ScreenInfo& screen_info_, ProgramManager& program_manager_,
StateTracker& state_tracker_)
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
stream_buffer(device, state_tracker),
texture_cache_runtime(device, program_manager, state_tracker),
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
buffer_cache_runtime(device, vulkan_device, vulkan_memory_allocator),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
query_cache(*this, maxwell3d, gpu_memory),
buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
async_shaders(emu_window_) {
unified_uniform_buffer.Create();
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
if (device.UseAssemblyShaders()) {
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
for (const GLuint cbuf : staging_cbufs) {
glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
nullptr, 0);
}
}
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
}
}
RasterizerOpenGL::~RasterizerOpenGL() {
if (device.UseAssemblyShaders()) {
glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
}
}
RasterizerOpenGL::~RasterizerOpenGL() = default;
void RasterizerOpenGL::SetupVertexFormat() {
void RasterizerOpenGL::SyncVertexFormats() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexFormats]) {
return;
}
flags[Dirty::VertexFormats] = false;
MICROPROFILE_SCOPE(OpenGL_VAO);
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
// the first 16 vertex attributes always, as we don't know which ones are actually used until
// shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
@@ -277,55 +234,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
}
}
void RasterizerOpenGL::SetupVertexBuffer() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexBuffers]) {
return;
}
flags[Dirty::VertexBuffers] = false;
MICROPROFILE_SCOPE(OpenGL_VB);
const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
// Upload all guest vertex arrays sequentially to our buffer
const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
}
flags[Dirty::VertexBuffer0 + index] = false;
const auto& vertex_array = regs.vertex_array[index];
if (!vertex_array.IsEnabled()) {
continue;
}
const GPUVAddr start = vertex_array.StartAddress();
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end >= start);
const GLuint gl_index = static_cast<GLuint>(index);
const u64 size = end - start;
if (size == 0) {
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
if (use_unified_memory) {
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
}
continue;
}
const auto info = buffer_cache.UploadMemory(start, size);
if (use_unified_memory) {
glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
info.address + info.offset, size);
} else {
glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
}
}
}
void RasterizerOpenGL::SetupVertexInstances() {
void RasterizerOpenGL::SyncVertexInstances() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexInstances]) {
return;
@@ -346,17 +255,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
}
}
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
MICROPROFILE_SCOPE(OpenGL_Index);
const auto& regs = maxwell3d.regs;
const std::size_t size = CalculateIndexBufferSize();
const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
return info.offset;
}
void RasterizerOpenGL::SetupShaders() {
MICROPROFILE_SCOPE(OpenGL_Shader);
void RasterizerOpenGL::SetupShaders(bool is_indexed) {
u32 clip_distances = 0;
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
@@ -413,11 +312,19 @@ void RasterizerOpenGL::SetupShaders() {
const size_t stage = index == 0 ? 0 : index - 1;
shaders[stage] = shader;
SetupDrawConstBuffers(stage, shader);
SetupDrawGlobalMemory(stage, shader);
SetupDrawTextures(shader, stage);
SetupDrawImages(shader, stage);
buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
buffer_cache.UnbindGraphicsStorageBuffers(stage);
u32 ssbo_index = 0;
for (const auto& buffer : shader->GetEntries().global_memory_entries) {
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
buffer.cbuf_offset, buffer.is_written);
++ssbo_index;
}
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -433,43 +340,26 @@ void RasterizerOpenGL::SetupShaders() {
SyncClipEnabled(clip_distances);
maxwell3d.dirty.flags[Dirty::Shaders] = false;
buffer_cache.UpdateGraphicsBuffers(is_indexed);
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
buffer_cache.BindHostGeometryBuffers(is_indexed);
size_t image_view_index = 0;
size_t texture_index = 0;
size_t image_index = 0;
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
const Shader* const shader = shaders[stage];
if (shader) {
const auto base = device.GetBaseBindings(stage);
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
texture_index, image_index);
}
}
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
const auto& regs = maxwell3d.regs;
std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (!regs.vertex_array[index].IsEnabled())
if (!shader) {
continue;
const GPUVAddr start = regs.vertex_array[index].StartAddress();
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
size += end - start;
ASSERT(end >= start);
}
buffer_cache.BindHostStageBuffers(stage);
const auto& base = device.GetBaseBindings(stage);
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
texture_index, image_index);
}
return size;
}
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
}
void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
@@ -478,6 +368,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
}
void RasterizerOpenGL::Clear() {
MICROPROFILE_SCOPE(OpenGL_Clears);
if (!maxwell3d.ShouldExecute()) {
return;
}
@@ -528,11 +419,9 @@ void RasterizerOpenGL::Clear() {
}
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
{
auto lock = texture_cache.AcquireLock();
texture_cache.UpdateRenderTargets(true);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
}
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UpdateRenderTargets(true);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@@ -544,7 +433,6 @@ void RasterizerOpenGL::Clear() {
} else if (use_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
++num_queued_commands;
}
@@ -553,75 +441,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
query_cache.UpdateCounters();
SyncViewport();
SyncRasterizeEnable();
SyncPolygonModes();
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
SyncDepthTestState();
SyncDepthClamp();
SyncStencilTestState();
SyncBlendState();
SyncLogicOpState();
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest();
SyncPointState();
SyncLineState();
SyncPolygonOffset();
SyncAlphaTest();
SyncFramebufferSRGB();
buffer_cache.Acquire();
current_cbuf = 0;
std::size_t buffer_size = CalculateVertexArraysSize();
// Add space for index buffer
if (is_indexed) {
buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
}
// Uniform space for the 5 shader stages
buffer_size =
Common::AlignUp<std::size_t>(buffer_size, 4) +
(sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
// Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers *
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
// Prepare the vertex array.
buffer_cache.Map(buffer_size);
// Prepare vertex array format.
SetupVertexFormat();
// Upload vertex and index data.
SetupVertexBuffer();
SetupVertexInstances();
GLintptr index_buffer_offset = 0;
if (is_indexed) {
index_buffer_offset = SetupIndexBuffer();
}
// Setup emulation uniform buffer.
if (!device.UseAssemblyShaders()) {
MaxwellUniformData ubo;
ubo.SetFromRegs(maxwell3d);
const auto info =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
}
SyncState();
// Setup shaders and their used resources.
auto lock = texture_cache.AcquireLock();
SetupShaders();
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
SetupShaders(is_indexed);
// Signal the buffer cache that we are not going to upload more things.
buffer_cache.Unmap();
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
program_manager.BindGraphicsPipeline();
@@ -635,7 +460,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
if (is_indexed) {
const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
@@ -675,22 +500,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Acquire();
current_cbuf = 0;
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
BindComputeTextures(kernel);
const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
buffer_cache.Map(buffer_size);
SetupComputeConstBuffers(kernel);
SetupComputeGlobalMemory(kernel);
buffer_cache.Unmap();
const auto& entries = kernel->GetEntries();
buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
buffer_cache.UnbindComputeStorageBuffers();
u32 ssbo_index = 0;
for (const auto& buffer : entries.global_memory_entries) {
buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
buffer.is_written);
++ssbo_index;
}
buffer_cache.UpdateComputeBuffers();
buffer_cache.BindHostComputeBuffers();
const auto& launch_desc = kepler_compute.launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
@@ -706,6 +531,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
query_cache.Query(gpu_addr, type, timestamp);
}
void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
}
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
@@ -714,19 +545,43 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
return;
}
{
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
buffer_cache.FlushRegion(addr, size);
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.DownloadMemory(addr, size);
}
query_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateExceptTextureCache(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
shader_cache.InvalidateRegion(addr, size);
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::InvalidateTextureCache(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.MustFlushRegion(addr, size);
return buffer_cache.IsRegionGpuModified(addr, size);
}
return texture_cache.IsRegionGpuModified(addr, size) ||
buffer_cache.MustFlushRegion(addr, size);
buffer_cache.IsRegionGpuModified(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -735,11 +590,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
return;
}
{
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
shader_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}
@@ -748,26 +606,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
shader_cache.OnCPUWrite(addr, size);
{
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
shader_cache.OnCPUWrite(addr, size);
buffer_cache.OnCPUWrite(addr, size);
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.CachedWriteMemory(addr, size);
}
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
buffer_cache.SyncGuestHost();
shader_cache.SyncGuestHost();
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.FlushCachedWrites();
}
}
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
{
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
buffer_cache.OnCPUWrite(addr, size);
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
shader_cache.OnCPUWrite(addr, size);
}
@@ -802,14 +669,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
}
void RasterizerOpenGL::WaitForIdle() {
// Place a barrier on everything that is not framebuffer related.
// This is related to another flag that is not currently implemented.
glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
glMemoryBarrier(GL_ALL_BARRIER_BITS);
}
void RasterizerOpenGL::FragmentBarrier() {
@@ -834,18 +694,21 @@ void RasterizerOpenGL::TickFrame() {
num_queued_commands = 0;
fence_manager.TickFrame();
buffer_cache.TickFrame();
{
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.TickFrame();
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.TickFrame();
}
}
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.BlitImage(dst, src, copy_config);
return true;
}
@@ -857,7 +720,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
}
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
auto lock = texture_cache.AcquireLock();
std::scoped_lock lock{texture_cache.mutex};
ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
if (!image_view) {
return false;
@@ -924,166 +787,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te
}
}
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
static constexpr std::array PARAMETER_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
};
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& stages = maxwell3d.state.shader_stages;
const auto& shader_stage = stages[stage_index];
const auto& entries = shader->GetEntries();
const bool use_unified = entries.use_unified_uniforms;
const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
const auto base_bindings = device.GetBaseBindings(stage_index);
u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
for (const auto& entry : entries.const_buffers) {
const u32 index = entry.GetIndex();
const auto& buffer = shader_stage.const_buffers[index];
SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
base_unified_offset + index * Maxwell::MaxConstBufferSize);
++binding;
}
if (use_unified) {
const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
entries.global_memory_entries.size());
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
}
}
void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& launch_desc = kepler_compute.launch_description;
const auto& entries = kernel->GetEntries();
const bool use_unified = entries.use_unified_uniforms;
u32 binding = 0;
for (const auto& entry : entries.const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
Tegra::Engines::ConstBufferInfo buffer;
buffer.address = config.Address();
buffer.size = config.size;
buffer.enabled = mask[entry.GetIndex()];
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
++binding;
}
if (use_unified) {
const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
NUM_CONST_BUFFERS_BYTES_PER_STAGE);
}
}
void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
const Tegra::Engines::ConstBufferInfo& buffer,
const ConstBufferEntry& entry, bool use_unified,
std::size_t unified_offset) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
if (device.UseAssemblyShaders()) {
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
} else {
glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
}
return;
}
// Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
// UBO alignment requirements.
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
const bool fast_upload = !use_unified && device.HasFastBufferSubData();
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
const GPUVAddr gpu_addr = buffer.address;
auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
if (device.UseAssemblyShaders()) {
UNIMPLEMENTED_IF(use_unified);
if (info.offset != 0) {
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
info.handle = staging_cbuf;
info.offset = 0;
}
glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
return;
}
if (use_unified) {
glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
unified_offset, size);
} else {
glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
}
}
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
static constexpr std::array TARGET_LUT = {
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
const auto& entries{shader->GetEntries().global_memory_entries};
std::array<BindlessSSBO, 32> ssbos;
ASSERT(entries.size() < ssbos.size());
const bool assembly_shaders = device.UseAssemblyShaders();
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
for (const auto& entry : entries) {
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
const u32 size{gpu_memory.Read<u32>(addr + 8)};
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
++binding;
}
if (assembly_shaders) {
UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
}
}
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
const auto& entries{kernel->GetEntries().global_memory_entries};
std::array<BindlessSSBO, 32> ssbos;
ASSERT(entries.size() < ssbos.size());
u32 binding = 0;
for (const auto& entry : entries) {
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
const u32 size{gpu_memory.Read<u32>(addr + 8)};
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
++binding;
}
if (device.UseAssemblyShaders()) {
UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
}
}
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
const size_t alignment{device.GetShaderStorageBufferAlignment()};
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
if (device.UseAssemblyShaders()) {
*ssbo = BindlessSSBO{
.address = static_cast<GLuint64EXT>(info.address + info.offset),
.length = static_cast<GLsizei>(size),
.padding = 0,
};
} else {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
static_cast<GLsizeiptr>(size));
}
}
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
const bool via_header_index =
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
@@ -1131,6 +834,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
}
}
void RasterizerOpenGL::SyncState() {
SyncViewport();
SyncRasterizeEnable();
SyncPolygonModes();
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
SyncDepthTestState();
SyncDepthClamp();
SyncStencilTestState();
SyncBlendState();
SyncLogicOpState();
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest();
SyncPointState();
SyncLineState();
SyncPolygonOffset();
SyncAlphaTest();
SyncFramebufferSRGB();
SyncVertexFormats();
SyncVertexInstances();
}
void RasterizerOpenGL::SyncViewport() {
auto& flags = maxwell3d.dirty.flags;
const auto& regs = maxwell3d.regs;
@@ -1166,9 +893,11 @@ void RasterizerOpenGL::SyncViewport() {
if (regs.screen_y_control.y_negate != 0) {
flip_y = !flip_y;
}
glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
: GL_NEGATIVE_ONE_TO_ONE);
const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
state_tracker.ClipControl(origin, depth);
state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
}
if (dirty_viewport) {
@@ -1652,36 +1381,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
if (regs.tfb_enabled == 0) {
return;
}
if (device.UseAssemblyShaders()) {
SyncTransformFeedback();
}
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
const auto& binding = regs.tfb_bindings[index];
if (!binding.buffer_enable) {
if (enabled_transform_feedback_buffers[index]) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
0);
}
enabled_transform_feedback_buffers[index] = false;
continue;
}
enabled_transform_feedback_buffers[index] = true;
auto& tfb_buffer = transform_feedback_buffers[index];
tfb_buffer.Create();
const GLuint handle = tfb_buffer.handle;
const std::size_t size = binding.buffer_size;
glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
static_cast<GLsizeiptr>(size));
}
UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
// We may have to call BeginTransformFeedbackNV here since they seem to call different
// implementations on Nvidia's driver (the pointer is different) but we are using
@@ -1695,23 +1401,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
if (regs.tfb_enabled == 0) {
return;
}
glEndTransformFeedback();
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
const auto& binding = regs.tfb_bindings[index];
if (!binding.buffer_enable) {
continue;
}
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
const GLuint handle = transform_feedback_buffers[index].handle;
const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size;
const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
static_cast<GLsizeiptr>(size));
}
}
} // namespace OpenGL

View File

@@ -30,7 +30,6 @@
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
@@ -47,6 +46,11 @@ namespace Tegra {
class MemoryManager;
}
namespace Vulkan {
class Device;
class MemoryAllocator;
} // namespace Vulkan
namespace OpenGL {
struct ScreenInfo;
@@ -63,6 +67,8 @@ class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
const Vulkan::Device* vulkan_device,
Vulkan::MemoryAllocator* vulkan_memory_allocator,
ScreenInfo& screen_info_, ProgramManager& program_manager_,
StateTracker& state_tracker_);
~RasterizerOpenGL() override;
@@ -72,8 +78,11 @@ public:
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
void InvalidateTextureCache(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
@@ -119,27 +128,6 @@ private:
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
size_t& image_view_index, size_t& texture_index, size_t& image_index);
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
/// Configures the current constbuffers to use for the kernel invocation.
void SetupComputeConstBuffers(Shader* kernel);
/// Configures a constant buffer.
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const ConstBufferEntry& entry, bool use_unified,
std::size_t unified_offset);
/// Configures the current global memory entries to use for the draw command.
void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
/// Configures the current global memory entries to use for the kernel invocation.
void SetupComputeGlobalMemory(Shader* kernel);
/// Configures a global memory buffer.
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
size_t size, BindlessSSBO* ssbo);
/// Configures the current textures to use for the draw command.
void SetupDrawTextures(const Shader* shader, size_t stage_index);
@@ -152,6 +140,9 @@ private:
/// Configures images in a compute shader.
void SetupComputeImages(const Shader* shader);
/// Syncs state to match guest's
void SyncState();
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
@@ -215,6 +206,12 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
/// Syncs vertex formats to match the guest state
void SyncVertexFormats();
/// Syncs vertex instances to match the guest state
void SyncVertexInstances();
/// Syncs transform feedback state to match guest state
/// @note Only valid on assembly shaders
void SyncTransformFeedback();
@@ -225,19 +222,7 @@ private:
/// End a transform feedback
void EndTransformFeedback();
std::size_t CalculateVertexArraysSize() const;
std::size_t CalculateIndexBufferSize() const;
/// Updates the current vertex format
void SetupVertexFormat();
void SetupVertexBuffer();
void SetupVertexInstances();
GLintptr SetupIndexBuffer();
void SetupShaders();
void SetupShaders(bool is_indexed);
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
@@ -249,12 +234,12 @@ private:
ProgramManager& program_manager;
StateTracker& state_tracker;
OGLStreamBuffer stream_buffer;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
ShaderCacheOpenGL shader_cache;
QueryCache query_cache;
OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
VideoCommon::Shader::AsyncShaders async_shaders;
@@ -262,20 +247,8 @@ private:
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
std::array<GLuint, MAX_TEXTURES> texture_handles;
std::array<GLuint, MAX_IMAGES> image_handles;
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
transform_feedback_buffers;
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
enabled_transform_feedback_buffers;
static constexpr std::size_t NUM_CONSTANT_BUFFERS =
Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
std::size_t current_cbuf = 0;
OGLBuffer unified_uniform_buffer;
std::array<GLuint, MAX_TEXTURES> texture_handles{};
std::array<GLuint, MAX_IMAGES> image_handles{};
/// Number of commands queued to the OpenGL driver. Resetted on flush.
std::size_t num_queued_commands = 0;

View File

@@ -171,12 +171,6 @@ void OGLBuffer::Release() {
handle = 0;
}
void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
}
void OGLSync::Create() {
if (handle != 0)
return;

View File

@@ -234,9 +234,6 @@ public:
/// Deletes the internal OpenGL resource
void Release();
// Converts the buffer into a stream copy buffer with a fixed size
void MakeStreamCopy(std::size_t buffer_size);
GLuint handle = 0;
};

View File

@@ -63,7 +63,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
#define itof intBitsToFloat
#define utof uintBitsToFloat
@@ -76,10 +76,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
layout (std140, binding = {}) uniform vs_config {{
float y_direction;
}};
)";
class ShaderWriter final {
@@ -401,13 +397,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
// We waste one UBO for emulation
const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
return num_ubos > num_available_ubos;
}
struct GenericVaryingDescription {
std::string name;
u8 first_element = 0;
@@ -419,9 +408,8 @@ public:
explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
ShaderType stage_, std::string_view identifier_,
std::string_view suffix_)
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_},
suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{
UseUnifiedUniforms(device_, ir_, stage_)} {
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
@@ -515,7 +503,8 @@ private:
if (!identifier.empty()) {
code.AddLine("// {}", identifier);
}
code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
if (device.HasShaderBallot()) {
code.AddLine("#extension GL_ARB_shader_ballot : require");
@@ -541,7 +530,7 @@ private:
code.AddNewLine();
code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
code.AddLine(COMMON_DECLARATIONS);
}
void DeclareVertex() {
@@ -864,17 +853,6 @@ private:
}
void DeclareConstantBuffers() {
if (use_unified_uniforms) {
const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
static_cast<u32>(ir.GetGlobalMemory().size());
code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
binding);
code.AddLine(" uint cbufs[];");
code.AddLine("}};");
code.AddNewLine();
return;
}
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
for (const auto& [index, info] : ir.GetConstantBuffers()) {
const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
@@ -1080,29 +1058,17 @@ private:
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
const Node offset = cbuf->GetOffset();
const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
if (use_unified_uniforms) {
return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
Type::Uint};
} else {
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
offset_imm / (4 * 4), (offset_imm / 4) % 4),
Type::Uint};
}
}
// Indirect access
if (use_unified_uniforms) {
return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
Visit(offset).AsUint()),
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
offset_imm / (4 * 4), (offset_imm / 4) % 4),
Type::Uint};
}
// Indirect access
const std::string final_offset = code.GenerateTemporary();
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
@@ -2292,7 +2258,6 @@ private:
}
}
}
if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
@@ -2336,7 +2301,8 @@ private:
}
Expression YNegate(Operation operation) {
return {"y_direction", Type::Float};
// Y_NEGATE is mapped to this uniform value
return {"gl_FrontMaterial.ambient.a", Type::Float};
}
template <u32 element>
@@ -2786,7 +2752,6 @@ private:
const std::string_view identifier;
const std::string_view suffix;
const Header header;
const bool use_unified_uniforms;
std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
@@ -3002,8 +2967,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s
for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
}
for (const auto& buffer : entries.const_buffers) {
entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
}
entries.shader_length = ir.GetLength();
entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
return entries;
}

View File

@@ -55,7 +55,7 @@ struct ShaderEntries {
std::vector<ImageEntry> images;
std::size_t shader_length{};
u32 clip_distances{};
bool use_unified_uniforms{};
u32 enabled_uniform_buffers{};
};
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,

View File

@@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) {
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
}
void SetupDirtyVertexArrays(Tables& tables) {
static constexpr std::size_t num_array = 3;
void SetupDirtyVertexInstances(Tables& tables) {
static constexpr std::size_t instance_base_offset = 3;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
const std::size_t instance_array_offset = array_offset + instance_base_offset;
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_array_offset] = VertexInstances;
@@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) {
StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
auto& dirty = gpu.Maxwell3D().dirty;
auto& tables = dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyFlags(tables);
SetupDirtyColorMasks(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyVertexArrays(tables);
SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
@@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
SetupDirtyClipControl(tables);
SetupDirtyDepthClampEnabled(tables);
SetupDirtyMisc(tables);
auto& store = dirty.on_write_stores;
store[VertexBuffers] = true;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
store[VertexBuffer0 + i] = true;
}
}
void StateTracker::InvalidateStreamBuffer() {
flags[Dirty::VertexBuffers] = true;
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
flags[index] = true;
}
}
} // namespace OpenGL

View File

@@ -28,10 +28,6 @@ enum : u8 {
VertexFormat0,
VertexFormat31 = VertexFormat0 + 31,
VertexBuffers,
VertexBuffer0,
VertexBuffer31 = VertexBuffer0 + 31,
VertexInstances,
VertexInstance0,
VertexInstance31 = VertexInstance0 + 31,
@@ -92,8 +88,6 @@ class StateTracker {
public:
explicit StateTracker(Tegra::GPU& gpu);
void InvalidateStreamBuffer();
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
return;
@@ -110,13 +104,32 @@ public:
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
}
void ClipControl(GLenum new_origin, GLenum new_depth) {
if (new_origin == origin && new_depth == depth) {
return;
}
origin = new_origin;
depth = new_depth;
glClipControl(origin, depth);
}
void SetYNegate(bool new_y_negate) {
if (new_y_negate == y_negate) {
return;
}
// Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
y_negate = new_y_negate;
const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
}
void NotifyScreenDrawVertexArray() {
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
flags[OpenGL::Dirty::VertexBuffers] = true;
flags[OpenGL::Dirty::VertexBuffer0] = true;
flags[VideoCommon::Dirty::VertexBuffers] = true;
flags[VideoCommon::Dirty::VertexBuffer0] = true;
flags[OpenGL::Dirty::VertexInstances] = true;
flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
@@ -202,6 +215,9 @@ private:
GLuint framebuffer = 0;
GLuint index_buffer = 0;
GLenum origin = GL_LOWER_LEFT;
GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
bool y_negate = false;
};
} // namespace OpenGL

View File

@@ -1,70 +1,64 @@
// Copyright 2018 Citra Emulator Project
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <tuple>
#include <vector>
#include <array>
#include <memory>
#include <span>
#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
MP_RGB(128, 128, 192));
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
: state_tracker{state_tracker_} {
gl_buffer.Create();
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
mapped_ptr = static_cast<u8*>(
glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
StreamBuffer::StreamBuffer() {
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
buffer.Create();
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
mapped_pointer =
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
for (OGLSync& sync : fences) {
sync.Create();
}
}
OGLStreamBuffer::~OGLStreamBuffer() {
glUnmapNamedBuffer(gl_buffer.handle);
gl_buffer.Release();
}
std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
ASSERT(size <= BUFFER_SIZE);
ASSERT(alignment <= BUFFER_SIZE);
mapped_size = size;
if (alignment > 0) {
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
ASSERT(size < REGION_SIZE);
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
++region) {
fences[region].Create();
}
used_iterator = iterator;
if (buffer_pos + size > BUFFER_SIZE) {
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
glInvalidateBufferData(gl_buffer.handle);
state_tracker.InvalidateStreamBuffer();
buffer_pos = 0;
for (size_t region = Region(free_iterator) + 1,
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
region < region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
}
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
ASSERT(size <= mapped_size);
if (size > 0) {
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
if (iterator + size > free_iterator) {
free_iterator = iterator + size;
}
if (iterator + size > STREAM_BUFFER_SIZE) {
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
fences[region].Create();
}
used_iterator = 0;
iterator = 0;
free_iterator = size;
buffer_pos += size;
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
}
const size_t offset = iterator;
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
return {std::span(mapped_pointer + offset, size), offset};
}
} // namespace OpenGL

View File

@@ -1,9 +1,12 @@
// Copyright 2018 Citra Emulator Project
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <memory>
#include <span>
#include <utility>
#include <glad/glad.h>
@@ -13,48 +16,35 @@
namespace OpenGL {
class Device;
class StateTracker;
class StreamBuffer {
static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
static constexpr size_t NUM_SYNCS = 16;
static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
static constexpr size_t MAX_ALIGNMENT = 256;
static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
~OGLStreamBuffer();
explicit StreamBuffer();
/*
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
* and the optional alignment requirement.
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
* The return values are the pointer to the new chunk, and the offset within the buffer.
* The actual used size must be specified on unmapping the chunk.
*/
std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
[[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
void Unmap(GLsizeiptr size);
GLuint Handle() const {
return gl_buffer.handle;
}
u64 Address() const {
return gpu_address;
}
GLsizeiptr Size() const noexcept {
return BUFFER_SIZE;
[[nodiscard]] GLuint Handle() const noexcept {
return buffer.handle;
}
private:
static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
[[nodiscard]] static size_t Region(size_t offset) noexcept {
return offset / REGION_SIZE;
}
StateTracker& state_tracker;
OGLBuffer gl_buffer;
GLuint64EXT gpu_address = 0;
GLintptr buffer_pos = 0;
GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr;
size_t iterator = 0;
size_t used_iterator = 0;
size_t free_iterator = 0;
u8* mapped_pointer = nullptr;
OGLBuffer buffer;
std::array<OGLSync, NUM_SYNCS> fences;
};
} // namespace OpenGL

View File

@@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
} // Anonymous namespace
ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
: span(map, size), sync{sync_}, handle{handle_} {}
ImageBufferMap::~ImageBufferMap() {
if (sync) {
sync->Create();
@@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() {
glFinish();
}
ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return upload_buffers.RequestMap(size, true);
}
ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return download_buffers.RequestMap(size, false);
}
@@ -596,7 +593,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_
bool insert_fence) {
const size_t index = RequestBuffer(requested_size);
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
return ImageBufferMap{
.mapped_span = std::span(maps[index], requested_size),
.sync = sync,
.buffer = buffers[index].handle,
};
}
size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
@@ -711,7 +712,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies) {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
@@ -735,7 +736,7 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) {
glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + buffer_offset,
copy.dst_offset, copy.size);
}
}
@@ -744,7 +745,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies) {
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
u32 current_row_length = std::numeric_limits<u32>::max();

View File

@@ -31,23 +31,12 @@ using VideoCommon::NUM_RT;
using VideoCommon::Offset2D;
using VideoCommon::RenderTargets;
class ImageBufferMap {
public:
explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
struct ImageBufferMap {
~ImageBufferMap();
GLuint Handle() const noexcept {
return handle;
}
std::span<u8> Span() const noexcept {
return span;
}
private:
std::span<u8> span;
std::span<u8> mapped_span;
OGLSync* sync;
GLuint handle;
GLuint buffer;
};
struct FormatProperties {
@@ -69,9 +58,9 @@ public:
void Finish();
ImageBufferMap MapUploadBuffer(size_t size);
ImageBufferMap UploadStagingBuffer(size_t size);
ImageBufferMap MapDownloadBuffer(size_t size);
ImageBufferMap DownloadStagingBuffer(size_t size);
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);

View File

@@ -27,11 +27,14 @@
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/textures/decoders.h"
#include "video_core/vulkan_common/vulkan_debug_callback.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_instance.h"
#include "video_core/vulkan_common/vulkan_library.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
namespace OpenGL {
namespace {
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
@@ -125,14 +128,100 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
}
}
Vulkan::vk::PhysicalDevice FindPhysicalDevice(Vulkan::vk::Instance& instance) {
using namespace Vulkan;
using UUID = std::array<GLubyte, GL_UUID_SIZE_EXT>;
GLint num_device_uuids;
glGetIntegerv(GL_NUM_DEVICE_UUIDS_EXT, &num_device_uuids);
std::vector<UUID> device_uuids(num_device_uuids);
for (GLint index = 0; index < num_device_uuids; ++index) {
glGetUnsignedBytei_vEXT(GL_DEVICE_UUID_EXT, 0, device_uuids[index].data());
}
UUID driver_uuid;
glGetUnsignedBytevEXT(GL_DRIVER_UUID_EXT, driver_uuid.data());
for (const VkPhysicalDevice raw_physical_device : instance.EnumeratePhysicalDevices()) {
VkPhysicalDeviceIDProperties device_id_properties{};
device_id_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
VkPhysicalDeviceProperties2KHR properties{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
.pNext = &device_id_properties,
.properties{},
};
vk::PhysicalDevice physical_device(raw_physical_device, instance.Dispatch());
physical_device.GetProperties2KHR(properties);
if (!std::ranges::equal(device_id_properties.driverUUID, driver_uuid)) {
continue;
}
const auto it =
std::ranges::find_if(device_uuids, [&device_id_properties, driver_uuid](UUID uuid) {
return std::ranges::equal(device_id_properties.deviceUUID, uuid);
});
if (it != device_uuids.end()) {
return physical_device;
}
}
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
}
} // Anonymous namespace
struct VulkanObjects {
static std::unique_ptr<VulkanObjects> TryCreate() {
if (!GLAD_GL_EXT_memory_object) {
// Interop is not present
return nullptr;
}
const std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
if (vendor == "ATI Technologies Inc.") {
// Avoid using GL_EXT_memory_object on AMD, as it makes the GL driver crash
return nullptr;
}
if (!Settings::values.use_assembly_shaders.GetValue()) {
// We only need interop when assembly shaders are enabled
return nullptr;
}
#ifdef __linux__
LOG_WARNING(Render_OpenGL, "Interop doesn't work on Linux at the moment");
return nullptr;
#endif
try {
return std::make_unique<VulkanObjects>();
} catch (const Vulkan::vk::Exception& exception) {
LOG_ERROR(Render_OpenGL, "Failed to initialize Vulkan objects with error: {}",
exception.what());
return nullptr;
}
}
Common::DynamicLibrary library{Vulkan::OpenLibrary()};
Vulkan::vk::InstanceDispatch dld;
Vulkan::vk::Instance instance{Vulkan::CreateInstance(library, dld, VK_API_VERSION_1_1)};
Vulkan::Device device{*instance, FindPhysicalDevice(instance), nullptr, dld};
Vulkan::MemoryAllocator memory_allocator{device, true};
};
RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_},
vulkan_objects{VulkanObjects::TryCreate()}, device{vulkan_objects != nullptr},
state_tracker{gpu}, program_manager{device},
rasterizer(emu_window, gpu, cpu_memory, device,
vulkan_objects ? &vulkan_objects->device : nullptr,
vulkan_objects ? &vulkan_objects->memory_allocator : nullptr, screen_info,
program_manager, state_tracker) {
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
glDebugMessageCallback(DebugHandler, nullptr);
}
AddTelemetryFields();
InitOpenGLObjects();
}
RendererOpenGL::~RendererOpenGL() = default;
@@ -148,7 +237,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
++m_current_frame;
rasterizer->TickFrame();
rasterizer.TickFrame();
context->SwapBuffers();
render_window.OnFrameDisplayed();
@@ -179,7 +268,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
framebuffer_crop_rect = framebuffer.crop_rect;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
return;
}
@@ -267,6 +356,7 @@ void RendererOpenGL::InitOpenGLObjects() {
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
@@ -289,14 +379,6 @@ void RendererOpenGL::AddTelemetryFields() {
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
}
void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
screen_info, program_manager, state_tracker);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer) {
texture.width = framebuffer.width;
@@ -407,6 +489,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
program_manager.BindHostPipeline(pipeline.handle);
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
glEnable(GL_FRAMEBUFFER_SRGB);
@@ -425,7 +508,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
glDepthRangeIndexed(0, 0.0, 0.0);
@@ -497,25 +579,4 @@ void RendererOpenGL::RenderScreenshot() {
renderer_settings.screenshot_requested = false;
}
bool RendererOpenGL::Init() {
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
glDebugMessageCallback(DebugHandler, nullptr);
}
AddTelemetryFields();
if (!GLAD_GL_VERSION_4_3) {
return false;
}
InitOpenGLObjects();
CreateRasterizer();
return true;
}
void RendererOpenGL::ShutDown() {}
} // namespace OpenGL

View File

@@ -10,6 +10,7 @@
#include "common/math_util.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -37,6 +38,8 @@ class GPU;
namespace OpenGL {
struct VulkanObjects;
/// Structure used for storing information about the textures for the Switch screen
struct TextureInfo {
OGLTexture resource;
@@ -63,18 +66,18 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererOpenGL() override;
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer;
}
private:
/// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
void AddTelemetryFields();
void CreateRasterizer();
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
@@ -98,8 +101,11 @@ private:
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
const Device device;
StateTracker state_tracker{gpu};
std::unique_ptr<VulkanObjects> vulkan_objects;
Device device;
StateTracker state_tracker;
ProgramManager program_manager;
RasterizerOpenGL rasterizer;
// OpenGL object IDs
OGLSampler present_sampler;
@@ -115,9 +121,6 @@ private:
/// Display information for Switch screen
ScreenInfo screen_info;
/// Global dummy shader pipeline
ProgramManager program_manager;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;

View File

@@ -71,7 +71,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@@ -91,8 +91,8 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
@@ -108,7 +108,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
@@ -132,8 +132,8 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
glUniform1ui(7, params.block_height_mask);
glUniform1ui(8, params.block_depth);
glUniform1ui(9, params.block_depth_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
@@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
"Non-power of two images are not implemented");
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
@@ -172,8 +172,8 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
}
program_manager.RestoreGuestCompute();

View File

@@ -15,9 +15,10 @@
namespace OpenGL {
class Image;
class ImageBufferMap;
class ProgramManager;
struct ImageBufferMap;
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);