another try

This commit is contained in:
mgthepro
2022-11-05 13:58:44 +01:00
parent 4a9f2bbf2a
commit 9f63fbe700
2002 changed files with 671171 additions and 671092 deletions

View File

@@ -1,291 +1,291 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <span>
#include "shader_recompiler/backend/glasm/emit_glasm.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
namespace {
using VideoCore::Surface::PixelFormat;
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
GLsizei padding;
};
static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
constexpr std::array PROGRAM_LUT{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) {
switch (gl_format) {
case GL_RGBA8_SNORM:
return GL_RGBA8;
case GL_R8_SNORM:
return GL_R8;
case GL_RGBA16_SNORM:
return GL_RGBA16;
case GL_R16_SNORM:
return GL_R16;
case GL_RG16_SNORM:
return GL_RG16;
case GL_RG8_SNORM:
return GL_RG8;
default:
return gl_format;
}
}
} // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
buffer.Create();
const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
if (runtime.has_unified_vertex_buffers) {
glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
}
}
void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
void Buffer::MakeResident(GLenum access) noexcept {
// Abuse GLenum's order to exit early
// GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
if (access <= current_residency_access || buffer.handle == 0) {
return;
}
if (std::exchange(current_residency_access, access) != GL_NONE) {
// If the buffer is already resident, remove its residency before promoting it
glMakeNamedBufferNonResidentNV(buffer.handle);
}
glMakeNamedBufferResidentNV(buffer.handle, access);
}
GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
return offset == view.offset && size == view.size && format == view.format;
})};
if (it != views.end()) {
return it->texture.handle;
}
OGLTexture texture;
texture.Create(GL_TEXTURE_BUFFER);
const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
const GLenum texture_format{GetTextureBufferFormat(gl_format)};
if (texture_format != gl_format) {
LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM.");
}
glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size);
views.push_back({
.offset = offset,
.size = size,
.format = format,
.texture = std::move(texture),
});
return views.back().texture.handle;
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
GLint gl_max_attributes;
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
max_attributes = static_cast<u32>(gl_max_attributes);
for (auto& stage_uniforms : fast_uniforms) {
for (OGLBuffer& buffer : stage_uniforms) {
buffer.Create();
glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr,
GL_STREAM_DRAW);
}
}
for (auto& stage_uniforms : copy_uniforms) {
for (OGLBuffer& buffer : stage_uniforms) {
buffer.Create();
glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
}
}
for (OGLBuffer& buffer : copy_compute_uniforms) {
buffer.Create();
glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
}
device_access_memory = [this]() -> u64 {
if (device.CanReportMemoryUsage()) {
return device.GetCurrentDedicatedVideoMemory() + 512_MiB;
}
return 2_GiB; // Return minimum requirements
}();
}
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
if (device.CanReportMemoryUsage()) {
return device_access_memory - device.GetCurrentDedicatedVideoMemory();
}
return 2_GiB;
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) {
glCopyNamedBufferSubData(
src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
}
}
void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), GL_RED, GL_UNSIGNED_INT, &value);
}
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
static_cast<GLsizeiptr>(Common::AlignUp(size, 4)));
} else {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
index_buffer_offset = offset;
}
}
void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
u32 stride) {
if (index >= max_attributes) {
return;
}
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
} else {
glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
static_cast<GLsizei>(stride));
}
}
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size) {
if (use_assembly_shaders) {
GLuint handle;
if (offset != 0) {
handle = copy_uniforms[stage][binding_index].handle;
glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
} else {
handle = buffer.Handle();
}
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
static_cast<GLsizeiptr>(size));
} else {
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size) {
if (use_assembly_shaders) {
GLuint handle;
if (offset != 0) {
handle = copy_compute_uniforms[binding_index].handle;
glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
} else {
handle = buffer.Handle();
}
glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
static_cast<GLsizeiptr>(size));
} else {
glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size, bool is_written) {
if (use_storage_buffers) {
const GLuint base_binding = graphics_base_storage_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
} else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(
PROGRAM_LUT[stage],
Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
}
}
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size, bool is_written) {
if (use_storage_buffers) {
if (size != 0) {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
} else {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
}
} else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(
GL_COMPUTE_PROGRAM_NV,
Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
}
}
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
u32 size) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
PixelFormat format) {
*texture_handles++ = buffer.View(offset, size, format);
}
void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
*image_handles++ = buffer.View(offset, size, format);
}
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <span>
#include "shader_recompiler/backend/glasm/emit_glasm.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
namespace {
using VideoCore::Surface::PixelFormat;
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
GLsizei padding;
};
static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
constexpr std::array PROGRAM_LUT{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) {
switch (gl_format) {
case GL_RGBA8_SNORM:
return GL_RGBA8;
case GL_R8_SNORM:
return GL_R8;
case GL_RGBA16_SNORM:
return GL_RGBA16;
case GL_R16_SNORM:
return GL_R16;
case GL_RG16_SNORM:
return GL_RG16;
case GL_RG8_SNORM:
return GL_RG8;
default:
return gl_format;
}
}
} // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
buffer.Create();
const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
if (runtime.has_unified_vertex_buffers) {
glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
}
}
void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
void Buffer::MakeResident(GLenum access) noexcept {
// Abuse GLenum's order to exit early
// GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
if (access <= current_residency_access || buffer.handle == 0) {
return;
}
if (std::exchange(current_residency_access, access) != GL_NONE) {
// If the buffer is already resident, remove its residency before promoting it
glMakeNamedBufferNonResidentNV(buffer.handle);
}
glMakeNamedBufferResidentNV(buffer.handle, access);
}
GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
return offset == view.offset && size == view.size && format == view.format;
})};
if (it != views.end()) {
return it->texture.handle;
}
OGLTexture texture;
texture.Create(GL_TEXTURE_BUFFER);
const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
const GLenum texture_format{GetTextureBufferFormat(gl_format)};
if (texture_format != gl_format) {
LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM.");
}
glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size);
views.push_back({
.offset = offset,
.size = size,
.format = format,
.texture = std::move(texture),
});
return views.back().texture.handle;
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
GLint gl_max_attributes;
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
max_attributes = static_cast<u32>(gl_max_attributes);
for (auto& stage_uniforms : fast_uniforms) {
for (OGLBuffer& buffer : stage_uniforms) {
buffer.Create();
glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr,
GL_STREAM_DRAW);
}
}
for (auto& stage_uniforms : copy_uniforms) {
for (OGLBuffer& buffer : stage_uniforms) {
buffer.Create();
glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
}
}
for (OGLBuffer& buffer : copy_compute_uniforms) {
buffer.Create();
glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
}
device_access_memory = [this]() -> u64 {
if (device.CanReportMemoryUsage()) {
return device.GetCurrentDedicatedVideoMemory() + 512_MiB;
}
return 2_GiB; // Return minimum requirements
}();
}
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
if (device.CanReportMemoryUsage()) {
return device_access_memory - device.GetCurrentDedicatedVideoMemory();
}
return 2_GiB;
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) {
glCopyNamedBufferSubData(
src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
}
}
void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), GL_RED, GL_UNSIGNED_INT, &value);
}
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
static_cast<GLsizeiptr>(Common::AlignUp(size, 4)));
} else {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
index_buffer_offset = offset;
}
}
void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
u32 stride) {
if (index >= max_attributes) {
return;
}
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
} else {
glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
static_cast<GLsizei>(stride));
}
}
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size) {
if (use_assembly_shaders) {
GLuint handle;
if (offset != 0) {
handle = copy_uniforms[stage][binding_index].handle;
glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
} else {
handle = buffer.Handle();
}
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
static_cast<GLsizeiptr>(size));
} else {
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size) {
if (use_assembly_shaders) {
GLuint handle;
if (offset != 0) {
handle = copy_compute_uniforms[binding_index].handle;
glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
} else {
handle = buffer.Handle();
}
glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
static_cast<GLsizeiptr>(size));
} else {
glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size, bool is_written) {
if (use_storage_buffers) {
const GLuint base_binding = graphics_base_storage_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
} else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(
PROGRAM_LUT[stage],
Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
}
}
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size, bool is_written) {
if (use_storage_buffers) {
if (size != 0) {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
} else {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
}
} else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
.padding = 0,
};
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(
GL_COMPUTE_PROGRAM_NV,
Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
}
}
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
u32 size) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
PixelFormat format) {
*texture_handles++ = buffer.View(offset, size, format);
}
void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
*image_handles++ = buffer.View(offset, size, format);
}
} // namespace OpenGL

View File

@@ -1,215 +1,215 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <span>
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
namespace OpenGL {
class BufferCacheRuntime;
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
u64 size_bytes);
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
void MakeResident(GLenum access) noexcept;
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
return address;
}
[[nodiscard]] GLuint Handle() const noexcept {
return buffer.handle;
}
private:
struct BufferView {
u32 offset;
u32 size;
VideoCore::Surface::PixelFormat format;
OGLTexture texture;
};
GLuint64EXT address = 0;
OGLBuffer buffer;
GLenum current_residency_access = GL_NONE;
std::vector<BufferView> views;
};
class BufferCacheRuntime {
friend Buffer;
public:
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
explicit BufferCacheRuntime(const Device& device_);
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
bool is_written);
void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
bool is_written);
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
u64 GetDeviceMemoryUsage() const;
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
const GLuint handle = fast_uniforms[stage][binding_index].handle;
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
if (use_assembly_shaders) {
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
} else {
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
}
}
void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
if (use_assembly_shaders) {
glProgramBufferParametersIuivNV(
PABO_LUT[stage], binding_index, 0,
static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
reinterpret_cast<const GLuint*>(data.data()));
} else {
glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
}
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
return mapped_span;
}
[[nodiscard]] const GLvoid* IndexOffset() const noexcept {
return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
}
[[nodiscard]] bool HasFastBufferSubData() const noexcept {
return has_fast_buffer_sub_data;
}
[[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
return !use_assembly_shaders;
}
void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
graphics_base_uniform_bindings = bindings;
}
void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
graphics_base_storage_bindings = bindings;
}
void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
texture_handles = texture_handles_;
image_handles = image_handles_;
}
void SetEnableStorageBuffers(bool use_storage_buffers_) {
use_storage_buffers = use_storage_buffers_;
}
u64 GetDeviceLocalMemory() const {
return device_access_memory;
}
bool CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
};
const Device& device;
bool has_fast_buffer_sub_data = false;
bool use_assembly_shaders = false;
bool has_unified_vertex_buffers = false;
bool use_storage_buffers = false;
u32 max_attributes = 0;
std::array<GLuint, 5> graphics_base_uniform_bindings{};
std::array<GLuint, 5> graphics_base_storage_bindings{};
GLuint* texture_handles = nullptr;
GLuint* image_handles = nullptr;
std::optional<StreamBuffer> stream_buffer;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
VideoCommon::NUM_STAGES>
fast_uniforms;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
VideoCommon::NUM_STAGES>
copy_uniforms;
std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
u32 index_buffer_offset = 0;
u64 device_access_memory;
};
struct BufferCacheParams {
using Runtime = OpenGL::BufferCacheRuntime;
using Buffer = OpenGL::Buffer;
static constexpr bool IS_OPENGL = true;
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <span>
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
namespace OpenGL {
class BufferCacheRuntime;
class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
u64 size_bytes);
explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
void MakeResident(GLenum access) noexcept;
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
return address;
}
[[nodiscard]] GLuint Handle() const noexcept {
return buffer.handle;
}
private:
struct BufferView {
u32 offset;
u32 size;
VideoCore::Surface::PixelFormat format;
OGLTexture texture;
};
GLuint64EXT address = 0;
OGLBuffer buffer;
GLenum current_residency_access = GL_NONE;
std::vector<BufferView> views;
};
class BufferCacheRuntime {
friend Buffer;
public:
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
explicit BufferCacheRuntime(const Device& device_);
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
bool is_written);
void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
bool is_written);
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
u64 GetDeviceMemoryUsage() const;
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
const GLuint handle = fast_uniforms[stage][binding_index].handle;
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
if (use_assembly_shaders) {
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
} else {
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
}
}
void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
if (use_assembly_shaders) {
glProgramBufferParametersIuivNV(
PABO_LUT[stage], binding_index, 0,
static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
reinterpret_cast<const GLuint*>(data.data()));
} else {
glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
static_cast<GLsizeiptr>(data.size_bytes()), data.data());
}
}
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
return mapped_span;
}
[[nodiscard]] const GLvoid* IndexOffset() const noexcept {
return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
}
[[nodiscard]] bool HasFastBufferSubData() const noexcept {
return has_fast_buffer_sub_data;
}
[[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
return !use_assembly_shaders;
}
void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
graphics_base_uniform_bindings = bindings;
}
void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
graphics_base_storage_bindings = bindings;
}
void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
texture_handles = texture_handles_;
image_handles = image_handles_;
}
void SetEnableStorageBuffers(bool use_storage_buffers_) {
use_storage_buffers = use_storage_buffers_;
}
u64 GetDeviceLocalMemory() const {
return device_access_memory;
}
bool CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
};
const Device& device;
bool has_fast_buffer_sub_data = false;
bool use_assembly_shaders = false;
bool has_unified_vertex_buffers = false;
bool use_storage_buffers = false;
u32 max_attributes = 0;
std::array<GLuint, 5> graphics_base_uniform_bindings{};
std::array<GLuint, 5> graphics_base_storage_bindings{};
GLuint* texture_handles = nullptr;
GLuint* image_handles = nullptr;
std::optional<StreamBuffer> stream_buffer;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
VideoCommon::NUM_STAGES>
fast_uniforms;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
VideoCommon::NUM_STAGES>
copy_uniforms;
std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
u32 index_buffer_offset = 0;
u64 device_access_memory;
};
struct BufferCacheParams {
using Runtime = OpenGL::BufferCacheRuntime;
using Buffer = OpenGL::Buffer;
static constexpr bool IS_OPENGL = true;
static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
} // namespace OpenGL

View File

@@ -1,226 +1,226 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include "common/cityhash.h"
#include "common/settings.h" // for enum class Settings::ShaderBackend
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
using Shader::ImageBufferDescriptor;
using Tegra::Texture::TexturePair;
using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 16;
size_t ComputePipelineKey::Hash() const noexcept {
return static_cast<size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
}
bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept {
return std::memcmp(this, &rhs, sizeof *this) == 0;
}
ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
const Shader::Info& info_, std::string code,
std::vector<u32> code_v)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
program_manager{program_manager_}, info{info_} {
switch (device.GetShaderBackend()) {
case Settings::ShaderBackend::GLSL:
source_program = CreateProgram(code, GL_COMPUTE_SHADER);
break;
case Settings::ShaderBackend::GLASM:
assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
break;
case Settings::ShaderBackend::SPIRV:
source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
break;
}
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin());
num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
const u32 num_textures{num_texture_buffers + Shader::NumDescriptors(info.texture_descriptors)};
ASSERT(num_textures <= MAX_TEXTURES);
const u32 num_images{num_image_buffers + Shader::NumDescriptors(info.image_descriptors)};
ASSERT(num_images <= MAX_IMAGES);
const bool is_glasm{assembly_program.handle != 0};
const u32 num_storage_buffers{Shader::NumDescriptors(info.storage_buffers_descriptors)};
use_storage_buffers =
!is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
writes_global_memory = !use_storage_buffers &&
std::ranges::any_of(info.storage_buffers_descriptors,
[](const auto& desc) { return desc.is_written; });
}
void ComputePipeline::Configure() {
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
buffer_cache.UnbindComputeStorageBuffers();
size_t ssbo_index{};
for (const auto& desc : info.storage_buffers_descriptors) {
ASSERT(desc.count == 1);
buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
desc.is_written);
++ssbo_index;
}
texture_cache.SynchronizeComputeDescriptors();
boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
std::array<GLuint, MAX_TEXTURES> samplers;
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
GLsizei sampler_binding{};
GLsizei texture_binding{};
GLsizei image_binding{};
const auto& qmd{kepler_compute->launch_description};
const auto& cbufs{qmd.const_buffer_config};
const bool via_header_index{qmd.linked_tsc != 0};
const auto read_handle{[&](const auto& desc, u32 index) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
const u32 index_offset{index << desc.size_shift};
const u32 offset{desc.cbuf_offset + index_offset};
const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
if (desc.has_secondary) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
secondary_offset};
const u32 lhs_raw{gpu_memory->Read<u32>(addr) << desc.shift_left};
const u32 rhs_raw{gpu_memory->Read<u32>(separate_addr)
<< desc.secondary_shift_left};
return TexturePair(lhs_raw | rhs_raw, via_header_index);
}
}
return TexturePair(gpu_memory->Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc, bool blacklist) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({
.index = handle.first,
.blacklist = blacklist,
.id = {},
});
}
}};
for (const auto& desc : info.texture_buffer_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
samplers[sampler_binding++] = 0;
}
}
for (const auto& desc : info.image_buffer_descriptors) {
add_image(desc, false);
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers[sampler_binding++] = sampler->Handle();
}
}
for (const auto& desc : info.image_descriptors) {
add_image(desc, desc.is_written);
}
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
if (assembly_program.handle != 0) {
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
} else {
program_manager.BindComputeProgram(source_program.handle);
}
buffer_cache.UnbindComputeTextureBuffers();
size_t texbuf_index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)};
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++texbuf_index;
}
}};
std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
buffer_cache.UpdateComputeBuffers();
buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
buffer_cache.BindHostComputeBuffers();
const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers +
num_image_buffers};
texture_binding += num_texture_buffers;
image_binding += num_image_buffers;
u32 texture_scaling_mask{};
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
textures[texture_binding] = image_view.Handle(desc.type);
if (texture_cache.IsRescaling(image_view)) {
texture_scaling_mask |= 1u << texture_binding;
}
++texture_binding;
}
}
u32 image_scaling_mask{};
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
images[image_binding] = image_view.StorageView(desc.type, desc.format);
if (texture_cache.IsRescaling(image_view)) {
image_scaling_mask |= 1u << image_binding;
}
++image_binding;
}
}
if (info.uses_rescaling_uniform) {
const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)};
const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)};
if (assembly_program.handle != 0) {
glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_texture_scaling_mask,
float_image_scaling_mask, 0.0f, 0.0f);
} else {
glProgramUniform4f(source_program.handle, 0, float_texture_scaling_mask,
float_image_scaling_mask, 0.0f, 0.0f);
}
}
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
glBindSamplers(0, sampler_binding, samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
}
}
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include "common/cityhash.h"
#include "common/settings.h" // for enum class Settings::ShaderBackend
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
using Shader::ImageBufferDescriptor;
using Tegra::Texture::TexturePair;
using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 16;
size_t ComputePipelineKey::Hash() const noexcept {
return static_cast<size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
}
bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept {
return std::memcmp(this, &rhs, sizeof *this) == 0;
}
ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
const Shader::Info& info_, std::string code,
std::vector<u32> code_v)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
program_manager{program_manager_}, info{info_} {
switch (device.GetShaderBackend()) {
case Settings::ShaderBackend::GLSL:
source_program = CreateProgram(code, GL_COMPUTE_SHADER);
break;
case Settings::ShaderBackend::GLASM:
assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
break;
case Settings::ShaderBackend::SPIRV:
source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
break;
}
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin());
num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
const u32 num_textures{num_texture_buffers + Shader::NumDescriptors(info.texture_descriptors)};
ASSERT(num_textures <= MAX_TEXTURES);
const u32 num_images{num_image_buffers + Shader::NumDescriptors(info.image_descriptors)};
ASSERT(num_images <= MAX_IMAGES);
const bool is_glasm{assembly_program.handle != 0};
const u32 num_storage_buffers{Shader::NumDescriptors(info.storage_buffers_descriptors)};
use_storage_buffers =
!is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
writes_global_memory = !use_storage_buffers &&
std::ranges::any_of(info.storage_buffers_descriptors,
[](const auto& desc) { return desc.is_written; });
}
void ComputePipeline::Configure() {
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
buffer_cache.UnbindComputeStorageBuffers();
size_t ssbo_index{};
for (const auto& desc : info.storage_buffers_descriptors) {
ASSERT(desc.count == 1);
buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
desc.is_written);
++ssbo_index;
}
texture_cache.SynchronizeComputeDescriptors();
boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
std::array<GLuint, MAX_TEXTURES> samplers;
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
GLsizei sampler_binding{};
GLsizei texture_binding{};
GLsizei image_binding{};
const auto& qmd{kepler_compute->launch_description};
const auto& cbufs{qmd.const_buffer_config};
const bool via_header_index{qmd.linked_tsc != 0};
const auto read_handle{[&](const auto& desc, u32 index) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
const u32 index_offset{index << desc.size_shift};
const u32 offset{desc.cbuf_offset + index_offset};
const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
if (desc.has_secondary) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
secondary_offset};
const u32 lhs_raw{gpu_memory->Read<u32>(addr) << desc.shift_left};
const u32 rhs_raw{gpu_memory->Read<u32>(separate_addr)
<< desc.secondary_shift_left};
return TexturePair(lhs_raw | rhs_raw, via_header_index);
}
}
return TexturePair(gpu_memory->Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc, bool blacklist) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({
.index = handle.first,
.blacklist = blacklist,
.id = {},
});
}
}};
for (const auto& desc : info.texture_buffer_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
samplers[sampler_binding++] = 0;
}
}
for (const auto& desc : info.image_buffer_descriptors) {
add_image(desc, false);
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers[sampler_binding++] = sampler->Handle();
}
}
for (const auto& desc : info.image_descriptors) {
add_image(desc, desc.is_written);
}
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
if (assembly_program.handle != 0) {
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
} else {
program_manager.BindComputeProgram(source_program.handle);
}
buffer_cache.UnbindComputeTextureBuffers();
size_t texbuf_index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)};
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++texbuf_index;
}
}};
std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
buffer_cache.UpdateComputeBuffers();
buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
buffer_cache.BindHostComputeBuffers();
const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers +
num_image_buffers};
texture_binding += num_texture_buffers;
image_binding += num_image_buffers;
u32 texture_scaling_mask{};
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
textures[texture_binding] = image_view.Handle(desc.type);
if (texture_cache.IsRescaling(image_view)) {
texture_scaling_mask |= 1u << texture_binding;
}
++texture_binding;
}
}
u32 image_scaling_mask{};
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
if (desc.is_written) {
texture_cache.MarkModification(image_view.image_id);
}
images[image_binding] = image_view.StorageView(desc.type, desc.format);
if (texture_cache.IsRescaling(image_view)) {
image_scaling_mask |= 1u << image_binding;
}
++image_binding;
}
}
if (info.uses_rescaling_uniform) {
const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)};
const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)};
if (assembly_program.handle != 0) {
glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_texture_scaling_mask,
float_image_scaling_mask, 0.0f, 0.0f);
} else {
glProgramUniform4f(source_program.handle, 0, float_texture_scaling_mask,
float_image_scaling_mask, 0.0f, 0.0f);
}
}
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
glBindSamplers(0, sampler_binding, samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
}
}
} // namespace OpenGL

View File

@@ -1,95 +1,95 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <type_traits>
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace Tegra {
class MemoryManager;
}
namespace Tegra::Engines {
class KeplerCompute;
}
namespace Shader {
struct Info;
}
namespace OpenGL {
class Device;
class ProgramManager;
struct ComputePipelineKey {
u64 unique_hash;
u32 shared_memory_size;
std::array<u32, 3> workgroup_size;
size_t Hash() const noexcept;
bool operator==(const ComputePipelineKey&) const noexcept;
bool operator!=(const ComputePipelineKey& rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(std::has_unique_object_representations_v<ComputePipelineKey>);
static_assert(std::is_trivially_copyable_v<ComputePipelineKey>);
static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
const Shader::Info& info_, std::string code, std::vector<u32> code_v);
void Configure();
[[nodiscard]] bool WritesGlobalMemory() const noexcept {
return writes_global_memory;
}
void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_,
Tegra::MemoryManager* gpu_memory_) {
kepler_compute = kepler_compute_;
gpu_memory = gpu_memory_;
}
private:
TextureCache& texture_cache;
BufferCache& buffer_cache;
Tegra::MemoryManager* gpu_memory;
Tegra::Engines::KeplerCompute* kepler_compute;
ProgramManager& program_manager;
Shader::Info info;
OGLProgram source_program;
OGLAssemblyProgram assembly_program;
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
u32 num_texture_buffers{};
u32 num_image_buffers{};
bool use_storage_buffers{};
bool writes_global_memory{};
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::ComputePipelineKey> {
size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <type_traits>
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace Tegra {
class MemoryManager;
}
namespace Tegra::Engines {
class KeplerCompute;
}
namespace Shader {
struct Info;
}
namespace OpenGL {
class Device;
class ProgramManager;
struct ComputePipelineKey {
u64 unique_hash;
u32 shared_memory_size;
std::array<u32, 3> workgroup_size;
size_t Hash() const noexcept;
bool operator==(const ComputePipelineKey&) const noexcept;
bool operator!=(const ComputePipelineKey& rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(std::has_unique_object_representations_v<ComputePipelineKey>);
static_assert(std::is_trivially_copyable_v<ComputePipelineKey>);
static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
const Shader::Info& info_, std::string code, std::vector<u32> code_v);
void Configure();
[[nodiscard]] bool WritesGlobalMemory() const noexcept {
return writes_global_memory;
}
void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_,
Tegra::MemoryManager* gpu_memory_) {
kepler_compute = kepler_compute_;
gpu_memory = gpu_memory_;
}
private:
TextureCache& texture_cache;
BufferCache& buffer_cache;
Tegra::MemoryManager* gpu_memory;
Tegra::Engines::KeplerCompute* kepler_compute;
ProgramManager& program_manager;
Shader::Info info;
OGLProgram source_program;
OGLAssemblyProgram assembly_program;
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
u32 num_texture_buffers{};
u32 num_image_buffers{};
bool use_storage_buffers{};
bool writes_global_memory{};
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::ComputePipelineKey> {
size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View File

@@ -1,288 +1,288 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdlib>
#include <optional>
#include <span>
#include <stdexcept>
#include <vector>
#include <glad/glad.h>
#include "common/literals.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "shader_recompiler/stage.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
using namespace Common::Literals;
namespace OpenGL {
namespace {
constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
};
template <typename T>
T GetInteger(GLenum pname) {
GLint temporary;
glGetIntegerv(pname, &temporary);
return static_cast<T>(temporary);
}
bool TestProgram(const GLchar* glsl) {
const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &glsl)};
GLint link_status;
glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
glDeleteProgram(shader);
return link_status == GL_TRUE;
}
std::vector<std::string_view> GetExtensions() {
GLint num_extensions;
glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
std::vector<std::string_view> extensions;
extensions.reserve(num_extensions);
for (GLint index = 0; index < num_extensions; ++index) {
extensions.push_back(
reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, static_cast<GLuint>(index))));
}
return extensions;
}
bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
return std::ranges::find(extensions, extension) != extensions.end();
}
std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept {
std::array<u32, Shader::MaxStageTypes> max;
std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>);
return max;
}
bool IsASTCSupported() {
static constexpr std::array targets{
GL_TEXTURE_2D,
GL_TEXTURE_2D_ARRAY,
};
static constexpr std::array formats{
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
GL_COMPRESSED_RGBA_ASTC_8x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR,
GL_COMPRESSED_RGBA_ASTC_10x5_KHR, GL_COMPRESSED_RGBA_ASTC_10x6_KHR,
GL_COMPRESSED_RGBA_ASTC_10x8_KHR, GL_COMPRESSED_RGBA_ASTC_10x10_KHR,
GL_COMPRESSED_RGBA_ASTC_12x10_KHR, GL_COMPRESSED_RGBA_ASTC_12x12_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
};
static constexpr std::array required_support{
GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
};
for (const GLenum target : targets) {
for (const GLenum format : formats) {
for (const GLenum support : required_support) {
GLint value;
glGetInternalformativ(target, format, support, 1, &value);
if (value != GL_FULL_SUPPORT) {
return false;
}
}
}
}
return true;
}
[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
}
} // Anonymous namespace
Device::Device() {
if (!GLAD_GL_VERSION_4_6) {
LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
throw std::runtime_error{"Insufficient version"};
}
vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor_name == "NVIDIA Corporation";
const bool is_amd = vendor_name == "ATI Technologies Inc.";
const bool is_intel = vendor_name == "Intel";
#ifdef __unix__
const bool is_linux = true;
#else
const bool is_linux = false;
#endif
bool disable_fast_buffer_sub_data = false;
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
LOG_WARNING(
Render_OpenGL,
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
max_uniform_buffers = BuildMaxUniformBuffers();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
has_astc = IsASTCSupported();
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_derivative_control = GLAD_GL_ARB_derivative_control;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
can_report_memory = GLAD_GL_NVX_gpu_memory_info;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
shader_backend = Settings::values.shader_backend.GetValue();
use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM &&
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) {
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
shader_backend = Settings::ShaderBackend::GLSL;
}
if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) {
const std::string_view driver_version = version.substr(13);
const int version_major =
std::atoi(driver_version.substr(0, driver_version.find(".")).data());
if (version_major >= 495) {
has_cbuf_ftou_bug = true;
has_bool_ref_bug = true;
}
}
// Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
!(is_amd || (is_intel && !is_linux));
use_driver_cache = is_nvidia;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
has_broken_texture_view_formats);
if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) {
LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported");
}
}
std::string Device::GetVendorName() const {
if (vendor_name == "NVIDIA Corporation") {
return "NVIDIA";
}
if (vendor_name == "ATI Technologies Inc.") {
return "AMD";
}
if (vendor_name == "Intel") {
// For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris.
// Simply return `INTEL` for those as well as the Windows driver.
return "INTEL";
}
if (vendor_name == "Intel Open Source Technology Center") {
return "I965";
}
if (vendor_name == "Mesa Project") {
return "I915";
}
if (vendor_name == "Mesa/X.org") {
// This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return
// MESA instead of one of those driver names.
return "MESA";
}
if (vendor_name == "AMD") {
return "RADEONSI";
}
if (vendor_name == "nouveau") {
return "NOUVEAU";
}
if (vendor_name == "X.Org") {
return "R600";
}
if (vendor_name == "Collabora Ltd") {
return "ZINK";
}
if (vendor_name == "Intel Corporation") {
return "OPENSWR";
}
if (vendor_name == "Microsoft Corporation") {
return "D3D12";
}
if (vendor_name == "NVIDIA") {
// Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default
// strategy would have returned `NVIDIA` here for this driver, the same result as the
// proprietary driver.
return "TEGRA";
}
return vendor_name;
}
bool Device::TestVariableAoffi() {
return TestProgram(R"(#version 430 core
// This is a unit test, please ignore me on apitrace bug reports.
uniform sampler2D tex;
uniform ivec2 variable_offset;
out vec4 output_attribute;
void main() {
output_attribute = textureOffset(tex, vec2(0), variable_offset);
})");
}
bool Device::TestPreciseBug() {
return !TestProgram(R"(#version 430 core
in vec3 coords;
out float out_value;
uniform sampler2DShadow tex;
void main() {
precise float tmp_value = vec4(texture(tex, coords)).x;
out_value = tmp_value;
})");
}
u64 Device::GetCurrentDedicatedVideoMemory() const {
GLint cur_avail_mem_kb = 0;
glGetIntegerv(GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX, &cur_avail_mem_kb);
return static_cast<u64>(cur_avail_mem_kb) * 1_KiB;
}
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdlib>
#include <optional>
#include <span>
#include <stdexcept>
#include <vector>
#include <glad/glad.h>
#include "common/literals.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "shader_recompiler/stage.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
using namespace Common::Literals;
namespace OpenGL {
namespace {
constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
};
template <typename T>
T GetInteger(GLenum pname) {
GLint temporary;
glGetIntegerv(pname, &temporary);
return static_cast<T>(temporary);
}
bool TestProgram(const GLchar* glsl) {
const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &glsl)};
GLint link_status;
glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
glDeleteProgram(shader);
return link_status == GL_TRUE;
}
std::vector<std::string_view> GetExtensions() {
GLint num_extensions;
glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
std::vector<std::string_view> extensions;
extensions.reserve(num_extensions);
for (GLint index = 0; index < num_extensions; ++index) {
extensions.push_back(
reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, static_cast<GLuint>(index))));
}
return extensions;
}
bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
return std::ranges::find(extensions, extension) != extensions.end();
}
std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept {
std::array<u32, Shader::MaxStageTypes> max;
std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>);
return max;
}
bool IsASTCSupported() {
static constexpr std::array targets{
GL_TEXTURE_2D,
GL_TEXTURE_2D_ARRAY,
};
static constexpr std::array formats{
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
GL_COMPRESSED_RGBA_ASTC_8x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR,
GL_COMPRESSED_RGBA_ASTC_10x5_KHR, GL_COMPRESSED_RGBA_ASTC_10x6_KHR,
GL_COMPRESSED_RGBA_ASTC_10x8_KHR, GL_COMPRESSED_RGBA_ASTC_10x10_KHR,
GL_COMPRESSED_RGBA_ASTC_12x10_KHR, GL_COMPRESSED_RGBA_ASTC_12x12_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
};
static constexpr std::array required_support{
GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
};
for (const GLenum target : targets) {
for (const GLenum format : formats) {
for (const GLenum support : required_support) {
GLint value;
glGetInternalformativ(target, format, support, 1, &value);
if (value != GL_FULL_SUPPORT) {
return false;
}
}
}
}
return true;
}
[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
}
} // Anonymous namespace
Device::Device() {
if (!GLAD_GL_VERSION_4_6) {
LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
throw std::runtime_error{"Insufficient version"};
}
vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor_name == "NVIDIA Corporation";
const bool is_amd = vendor_name == "ATI Technologies Inc.";
const bool is_intel = vendor_name == "Intel";
#ifdef __unix__
const bool is_linux = true;
#else
const bool is_linux = false;
#endif
bool disable_fast_buffer_sub_data = false;
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
LOG_WARNING(
Render_OpenGL,
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
max_uniform_buffers = BuildMaxUniformBuffers();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
has_astc = IsASTCSupported();
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_derivative_control = GLAD_GL_ARB_derivative_control;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
can_report_memory = GLAD_GL_NVX_gpu_memory_info;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
shader_backend = Settings::values.shader_backend.GetValue();
use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM &&
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) {
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
shader_backend = Settings::ShaderBackend::GLSL;
}
if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) {
const std::string_view driver_version = version.substr(13);
const int version_major =
std::atoi(driver_version.substr(0, driver_version.find(".")).data());
if (version_major >= 495) {
has_cbuf_ftou_bug = true;
has_bool_ref_bug = true;
}
}
// Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
!(is_amd || (is_intel && !is_linux));
use_driver_cache = is_nvidia;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
has_broken_texture_view_formats);
if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) {
LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported");
}
}
std::string Device::GetVendorName() const {
if (vendor_name == "NVIDIA Corporation") {
return "NVIDIA";
}
if (vendor_name == "ATI Technologies Inc.") {
return "AMD";
}
if (vendor_name == "Intel") {
// For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris.
// Simply return `INTEL` for those as well as the Windows driver.
return "INTEL";
}
if (vendor_name == "Intel Open Source Technology Center") {
return "I965";
}
if (vendor_name == "Mesa Project") {
return "I915";
}
if (vendor_name == "Mesa/X.org") {
// This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return
// MESA instead of one of those driver names.
return "MESA";
}
if (vendor_name == "AMD") {
return "RADEONSI";
}
if (vendor_name == "nouveau") {
return "NOUVEAU";
}
if (vendor_name == "X.Org") {
return "R600";
}
if (vendor_name == "Collabora Ltd") {
return "ZINK";
}
if (vendor_name == "Intel Corporation") {
return "OPENSWR";
}
if (vendor_name == "Microsoft Corporation") {
return "D3D12";
}
if (vendor_name == "NVIDIA") {
// Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default
// strategy would have returned `NVIDIA` here for this driver, the same result as the
// proprietary driver.
return "TEGRA";
}
return vendor_name;
}
bool Device::TestVariableAoffi() {
return TestProgram(R"(#version 430 core
// This is a unit test, please ignore me on apitrace bug reports.
uniform sampler2D tex;
uniform ivec2 variable_offset;
out vec4 output_attribute;
void main() {
output_attribute = textureOffset(tex, vec2(0), variable_offset);
})");
}
bool Device::TestPreciseBug() {
return !TestProgram(R"(#version 430 core
in vec3 coords;
out float out_value;
uniform sampler2DShadow tex;
void main() {
precise float tmp_value = vec4(texture(tex, coords)).x;
out_value = tmp_value;
})");
}
u64 Device::GetCurrentDedicatedVideoMemory() const {
GLint cur_avail_mem_kb = 0;
glGetIntegerv(GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX, &cur_avail_mem_kb);
return static_cast<u64>(cur_avail_mem_kb) * 1_KiB;
}
} // namespace OpenGL

View File

@@ -1,223 +1,223 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include "common/common_types.h"
#include "shader_recompiler/stage.h"
namespace Settings {
enum class ShaderBackend : u32;
};
namespace OpenGL {
class Device {
public:
explicit Device();
[[nodiscard]] std::string GetVendorName() const;
u64 GetCurrentDedicatedVideoMemory() const;
u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
return max_uniform_buffers[static_cast<size_t>(stage)];
}
size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
size_t GetShaderStorageBufferAlignment() const {
return shader_storage_alignment;
}
u32 GetMaxVertexAttributes() const {
return max_vertex_attributes;
}
u32 GetMaxVaryings() const {
return max_varyings;
}
u32 GetMaxComputeSharedMemorySize() const {
return max_compute_shared_memory_size;
}
u32 GetMaxGLASMStorageBufferBlocks() const {
return max_glasm_storage_buffer_blocks;
}
bool HasWarpIntrinsics() const {
return has_warp_intrinsics;
}
bool HasShaderBallot() const {
return has_shader_ballot;
}
bool HasVertexViewportLayer() const {
return has_vertex_viewport_layer;
}
bool HasImageLoadFormatted() const {
return has_image_load_formatted;
}
bool HasTextureShadowLod() const {
return has_texture_shadow_lod;
}
bool HasVertexBufferUnifiedMemory() const {
return has_vertex_buffer_unified_memory;
}
bool HasASTC() const {
return has_astc;
}
bool HasVariableAoffi() const {
return has_variable_aoffi;
}
bool HasComponentIndexingBug() const {
return has_component_indexing_bug;
}
bool HasPreciseBug() const {
return has_precise_bug;
}
bool HasBrokenTextureViewFormats() const {
return has_broken_texture_view_formats;
}
bool HasFastBufferSubData() const {
return has_fast_buffer_sub_data;
}
bool HasNvViewportArray2() const {
return has_nv_viewport_array2;
}
bool HasDerivativeControl() const {
return has_derivative_control;
}
bool HasDebuggingToolAttached() const {
return has_debugging_tool_attached;
}
bool UseAssemblyShaders() const {
return use_assembly_shaders;
}
bool UseAsynchronousShaders() const {
return use_asynchronous_shaders;
}
bool UseDriverCache() const {
return use_driver_cache;
}
bool HasDepthBufferFloat() const {
return has_depth_buffer_float;
}
bool HasGeometryShaderPassthrough() const {
return has_geometry_shader_passthrough;
}
bool HasNvGpuShader5() const {
return has_nv_gpu_shader_5;
}
bool HasShaderInt64() const {
return has_shader_int64;
}
bool HasAmdShaderHalfFloat() const {
return has_amd_shader_half_float;
}
bool HasSparseTexture2() const {
return has_sparse_texture_2;
}
bool IsWarpSizePotentiallyLargerThanGuest() const {
return warp_size_potentially_larger_than_guest;
}
bool NeedsFastmathOff() const {
return need_fastmath_off;
}
bool HasCbufFtouBug() const {
return has_cbuf_ftou_bug;
}
bool HasBoolRefBug() const {
return has_bool_ref_bug;
}
Settings::ShaderBackend GetShaderBackend() const {
return shader_backend;
}
bool IsAmd() const {
return vendor_name == "ATI Technologies Inc.";
}
bool CanReportMemoryUsage() const {
return can_report_memory;
}
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{};
size_t uniform_buffer_alignment{};
size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
u32 max_glasm_storage_buffer_blocks{};
Settings::ShaderBackend shader_backend{};
bool has_warp_intrinsics{};
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
bool has_image_load_formatted{};
bool has_texture_shadow_lod{};
bool has_vertex_buffer_unified_memory{};
bool has_astc{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
bool has_broken_texture_view_formats{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
bool has_derivative_control{};
bool has_debugging_tool_attached{};
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
bool use_driver_cache{};
bool has_depth_buffer_float{};
bool has_geometry_shader_passthrough{};
bool has_nv_gpu_shader_5{};
bool has_shader_int64{};
bool has_amd_shader_half_float{};
bool has_sparse_texture_2{};
bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
bool has_cbuf_ftou_bug{};
bool has_bool_ref_bug{};
bool can_report_memory{};
std::string vendor_name;
};
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include "common/common_types.h"
#include "shader_recompiler/stage.h"
namespace Settings {
enum class ShaderBackend : u32;
};
namespace OpenGL {
class Device {
public:
explicit Device();
[[nodiscard]] std::string GetVendorName() const;
u64 GetCurrentDedicatedVideoMemory() const;
u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
return max_uniform_buffers[static_cast<size_t>(stage)];
}
size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
size_t GetShaderStorageBufferAlignment() const {
return shader_storage_alignment;
}
u32 GetMaxVertexAttributes() const {
return max_vertex_attributes;
}
u32 GetMaxVaryings() const {
return max_varyings;
}
u32 GetMaxComputeSharedMemorySize() const {
return max_compute_shared_memory_size;
}
u32 GetMaxGLASMStorageBufferBlocks() const {
return max_glasm_storage_buffer_blocks;
}
bool HasWarpIntrinsics() const {
return has_warp_intrinsics;
}
bool HasShaderBallot() const {
return has_shader_ballot;
}
bool HasVertexViewportLayer() const {
return has_vertex_viewport_layer;
}
bool HasImageLoadFormatted() const {
return has_image_load_formatted;
}
bool HasTextureShadowLod() const {
return has_texture_shadow_lod;
}
bool HasVertexBufferUnifiedMemory() const {
return has_vertex_buffer_unified_memory;
}
bool HasASTC() const {
return has_astc;
}
bool HasVariableAoffi() const {
return has_variable_aoffi;
}
bool HasComponentIndexingBug() const {
return has_component_indexing_bug;
}
bool HasPreciseBug() const {
return has_precise_bug;
}
bool HasBrokenTextureViewFormats() const {
return has_broken_texture_view_formats;
}
bool HasFastBufferSubData() const {
return has_fast_buffer_sub_data;
}
bool HasNvViewportArray2() const {
return has_nv_viewport_array2;
}
bool HasDerivativeControl() const {
return has_derivative_control;
}
bool HasDebuggingToolAttached() const {
return has_debugging_tool_attached;
}
bool UseAssemblyShaders() const {
return use_assembly_shaders;
}
bool UseAsynchronousShaders() const {
return use_asynchronous_shaders;
}
bool UseDriverCache() const {
return use_driver_cache;
}
bool HasDepthBufferFloat() const {
return has_depth_buffer_float;
}
bool HasGeometryShaderPassthrough() const {
return has_geometry_shader_passthrough;
}
bool HasNvGpuShader5() const {
return has_nv_gpu_shader_5;
}
bool HasShaderInt64() const {
return has_shader_int64;
}
bool HasAmdShaderHalfFloat() const {
return has_amd_shader_half_float;
}
bool HasSparseTexture2() const {
return has_sparse_texture_2;
}
bool IsWarpSizePotentiallyLargerThanGuest() const {
return warp_size_potentially_larger_than_guest;
}
bool NeedsFastmathOff() const {
return need_fastmath_off;
}
bool HasCbufFtouBug() const {
return has_cbuf_ftou_bug;
}
bool HasBoolRefBug() const {
return has_bool_ref_bug;
}
Settings::ShaderBackend GetShaderBackend() const {
return shader_backend;
}
bool IsAmd() const {
return vendor_name == "ATI Technologies Inc.";
}
bool CanReportMemoryUsage() const {
return can_report_memory;
}
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{};
size_t uniform_buffer_alignment{};
size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
u32 max_glasm_storage_buffer_blocks{};
Settings::ShaderBackend shader_backend{};
bool has_warp_intrinsics{};
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
bool has_image_load_formatted{};
bool has_texture_shadow_lod{};
bool has_vertex_buffer_unified_memory{};
bool has_astc{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
bool has_broken_texture_view_formats{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
bool has_derivative_control{};
bool has_debugging_tool_attached{};
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
bool use_driver_cache{};
bool has_depth_buffer_float{};
bool has_geometry_shader_passthrough{};
bool has_nv_gpu_shader_5{};
bool has_shader_int64{};
bool has_amd_shader_half_float{};
bool has_sparse_texture_2{};
bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
bool has_cbuf_ftou_bug{};
bool has_bool_ref_bug{};
bool can_report_memory{};
std::string vendor_name;
};
} // namespace OpenGL

View File

@@ -1,64 +1,64 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
namespace OpenGL {
GLInnerFence::GLInnerFence(bool is_stubbed_) : FenceBase{is_stubbed_} {}
GLInnerFence::~GLInnerFence() = default;
void GLInnerFence::Queue() {
if (is_stubbed) {
return;
}
ASSERT(sync_object.handle == 0);
sync_object.Create();
}
bool GLInnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
ASSERT(sync_object.handle != 0);
GLint sync_status;
glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status);
return sync_status == GL_SIGNALED;
}
void GLInnerFence::Wait() {
if (is_stubbed) {
return;
}
ASSERT(sync_object.handle != 0);
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
}
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
Tegra::GPU& gpu_, TextureCache& texture_cache_,
BufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
Fence FenceManagerOpenGL::CreateFence(bool is_stubbed) {
return std::make_shared<GLInnerFence>(is_stubbed);
}
void FenceManagerOpenGL::QueueFence(Fence& fence) {
fence->Queue();
}
bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
void FenceManagerOpenGL::WaitFence(Fence& fence) {
fence->Wait();
}
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
namespace OpenGL {
GLInnerFence::GLInnerFence(bool is_stubbed_) : FenceBase{is_stubbed_} {}
GLInnerFence::~GLInnerFence() = default;
void GLInnerFence::Queue() {
if (is_stubbed) {
return;
}
ASSERT(sync_object.handle == 0);
sync_object.Create();
}
bool GLInnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
ASSERT(sync_object.handle != 0);
GLint sync_status;
glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status);
return sync_status == GL_SIGNALED;
}
void GLInnerFence::Wait() {
if (is_stubbed) {
return;
}
ASSERT(sync_object.handle != 0);
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
}
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
Tegra::GPU& gpu_, TextureCache& texture_cache_,
BufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
Fence FenceManagerOpenGL::CreateFence(bool is_stubbed) {
return std::make_shared<GLInnerFence>(is_stubbed);
}
void FenceManagerOpenGL::QueueFence(Fence& fence) {
fence->Queue();
}
bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
void FenceManagerOpenGL::WaitFence(Fence& fence) {
fence->Wait();
}
} // namespace OpenGL

View File

@@ -1,48 +1,48 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include "common/common_types.h"
#include "video_core/fence_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
explicit GLInnerFence(bool is_stubbed_);
~GLInnerFence();
void Queue();
bool IsSignaled() const;
void Wait();
private:
OGLSync sync_object;
};
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCache& texture_cache, BufferCache& buffer_cache,
QueryCache& query_cache);
protected:
Fence CreateFence(bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
};
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include "common/common_types.h"
#include "video_core/fence_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
explicit GLInnerFence(bool is_stubbed_);
~GLInnerFence();
void Queue();
bool IsSignaled() const;
void Wait();
private:
OGLSync sync_object;
};
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCache& texture_cache, BufferCache& buffer_cache,
QueryCache& query_cache);
protected:
Fence CreateFence(bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
};
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@@ -1,170 +1,170 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <cstring>
#include <type_traits>
#include <utility>
#include "common/bit_field.h"
#include "common/cityhash.h"
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/transform_feedback.h"
namespace OpenGL {
namespace ShaderContext {
struct Context;
}
class Device;
class ProgramManager;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
struct GraphicsPipelineKey {
std::array<u64, 6> unique_hashes;
union {
u32 raw;
BitField<0, 1, u32> xfb_enabled;
BitField<1, 1, u32> early_z;
BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
BitField<6, 2, Maxwell::Tessellation::DomainType> tessellation_primitive;
BitField<8, 2, Maxwell::Tessellation::Spacing> tessellation_spacing;
BitField<10, 1, u32> tessellation_clockwise;
};
std::array<u32, 3> padding;
VideoCommon::TransformFeedbackState xfb_state;
size_t Hash() const noexcept {
return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
}
bool operator==(const GraphicsPipelineKey& rhs) const noexcept {
return std::memcmp(this, &rhs, Size()) == 0;
}
bool operator!=(const GraphicsPipelineKey& rhs) const noexcept {
return !operator==(rhs);
}
[[nodiscard]] size_t Size() const noexcept {
if (xfb_enabled != 0) {
return sizeof(GraphicsPipelineKey);
} else {
return offsetof(GraphicsPipelineKey, padding);
}
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
class GraphicsPipeline {
public:
explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
StateTracker& state_tracker_, ShaderWorker* thread_worker,
VideoCore::ShaderNotify* shader_notify,
std::array<std::string, 5> sources,
std::array<std::vector<u32>, 5> sources_spirv,
const std::array<const Shader::Info*, 5>& infos,
const GraphicsPipelineKey& key_);
void Configure(bool is_indexed) {
configure_func(this, is_indexed);
}
void ConfigureTransformFeedback() const {
if (num_xfb_attribs != 0) {
ConfigureTransformFeedbackImpl();
}
}
[[nodiscard]] const GraphicsPipelineKey& Key() const noexcept {
return key;
}
[[nodiscard]] bool WritesGlobalMemory() const noexcept {
return writes_global_memory;
}
[[nodiscard]] bool IsBuilt() noexcept;
template <typename Spec>
static auto MakeConfigureSpecFunc() {
return [](GraphicsPipeline* pipeline, bool is_indexed) {
pipeline->ConfigureImpl<Spec>(is_indexed);
};
}
void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) {
maxwell3d = maxwell3d_;
gpu_memory = gpu_memory_;
}
private:
template <typename Spec>
void ConfigureImpl(bool is_indexed);
void ConfigureTransformFeedbackImpl() const;
void GenerateTransformFeedbackState();
void WaitForBuild();
TextureCache& texture_cache;
BufferCache& buffer_cache;
Tegra::MemoryManager* gpu_memory;
Tegra::Engines::Maxwell3D* maxwell3d;
ProgramManager& program_manager;
StateTracker& state_tracker;
const GraphicsPipelineKey key;
void (*configure_func)(GraphicsPipeline*, bool){};
std::array<OGLProgram, 5> source_programs;
std::array<OGLAssemblyProgram, 5> assembly_programs;
u32 enabled_stages_mask{};
std::array<Shader::Info, 5> stage_infos{};
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
std::array<u32, 5> base_uniform_bindings{};
std::array<u32, 5> base_storage_bindings{};
std::array<u32, 5> num_texture_buffers{};
std::array<u32, 5> num_image_buffers{};
bool use_storage_buffers{};
bool writes_global_memory{};
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
GLsizei num_xfb_attribs{};
GLsizei num_xfb_strides{};
std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
std::mutex built_mutex;
std::condition_variable built_condvar;
OGLSync built_fence{};
bool is_built{false};
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::GraphicsPipelineKey> {
size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <cstring>
#include <type_traits>
#include <utility>
#include "common/bit_field.h"
#include "common/cityhash.h"
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/transform_feedback.h"
namespace OpenGL {
namespace ShaderContext {
struct Context;
}
class Device;
class ProgramManager;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
struct GraphicsPipelineKey {
std::array<u64, 6> unique_hashes;
union {
u32 raw;
BitField<0, 1, u32> xfb_enabled;
BitField<1, 1, u32> early_z;
BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
BitField<6, 2, Maxwell::Tessellation::DomainType> tessellation_primitive;
BitField<8, 2, Maxwell::Tessellation::Spacing> tessellation_spacing;
BitField<10, 1, u32> tessellation_clockwise;
};
std::array<u32, 3> padding;
VideoCommon::TransformFeedbackState xfb_state;
size_t Hash() const noexcept {
return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
}
bool operator==(const GraphicsPipelineKey& rhs) const noexcept {
return std::memcmp(this, &rhs, Size()) == 0;
}
bool operator!=(const GraphicsPipelineKey& rhs) const noexcept {
return !operator==(rhs);
}
[[nodiscard]] size_t Size() const noexcept {
if (xfb_enabled != 0) {
return sizeof(GraphicsPipelineKey);
} else {
return offsetof(GraphicsPipelineKey, padding);
}
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
class GraphicsPipeline {
public:
explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
StateTracker& state_tracker_, ShaderWorker* thread_worker,
VideoCore::ShaderNotify* shader_notify,
std::array<std::string, 5> sources,
std::array<std::vector<u32>, 5> sources_spirv,
const std::array<const Shader::Info*, 5>& infos,
const GraphicsPipelineKey& key_);
void Configure(bool is_indexed) {
configure_func(this, is_indexed);
}
void ConfigureTransformFeedback() const {
if (num_xfb_attribs != 0) {
ConfigureTransformFeedbackImpl();
}
}
[[nodiscard]] const GraphicsPipelineKey& Key() const noexcept {
return key;
}
[[nodiscard]] bool WritesGlobalMemory() const noexcept {
return writes_global_memory;
}
[[nodiscard]] bool IsBuilt() noexcept;
template <typename Spec>
static auto MakeConfigureSpecFunc() {
return [](GraphicsPipeline* pipeline, bool is_indexed) {
pipeline->ConfigureImpl<Spec>(is_indexed);
};
}
void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) {
maxwell3d = maxwell3d_;
gpu_memory = gpu_memory_;
}
private:
template <typename Spec>
void ConfigureImpl(bool is_indexed);
void ConfigureTransformFeedbackImpl() const;
void GenerateTransformFeedbackState();
void WaitForBuild();
TextureCache& texture_cache;
BufferCache& buffer_cache;
Tegra::MemoryManager* gpu_memory;
Tegra::Engines::Maxwell3D* maxwell3d;
ProgramManager& program_manager;
StateTracker& state_tracker;
const GraphicsPipelineKey key;
void (*configure_func)(GraphicsPipeline*, bool){};
std::array<OGLProgram, 5> source_programs;
std::array<OGLAssemblyProgram, 5> assembly_programs;
u32 enabled_stages_mask{};
std::array<Shader::Info, 5> stage_infos{};
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
std::array<u32, 5> base_uniform_bindings{};
std::array<u32, 5> base_storage_bindings{};
std::array<u32, 5> num_texture_buffers{};
std::array<u32, 5> num_image_buffers{};
bool use_storage_buffers{};
bool writes_global_memory{};
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
GLsizei num_xfb_attribs{};
GLsizei num_xfb_strides{};
std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
std::mutex built_mutex;
std::condition_variable built_condvar;
OGLSync built_fence{};
bool is_built{false};
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::GraphicsPipelineKey> {
size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View File

@@ -1,116 +1,116 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>
#include <glad/glad.h>
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
namespace OpenGL {
namespace {
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
constexpr GLenum GetTarget(VideoCore::QueryType type) {
return QueryTargets[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
: QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;
OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
auto& reserve = query_pools[static_cast<std::size_t>(type)];
OGLQuery query;
if (reserve.empty()) {
query.Create(GetTarget(type));
return query;
}
query = std::move(reserve.back());
reserve.pop_back();
return query;
}
void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
}
bool QueryCache::AnyCommandQueued() const noexcept {
return gl_rasterizer.AnyCommandQueued();
}
HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
VideoCore::QueryType type_)
: HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, query{
cache.AllocateQuery(
type)} {
glBeginQuery(GetTarget(type), query.handle);
}
HostCounter::~HostCounter() {
cache.Reserve(type, std::move(query));
}
void HostCounter::EndQuery() {
if (!cache.AnyCommandQueued()) {
// There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
// having any of these causes a lock. glFlush is considered a command, so we can safely wait
// for this. Insert to the OpenGL command stream a flush.
glFlush();
}
glEndQuery(GetTarget(type));
}
u64 HostCounter::BlockingQuery() const {
GLint64 value;
glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
return static_cast<u64>(value);
}
CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
u8* host_ptr_)
: CachedQueryBase{cpu_addr_, host_ptr_}, cache{&cache_}, type{type_} {}
CachedQuery::~CachedQuery() = default;
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
: CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
cache = rhs.cache;
type = rhs.type;
CachedQueryBase<HostCounter>::operator=(std::move(rhs));
return *this;
}
void CachedQuery::Flush() {
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
// To avoid this disable and re-enable keeping the dependency stream.
// But we only have to do this if we have pending waits to be done.
auto& stream = cache->Stream(type);
const bool slice_counter = WaitPending() && stream.IsEnabled();
if (slice_counter) {
stream.Update(false);
}
VideoCommon::CachedQueryBase<HostCounter>::Flush();
if (slice_counter) {
stream.Update(true);
}
}
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>
#include <glad/glad.h>
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
namespace OpenGL {
namespace {
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
constexpr GLenum GetTarget(VideoCore::QueryType type) {
return QueryTargets[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
: QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;
OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
auto& reserve = query_pools[static_cast<std::size_t>(type)];
OGLQuery query;
if (reserve.empty()) {
query.Create(GetTarget(type));
return query;
}
query = std::move(reserve.back());
reserve.pop_back();
return query;
}
void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
}
bool QueryCache::AnyCommandQueued() const noexcept {
return gl_rasterizer.AnyCommandQueued();
}
HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
VideoCore::QueryType type_)
: HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, query{
cache.AllocateQuery(
type)} {
glBeginQuery(GetTarget(type), query.handle);
}
HostCounter::~HostCounter() {
cache.Reserve(type, std::move(query));
}
void HostCounter::EndQuery() {
if (!cache.AnyCommandQueued()) {
// There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
// having any of these causes a lock. glFlush is considered a command, so we can safely wait
// for this. Insert to the OpenGL command stream a flush.
glFlush();
}
glEndQuery(GetTarget(type));
}
u64 HostCounter::BlockingQuery() const {
GLint64 value;
glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
return static_cast<u64>(value);
}
CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
u8* host_ptr_)
: CachedQueryBase{cpu_addr_, host_ptr_}, cache{&cache_}, type{type_} {}
CachedQuery::~CachedQuery() = default;
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
: CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
cache = rhs.cache;
type = rhs.type;
CachedQueryBase<HostCounter>::operator=(std::move(rhs));
return *this;
}
void CachedQuery::Flush() {
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
// To avoid this disable and re-enable keeping the dependency stream.
// But we only have to do this if we have pending waits to be done.
auto& stream = cache->Stream(type);
const bool slice_counter = WaitPending() && stream.IsEnabled();
if (slice_counter) {
stream.Update(false);
}
VideoCommon::CachedQueryBase<HostCounter>::Flush();
if (slice_counter) {
stream.Update(true);
}
}
} // namespace OpenGL

View File

@@ -1,80 +1,80 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "video_core/query_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace Core {
class System;
}
namespace OpenGL {
class CachedQuery;
class HostCounter;
class QueryCache;
class RasterizerOpenGL;
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(RasterizerOpenGL& rasterizer_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
void Reserve(VideoCore::QueryType type, OGLQuery&& query);
bool AnyCommandQueued() const noexcept;
private:
RasterizerOpenGL& gl_rasterizer;
std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
public:
explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
VideoCore::QueryType type_);
~HostCounter();
void EndQuery();
private:
u64 BlockingQuery() const override;
QueryCache& cache;
const VideoCore::QueryType type;
OGLQuery query;
};
class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
public:
explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
u8* host_ptr_);
~CachedQuery() override;
CachedQuery(CachedQuery&& rhs) noexcept;
CachedQuery& operator=(CachedQuery&& rhs) noexcept;
CachedQuery(const CachedQuery&) = delete;
CachedQuery& operator=(const CachedQuery&) = delete;
void Flush() override;
private:
QueryCache* cache;
VideoCore::QueryType type;
};
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "video_core/query_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace Core {
class System;
}
namespace OpenGL {
class CachedQuery;
class HostCounter;
class QueryCache;
class RasterizerOpenGL;
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(RasterizerOpenGL& rasterizer_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
void Reserve(VideoCore::QueryType type, OGLQuery&& query);
bool AnyCommandQueued() const noexcept;
private:
RasterizerOpenGL& gl_rasterizer;
std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
public:
explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
VideoCore::QueryType type_);
~HostCounter();
void EndQuery();
private:
u64 BlockingQuery() const override;
QueryCache& cache;
const VideoCore::QueryType type;
OGLQuery query;
};
class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
public:
explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
u8* host_ptr_);
~CachedQuery() override;
CachedQuery(CachedQuery&& rhs) noexcept;
CachedQuery& operator=(CachedQuery&& rhs) noexcept;
CachedQuery(const CachedQuery&) = delete;
CachedQuery& operator=(const CachedQuery&) = delete;
void Flush() override;
private:
QueryCache* cache;
VideoCore::QueryType type;
};
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@@ -1,233 +1,233 @@
// SPDX-FileCopyrightText: 2015 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <cstddef>
#include <optional>
#include <boost/container/static_vector.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace Core::Memory {
class Memory;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra {
class MemoryManager;
}
namespace OpenGL {
struct ScreenInfo;
struct ShaderEntries;
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
GLsizei padding;
};
static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
public:
explicit AccelerateDMA(BufferCache& buffer_cache);
bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) override;
bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
private:
BufferCache& buffer_cache;
};
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated,
protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
ScreenInfo& screen_info_, ProgramManager& program_manager_,
StateTracker& state_tracker_);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, u32 instance_count) override;
void Clear() override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
void SignalFence(std::function<void()>&& func) override;
void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<const u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
/// Returns true when there are commands queued to the OpenGL server.
bool AnyCommandQueued() const {
return num_queued_commands > 0;
}
void InitializeChannel(Tegra::Control::ChannelState& channel) override;
void BindChannel(Tegra::Control::ChannelState& channel) override;
void ReleaseChannel(s32 channel_id) override;
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
/// Syncs state to match guest's
void SyncState();
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
/// Syncs the depth clamp state
void SyncDepthClamp();
/// Syncs the clip enabled status to match the guest state
void SyncClipEnabled(u32 clip_mask);
/// Syncs the clip coefficients to match the guest state
void SyncClipCoef();
/// Syncs the cull mode to match the guest state
void SyncCullMode();
/// Syncs the primitve restart to match the guest state
void SyncPrimitiveRestart();
/// Syncs the depth test state to match the guest state
void SyncDepthTestState();
/// Syncs the stencil test state to match the guest state
void SyncStencilTestState();
/// Syncs the blend state to match the guest state
void SyncBlendState();
/// Syncs the LogicOp state to match the guest state
void SyncLogicOpState();
/// Syncs the the color clamp state
void SyncFragmentColorClampState();
/// Syncs the alpha coverage and alpha to one
void SyncMultiSampleState();
/// Syncs the scissor test state to match the guest state
void SyncScissorTest();
/// Syncs the point state to match the guest state
void SyncPointState();
/// Syncs the line state to match the guest state
void SyncLineState();
/// Syncs the rasterizer enable state to match the guest state
void SyncRasterizeEnable();
/// Syncs polygon modes to match the guest state
void SyncPolygonModes();
/// Syncs Color Mask
void SyncColorMask();
/// Syncs the polygon offsets
void SyncPolygonOffset();
/// Syncs the alpha test state to match the guest state
void SyncAlphaTest();
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
/// Syncs vertex formats to match the guest state
void SyncVertexFormats();
/// Syncs vertex instances to match the guest state
void SyncVertexInstances();
/// Begin a transform feedback
void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
/// End a transform feedback
void EndTransformFeedback();
void BindInlineIndexBuffer();
Tegra::GPU& gpu;
const Device& device;
ScreenInfo& screen_info;
ProgramManager& program_manager;
StateTracker& state_tracker;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
ShaderCache shader_cache;
QueryCache query_cache;
AccelerateDMA accelerate_dma;
FenceManagerOpenGL fence_manager;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
std::array<GLuint, MAX_TEXTURES> texture_handles{};
std::array<GLuint, MAX_IMAGES> image_handles{};
/// Number of commands queued to the OpenGL driver. Resetted on flush.
size_t num_queued_commands = 0;
bool has_written_global_memory = false;
u32 last_clip_distance_mask = 0;
};
} // namespace OpenGL
// SPDX-FileCopyrightText: 2015 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <cstddef>
#include <optional>
#include <boost/container/static_vector.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace Core::Memory {
class Memory;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra {
class MemoryManager;
}
namespace OpenGL {
struct ScreenInfo;
struct ShaderEntries;
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
GLsizei padding;
};
static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
public:
explicit AccelerateDMA(BufferCache& buffer_cache);
bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) override;
bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
private:
BufferCache& buffer_cache;
};
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated,
protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
ScreenInfo& screen_info_, ProgramManager& program_manager_,
StateTracker& state_tracker_);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, u32 instance_count) override;
void Clear() override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
void SignalFence(std::function<void()>&& func) override;
void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<const u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
/// Returns true when there are commands queued to the OpenGL server.
bool AnyCommandQueued() const {
return num_queued_commands > 0;
}
void InitializeChannel(Tegra::Control::ChannelState& channel) override;
void BindChannel(Tegra::Control::ChannelState& channel) override;
void ReleaseChannel(s32 channel_id) override;
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
/// Syncs state to match guest's
void SyncState();
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
/// Syncs the depth clamp state
void SyncDepthClamp();
/// Syncs the clip enabled status to match the guest state
void SyncClipEnabled(u32 clip_mask);
/// Syncs the clip coefficients to match the guest state
void SyncClipCoef();
/// Syncs the cull mode to match the guest state
void SyncCullMode();
/// Syncs the primitve restart to match the guest state
void SyncPrimitiveRestart();
/// Syncs the depth test state to match the guest state
void SyncDepthTestState();
/// Syncs the stencil test state to match the guest state
void SyncStencilTestState();
/// Syncs the blend state to match the guest state
void SyncBlendState();
/// Syncs the LogicOp state to match the guest state
void SyncLogicOpState();
/// Syncs the the color clamp state
void SyncFragmentColorClampState();
/// Syncs the alpha coverage and alpha to one
void SyncMultiSampleState();
/// Syncs the scissor test state to match the guest state
void SyncScissorTest();
/// Syncs the point state to match the guest state
void SyncPointState();
/// Syncs the line state to match the guest state
void SyncLineState();
/// Syncs the rasterizer enable state to match the guest state
void SyncRasterizeEnable();
/// Syncs polygon modes to match the guest state
void SyncPolygonModes();
/// Syncs Color Mask
void SyncColorMask();
/// Syncs the polygon offsets
void SyncPolygonOffset();
/// Syncs the alpha test state to match the guest state
void SyncAlphaTest();
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
/// Syncs vertex formats to match the guest state
void SyncVertexFormats();
/// Syncs vertex instances to match the guest state
void SyncVertexInstances();
/// Begin a transform feedback
void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
/// End a transform feedback
void EndTransformFeedback();
void BindInlineIndexBuffer();
Tegra::GPU& gpu;
const Device& device;
ScreenInfo& screen_info;
ProgramManager& program_manager;
StateTracker& state_tracker;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
ShaderCache shader_cache;
QueryCache query_cache;
AccelerateDMA accelerate_dma;
FenceManagerOpenGL fence_manager;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
std::array<GLuint, MAX_TEXTURES> texture_handles{};
std::array<GLuint, MAX_IMAGES> image_handles{};
/// Number of commands queued to the OpenGL driver. Resetted on flush.
size_t num_queued_commands = 0;
bool has_written_global_memory = false;
u32 last_clip_distance_mask = 0;
};
} // namespace OpenGL

View File

@@ -1,200 +1,200 @@
// SPDX-FileCopyrightText: 2015 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <string_view>
#include <glad/glad.h>
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
namespace OpenGL {
void OGLRenderbuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateRenderbuffers(1, &handle);
}
void OGLRenderbuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteRenderbuffers(1, &handle);
handle = 0;
}
void OGLTexture::Create(GLenum target) {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateTextures(target, 1, &handle);
}
void OGLTexture::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
handle = 0;
}
void OGLTextureView::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenTextures(1, &handle);
}
void OGLTextureView::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
handle = 0;
}
void OGLSampler::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateSamplers(1, &handle);
}
void OGLSampler::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteSamplers(1, &handle);
handle = 0;
}
void OGLShader::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteShader(handle);
handle = 0;
}
void OGLProgram::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgram(handle);
handle = 0;
}
void OGLAssemblyProgram::Release() {
if (handle == 0) {
return;
}
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramsARB(1, &handle);
handle = 0;
}
void OGLPipeline::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenProgramPipelines(1, &handle);
}
void OGLPipeline::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramPipelines(1, &handle);
handle = 0;
}
void OGLBuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateBuffers(1, &handle);
}
void OGLBuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteBuffers(1, &handle);
handle = 0;
}
void OGLSync::Create() {
if (handle != 0)
return;
// Don't profile here, this one is expected to happen ingame.
handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
void OGLSync::Release() {
if (handle == 0)
return;
// Don't profile here, this one is expected to happen ingame.
glDeleteSync(handle);
handle = 0;
}
void OGLFramebuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
// Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
// a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
// across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
// mismatching size, this is why core framebuffers are preferred.
glGenFramebuffers(1, &handle);
glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
}
void OGLFramebuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteFramebuffers(1, &handle);
handle = 0;
}
void OGLQuery::Create(GLenum target) {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateQueries(target, 1, &handle);
}
void OGLQuery::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteQueries(1, &handle);
handle = 0;
}
} // namespace OpenGL
// SPDX-FileCopyrightText: 2015 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <string_view>
#include <glad/glad.h>
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
namespace OpenGL {
void OGLRenderbuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateRenderbuffers(1, &handle);
}
void OGLRenderbuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteRenderbuffers(1, &handle);
handle = 0;
}
void OGLTexture::Create(GLenum target) {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateTextures(target, 1, &handle);
}
void OGLTexture::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
handle = 0;
}
void OGLTextureView::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenTextures(1, &handle);
}
void OGLTextureView::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
handle = 0;
}
void OGLSampler::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateSamplers(1, &handle);
}
void OGLSampler::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteSamplers(1, &handle);
handle = 0;
}
void OGLShader::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteShader(handle);
handle = 0;
}
void OGLProgram::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgram(handle);
handle = 0;
}
void OGLAssemblyProgram::Release() {
if (handle == 0) {
return;
}
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramsARB(1, &handle);
handle = 0;
}
void OGLPipeline::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenProgramPipelines(1, &handle);
}
void OGLPipeline::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramPipelines(1, &handle);
handle = 0;
}
void OGLBuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateBuffers(1, &handle);
}
void OGLBuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteBuffers(1, &handle);
handle = 0;
}
void OGLSync::Create() {
if (handle != 0)
return;
// Don't profile here, this one is expected to happen ingame.
handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
void OGLSync::Release() {
if (handle == 0)
return;
// Don't profile here, this one is expected to happen ingame.
glDeleteSync(handle);
handle = 0;
}
void OGLFramebuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
// Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
// a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
// across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
// mismatching size, this is why core framebuffers are preferred.
glGenFramebuffers(1, &handle);
glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
}
void OGLFramebuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteFramebuffers(1, &handle);
handle = 0;
}
void OGLQuery::Create(GLenum target) {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateQueries(target, 1, &handle);
}
void OGLQuery::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteQueries(1, &handle);
handle = 0;
}
} // namespace OpenGL

View File

@@ -1,323 +1,323 @@
// SPDX-FileCopyrightText: 2015 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string_view>
#include <utility>
#include <glad/glad.h>
#include "common/common_funcs.h"
namespace OpenGL {
class OGLRenderbuffer final {
public:
YUZU_NON_COPYABLE(OGLRenderbuffer);
OGLRenderbuffer() = default;
OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLRenderbuffer() {
Release();
}
OGLRenderbuffer& operator=(OGLRenderbuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLTexture final {
public:
YUZU_NON_COPYABLE(OGLTexture);
OGLTexture() = default;
OGLTexture(OGLTexture&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLTexture() {
Release();
}
OGLTexture& operator=(OGLTexture&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create(GLenum target);
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLTextureView final {
public:
YUZU_NON_COPYABLE(OGLTextureView);
OGLTextureView() = default;
OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLTextureView() {
Release();
}
OGLTextureView& operator=(OGLTextureView&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLSampler final {
public:
YUZU_NON_COPYABLE(OGLSampler);
OGLSampler() = default;
OGLSampler(OGLSampler&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLSampler() {
Release();
}
OGLSampler& operator=(OGLSampler&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLShader final {
public:
YUZU_NON_COPYABLE(OGLShader);
OGLShader() = default;
OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLShader() {
Release();
}
OGLShader& operator=(OGLShader&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
void Release();
GLuint handle = 0;
};
class OGLProgram final {
public:
YUZU_NON_COPYABLE(OGLProgram);
OGLProgram() = default;
OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLProgram() {
Release();
}
OGLProgram& operator=(OGLProgram&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLAssemblyProgram final {
public:
YUZU_NON_COPYABLE(OGLAssemblyProgram);
OGLAssemblyProgram() = default;
OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLAssemblyProgram() {
Release();
}
OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLPipeline final {
public:
YUZU_NON_COPYABLE(OGLPipeline);
OGLPipeline() = default;
OGLPipeline(OGLPipeline&& o) noexcept : handle{std::exchange<GLuint>(o.handle, 0)} {}
~OGLPipeline() {
Release();
}
OGLPipeline& operator=(OGLPipeline&& o) noexcept {
handle = std::exchange<GLuint>(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLBuffer final {
public:
YUZU_NON_COPYABLE(OGLBuffer);
OGLBuffer() = default;
OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLBuffer() {
Release();
}
OGLBuffer& operator=(OGLBuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLSync final {
public:
YUZU_NON_COPYABLE(OGLSync);
OGLSync() = default;
OGLSync(OGLSync&& o) noexcept : handle(std::exchange(o.handle, nullptr)) {}
~OGLSync() {
Release();
}
OGLSync& operator=(OGLSync&& o) noexcept {
Release();
handle = std::exchange(o.handle, nullptr);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLsync handle = 0;
};
class OGLFramebuffer final {
public:
YUZU_NON_COPYABLE(OGLFramebuffer);
OGLFramebuffer() = default;
OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLFramebuffer() {
Release();
}
OGLFramebuffer& operator=(OGLFramebuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLQuery final {
public:
YUZU_NON_COPYABLE(OGLQuery);
OGLQuery() = default;
OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLQuery() {
Release();
}
OGLQuery& operator=(OGLQuery&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create(GLenum target);
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
} // namespace OpenGL
// SPDX-FileCopyrightText: 2015 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string_view>
#include <utility>
#include <glad/glad.h>
#include "common/common_funcs.h"
namespace OpenGL {
class OGLRenderbuffer final {
public:
YUZU_NON_COPYABLE(OGLRenderbuffer);
OGLRenderbuffer() = default;
OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLRenderbuffer() {
Release();
}
OGLRenderbuffer& operator=(OGLRenderbuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLTexture final {
public:
YUZU_NON_COPYABLE(OGLTexture);
OGLTexture() = default;
OGLTexture(OGLTexture&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLTexture() {
Release();
}
OGLTexture& operator=(OGLTexture&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create(GLenum target);
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLTextureView final {
public:
YUZU_NON_COPYABLE(OGLTextureView);
OGLTextureView() = default;
OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLTextureView() {
Release();
}
OGLTextureView& operator=(OGLTextureView&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLSampler final {
public:
YUZU_NON_COPYABLE(OGLSampler);
OGLSampler() = default;
OGLSampler(OGLSampler&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLSampler() {
Release();
}
OGLSampler& operator=(OGLSampler&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLShader final {
public:
YUZU_NON_COPYABLE(OGLShader);
OGLShader() = default;
OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLShader() {
Release();
}
OGLShader& operator=(OGLShader&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
void Release();
GLuint handle = 0;
};
class OGLProgram final {
public:
YUZU_NON_COPYABLE(OGLProgram);
OGLProgram() = default;
OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLProgram() {
Release();
}
OGLProgram& operator=(OGLProgram&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLAssemblyProgram final {
public:
YUZU_NON_COPYABLE(OGLAssemblyProgram);
OGLAssemblyProgram() = default;
OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLAssemblyProgram() {
Release();
}
OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLPipeline final {
public:
YUZU_NON_COPYABLE(OGLPipeline);
OGLPipeline() = default;
OGLPipeline(OGLPipeline&& o) noexcept : handle{std::exchange<GLuint>(o.handle, 0)} {}
~OGLPipeline() {
Release();
}
OGLPipeline& operator=(OGLPipeline&& o) noexcept {
handle = std::exchange<GLuint>(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLBuffer final {
public:
YUZU_NON_COPYABLE(OGLBuffer);
OGLBuffer() = default;
OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLBuffer() {
Release();
}
OGLBuffer& operator=(OGLBuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLSync final {
public:
YUZU_NON_COPYABLE(OGLSync);
OGLSync() = default;
OGLSync(OGLSync&& o) noexcept : handle(std::exchange(o.handle, nullptr)) {}
~OGLSync() {
Release();
}
OGLSync& operator=(OGLSync&& o) noexcept {
Release();
handle = std::exchange(o.handle, nullptr);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLsync handle = 0;
};
class OGLFramebuffer final {
public:
YUZU_NON_COPYABLE(OGLFramebuffer);
OGLFramebuffer() = default;
OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLFramebuffer() {
Release();
}
OGLFramebuffer& operator=(OGLFramebuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLQuery final {
public:
YUZU_NON_COPYABLE(OGLQuery);
OGLQuery() = default;
OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLQuery() {
Release();
}
OGLQuery& operator=(OGLQuery&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create(GLenum target);
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@@ -1,88 +1,88 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <filesystem>
#include <stop_token>
#include <unordered_map>
#include "common/common_types.h"
#include "common/thread_worker.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/profile.h"
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_context.h"
#include "video_core/shader_cache.h"
namespace Tegra {
class MemoryManager;
}
namespace OpenGL {
class Device;
class ProgramManager;
class RasterizerOpenGL;
using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
class ShaderCache : public VideoCommon::ShaderCache {
public:
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
const Device& device_, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_);
~ShaderCache();
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
private:
GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
std::span<Shader::Environment* const> envs, bool build_in_parallel);
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
const VideoCommon::ShaderInfo* shader);
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
const ComputePipelineKey& key,
Shader::Environment& env);
std::unique_ptr<ShaderWorker> CreateWorkers() const;
Core::Frontend::EmuWindow& emu_window;
const Device& device;
TextureCache& texture_cache;
BufferCache& buffer_cache;
ProgramManager& program_manager;
StateTracker& state_tracker;
VideoCore::ShaderNotify& shader_notify;
const bool use_asynchronous_shaders;
GraphicsPipelineKey graphics_key{};
GraphicsPipeline* current_pipeline{};
ShaderContext::ShaderPools main_pools;
std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
Shader::Profile profile;
Shader::HostTranslateInfo host_info;
std::filesystem::path shader_cache_filename;
std::unique_ptr<ShaderWorker> workers;
};
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <filesystem>
#include <stop_token>
#include <unordered_map>
#include "common/common_types.h"
#include "common/thread_worker.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/profile.h"
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_context.h"
#include "video_core/shader_cache.h"
namespace Tegra {
class MemoryManager;
}
namespace OpenGL {
class Device;
class ProgramManager;
class RasterizerOpenGL;
using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
class ShaderCache : public VideoCommon::ShaderCache {
public:
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
const Device& device_, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_);
~ShaderCache();
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
private:
GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
[[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
std::span<Shader::Environment* const> envs, bool build_in_parallel);
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
const VideoCommon::ShaderInfo* shader);
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
const ComputePipelineKey& key,
Shader::Environment& env);
std::unique_ptr<ShaderWorker> CreateWorkers() const;
Core::Frontend::EmuWindow& emu_window;
const Device& device;
TextureCache& texture_cache;
BufferCache& buffer_cache;
ProgramManager& program_manager;
StateTracker& state_tracker;
VideoCore::ShaderNotify& shader_notify;
const bool use_asynchronous_shaders;
GraphicsPipelineKey graphics_key{};
GraphicsPipeline* current_pipeline{};
ShaderContext::ShaderPools main_pools;
std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
Shader::Profile profile;
Shader::HostTranslateInfo host_info;
std::filesystem::path shader_cache_filename;
std::unique_ptr<ShaderWorker> workers;
};
} // namespace OpenGL

View File

@@ -1,32 +1,32 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "core/frontend/emu_window.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
namespace OpenGL::ShaderContext {
struct ShaderPools {
void ReleaseContents() {
flow_block.ReleaseContents();
block.ReleaseContents();
inst.ReleaseContents();
}
Shader::ObjectPool<Shader::IR::Inst> inst;
Shader::ObjectPool<Shader::IR::Block> block;
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
struct Context {
explicit Context(Core::Frontend::EmuWindow& emu_window)
: gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
Core::Frontend::GraphicsContext::Scoped scoped;
ShaderPools pools;
};
} // namespace OpenGL::ShaderContext
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "core/frontend/emu_window.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
namespace OpenGL::ShaderContext {
struct ShaderPools {
void ReleaseContents() {
flow_block.ReleaseContents();
block.ReleaseContents();
inst.ReleaseContents();
}
Shader::ObjectPool<Shader::IR::Inst> inst;
Shader::ObjectPool<Shader::IR::Block> block;
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
struct Context {
explicit Context(Core::Frontend::EmuWindow& emu_window)
: gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
Core::Frontend::GraphicsContext::Scoped scoped;
ShaderPools pools;
};
} // namespace OpenGL::ShaderContext

View File

@@ -1,2 +1,2 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

View File

@@ -1,144 +1,144 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <span>
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class ProgramManager {
static constexpr size_t NUM_STAGES = 5;
static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
public:
explicit ProgramManager(const Device& device) {
glCreateProgramPipelines(1, &pipeline.handle);
if (device.UseAssemblyShaders()) {
glEnable(GL_COMPUTE_PROGRAM_NV);
}
}
void BindComputeProgram(GLuint program) {
glUseProgram(program);
is_compute_bound = true;
}
void BindComputeAssemblyProgram(GLuint program) {
if (current_assembly_compute_program != program) {
current_assembly_compute_program = program;
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
}
UnbindPipeline();
}
void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
static constexpr std::array<GLenum, 5> stage_enums{
GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
};
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
}
}
BindPipeline();
}
void BindPresentPrograms(GLuint vertex, GLuint fragment) {
if (current_programs[0] != vertex) {
current_programs[0] = vertex;
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
}
if (current_programs[4] != fragment) {
current_programs[4] = fragment;
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
}
glUseProgramStages(
pipeline.handle,
GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
current_programs[1] = 0;
current_programs[2] = 0;
current_programs[3] = 0;
if (current_stage_mask != 0) {
current_stage_mask = 0;
for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
glDisable(program_type);
}
}
BindPipeline();
}
void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
u32 stage_mask) {
const u32 changed_mask = current_stage_mask ^ stage_mask;
current_stage_mask = stage_mask;
if (changed_mask != 0) {
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (((changed_mask >> stage) & 1) != 0) {
if (((stage_mask >> stage) & 1) != 0) {
glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
} else {
glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
}
}
}
}
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
}
}
UnbindPipeline();
}
void RestoreGuestCompute() {}
private:
void BindPipeline() {
if (!is_pipeline_bound) {
is_pipeline_bound = true;
glBindProgramPipeline(pipeline.handle);
}
UnbindCompute();
}
void UnbindPipeline() {
if (is_pipeline_bound) {
is_pipeline_bound = false;
glBindProgramPipeline(0);
}
UnbindCompute();
}
void UnbindCompute() {
if (is_compute_bound) {
is_compute_bound = false;
glUseProgram(0);
}
}
OGLPipeline pipeline;
bool is_pipeline_bound{};
bool is_compute_bound{};
u32 current_stage_mask = 0;
std::array<GLuint, NUM_STAGES> current_programs{};
GLuint current_assembly_compute_program = 0;
};
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <span>
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class ProgramManager {
static constexpr size_t NUM_STAGES = 5;
static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
public:
explicit ProgramManager(const Device& device) {
glCreateProgramPipelines(1, &pipeline.handle);
if (device.UseAssemblyShaders()) {
glEnable(GL_COMPUTE_PROGRAM_NV);
}
}
void BindComputeProgram(GLuint program) {
glUseProgram(program);
is_compute_bound = true;
}
void BindComputeAssemblyProgram(GLuint program) {
if (current_assembly_compute_program != program) {
current_assembly_compute_program = program;
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
}
UnbindPipeline();
}
void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
static constexpr std::array<GLenum, 5> stage_enums{
GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
};
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
}
}
BindPipeline();
}
void BindPresentPrograms(GLuint vertex, GLuint fragment) {
if (current_programs[0] != vertex) {
current_programs[0] = vertex;
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
}
if (current_programs[4] != fragment) {
current_programs[4] = fragment;
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
}
glUseProgramStages(
pipeline.handle,
GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
current_programs[1] = 0;
current_programs[2] = 0;
current_programs[3] = 0;
if (current_stage_mask != 0) {
current_stage_mask = 0;
for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
glDisable(program_type);
}
}
BindPipeline();
}
void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
u32 stage_mask) {
const u32 changed_mask = current_stage_mask ^ stage_mask;
current_stage_mask = stage_mask;
if (changed_mask != 0) {
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (((changed_mask >> stage) & 1) != 0) {
if (((stage_mask >> stage) & 1) != 0) {
glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
} else {
glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
}
}
}
}
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
}
}
UnbindPipeline();
}
void RestoreGuestCompute() {}
private:
void BindPipeline() {
if (!is_pipeline_bound) {
is_pipeline_bound = true;
glBindProgramPipeline(pipeline.handle);
}
UnbindCompute();
}
void UnbindPipeline() {
if (is_pipeline_bound) {
is_pipeline_bound = false;
glBindProgramPipeline(0);
}
UnbindCompute();
}
void UnbindCompute() {
if (is_compute_bound) {
is_compute_bound = false;
glUseProgram(0);
}
}
OGLPipeline pipeline;
bool is_pipeline_bound{};
bool is_compute_bound{};
u32 current_stage_mask = 0;
std::array<GLuint, NUM_STAGES> current_programs{};
GLuint current_assembly_compute_program = 0;
};
} // namespace OpenGL

View File

@@ -1,111 +1,111 @@
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <string_view>
#include <vector>
#include <glad/glad.h>
#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
static OGLProgram LinkSeparableProgram(GLuint shader) {
OGLProgram program;
program.handle = glCreateProgram();
glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glAttachShader(program.handle, shader);
glLinkProgram(program.handle);
glDetachShader(program.handle, shader);
if (!Settings::values.renderer_debug) {
return program;
}
GLint link_status{};
glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
GLint log_length{};
glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return program;
}
std::string log(log_length, 0);
glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
if (link_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
return program;
}
static void LogShader(GLuint shader, std::string_view code = {}) {
GLint shader_status{};
glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
if (shader_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "Failed to build shader");
}
GLint log_length{};
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return;
}
std::string log(log_length, 0);
glGetShaderInfoLog(shader, log_length, nullptr, log.data());
if (shader_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
if (!code.empty()) {
LOG_INFO(Render_OpenGL, "\n{}", code);
}
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
}
OGLProgram CreateProgram(std::string_view code, GLenum stage) {
OGLShader shader;
shader.handle = glCreateShader(stage);
const GLint length = static_cast<GLint>(code.size());
const GLchar* const code_ptr = code.data();
glShaderSource(shader.handle, 1, &code_ptr, &length);
glCompileShader(shader.handle);
if (Settings::values.renderer_debug) {
LogShader(shader.handle, code);
}
return LinkSeparableProgram(shader.handle);
}
OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
OGLShader shader;
shader.handle = glCreateShader(stage);
glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
static_cast<GLsizei>(code.size_bytes()));
glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
if (Settings::values.renderer_debug) {
LogShader(shader.handle);
}
return LinkSeparableProgram(shader.handle);
}
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {
OGLAssemblyProgram program;
glGenProgramsARB(1, &program.handle);
glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB,
static_cast<GLsizei>(code.size()), code.data());
if (Settings::values.renderer_debug) {
const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
if (err && *err) {
if (std::strstr(err, "error")) {
LOG_CRITICAL(Render_OpenGL, "\n{}", err);
LOG_INFO(Render_OpenGL, "\n{}", code);
} else {
LOG_WARNING(Render_OpenGL, "\n{}", err);
}
}
}
return program;
}
} // namespace OpenGL
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <string_view>
#include <vector>
#include <glad/glad.h>
#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
static OGLProgram LinkSeparableProgram(GLuint shader) {
OGLProgram program;
program.handle = glCreateProgram();
glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glAttachShader(program.handle, shader);
glLinkProgram(program.handle);
glDetachShader(program.handle, shader);
if (!Settings::values.renderer_debug) {
return program;
}
GLint link_status{};
glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
GLint log_length{};
glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return program;
}
std::string log(log_length, 0);
glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
if (link_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
return program;
}
static void LogShader(GLuint shader, std::string_view code = {}) {
GLint shader_status{};
glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
if (shader_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "Failed to build shader");
}
GLint log_length{};
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return;
}
std::string log(log_length, 0);
glGetShaderInfoLog(shader, log_length, nullptr, log.data());
if (shader_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
if (!code.empty()) {
LOG_INFO(Render_OpenGL, "\n{}", code);
}
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
}
OGLProgram CreateProgram(std::string_view code, GLenum stage) {
OGLShader shader;
shader.handle = glCreateShader(stage);
const GLint length = static_cast<GLint>(code.size());
const GLchar* const code_ptr = code.data();
glShaderSource(shader.handle, 1, &code_ptr, &length);
glCompileShader(shader.handle);
if (Settings::values.renderer_debug) {
LogShader(shader.handle, code);
}
return LinkSeparableProgram(shader.handle);
}
OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
OGLShader shader;
shader.handle = glCreateShader(stage);
glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
static_cast<GLsizei>(code.size_bytes()));
glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
if (Settings::values.renderer_debug) {
LogShader(shader.handle);
}
return LinkSeparableProgram(shader.handle);
}
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {
OGLAssemblyProgram program;
glGenProgramsARB(1, &program.handle);
glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB,
static_cast<GLsizei>(code.size()), code.data());
if (Settings::values.renderer_debug) {
const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
if (err && *err) {
if (std::strstr(err, "error")) {
LOG_CRITICAL(Render_OpenGL, "\n{}", err);
LOG_INFO(Render_OpenGL, "\n{}", code);
} else {
LOG_WARNING(Render_OpenGL, "\n{}", err);
}
}
}
return program;
}
} // namespace OpenGL

View File

@@ -1,21 +1,21 @@
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <string_view>
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
OGLProgram CreateProgram(std::string_view code, GLenum stage);
OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);
} // namespace OpenGL
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <string_view>
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
OGLProgram CreateProgram(std::string_view code, GLenum stage);
OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);
} // namespace OpenGL

View File

@@ -1,243 +1,243 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstddef>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/control/channel_state.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace OpenGL {
namespace {
using namespace Dirty;
using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D;
using Regs = Maxwell3D::Regs;
using Tables = Maxwell3D::DirtyState::Tables;
using Table = Maxwell3D::DirtyState::Table;
void SetupDirtyColorMasks(Tables& tables) {
tables[0][OFF(color_mask_common)] = ColorMaskCommon;
for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) {
const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]);
FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt);
}
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
}
void SetupDirtyVertexInstances(Tables& tables) {
static constexpr std::size_t instance_base_offset = 3;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
const std::size_t array_offset = OFF(vertex_streams) + i * NUM(vertex_streams[0]);
const std::size_t instance_array_offset = array_offset + instance_base_offset;
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_array_offset] = VertexInstances;
const std::size_t instance_offset = OFF(vertex_stream_instances) + i;
tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_offset] = VertexInstances;
}
}
void SetupDirtyVertexFormat(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i);
}
FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats);
}
void SetupDirtyViewports(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]);
const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]);
FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i);
FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i);
}
FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports);
tables[0][OFF(viewport_scale_offset_enbled)] = ViewportTransform;
tables[1][OFF(viewport_scale_offset_enbled)] = Viewports;
}
void SetupDirtyScissors(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]);
FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i);
}
FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
}
void SetupDirtyPolygonModes(Tables& tables) {
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
tables[1][OFF(polygon_mode_front)] = PolygonModes;
tables[1][OFF(polygon_mode_back)] = PolygonModes;
tables[0][OFF(fill_via_triangle_mode)] = PolygonModes;
}
void SetupDirtyDepthTest(Tables& tables) {
auto& table = tables[0];
table[OFF(depth_test_enable)] = DepthTest;
table[OFF(depth_write_enabled)] = DepthMask;
table[OFF(depth_test_func)] = DepthTest;
}
void SetupDirtyStencilTest(Tables& tables) {
static constexpr std::array offsets = {
OFF(stencil_enable), OFF(stencil_front_op.func), OFF(stencil_front_ref),
OFF(stencil_front_func_mask), OFF(stencil_front_op.fail), OFF(stencil_front_op.zfail),
OFF(stencil_front_op.zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable),
OFF(stencil_back_op.func), OFF(stencil_back_ref), OFF(stencil_back_func_mask),
OFF(stencil_back_op.fail), OFF(stencil_back_op.zfail), OFF(stencil_back_op.zpass),
OFF(stencil_back_mask)};
for (const auto offset : offsets) {
tables[0][offset] = StencilTest;
}
}
void SetupDirtyAlphaTest(Tables& tables) {
auto& table = tables[0];
table[OFF(alpha_test_ref)] = AlphaTest;
table[OFF(alpha_test_func)] = AlphaTest;
table[OFF(alpha_test_enabled)] = AlphaTest;
}
void SetupDirtyBlend(Tables& tables) {
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor);
tables[0][OFF(blend_per_target_enabled)] = BlendIndependentEnabled;
for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) {
const std::size_t offset = OFF(blend_per_target) + i * NUM(blend_per_target[0]);
FillBlock(tables[0], offset, NUM(blend_per_target[0]), BlendState0 + i);
tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i);
}
FillBlock(tables[1], OFF(blend_per_target), NUM(blend_per_target), BlendStates);
FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates);
}
void SetupDirtyPrimitiveRestart(Tables& tables) {
FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart);
}
void SetupDirtyPolygonOffset(Tables& tables) {
auto& table = tables[0];
table[OFF(polygon_offset_fill_enable)] = PolygonOffset;
table[OFF(polygon_offset_line_enable)] = PolygonOffset;
table[OFF(polygon_offset_point_enable)] = PolygonOffset;
table[OFF(slope_scale_depth_bias)] = PolygonOffset;
table[OFF(depth_bias)] = PolygonOffset;
table[OFF(depth_bias_clamp)] = PolygonOffset;
}
void SetupDirtyMultisampleControl(Tables& tables) {
FillBlock(tables[0], OFF(anti_alias_alpha_control), NUM(anti_alias_alpha_control),
MultisampleControl);
}
void SetupDirtyRasterizeEnable(Tables& tables) {
tables[0][OFF(rasterize_enable)] = RasterizeEnable;
}
void SetupDirtyFramebufferSRGB(Tables& tables) {
tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB;
}
void SetupDirtyLogicOp(Tables& tables) {
FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp);
}
void SetupDirtyFragmentClampColor(Tables& tables) {
tables[0][OFF(frag_color_clamp)] = FragmentClampColor;
}
void SetupDirtyPointSize(Tables& tables) {
tables[0][OFF(point_size_attribute)] = PointSize;
tables[0][OFF(point_size)] = PointSize;
tables[0][OFF(point_sprite_enable)] = PointSize;
}
void SetupDirtyLineWidth(Tables& tables) {
tables[0][OFF(line_width_smooth)] = LineWidth;
tables[0][OFF(line_width_aliased)] = LineWidth;
tables[0][OFF(line_anti_alias_enable)] = LineWidth;
}
void SetupDirtyClipControl(Tables& tables) {
auto& table = tables[0];
table[OFF(window_origin)] = ClipControl;
table[OFF(depth_mode)] = ClipControl;
}
void SetupDirtyDepthClampEnabled(Tables& tables) {
tables[0][OFF(viewport_clip_control)] = DepthClampEnabled;
}
void SetupDirtyMisc(Tables& tables) {
auto& table = tables[0];
table[OFF(user_clip_enable)] = ClipDistances;
table[OFF(gl_front_face)] = FrontFace;
table[OFF(gl_cull_test_enabled)] = CullTest;
table[OFF(gl_cull_face)] = CullTest;
}
} // Anonymous namespace
void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
auto& tables{channel_state.maxwell_3d->dirty.tables};
SetupDirtyFlags(tables);
SetupDirtyColorMasks(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyPolygonModes(tables);
SetupDirtyDepthTest(tables);
SetupDirtyStencilTest(tables);
SetupDirtyAlphaTest(tables);
SetupDirtyBlend(tables);
SetupDirtyPrimitiveRestart(tables);
SetupDirtyPolygonOffset(tables);
SetupDirtyMultisampleControl(tables);
SetupDirtyRasterizeEnable(tables);
SetupDirtyFramebufferSRGB(tables);
SetupDirtyLogicOp(tables);
SetupDirtyFragmentClampColor(tables);
SetupDirtyPointSize(tables);
SetupDirtyLineWidth(tables);
SetupDirtyClipControl(tables);
SetupDirtyDepthClampEnabled(tables);
SetupDirtyMisc(tables);
}
void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
flags = &channel_state.maxwell_3d->dirty.flags;
}
void StateTracker::InvalidateState() {
flags->set();
}
StateTracker::StateTracker() : flags{&default_flags} {}
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <cstddef>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/control/channel_state.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace OpenGL {
namespace {
using namespace Dirty;
using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D;
using Regs = Maxwell3D::Regs;
using Tables = Maxwell3D::DirtyState::Tables;
using Table = Maxwell3D::DirtyState::Table;
void SetupDirtyColorMasks(Tables& tables) {
tables[0][OFF(color_mask_common)] = ColorMaskCommon;
for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) {
const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]);
FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt);
}
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
}
void SetupDirtyVertexInstances(Tables& tables) {
static constexpr std::size_t instance_base_offset = 3;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
const std::size_t array_offset = OFF(vertex_streams) + i * NUM(vertex_streams[0]);
const std::size_t instance_array_offset = array_offset + instance_base_offset;
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_array_offset] = VertexInstances;
const std::size_t instance_offset = OFF(vertex_stream_instances) + i;
tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_offset] = VertexInstances;
}
}
void SetupDirtyVertexFormat(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i);
}
FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats);
}
void SetupDirtyViewports(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]);
const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]);
FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i);
FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i);
}
FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports);
tables[0][OFF(viewport_scale_offset_enbled)] = ViewportTransform;
tables[1][OFF(viewport_scale_offset_enbled)] = Viewports;
}
void SetupDirtyScissors(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]);
FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i);
}
FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
}
void SetupDirtyPolygonModes(Tables& tables) {
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
tables[1][OFF(polygon_mode_front)] = PolygonModes;
tables[1][OFF(polygon_mode_back)] = PolygonModes;
tables[0][OFF(fill_via_triangle_mode)] = PolygonModes;
}
void SetupDirtyDepthTest(Tables& tables) {
auto& table = tables[0];
table[OFF(depth_test_enable)] = DepthTest;
table[OFF(depth_write_enabled)] = DepthMask;
table[OFF(depth_test_func)] = DepthTest;
}
void SetupDirtyStencilTest(Tables& tables) {
static constexpr std::array offsets = {
OFF(stencil_enable), OFF(stencil_front_op.func), OFF(stencil_front_ref),
OFF(stencil_front_func_mask), OFF(stencil_front_op.fail), OFF(stencil_front_op.zfail),
OFF(stencil_front_op.zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable),
OFF(stencil_back_op.func), OFF(stencil_back_ref), OFF(stencil_back_func_mask),
OFF(stencil_back_op.fail), OFF(stencil_back_op.zfail), OFF(stencil_back_op.zpass),
OFF(stencil_back_mask)};
for (const auto offset : offsets) {
tables[0][offset] = StencilTest;
}
}
void SetupDirtyAlphaTest(Tables& tables) {
auto& table = tables[0];
table[OFF(alpha_test_ref)] = AlphaTest;
table[OFF(alpha_test_func)] = AlphaTest;
table[OFF(alpha_test_enabled)] = AlphaTest;
}
void SetupDirtyBlend(Tables& tables) {
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor);
tables[0][OFF(blend_per_target_enabled)] = BlendIndependentEnabled;
for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) {
const std::size_t offset = OFF(blend_per_target) + i * NUM(blend_per_target[0]);
FillBlock(tables[0], offset, NUM(blend_per_target[0]), BlendState0 + i);
tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i);
}
FillBlock(tables[1], OFF(blend_per_target), NUM(blend_per_target), BlendStates);
FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates);
}
void SetupDirtyPrimitiveRestart(Tables& tables) {
FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart);
}
void SetupDirtyPolygonOffset(Tables& tables) {
auto& table = tables[0];
table[OFF(polygon_offset_fill_enable)] = PolygonOffset;
table[OFF(polygon_offset_line_enable)] = PolygonOffset;
table[OFF(polygon_offset_point_enable)] = PolygonOffset;
table[OFF(slope_scale_depth_bias)] = PolygonOffset;
table[OFF(depth_bias)] = PolygonOffset;
table[OFF(depth_bias_clamp)] = PolygonOffset;
}
void SetupDirtyMultisampleControl(Tables& tables) {
FillBlock(tables[0], OFF(anti_alias_alpha_control), NUM(anti_alias_alpha_control),
MultisampleControl);
}
void SetupDirtyRasterizeEnable(Tables& tables) {
tables[0][OFF(rasterize_enable)] = RasterizeEnable;
}
void SetupDirtyFramebufferSRGB(Tables& tables) {
tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB;
}
void SetupDirtyLogicOp(Tables& tables) {
FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp);
}
void SetupDirtyFragmentClampColor(Tables& tables) {
tables[0][OFF(frag_color_clamp)] = FragmentClampColor;
}
void SetupDirtyPointSize(Tables& tables) {
tables[0][OFF(point_size_attribute)] = PointSize;
tables[0][OFF(point_size)] = PointSize;
tables[0][OFF(point_sprite_enable)] = PointSize;
}
void SetupDirtyLineWidth(Tables& tables) {
tables[0][OFF(line_width_smooth)] = LineWidth;
tables[0][OFF(line_width_aliased)] = LineWidth;
tables[0][OFF(line_anti_alias_enable)] = LineWidth;
}
void SetupDirtyClipControl(Tables& tables) {
auto& table = tables[0];
table[OFF(window_origin)] = ClipControl;
table[OFF(depth_mode)] = ClipControl;
}
void SetupDirtyDepthClampEnabled(Tables& tables) {
tables[0][OFF(viewport_clip_control)] = DepthClampEnabled;
}
void SetupDirtyMisc(Tables& tables) {
auto& table = tables[0];
table[OFF(user_clip_enable)] = ClipDistances;
table[OFF(gl_front_face)] = FrontFace;
table[OFF(gl_cull_test_enabled)] = CullTest;
table[OFF(gl_cull_face)] = CullTest;
}
} // Anonymous namespace
void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
auto& tables{channel_state.maxwell_3d->dirty.tables};
SetupDirtyFlags(tables);
SetupDirtyColorMasks(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyPolygonModes(tables);
SetupDirtyDepthTest(tables);
SetupDirtyStencilTest(tables);
SetupDirtyAlphaTest(tables);
SetupDirtyBlend(tables);
SetupDirtyPrimitiveRestart(tables);
SetupDirtyPolygonOffset(tables);
SetupDirtyMultisampleControl(tables);
SetupDirtyRasterizeEnable(tables);
SetupDirtyFramebufferSRGB(tables);
SetupDirtyLogicOp(tables);
SetupDirtyFragmentClampColor(tables);
SetupDirtyPointSize(tables);
SetupDirtyLineWidth(tables);
SetupDirtyClipControl(tables);
SetupDirtyDepthClampEnabled(tables);
SetupDirtyMisc(tables);
}
void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
flags = &channel_state.maxwell_3d->dirty.flags;
}
void StateTracker::InvalidateState() {
flags->set();
}
StateTracker::StateTracker() : flags{&default_flags} {}
} // namespace OpenGL

View File

@@ -1,235 +1,235 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <limits>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
namespace Tegra {
namespace Control {
struct ChannelState;
}
} // namespace Tegra
namespace OpenGL {
namespace Dirty {
enum : u8 {
First = VideoCommon::Dirty::LastCommonEntry,
VertexFormats,
VertexFormat0,
VertexFormat31 = VertexFormat0 + 31,
VertexInstances,
VertexInstance0,
VertexInstance31 = VertexInstance0 + 31,
ViewportTransform,
Viewports,
Viewport0,
Viewport15 = Viewport0 + 15,
Scissors,
Scissor0,
Scissor15 = Scissor0 + 15,
ColorMaskCommon,
ColorMasks,
ColorMask0,
ColorMask7 = ColorMask0 + 7,
BlendColor,
BlendIndependentEnabled,
BlendStates,
BlendState0,
BlendState7 = BlendState0 + 7,
ClipDistances,
PolygonModes,
PolygonModeFront,
PolygonModeBack,
ColorMask,
FrontFace,
CullTest,
DepthMask,
DepthTest,
StencilTest,
AlphaTest,
PrimitiveRestart,
PolygonOffset,
MultisampleControl,
RasterizeEnable,
FramebufferSRGB,
LogicOp,
FragmentClampColor,
PointSize,
LineWidth,
ClipControl,
DepthClampEnabled,
Last
};
static_assert(Last <= std::numeric_limits<u8>::max());
} // namespace Dirty
class StateTracker {
public:
explicit StateTracker();
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
return;
}
index_buffer = new_index_buffer;
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
}
void BindFramebuffer(GLuint new_framebuffer) {
if (framebuffer == new_framebuffer) {
return;
}
framebuffer = new_framebuffer;
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
}
void ClipControl(GLenum new_origin, GLenum new_depth) {
if (new_origin == origin && new_depth == depth) {
return;
}
origin = new_origin;
depth = new_depth;
glClipControl(origin, depth);
}
void SetYNegate(bool new_y_negate) {
if (new_y_negate == y_negate) {
return;
}
// Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
y_negate = new_y_negate;
const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
}
void NotifyScreenDrawVertexArray() {
(*flags)[OpenGL::Dirty::VertexFormats] = true;
(*flags)[OpenGL::Dirty::VertexFormat0 + 0] = true;
(*flags)[OpenGL::Dirty::VertexFormat0 + 1] = true;
(*flags)[VideoCommon::Dirty::VertexBuffers] = true;
(*flags)[VideoCommon::Dirty::VertexBuffer0] = true;
(*flags)[OpenGL::Dirty::VertexInstances] = true;
(*flags)[OpenGL::Dirty::VertexInstance0 + 0] = true;
(*flags)[OpenGL::Dirty::VertexInstance0 + 1] = true;
}
void NotifyPolygonModes() {
(*flags)[OpenGL::Dirty::PolygonModes] = true;
(*flags)[OpenGL::Dirty::PolygonModeFront] = true;
(*flags)[OpenGL::Dirty::PolygonModeBack] = true;
}
void NotifyViewport0() {
(*flags)[OpenGL::Dirty::Viewports] = true;
(*flags)[OpenGL::Dirty::Viewport0] = true;
}
void NotifyScissor0() {
(*flags)[OpenGL::Dirty::Scissors] = true;
(*flags)[OpenGL::Dirty::Scissor0] = true;
}
void NotifyColorMask(size_t index) {
(*flags)[OpenGL::Dirty::ColorMasks] = true;
(*flags)[OpenGL::Dirty::ColorMask0 + index] = true;
}
void NotifyBlend0() {
(*flags)[OpenGL::Dirty::BlendStates] = true;
(*flags)[OpenGL::Dirty::BlendState0] = true;
}
void NotifyFramebuffer() {
(*flags)[VideoCommon::Dirty::RenderTargets] = true;
}
void NotifyFrontFace() {
(*flags)[OpenGL::Dirty::FrontFace] = true;
}
void NotifyCullTest() {
(*flags)[OpenGL::Dirty::CullTest] = true;
}
void NotifyDepthMask() {
(*flags)[OpenGL::Dirty::DepthMask] = true;
}
void NotifyDepthTest() {
(*flags)[OpenGL::Dirty::DepthTest] = true;
}
void NotifyStencilTest() {
(*flags)[OpenGL::Dirty::StencilTest] = true;
}
void NotifyPolygonOffset() {
(*flags)[OpenGL::Dirty::PolygonOffset] = true;
}
void NotifyRasterizeEnable() {
(*flags)[OpenGL::Dirty::RasterizeEnable] = true;
}
void NotifyFramebufferSRGB() {
(*flags)[OpenGL::Dirty::FramebufferSRGB] = true;
}
void NotifyLogicOp() {
(*flags)[OpenGL::Dirty::LogicOp] = true;
}
void NotifyClipControl() {
(*flags)[OpenGL::Dirty::ClipControl] = true;
}
void NotifyAlphaTest() {
(*flags)[OpenGL::Dirty::AlphaTest] = true;
}
void NotifyRange(u8 start, u8 end) {
for (auto flag = start; flag <= end; flag++) {
(*flags)[flag] = true;
}
}
void SetupTables(Tegra::Control::ChannelState& channel_state);
void ChangeChannel(Tegra::Control::ChannelState& channel_state);
void InvalidateState();
private:
Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags{};
GLuint framebuffer = 0;
GLuint index_buffer = 0;
GLenum origin = GL_LOWER_LEFT;
GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
bool y_negate = false;
};
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <limits>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
namespace Tegra {
namespace Control {
struct ChannelState;
}
} // namespace Tegra
namespace OpenGL {
namespace Dirty {
enum : u8 {
First = VideoCommon::Dirty::LastCommonEntry,
VertexFormats,
VertexFormat0,
VertexFormat31 = VertexFormat0 + 31,
VertexInstances,
VertexInstance0,
VertexInstance31 = VertexInstance0 + 31,
ViewportTransform,
Viewports,
Viewport0,
Viewport15 = Viewport0 + 15,
Scissors,
Scissor0,
Scissor15 = Scissor0 + 15,
ColorMaskCommon,
ColorMasks,
ColorMask0,
ColorMask7 = ColorMask0 + 7,
BlendColor,
BlendIndependentEnabled,
BlendStates,
BlendState0,
BlendState7 = BlendState0 + 7,
ClipDistances,
PolygonModes,
PolygonModeFront,
PolygonModeBack,
ColorMask,
FrontFace,
CullTest,
DepthMask,
DepthTest,
StencilTest,
AlphaTest,
PrimitiveRestart,
PolygonOffset,
MultisampleControl,
RasterizeEnable,
FramebufferSRGB,
LogicOp,
FragmentClampColor,
PointSize,
LineWidth,
ClipControl,
DepthClampEnabled,
Last
};
static_assert(Last <= std::numeric_limits<u8>::max());
} // namespace Dirty
class StateTracker {
public:
explicit StateTracker();
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
return;
}
index_buffer = new_index_buffer;
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
}
void BindFramebuffer(GLuint new_framebuffer) {
if (framebuffer == new_framebuffer) {
return;
}
framebuffer = new_framebuffer;
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
}
void ClipControl(GLenum new_origin, GLenum new_depth) {
if (new_origin == origin && new_depth == depth) {
return;
}
origin = new_origin;
depth = new_depth;
glClipControl(origin, depth);
}
void SetYNegate(bool new_y_negate) {
if (new_y_negate == y_negate) {
return;
}
// Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
y_negate = new_y_negate;
const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
}
void NotifyScreenDrawVertexArray() {
(*flags)[OpenGL::Dirty::VertexFormats] = true;
(*flags)[OpenGL::Dirty::VertexFormat0 + 0] = true;
(*flags)[OpenGL::Dirty::VertexFormat0 + 1] = true;
(*flags)[VideoCommon::Dirty::VertexBuffers] = true;
(*flags)[VideoCommon::Dirty::VertexBuffer0] = true;
(*flags)[OpenGL::Dirty::VertexInstances] = true;
(*flags)[OpenGL::Dirty::VertexInstance0 + 0] = true;
(*flags)[OpenGL::Dirty::VertexInstance0 + 1] = true;
}
void NotifyPolygonModes() {
(*flags)[OpenGL::Dirty::PolygonModes] = true;
(*flags)[OpenGL::Dirty::PolygonModeFront] = true;
(*flags)[OpenGL::Dirty::PolygonModeBack] = true;
}
void NotifyViewport0() {
(*flags)[OpenGL::Dirty::Viewports] = true;
(*flags)[OpenGL::Dirty::Viewport0] = true;
}
void NotifyScissor0() {
(*flags)[OpenGL::Dirty::Scissors] = true;
(*flags)[OpenGL::Dirty::Scissor0] = true;
}
void NotifyColorMask(size_t index) {
(*flags)[OpenGL::Dirty::ColorMasks] = true;
(*flags)[OpenGL::Dirty::ColorMask0 + index] = true;
}
void NotifyBlend0() {
(*flags)[OpenGL::Dirty::BlendStates] = true;
(*flags)[OpenGL::Dirty::BlendState0] = true;
}
void NotifyFramebuffer() {
(*flags)[VideoCommon::Dirty::RenderTargets] = true;
}
void NotifyFrontFace() {
(*flags)[OpenGL::Dirty::FrontFace] = true;
}
void NotifyCullTest() {
(*flags)[OpenGL::Dirty::CullTest] = true;
}
void NotifyDepthMask() {
(*flags)[OpenGL::Dirty::DepthMask] = true;
}
void NotifyDepthTest() {
(*flags)[OpenGL::Dirty::DepthTest] = true;
}
void NotifyStencilTest() {
(*flags)[OpenGL::Dirty::StencilTest] = true;
}
void NotifyPolygonOffset() {
(*flags)[OpenGL::Dirty::PolygonOffset] = true;
}
void NotifyRasterizeEnable() {
(*flags)[OpenGL::Dirty::RasterizeEnable] = true;
}
void NotifyFramebufferSRGB() {
(*flags)[OpenGL::Dirty::FramebufferSRGB] = true;
}
void NotifyLogicOp() {
(*flags)[OpenGL::Dirty::LogicOp] = true;
}
void NotifyClipControl() {
(*flags)[OpenGL::Dirty::ClipControl] = true;
}
void NotifyAlphaTest() {
(*flags)[OpenGL::Dirty::AlphaTest] = true;
}
void NotifyRange(u8 start, u8 end) {
for (auto flag = start; flag <= end; flag++) {
(*flags)[flag] = true;
}
}
void SetupTables(Tegra::Control::ChannelState& channel_state);
void ChangeChannel(Tegra::Control::ChannelState& channel_state);
void InvalidateState();
private:
Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags{};
GLuint framebuffer = 0;
GLuint index_buffer = 0;
GLenum origin = GL_LOWER_LEFT;
GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
bool y_negate = false;
};
} // namespace OpenGL

View File

@@ -1,63 +1,63 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <memory>
#include <span>
#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
namespace OpenGL {
StreamBuffer::StreamBuffer() {
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
buffer.Create();
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
mapped_pointer =
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
for (OGLSync& sync : fences) {
sync.Create();
}
}
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
ASSERT(size < REGION_SIZE);
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
++region) {
fences[region].Create();
}
used_iterator = iterator;
for (size_t region = Region(free_iterator) + 1,
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
region < region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
if (iterator + size >= free_iterator) {
free_iterator = iterator + size;
}
if (iterator + size > STREAM_BUFFER_SIZE) {
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
fences[region].Create();
}
used_iterator = 0;
iterator = 0;
free_iterator = size;
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
}
const size_t offset = iterator;
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
return {std::span(mapped_pointer + offset, size), offset};
}
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <memory>
#include <span>
#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
namespace OpenGL {
StreamBuffer::StreamBuffer() {
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
buffer.Create();
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
mapped_pointer =
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
for (OGLSync& sync : fences) {
sync.Create();
}
}
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
ASSERT(size < REGION_SIZE);
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
++region) {
fences[region].Create();
}
used_iterator = iterator;
for (size_t region = Region(free_iterator) + 1,
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
region < region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
if (iterator + size >= free_iterator) {
free_iterator = iterator + size;
}
if (iterator + size > STREAM_BUFFER_SIZE) {
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
fences[region].Create();
}
used_iterator = 0;
iterator = 0;
free_iterator = size;
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
}
const size_t offset = iterator;
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
return {std::span(mapped_pointer + offset, size), offset};
}
} // namespace OpenGL

View File

@@ -1,51 +1,51 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <span>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/literals.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
using namespace Common::Literals;
class StreamBuffer {
static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
static constexpr size_t NUM_SYNCS = 16;
static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
static constexpr size_t MAX_ALIGNMENT = 256;
static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
public:
explicit StreamBuffer();
[[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
[[nodiscard]] GLuint Handle() const noexcept {
return buffer.handle;
}
private:
[[nodiscard]] static size_t Region(size_t offset) noexcept {
return offset / REGION_SIZE;
}
size_t iterator = 0;
size_t used_iterator = 0;
size_t free_iterator = 0;
u8* mapped_pointer = nullptr;
OGLBuffer buffer;
std::array<OGLSync, NUM_SYNCS> fences;
};
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <span>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/literals.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
using namespace Common::Literals;
class StreamBuffer {
static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
static constexpr size_t NUM_SYNCS = 16;
static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
static constexpr size_t MAX_ALIGNMENT = 256;
static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
public:
explicit StreamBuffer();
[[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
[[nodiscard]] GLuint Handle() const noexcept {
return buffer.handle;
}
private:
[[nodiscard]] static size_t Region(size_t offset) noexcept {
return offset / REGION_SIZE;
}
size_t iterator = 0;
size_t used_iterator = 0;
size_t free_iterator = 0;
u8* mapped_pointer = nullptr;
OGLBuffer buffer;
std::array<OGLSync, NUM_SYNCS> fences;
};
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@@ -1,368 +1,368 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <span>
#include <glad/glad.h>
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
namespace Settings {
struct ResolutionScalingInfo;
}
namespace OpenGL {
class ProgramManager;
class StateTracker;
class Framebuffer;
class Image;
class ImageView;
class Sampler;
using VideoCommon::ImageId;
using VideoCommon::ImageViewId;
using VideoCommon::ImageViewType;
using VideoCommon::NUM_RT;
using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
using VideoCommon::SlotVector;
struct ImageBufferMap {
~ImageBufferMap();
std::span<u8> mapped_span;
size_t offset = 0;
OGLSync* sync;
GLuint buffer;
};
struct FormatProperties {
GLenum compatibility_class;
bool compatibility_by_size;
bool is_compressed;
};
class FormatConversionPass {
public:
explicit FormatConversionPass(UtilShaders& util_shaders);
~FormatConversionPass() = default;
void ConvertImage(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
private:
UtilShaders& util_shaders;
OGLBuffer intermediate_pbo;
size_t pbo_size{};
};
class TextureCacheRuntime {
friend Framebuffer;
friend Image;
friend ImageView;
friend Sampler;
public:
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
StateTracker& state_tracker);
~TextureCacheRuntime();
void Finish();
ImageBufferMap UploadStagingBuffer(size_t size);
ImageBufferMap DownloadStagingBuffer(size_t size);
u64 GetDeviceLocalMemory() const {
return device_access_memory;
}
u64 GetDeviceMemoryUsage() const;
bool CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) {
return true;
}
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
UNIMPLEMENTED();
}
bool CanImageBeCopied(const Image& dst, const Image& src);
void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, const Region2D& dst_region,
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void InsertUploadMemoryBarrier();
FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
bool HasNativeBgr() const noexcept {
// OpenGL does not have native support for the BGR internal format
return false;
}
bool HasBrokenTextureViewFormats() const noexcept {
return has_broken_texture_view_formats;
}
bool HasNativeASTC() const noexcept;
void TickFrame() {}
StateTracker& GetStateTracker() {
return state_tracker;
}
private:
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
~StagingBuffers();
ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
size_t RequestBuffer(size_t requested_size);
std::optional<size_t> FindBuffer(size_t requested_size);
std::vector<OGLSync> syncs;
std::vector<OGLBuffer> buffers;
std::vector<u8*> maps;
std::vector<size_t> sizes;
GLenum storage_flags;
GLenum map_flags;
};
const Device& device;
StateTracker& state_tracker;
UtilShaders util_shaders;
FormatConversionPass format_conversion_pass;
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
bool has_broken_texture_view_formats = false;
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
OGLTextureView null_image_view_1d;
OGLTextureView null_image_view_2d;
OGLTextureView null_image_view_2d_array;
OGLTextureView null_image_view_cube;
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
std::array<OGLFramebuffer, 4> rescale_draw_fbos;
std::array<OGLFramebuffer, 4> rescale_read_fbos;
const Settings::ResolutionScalingInfo& resolution;
u64 device_access_memory;
};
class Image : public VideoCommon::ImageBase {
friend ImageView;
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
explicit Image(const VideoCommon::NullImageParams&);
~Image();
Image(const Image&) = delete;
Image& operator=(const Image&) = delete;
Image(Image&&) = default;
Image& operator=(Image&&) = default;
void UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
GLuint Handle() const noexcept {
return current_texture;
}
GLuint GlFormat() const noexcept {
return gl_format;
}
GLuint GlType() const noexcept {
return gl_type;
}
bool ScaleUp(bool ignore = false);
bool ScaleDown(bool ignore = false);
private:
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
void Scale(bool up_scale);
OGLTexture texture;
OGLTexture upscaled_backup;
OGLTextureView store_view;
GLenum gl_internal_format = GL_NONE;
GLenum gl_format = GL_NONE;
GLenum gl_type = GL_NONE;
GLsizei gl_num_levels{};
TextureCacheRuntime* runtime{};
GLuint current_texture{};
};
class ImageView : public VideoCommon::ImageViewBase {
friend Image;
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&,
const SlotVector<Image>&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
~ImageView();
ImageView(const ImageView&) = delete;
ImageView& operator=(const ImageView&) = delete;
ImageView(ImageView&&) = default;
ImageView& operator=(ImageView&&) = default;
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
[[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
return views[static_cast<size_t>(handle_type)];
}
[[nodiscard]] GLuint DefaultHandle() const noexcept {
return default_handle;
}
[[nodiscard]] GLenum Format() const noexcept {
return internal_format;
}
[[nodiscard]] GPUVAddr GpuAddr() const noexcept {
return gpu_addr;
}
[[nodiscard]] u32 BufferSize() const noexcept {
return buffer_size;
}
private:
struct StorageViews {
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{};
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{};
};
void SetupView(Shader::TextureType view_type);
GLuint MakeView(Shader::TextureType view_type, GLenum view_format);
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
std::vector<OGLTextureView> stored_views;
std::unique_ptr<StorageViews> storage_views;
GLenum internal_format = GL_NONE;
GLuint default_handle = 0;
GPUVAddr gpu_addr = 0;
u32 buffer_size = 0;
GLuint original_texture = 0;
int num_samples = 0;
VideoCommon::SubresourceRange flat_range;
VideoCommon::SubresourceRange full_range;
std::array<u8, 4> swizzle{};
bool set_object_label = false;
bool is_render_target = false;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
class Sampler {
public:
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
GLuint Handle() const noexcept {
return sampler.handle;
}
private:
OGLSampler sampler;
};
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
~Framebuffer();
Framebuffer(const Framebuffer&) = delete;
Framebuffer& operator=(const Framebuffer&) = delete;
Framebuffer(Framebuffer&&) = default;
Framebuffer& operator=(Framebuffer&&) = default;
[[nodiscard]] GLuint Handle() const noexcept {
return framebuffer.handle;
}
[[nodiscard]] GLbitfield BufferBits() const noexcept {
return buffer_bits;
}
private:
OGLFramebuffer framebuffer;
GLbitfield buffer_bits = GL_NONE;
};
struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image;
using ImageAlloc = OpenGL::ImageAlloc;
using ImageView = OpenGL::ImageView;
using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <span>
#include <glad/glad.h>
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
namespace Settings {
struct ResolutionScalingInfo;
}
namespace OpenGL {
class ProgramManager;
class StateTracker;
class Framebuffer;
class Image;
class ImageView;
class Sampler;
using VideoCommon::ImageId;
using VideoCommon::ImageViewId;
using VideoCommon::ImageViewType;
using VideoCommon::NUM_RT;
using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
using VideoCommon::SlotVector;
struct ImageBufferMap {
~ImageBufferMap();
std::span<u8> mapped_span;
size_t offset = 0;
OGLSync* sync;
GLuint buffer;
};
struct FormatProperties {
GLenum compatibility_class;
bool compatibility_by_size;
bool is_compressed;
};
class FormatConversionPass {
public:
explicit FormatConversionPass(UtilShaders& util_shaders);
~FormatConversionPass() = default;
void ConvertImage(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
private:
UtilShaders& util_shaders;
OGLBuffer intermediate_pbo;
size_t pbo_size{};
};
class TextureCacheRuntime {
friend Framebuffer;
friend Image;
friend ImageView;
friend Sampler;
public:
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
StateTracker& state_tracker);
~TextureCacheRuntime();
void Finish();
ImageBufferMap UploadStagingBuffer(size_t size);
ImageBufferMap DownloadStagingBuffer(size_t size);
u64 GetDeviceLocalMemory() const {
return device_access_memory;
}
u64 GetDeviceMemoryUsage() const;
bool CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) {
return true;
}
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
UNIMPLEMENTED();
}
bool CanImageBeCopied(const Image& dst, const Image& src);
void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, const Region2D& dst_region,
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void InsertUploadMemoryBarrier();
FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
bool HasNativeBgr() const noexcept {
// OpenGL does not have native support for the BGR internal format
return false;
}
bool HasBrokenTextureViewFormats() const noexcept {
return has_broken_texture_view_formats;
}
bool HasNativeASTC() const noexcept;
void TickFrame() {}
StateTracker& GetStateTracker() {
return state_tracker;
}
private:
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
~StagingBuffers();
ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
size_t RequestBuffer(size_t requested_size);
std::optional<size_t> FindBuffer(size_t requested_size);
std::vector<OGLSync> syncs;
std::vector<OGLBuffer> buffers;
std::vector<u8*> maps;
std::vector<size_t> sizes;
GLenum storage_flags;
GLenum map_flags;
};
const Device& device;
StateTracker& state_tracker;
UtilShaders util_shaders;
FormatConversionPass format_conversion_pass;
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
bool has_broken_texture_view_formats = false;
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
OGLTextureView null_image_view_1d;
OGLTextureView null_image_view_2d;
OGLTextureView null_image_view_2d_array;
OGLTextureView null_image_view_cube;
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
std::array<OGLFramebuffer, 4> rescale_draw_fbos;
std::array<OGLFramebuffer, 4> rescale_read_fbos;
const Settings::ResolutionScalingInfo& resolution;
u64 device_access_memory;
};
class Image : public VideoCommon::ImageBase {
friend ImageView;
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
explicit Image(const VideoCommon::NullImageParams&);
~Image();
Image(const Image&) = delete;
Image& operator=(const Image&) = delete;
Image(Image&&) = default;
Image& operator=(Image&&) = default;
void UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
GLuint Handle() const noexcept {
return current_texture;
}
GLuint GlFormat() const noexcept {
return gl_format;
}
GLuint GlType() const noexcept {
return gl_type;
}
bool ScaleUp(bool ignore = false);
bool ScaleDown(bool ignore = false);
private:
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
void Scale(bool up_scale);
OGLTexture texture;
OGLTexture upscaled_backup;
OGLTextureView store_view;
GLenum gl_internal_format = GL_NONE;
GLenum gl_format = GL_NONE;
GLenum gl_type = GL_NONE;
GLsizei gl_num_levels{};
TextureCacheRuntime* runtime{};
GLuint current_texture{};
};
class ImageView : public VideoCommon::ImageViewBase {
friend Image;
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&,
const SlotVector<Image>&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
~ImageView();
ImageView(const ImageView&) = delete;
ImageView& operator=(const ImageView&) = delete;
ImageView(ImageView&&) = default;
ImageView& operator=(ImageView&&) = default;
[[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
[[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
return views[static_cast<size_t>(handle_type)];
}
[[nodiscard]] GLuint DefaultHandle() const noexcept {
return default_handle;
}
[[nodiscard]] GLenum Format() const noexcept {
return internal_format;
}
[[nodiscard]] GPUVAddr GpuAddr() const noexcept {
return gpu_addr;
}
[[nodiscard]] u32 BufferSize() const noexcept {
return buffer_size;
}
private:
struct StorageViews {
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{};
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{};
};
void SetupView(Shader::TextureType view_type);
GLuint MakeView(Shader::TextureType view_type, GLenum view_format);
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
std::vector<OGLTextureView> stored_views;
std::unique_ptr<StorageViews> storage_views;
GLenum internal_format = GL_NONE;
GLuint default_handle = 0;
GPUVAddr gpu_addr = 0;
u32 buffer_size = 0;
GLuint original_texture = 0;
int num_samples = 0;
VideoCommon::SubresourceRange flat_range;
VideoCommon::SubresourceRange full_range;
std::array<u8, 4> swizzle{};
bool set_object_label = false;
bool is_render_target = false;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
class Sampler {
public:
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
GLuint Handle() const noexcept {
return sampler.handle;
}
private:
OGLSampler sampler;
};
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
~Framebuffer();
Framebuffer(const Framebuffer&) = delete;
Framebuffer& operator=(const Framebuffer&) = delete;
Framebuffer(Framebuffer&&) = default;
Framebuffer& operator=(Framebuffer&&) = default;
[[nodiscard]] GLuint Handle() const noexcept {
return framebuffer.handle;
}
[[nodiscard]] GLbitfield BufferBits() const noexcept {
return buffer_bits;
}
private:
OGLFramebuffer framebuffer;
GLbitfield buffer_bits = GL_NONE;
};
struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image;
using ImageAlloc = OpenGL::ImageAlloc;
using ImageView = OpenGL::ImageView;
using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
} // namespace OpenGL

View File

@@ -1,9 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>;
}
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,143 +1,143 @@
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
class System;
class TelemetrySession;
} // namespace Core
namespace Core::Frontend {
class EmuWindow;
}
namespace Core::Memory {
class Memory;
}
namespace Layout {
struct FramebufferLayout;
}
namespace Tegra {
class GPU;
}
namespace OpenGL {
/// Structure used for storing information about the textures for the Switch screen
struct TextureInfo {
OGLTexture resource;
GLsizei width;
GLsizei height;
GLenum gl_format;
GLenum gl_type;
Service::android::PixelFormat pixel_format;
};
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
GLuint display_texture{};
bool was_accelerated = false;
bool display_srgb{};
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
};
class RendererOpenGL final : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererOpenGL() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer;
}
[[nodiscard]] std::string GetDeviceVendor() const override {
return device.GetVendorName();
}
private:
/// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
void AddTelemetryFields();
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
/// Draws the emulated screens to the emulator window.
void DrawScreen(const Layout::FramebufferLayout& layout);
void RenderScreenshot();
/// Loads framebuffer from emulated memory into the active OpenGL texture.
void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
/// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture
/// can be 1x1 but will stretch across whatever it's rendered on.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
Core::TelemetrySession& telemetry_session;
Core::Frontend::EmuWindow& emu_window;
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
Device device;
StateTracker state_tracker;
ProgramManager program_manager;
RasterizerOpenGL rasterizer;
// OpenGL object IDs
OGLSampler present_sampler;
OGLSampler present_sampler_nn;
OGLBuffer vertex_buffer;
OGLProgram fxaa_vertex;
OGLProgram fxaa_fragment;
OGLProgram present_vertex;
OGLProgram present_bilinear_fragment;
OGLProgram present_bicubic_fragment;
OGLProgram present_gaussian_fragment;
OGLProgram present_scaleforce_fragment;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer
GLuint64EXT vertex_buffer_address = 0;
/// Display information for Switch screen
ScreenInfo screen_info;
OGLTexture fxaa_texture;
OGLFramebuffer fxaa_framebuffer;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
/// Used for transforming the framebuffer orientation
Service::android::BufferTransformFlags framebuffer_transform_flags{};
Common::Rectangle<int> framebuffer_crop_rect;
u32 framebuffer_width;
u32 framebuffer_height;
};
} // namespace OpenGL
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
class System;
class TelemetrySession;
} // namespace Core
namespace Core::Frontend {
class EmuWindow;
}
namespace Core::Memory {
class Memory;
}
namespace Layout {
struct FramebufferLayout;
}
namespace Tegra {
class GPU;
}
namespace OpenGL {
/// Structure used for storing information about the textures for the Switch screen
struct TextureInfo {
OGLTexture resource;
GLsizei width;
GLsizei height;
GLenum gl_format;
GLenum gl_type;
Service::android::PixelFormat pixel_format;
};
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
GLuint display_texture{};
bool was_accelerated = false;
bool display_srgb{};
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
};
class RendererOpenGL final : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererOpenGL() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer;
}
[[nodiscard]] std::string GetDeviceVendor() const override {
return device.GetVendorName();
}
private:
/// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
void AddTelemetryFields();
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
/// Draws the emulated screens to the emulator window.
void DrawScreen(const Layout::FramebufferLayout& layout);
void RenderScreenshot();
/// Loads framebuffer from emulated memory into the active OpenGL texture.
void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
/// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture
/// can be 1x1 but will stretch across whatever it's rendered on.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
Core::TelemetrySession& telemetry_session;
Core::Frontend::EmuWindow& emu_window;
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
Device device;
StateTracker state_tracker;
ProgramManager program_manager;
RasterizerOpenGL rasterizer;
// OpenGL object IDs
OGLSampler present_sampler;
OGLSampler present_sampler_nn;
OGLBuffer vertex_buffer;
OGLProgram fxaa_vertex;
OGLProgram fxaa_fragment;
OGLProgram present_vertex;
OGLProgram present_bilinear_fragment;
OGLProgram present_bicubic_fragment;
OGLProgram present_gaussian_fragment;
OGLProgram present_scaleforce_fragment;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer
GLuint64EXT vertex_buffer_address = 0;
/// Display information for Switch screen
ScreenInfo screen_info;
OGLTexture fxaa_texture;
OGLFramebuffer fxaa_framebuffer;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
/// Used for transforming the framebuffer orientation
Service::android::BufferTransformFlags framebuffer_transform_flags{};
Common::Rectangle<int> framebuffer_crop_rect;
u32 framebuffer_width;
u32 framebuffer_height;
};
} // namespace OpenGL

View File

@@ -1,289 +1,289 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <span>
#include <string_view>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "video_core/host_shaders/astc_decoder_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
#include "video_core/host_shaders/opengl_convert_s8d24_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/astc.h"
#include "video_core/textures/decoders.h"
namespace OpenGL {
using namespace HostShaders;
using namespace Tegra::Texture::ASTC;
using VideoCommon::Extent2D;
using VideoCommon::Extent3D;
using VideoCommon::ImageCopy;
using VideoCommon::ImageType;
using VideoCommon::SwizzleParameters;
using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
using VideoCore::Surface::BytesPerBlock;
namespace {
OGLProgram MakeProgram(std::string_view source) {
return CreateProgram(source, GL_COMPUTE_SHADER);
}
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
: program_manager{program_manager_}, astc_decoder_program(MakeProgram(ASTC_DECODER_COMP)),
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)),
convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
}
UtilShaders::~UtilShaders() = default;
void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
const Extent2D tile_size{
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
program_manager.BindComputeProgram(astc_decoder_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(1, tile_size.width, tile_size.height);
// Ensure buffer data is valid before dispatching
glFlush();
for (const SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
ASSERT(params.bytes_per_block_log2 == 4);
glUniform1ui(2, params.layer_stride);
glUniform1ui(3, params.block_size);
glUniform1ui(4, params.x_shift);
glUniform1ui(5, params.block_height);
glUniform1ui(6, params.block_height_mask);
// ASTC texture data
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RGBA8);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
// Precautionary barrier to ensure the compute shader is done decoding prior to texture access.
// GL_TEXTURE_FETCH_BARRIER_BIT and GL_SHADER_IMAGE_ACCESS_BARRIER_BIT are used in a separate
// glMemoryBarrier call by the texture cache runtime
glMemoryBarrier(GL_UNIFORM_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT |
GL_TEXTURE_UPDATE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT |
GL_SHADER_STORAGE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
program_manager.RestoreGuestCompute();
}
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
glUniform3uiv(0, 1, params.origin.data());
glUniform3iv(1, 1, params.destination.data());
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.layer_stride);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
glUniform3uiv(0, 1, params.origin.data());
glUniform3iv(1, 1, params.destination.data());
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.slice_size);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glUniform1ui(8, params.block_depth);
glUniform1ui(9, params.block_depth_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
static constexpr GLuint LOC_ORIGIN = 0;
static constexpr GLuint LOC_DESTINATION = 1;
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
static constexpr GLuint LOC_PITCH = 3;
const u32 bytes_per_block = BytesPerBlock(image.info.format);
const GLenum format = StoreFormat(bytes_per_block);
const u32 pitch = image.info.pitch;
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
glUniform1ui(LOC_PITCH, pitch);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), 0, GL_FALSE, 0, GL_WRITE_ONLY,
format);
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
static constexpr GLuint BINDING_INPUT_IMAGE = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
program_manager.BindComputeProgram(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
ASSERT(copy.src_subresource.num_layers == 1);
ASSERT(copy.dst_subresource.base_layer == 0);
ASSERT(copy.dst_subresource.num_layers == 1);
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI);
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copies) {
static constexpr GLuint BINDING_DESTINATION = 0;
static constexpr GLuint LOC_SIZE = 0;
program_manager.BindComputeProgram(convert_s8d24_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
ASSERT(copy.src_subresource.num_layers == 1);
ASSERT(copy.dst_subresource.base_layer == 0);
ASSERT(copy.dst_subresource.num_layers == 1);
glUniform3ui(LOC_SIZE, copy.extent.width, copy.extent.height, copy.extent.depth);
glBindImageTexture(BINDING_DESTINATION, dst_image.StorageHandle(),
copy.dst_subresource.base_level, GL_TRUE, 0, GL_READ_WRITE, GL_RGBA8UI);
glDispatchCompute(Common::DivCeil(copy.extent.width, 16u),
Common::DivCeil(copy.extent.height, 8u), copy.extent.depth);
}
program_manager.RestoreGuestCompute();
}
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
return GL_R8UI;
case 2:
return GL_R16UI;
case 4:
return GL_R32UI;
case 8:
return GL_RG32UI;
case 16:
return GL_RGBA32UI;
}
ASSERT(false);
return GL_R8UI;
}
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <span>
#include <string_view>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "video_core/host_shaders/astc_decoder_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
#include "video_core/host_shaders/opengl_convert_s8d24_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/astc.h"
#include "video_core/textures/decoders.h"
namespace OpenGL {
using namespace HostShaders;
using namespace Tegra::Texture::ASTC;
using VideoCommon::Extent2D;
using VideoCommon::Extent3D;
using VideoCommon::ImageCopy;
using VideoCommon::ImageType;
using VideoCommon::SwizzleParameters;
using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
using VideoCore::Surface::BytesPerBlock;
namespace {
OGLProgram MakeProgram(std::string_view source) {
return CreateProgram(source, GL_COMPUTE_SHADER);
}
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
: program_manager{program_manager_}, astc_decoder_program(MakeProgram(ASTC_DECODER_COMP)),
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)),
convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
}
UtilShaders::~UtilShaders() = default;
void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
const Extent2D tile_size{
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
program_manager.BindComputeProgram(astc_decoder_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(1, tile_size.width, tile_size.height);
// Ensure buffer data is valid before dispatching
glFlush();
for (const SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
ASSERT(params.bytes_per_block_log2 == 4);
glUniform1ui(2, params.layer_stride);
glUniform1ui(3, params.block_size);
glUniform1ui(4, params.x_shift);
glUniform1ui(5, params.block_height);
glUniform1ui(6, params.block_height_mask);
// ASTC texture data
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RGBA8);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
// Precautionary barrier to ensure the compute shader is done decoding prior to texture access.
// GL_TEXTURE_FETCH_BARRIER_BIT and GL_SHADER_IMAGE_ACCESS_BARRIER_BIT are used in a separate
// glMemoryBarrier call by the texture cache runtime
glMemoryBarrier(GL_UNIFORM_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT |
GL_TEXTURE_UPDATE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT |
GL_SHADER_STORAGE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
program_manager.RestoreGuestCompute();
}
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
glUniform3uiv(0, 1, params.origin.data());
glUniform3iv(1, 1, params.destination.data());
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.layer_stride);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
glUniform3uiv(0, 1, params.origin.data());
glUniform3iv(1, 1, params.destination.data());
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.slice_size);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glUniform1ui(8, params.block_depth);
glUniform1ui(9, params.block_depth_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
static constexpr GLuint LOC_ORIGIN = 0;
static constexpr GLuint LOC_DESTINATION = 1;
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
static constexpr GLuint LOC_PITCH = 3;
const u32 bytes_per_block = BytesPerBlock(image.info.format);
const GLenum format = StoreFormat(bytes_per_block);
const u32 pitch = image.info.pitch;
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
glUniform1ui(LOC_PITCH, pitch);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), 0, GL_FALSE, 0, GL_WRITE_ONLY,
format);
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
static constexpr GLuint BINDING_INPUT_IMAGE = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
program_manager.BindComputeProgram(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
ASSERT(copy.src_subresource.num_layers == 1);
ASSERT(copy.dst_subresource.base_layer == 0);
ASSERT(copy.dst_subresource.num_layers == 1);
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI);
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copies) {
static constexpr GLuint BINDING_DESTINATION = 0;
static constexpr GLuint LOC_SIZE = 0;
program_manager.BindComputeProgram(convert_s8d24_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
ASSERT(copy.src_subresource.num_layers == 1);
ASSERT(copy.dst_subresource.base_layer == 0);
ASSERT(copy.dst_subresource.num_layers == 1);
glUniform3ui(LOC_SIZE, copy.extent.width, copy.extent.height, copy.extent.depth);
glBindImageTexture(BINDING_DESTINATION, dst_image.StorageHandle(),
copy.dst_subresource.base_level, GL_TRUE, 0, GL_READ_WRITE, GL_RGBA8UI);
glDispatchCompute(Common::DivCeil(copy.extent.width, 16u),
Common::DivCeil(copy.extent.height, 8u), copy.extent.depth);
}
program_manager.RestoreGuestCompute();
}
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
return GL_R8UI;
case 2:
return GL_R16UI;
case 4:
return GL_R32UI;
case 8:
return GL_RG32UI;
case 16:
return GL_RGBA32UI;
}
ASSERT(false);
return GL_R8UI;
}
} // namespace OpenGL

View File

@@ -1,58 +1,58 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/texture_cache/types.h"
namespace OpenGL {
class Image;
class ProgramManager;
struct ImageBufferMap;
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
~UtilShaders();
void ASTCDecode(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void PitchUpload(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void CopyBC4(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies);
private:
ProgramManager& program_manager;
OGLBuffer swizzle_table_buffer;
OGLProgram astc_decoder_program;
OGLProgram block_linear_unswizzle_2d_program;
OGLProgram block_linear_unswizzle_3d_program;
OGLProgram pitch_unswizzle_program;
OGLProgram copy_bc4_program;
OGLProgram convert_s8d24_program;
};
GLenum StoreFormat(u32 bytes_per_block);
} // namespace OpenGL
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/texture_cache/types.h"
namespace OpenGL {
class Image;
class ProgramManager;
struct ImageBufferMap;
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
~UtilShaders();
void ASTCDecode(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void PitchUpload(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void CopyBC4(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies);
private:
ProgramManager& program_manager;
OGLBuffer swizzle_table_buffer;
OGLProgram astc_decoder_program;
OGLProgram block_linear_unswizzle_2d_program;
OGLProgram block_linear_unswizzle_3d_program;
OGLProgram pitch_unswizzle_program;
OGLProgram copy_bc4_program;
OGLProgram convert_s8d24_program;
};
GLenum StoreFormat(u32 bytes_per_block);
} // namespace OpenGL