early-access version 1255
This commit is contained in:
2124
src/video_core/renderer_opengl/gl_arb_decompiler.cpp
Executable file
2124
src/video_core/renderer_opengl/gl_arb_decompiler.cpp
Executable file
File diff suppressed because it is too large
Load Diff
29
src/video_core/renderer_opengl/gl_arb_decompiler.h
Executable file
29
src/video_core/renderer_opengl/gl_arb_decompiler.h
Executable file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
enum class ShaderType : u32;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class ShaderIR;
|
||||
class Registry;
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
Tegra::Engines::ShaderType stage, std::string_view identifier);
|
||||
|
||||
} // namespace OpenGL
|
99
src/video_core/renderer_opengl/gl_buffer_cache.cpp
Executable file
99
src/video_core/renderer_opengl/gl_buffer_cache.cpp
Executable file
@@ -0,0 +1,99 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
||||
|
||||
Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
|
||||
: BufferBlock{cpu_addr_, size_} {
|
||||
gl_buffer.Create();
|
||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
|
||||
if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
|
||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||
}
|
||||
}
|
||||
|
||||
Buffer::~Buffer() = default;
|
||||
|
||||
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
|
||||
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(data_size), data);
|
||||
}
|
||||
|
||||
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
|
||||
const GLintptr gl_offset = static_cast<GLintptr>(offset);
|
||||
if (read_buffer.handle == 0) {
|
||||
read_buffer.Create();
|
||||
glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
|
||||
GL_STREAM_READ);
|
||||
}
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
|
||||
glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
|
||||
}
|
||||
|
||||
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size) {
|
||||
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
|
||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
|
||||
}
|
||||
|
||||
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const Device& device_, OGLStreamBuffer& stream_buffer_,
|
||||
StateTracker& state_tracker)
|
||||
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
|
||||
if (!device.HasFastBufferSubData()) {
|
||||
return;
|
||||
}
|
||||
|
||||
static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
|
||||
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||
for (const GLuint cbuf : cbufs) {
|
||||
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
|
||||
}
|
||||
}
|
||||
|
||||
OGLBufferCache::~OGLBufferCache() {
|
||||
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||
}
|
||||
|
||||
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<Buffer>(device, cpu_addr, size);
|
||||
}
|
||||
|
||||
OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||
return {0, 0, 0};
|
||||
}
|
||||
|
||||
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
|
||||
std::size_t size) {
|
||||
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
||||
const GLuint cbuf = cbufs[cbuf_cursor++];
|
||||
|
||||
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
||||
return {cbuf, 0, 0};
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
83
src/video_core/renderer_opengl/gl_buffer_cache.h
Executable file
83
src/video_core/renderer_opengl/gl_buffer_cache.h
Executable file
@@ -0,0 +1,83 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class OGLStreamBuffer;
|
||||
class RasterizerOpenGL;
|
||||
class StateTracker;
|
||||
|
||||
class Buffer : public VideoCommon::BufferBlock {
|
||||
public:
|
||||
explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
|
||||
~Buffer();
|
||||
|
||||
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
|
||||
|
||||
void Download(std::size_t offset, std::size_t data_size, u8* data);
|
||||
|
||||
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t copy_size);
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
u64 Address() const noexcept {
|
||||
return gpu_address;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLBuffer gl_buffer;
|
||||
OGLBuffer read_buffer;
|
||||
u64 gpu_address = 0;
|
||||
};
|
||||
|
||||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||
class OGLBufferCache final : public GenericBufferCache {
|
||||
public:
|
||||
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
|
||||
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
|
||||
const Device& device, OGLStreamBuffer& stream_buffer,
|
||||
StateTracker& state_tracker);
|
||||
~OGLBufferCache();
|
||||
|
||||
BufferInfo GetEmptyBuffer(std::size_t) override;
|
||||
|
||||
void Acquire() noexcept {
|
||||
cbuf_cursor = 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
|
||||
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
|
||||
|
||||
private:
|
||||
static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||
|
||||
const Device& device;
|
||||
|
||||
std::size_t cbuf_cursor = 0;
|
||||
std::array<GLuint, NUM_CBUFS> cbufs{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
294
src/video_core/renderer_opengl/gl_device.cpp
Executable file
294
src/video_core/renderer_opengl/gl_device.cpp
Executable file
@@ -0,0 +1,294 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
// One uniform block is reserved for emulation purposes
|
||||
constexpr u32 ReservedUniformBlocks = 1;
|
||||
|
||||
constexpr u32 NumStages = 5;
|
||||
|
||||
constexpr std::array LIMIT_UBOS = {
|
||||
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
|
||||
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SSBOS = {
|
||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SAMPLERS = {
|
||||
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
|
||||
};
|
||||
constexpr std::array LIMIT_IMAGES = {
|
||||
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
|
||||
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T GetInteger(GLenum pname) {
|
||||
GLint temporary;
|
||||
glGetIntegerv(pname, &temporary);
|
||||
return static_cast<T>(temporary);
|
||||
}
|
||||
|
||||
bool TestProgram(const GLchar* glsl) {
|
||||
const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &glsl)};
|
||||
GLint link_status;
|
||||
glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
|
||||
glDeleteProgram(shader);
|
||||
return link_status == GL_TRUE;
|
||||
}
|
||||
|
||||
std::vector<std::string_view> GetExtensions() {
|
||||
GLint num_extensions;
|
||||
glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
|
||||
std::vector<std::string_view> extensions;
|
||||
extensions.reserve(num_extensions);
|
||||
for (GLint index = 0; index < num_extensions; ++index) {
|
||||
extensions.push_back(
|
||||
reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, static_cast<GLuint>(index))));
|
||||
}
|
||||
return extensions;
|
||||
}
|
||||
|
||||
bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
|
||||
return std::ranges::find(extensions, extension) != extensions.end();
|
||||
}
|
||||
|
||||
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
||||
ASSERT(num >= amount);
|
||||
if (limit) {
|
||||
amount = std::min(amount, GetInteger<u32>(*limit));
|
||||
}
|
||||
num -= amount;
|
||||
return std::exchange(base, base + amount);
|
||||
}
|
||||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
|
||||
std::ranges::transform(LIMIT_UBOS, max.begin(),
|
||||
[](GLenum pname) { return GetInteger<u32>(pname); });
|
||||
return max;
|
||||
}
|
||||
|
||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
|
||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
|
||||
|
||||
static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
|
||||
const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
|
||||
const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
|
||||
const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
|
||||
|
||||
u32 num_ubos = total_ubos - ReservedUniformBlocks;
|
||||
u32 num_ssbos = total_ssbos;
|
||||
u32 num_samplers = total_samplers;
|
||||
|
||||
u32 base_ubo = ReservedUniformBlocks;
|
||||
u32 base_ssbo = 0;
|
||||
u32 base_samplers = 0;
|
||||
|
||||
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||
const std::size_t stage = stage_swizzle[i];
|
||||
bindings[stage] = {
|
||||
Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
|
||||
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
|
||||
Extract(base_samplers, num_samplers, total_samplers / NumStages,
|
||||
LIMIT_SAMPLERS[stage])};
|
||||
}
|
||||
|
||||
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
|
||||
u32 base_images = 0;
|
||||
|
||||
// GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
|
||||
// Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
|
||||
// fragment stage, and at least 1 for the rest of the stages.
|
||||
// So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
|
||||
|
||||
// Reserve at least 4 image bindings on the fragment stage.
|
||||
bindings[4].image =
|
||||
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
|
||||
|
||||
// This is guaranteed to be at least 1.
|
||||
const u32 total_extracted_images = num_images / (NumStages - 1);
|
||||
|
||||
// Reserve the other image bindings.
|
||||
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||
const std::size_t stage = stage_swizzle[i];
|
||||
if (stage == 4) {
|
||||
continue;
|
||||
}
|
||||
bindings[stage].image =
|
||||
Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
|
||||
}
|
||||
|
||||
// Compute doesn't care about any of this.
|
||||
bindings[5] = {0, 0, 0, 0};
|
||||
|
||||
return bindings;
|
||||
}
|
||||
|
||||
bool IsASTCSupported() {
|
||||
static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
|
||||
static constexpr std::array formats = {
|
||||
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_8x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_10x5_KHR, GL_COMPRESSED_RGBA_ASTC_10x6_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_10x8_KHR, GL_COMPRESSED_RGBA_ASTC_10x10_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_12x10_KHR, GL_COMPRESSED_RGBA_ASTC_12x12_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
|
||||
};
|
||||
static constexpr std::array required_support = {
|
||||
GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
|
||||
GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
|
||||
};
|
||||
|
||||
for (const GLenum target : targets) {
|
||||
for (const GLenum format : formats) {
|
||||
for (const GLenum support : required_support) {
|
||||
GLint value;
|
||||
glGetInternalformativ(target, format, support, 1, &value);
|
||||
if (value != GL_FULL_SUPPORT) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
|
||||
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
||||
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device()
|
||||
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
|
||||
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
|
||||
const std::vector extensions = GetExtensions();
|
||||
|
||||
const bool is_nvidia = vendor == "NVIDIA Corporation";
|
||||
const bool is_amd = vendor == "ATI Technologies Inc.";
|
||||
|
||||
bool disable_fast_buffer_sub_data = false;
|
||||
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
|
||||
LOG_WARNING(
|
||||
Render_OpenGL,
|
||||
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
||||
disable_fast_buffer_sub_data = true;
|
||||
}
|
||||
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
|
||||
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
|
||||
GLAD_GL_NV_shader_thread_shuffle;
|
||||
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
|
||||
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
|
||||
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
|
||||
has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
|
||||
has_astc = IsASTCSupported();
|
||||
has_variable_aoffi = TestVariableAoffi();
|
||||
has_component_indexing_bug = is_amd;
|
||||
has_precise_bug = TestPreciseBug();
|
||||
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
||||
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
|
||||
has_debugging_tool_attached = IsDebugToolAttached(extensions);
|
||||
|
||||
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
|
||||
// uniform buffers as "push constants"
|
||||
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
|
||||
|
||||
use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
|
||||
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
|
||||
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
|
||||
|
||||
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
|
||||
|
||||
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
|
||||
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
|
||||
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
|
||||
|
||||
if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
|
||||
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
|
||||
}
|
||||
}
|
||||
|
||||
Device::Device(std::nullptr_t) {
|
||||
max_uniform_buffers.fill(std::numeric_limits<u32>::max());
|
||||
uniform_buffer_alignment = 4;
|
||||
shader_storage_alignment = 4;
|
||||
max_vertex_attributes = 16;
|
||||
max_varyings = 15;
|
||||
max_compute_shared_memory_size = 0x10000;
|
||||
has_warp_intrinsics = true;
|
||||
has_shader_ballot = true;
|
||||
has_vertex_viewport_layer = true;
|
||||
has_image_load_formatted = true;
|
||||
has_texture_shadow_lod = true;
|
||||
has_variable_aoffi = true;
|
||||
}
|
||||
|
||||
bool Device::TestVariableAoffi() {
|
||||
return TestProgram(R"(#version 430 core
|
||||
// This is a unit test, please ignore me on apitrace bug reports.
|
||||
uniform sampler2D tex;
|
||||
uniform ivec2 variable_offset;
|
||||
out vec4 output_attribute;
|
||||
void main() {
|
||||
output_attribute = textureOffset(tex, vec2(0), variable_offset);
|
||||
})");
|
||||
}
|
||||
|
||||
bool Device::TestPreciseBug() {
|
||||
return !TestProgram(R"(#version 430 core
|
||||
in vec3 coords;
|
||||
out float out_value;
|
||||
uniform sampler2DShadow tex;
|
||||
void main() {
|
||||
precise float tmp_value = vec4(texture(tex, coords)).x;
|
||||
out_value = tmp_value;
|
||||
})");
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
147
src/video_core/renderer_opengl/gl_device.h
Executable file
147
src/video_core/renderer_opengl/gl_device.h
Executable file
@@ -0,0 +1,147 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
static constexpr u32 EmulationUniformBlockBinding = 0;
|
||||
|
||||
class Device final {
|
||||
public:
|
||||
struct BaseBindings final {
|
||||
u32 uniform_buffer{};
|
||||
u32 shader_storage_buffer{};
|
||||
u32 sampler{};
|
||||
u32 image{};
|
||||
};
|
||||
|
||||
explicit Device();
|
||||
explicit Device(std::nullptr_t);
|
||||
|
||||
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
|
||||
return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
|
||||
}
|
||||
|
||||
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
|
||||
return base_bindings[stage_index];
|
||||
}
|
||||
|
||||
const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
|
||||
return GetBaseBindings(static_cast<std::size_t>(shader_type));
|
||||
}
|
||||
|
||||
size_t GetUniformBufferAlignment() const {
|
||||
return uniform_buffer_alignment;
|
||||
}
|
||||
|
||||
size_t GetShaderStorageBufferAlignment() const {
|
||||
return shader_storage_alignment;
|
||||
}
|
||||
|
||||
u32 GetMaxVertexAttributes() const {
|
||||
return max_vertex_attributes;
|
||||
}
|
||||
|
||||
u32 GetMaxVaryings() const {
|
||||
return max_varyings;
|
||||
}
|
||||
|
||||
u32 GetMaxComputeSharedMemorySize() const {
|
||||
return max_compute_shared_memory_size;
|
||||
}
|
||||
|
||||
bool HasWarpIntrinsics() const {
|
||||
return has_warp_intrinsics;
|
||||
}
|
||||
|
||||
bool HasShaderBallot() const {
|
||||
return has_shader_ballot;
|
||||
}
|
||||
|
||||
bool HasVertexViewportLayer() const {
|
||||
return has_vertex_viewport_layer;
|
||||
}
|
||||
|
||||
bool HasImageLoadFormatted() const {
|
||||
return has_image_load_formatted;
|
||||
}
|
||||
|
||||
bool HasTextureShadowLod() const {
|
||||
return has_texture_shadow_lod;
|
||||
}
|
||||
|
||||
bool HasVertexBufferUnifiedMemory() const {
|
||||
return has_vertex_buffer_unified_memory;
|
||||
}
|
||||
|
||||
bool HasASTC() const {
|
||||
return has_astc;
|
||||
}
|
||||
|
||||
bool HasVariableAoffi() const {
|
||||
return has_variable_aoffi;
|
||||
}
|
||||
|
||||
bool HasComponentIndexingBug() const {
|
||||
return has_component_indexing_bug;
|
||||
}
|
||||
|
||||
bool HasPreciseBug() const {
|
||||
return has_precise_bug;
|
||||
}
|
||||
|
||||
bool HasFastBufferSubData() const {
|
||||
return has_fast_buffer_sub_data;
|
||||
}
|
||||
|
||||
bool HasNvViewportArray2() const {
|
||||
return has_nv_viewport_array2;
|
||||
}
|
||||
|
||||
bool HasDebuggingToolAttached() const {
|
||||
return has_debugging_tool_attached;
|
||||
}
|
||||
|
||||
bool UseAssemblyShaders() const {
|
||||
return use_assembly_shaders;
|
||||
}
|
||||
|
||||
bool UseAsynchronousShaders() const {
|
||||
return use_asynchronous_shaders;
|
||||
}
|
||||
|
||||
private:
|
||||
static bool TestVariableAoffi();
|
||||
static bool TestPreciseBug();
|
||||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
|
||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
|
||||
size_t uniform_buffer_alignment{};
|
||||
size_t shader_storage_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
u32 max_varyings{};
|
||||
u32 max_compute_shared_memory_size{};
|
||||
bool has_warp_intrinsics{};
|
||||
bool has_shader_ballot{};
|
||||
bool has_vertex_viewport_layer{};
|
||||
bool has_image_load_formatted{};
|
||||
bool has_texture_shadow_lod{};
|
||||
bool has_vertex_buffer_unified_memory{};
|
||||
bool has_astc{};
|
||||
bool has_variable_aoffi{};
|
||||
bool has_component_indexing_bug{};
|
||||
bool has_precise_bug{};
|
||||
bool has_fast_buffer_sub_data{};
|
||||
bool has_nv_viewport_array2{};
|
||||
bool has_debugging_tool_attached{};
|
||||
bool use_assembly_shaders{};
|
||||
bool use_asynchronous_shaders{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
73
src/video_core/renderer_opengl/gl_fence_manager.cpp
Executable file
73
src/video_core/renderer_opengl/gl_fence_manager.cpp
Executable file
@@ -0,0 +1,73 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_fence_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
|
||||
|
||||
GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
|
||||
: FenceBase{address_, payload_, is_stubbed_} {}
|
||||
|
||||
GLInnerFence::~GLInnerFence() = default;
|
||||
|
||||
void GLInnerFence::Queue() {
|
||||
if (is_stubbed) {
|
||||
return;
|
||||
}
|
||||
ASSERT(sync_object.handle == 0);
|
||||
sync_object.Create();
|
||||
}
|
||||
|
||||
bool GLInnerFence::IsSignaled() const {
|
||||
if (is_stubbed) {
|
||||
return true;
|
||||
}
|
||||
ASSERT(sync_object.handle != 0);
|
||||
GLsizei length;
|
||||
GLint sync_status;
|
||||
glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
|
||||
return sync_status == GL_SIGNALED;
|
||||
}
|
||||
|
||||
void GLInnerFence::Wait() {
|
||||
if (is_stubbed) {
|
||||
return;
|
||||
}
|
||||
ASSERT(sync_object.handle != 0);
|
||||
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
|
||||
}
|
||||
|
||||
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::GPU& gpu_, TextureCache& texture_cache_,
|
||||
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
|
||||
|
||||
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
|
||||
return std::make_shared<GLInnerFence>(value, is_stubbed);
|
||||
}
|
||||
|
||||
Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
|
||||
return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
|
||||
}
|
||||
|
||||
void FenceManagerOpenGL::QueueFence(Fence& fence) {
|
||||
fence->Queue();
|
||||
}
|
||||
|
||||
bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
|
||||
return fence->IsSignaled();
|
||||
}
|
||||
|
||||
void FenceManagerOpenGL::WaitFence(Fence& fence) {
|
||||
fence->Wait();
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
52
src/video_core/renderer_opengl/gl_fence_manager.h
Executable file
52
src/video_core/renderer_opengl/gl_fence_manager.h
Executable file
@@ -0,0 +1,52 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/fence_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class GLInnerFence : public VideoCommon::FenceBase {
|
||||
public:
|
||||
explicit GLInnerFence(u32 payload_, bool is_stubbed_);
|
||||
explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
|
||||
~GLInnerFence();
|
||||
|
||||
void Queue();
|
||||
|
||||
bool IsSignaled() const;
|
||||
|
||||
void Wait();
|
||||
|
||||
private:
|
||||
OGLSync sync_object;
|
||||
};
|
||||
|
||||
using Fence = std::shared_ptr<GLInnerFence>;
|
||||
using GenericFenceManager =
|
||||
VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
|
||||
|
||||
class FenceManagerOpenGL final : public GenericFenceManager {
|
||||
public:
|
||||
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
|
||||
QueryCache& query_cache_);
|
||||
|
||||
protected:
|
||||
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
|
||||
void QueueFence(Fence& fence) override;
|
||||
bool IsFenceSignaled(Fence& fence) const override;
|
||||
void WaitFence(Fence& fence) override;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
121
src/video_core/renderer_opengl/gl_query_cache.cpp
Executable file
121
src/video_core/renderer_opengl/gl_query_cache.cpp
Executable file
@@ -0,0 +1,121 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
|
||||
|
||||
constexpr GLenum GetTarget(VideoCore::QueryType type) {
|
||||
return QueryTargets[static_cast<std::size_t>(type)];
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_)
|
||||
: QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
|
||||
|
||||
QueryCache::~QueryCache() = default;
|
||||
|
||||
OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
|
||||
auto& reserve = query_pools[static_cast<std::size_t>(type)];
|
||||
OGLQuery query;
|
||||
if (reserve.empty()) {
|
||||
query.Create(GetTarget(type));
|
||||
return query;
|
||||
}
|
||||
|
||||
query = std::move(reserve.back());
|
||||
reserve.pop_back();
|
||||
return query;
|
||||
}
|
||||
|
||||
void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
|
||||
query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
|
||||
}
|
||||
|
||||
bool QueryCache::AnyCommandQueued() const noexcept {
|
||||
return gl_rasterizer.AnyCommandQueued();
|
||||
}
|
||||
|
||||
HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
|
||||
VideoCore::QueryType type_)
|
||||
: HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, query{
|
||||
cache.AllocateQuery(
|
||||
type)} {
|
||||
glBeginQuery(GetTarget(type), query.handle);
|
||||
}
|
||||
|
||||
HostCounter::~HostCounter() {
|
||||
cache.Reserve(type, std::move(query));
|
||||
}
|
||||
|
||||
void HostCounter::EndQuery() {
|
||||
if (!cache.AnyCommandQueued()) {
|
||||
// There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
|
||||
// having any of these causes a lock. glFlush is considered a command, so we can safely wait
|
||||
// for this. Insert to the OpenGL command stream a flush.
|
||||
glFlush();
|
||||
}
|
||||
glEndQuery(GetTarget(type));
|
||||
}
|
||||
|
||||
u64 HostCounter::BlockingQuery() const {
|
||||
GLint64 value;
|
||||
glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
|
||||
return static_cast<u64>(value);
|
||||
}
|
||||
|
||||
CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
|
||||
u8* host_ptr_)
|
||||
: CachedQueryBase{cpu_addr_, host_ptr_}, cache{&cache_}, type{type_} {}
|
||||
|
||||
CachedQuery::~CachedQuery() = default;
|
||||
|
||||
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
|
||||
: CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
|
||||
|
||||
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
|
||||
cache = rhs.cache;
|
||||
type = rhs.type;
|
||||
CachedQueryBase<HostCounter>::operator=(std::move(rhs));
|
||||
return *this;
|
||||
}
|
||||
|
||||
void CachedQuery::Flush() {
|
||||
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
|
||||
// To avoid this disable and re-enable keeping the dependency stream.
|
||||
// But we only have to do this if we have pending waits to be done.
|
||||
auto& stream = cache->Stream(type);
|
||||
const bool slice_counter = WaitPending() && stream.IsEnabled();
|
||||
if (slice_counter) {
|
||||
stream.Update(false);
|
||||
}
|
||||
|
||||
VideoCommon::CachedQueryBase<HostCounter>::Flush();
|
||||
|
||||
if (slice_counter) {
|
||||
stream.Update(true);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
82
src/video_core/renderer_opengl/gl_query_cache.h
Executable file
82
src/video_core/renderer_opengl/gl_query_cache.h
Executable file
@@ -0,0 +1,82 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/query_cache.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class CachedQuery;
|
||||
class HostCounter;
|
||||
class QueryCache;
|
||||
class RasterizerOpenGL;
|
||||
|
||||
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
|
||||
|
||||
class QueryCache final
|
||||
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
|
||||
public:
|
||||
explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_);
|
||||
~QueryCache();
|
||||
|
||||
OGLQuery AllocateQuery(VideoCore::QueryType type);
|
||||
|
||||
void Reserve(VideoCore::QueryType type, OGLQuery&& query);
|
||||
|
||||
bool AnyCommandQueued() const noexcept;
|
||||
|
||||
private:
|
||||
RasterizerOpenGL& gl_rasterizer;
|
||||
std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> query_pools;
|
||||
};
|
||||
|
||||
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
|
||||
public:
|
||||
explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
|
||||
VideoCore::QueryType type_);
|
||||
~HostCounter();
|
||||
|
||||
void EndQuery();
|
||||
|
||||
private:
|
||||
u64 BlockingQuery() const override;
|
||||
|
||||
QueryCache& cache;
|
||||
const VideoCore::QueryType type;
|
||||
OGLQuery query;
|
||||
};
|
||||
|
||||
class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
|
||||
public:
|
||||
explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
|
||||
u8* host_ptr_);
|
||||
~CachedQuery() override;
|
||||
|
||||
CachedQuery(CachedQuery&& rhs) noexcept;
|
||||
CachedQuery& operator=(CachedQuery&& rhs) noexcept;
|
||||
|
||||
CachedQuery(const CachedQuery&) = delete;
|
||||
CachedQuery& operator=(const CachedQuery&) = delete;
|
||||
|
||||
void Flush() override;
|
||||
|
||||
private:
|
||||
QueryCache* cache;
|
||||
VideoCore::QueryType type;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
1734
src/video_core/renderer_opengl/gl_rasterizer.cpp
Executable file
1734
src/video_core/renderer_opengl/gl_rasterizer.cpp
Executable file
File diff suppressed because it is too large
Load Diff
288
src/video_core/renderer_opengl/gl_rasterizer.h
Executable file
288
src/video_core/renderer_opengl/gl_rasterizer.h
Executable file
@@ -0,0 +1,288 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/const_buffer_info.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_fence_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Core::Memory {
|
||||
class Memory;
|
||||
}
|
||||
|
||||
namespace Core::Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
struct ScreenInfo;
|
||||
struct ShaderEntries;
|
||||
|
||||
struct BindlessSSBO {
|
||||
GLuint64EXT address;
|
||||
GLsizei length;
|
||||
GLsizei padding;
|
||||
};
|
||||
static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
|
||||
|
||||
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
|
||||
public:
|
||||
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory_, const Device& device_,
|
||||
ScreenInfo& screen_info_, ProgramManager& program_manager_,
|
||||
StateTracker& state_tracker_);
|
||||
~RasterizerOpenGL() override;
|
||||
|
||||
void Draw(bool is_indexed, bool is_instanced) override;
|
||||
void Clear() override;
|
||||
void DispatchCompute(GPUVAddr code_addr) override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
|
||||
void InvalidateTextureCache(VAddr addr, u64 size) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void SyncGuestHost() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void ReleaseFences() override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitForIdle() override;
|
||||
void FragmentBarrier() override;
|
||||
void TiledCacheBarrier() override;
|
||||
void FlushCommands() override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) override;
|
||||
void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
/// Returns true when there are commands queued to the OpenGL server.
|
||||
bool AnyCommandQueued() const {
|
||||
return num_queued_commands > 0;
|
||||
}
|
||||
|
||||
VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
|
||||
return async_shaders;
|
||||
}
|
||||
|
||||
const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
|
||||
return async_shaders;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr size_t MAX_TEXTURES = 192;
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
|
||||
|
||||
void BindComputeTextures(Shader* kernel);
|
||||
|
||||
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
|
||||
size_t& image_view_index, size_t& texture_index, size_t& image_index);
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current constbuffers to use for the kernel invocation.
|
||||
void SetupComputeConstBuffers(Shader* kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry, bool use_unified,
|
||||
std::size_t unified_offset);
|
||||
|
||||
/// Configures the current global memory entries to use for the draw command.
|
||||
void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current global memory entries to use for the kernel invocation.
|
||||
void SetupComputeGlobalMemory(Shader* kernel);
|
||||
|
||||
/// Configures a global memory buffer.
|
||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
size_t size, BindlessSSBO* ssbo);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(const Shader* shader, size_t stage_index);
|
||||
|
||||
/// Configures the textures used in a compute shader.
|
||||
void SetupComputeTextures(const Shader* kernel);
|
||||
|
||||
/// Configures images in a graphics shader.
|
||||
void SetupDrawImages(const Shader* shader, size_t stage_index);
|
||||
|
||||
/// Configures images in a compute shader.
|
||||
void SetupComputeImages(const Shader* shader);
|
||||
|
||||
/// Syncs the viewport and depth range to match the guest state
|
||||
void SyncViewport();
|
||||
|
||||
/// Syncs the depth clamp state
|
||||
void SyncDepthClamp();
|
||||
|
||||
/// Syncs the clip enabled status to match the guest state
|
||||
void SyncClipEnabled(u32 clip_mask);
|
||||
|
||||
/// Syncs the clip coefficients to match the guest state
|
||||
void SyncClipCoef();
|
||||
|
||||
/// Syncs the cull mode to match the guest state
|
||||
void SyncCullMode();
|
||||
|
||||
/// Syncs the primitve restart to match the guest state
|
||||
void SyncPrimitiveRestart();
|
||||
|
||||
/// Syncs the depth test state to match the guest state
|
||||
void SyncDepthTestState();
|
||||
|
||||
/// Syncs the stencil test state to match the guest state
|
||||
void SyncStencilTestState();
|
||||
|
||||
/// Syncs the blend state to match the guest state
|
||||
void SyncBlendState();
|
||||
|
||||
/// Syncs the LogicOp state to match the guest state
|
||||
void SyncLogicOpState();
|
||||
|
||||
/// Syncs the the color clamp state
|
||||
void SyncFragmentColorClampState();
|
||||
|
||||
/// Syncs the alpha coverage and alpha to one
|
||||
void SyncMultiSampleState();
|
||||
|
||||
/// Syncs the scissor test state to match the guest state
|
||||
void SyncScissorTest();
|
||||
|
||||
/// Syncs the point state to match the guest state
|
||||
void SyncPointState();
|
||||
|
||||
/// Syncs the line state to match the guest state
|
||||
void SyncLineState();
|
||||
|
||||
/// Syncs the rasterizer enable state to match the guest state
|
||||
void SyncRasterizeEnable();
|
||||
|
||||
/// Syncs polygon modes to match the guest state
|
||||
void SyncPolygonModes();
|
||||
|
||||
/// Syncs Color Mask
|
||||
void SyncColorMask();
|
||||
|
||||
/// Syncs the polygon offsets
|
||||
void SyncPolygonOffset();
|
||||
|
||||
/// Syncs the alpha test state to match the guest state
|
||||
void SyncAlphaTest();
|
||||
|
||||
/// Syncs the framebuffer sRGB state to match the guest state
|
||||
void SyncFramebufferSRGB();
|
||||
|
||||
/// Syncs transform feedback state to match guest state
|
||||
/// @note Only valid on assembly shaders
|
||||
void SyncTransformFeedback();
|
||||
|
||||
/// Begin a transform feedback
|
||||
void BeginTransformFeedback(GLenum primitive_mode);
|
||||
|
||||
/// End a transform feedback
|
||||
void EndTransformFeedback();
|
||||
|
||||
std::size_t CalculateVertexArraysSize() const;
|
||||
|
||||
std::size_t CalculateIndexBufferSize() const;
|
||||
|
||||
/// Updates the current vertex format
|
||||
void SetupVertexFormat();
|
||||
|
||||
void SetupVertexBuffer();
|
||||
void SetupVertexInstances();
|
||||
|
||||
GLintptr SetupIndexBuffer();
|
||||
|
||||
void SetupShaders();
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
|
||||
const Device& device;
|
||||
ScreenInfo& screen_info;
|
||||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLStreamBuffer stream_buffer;
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
QueryCache query_cache;
|
||||
OGLBufferCache buffer_cache;
|
||||
FenceManagerOpenGL fence_manager;
|
||||
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
|
||||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
|
||||
std::array<GLuint, MAX_TEXTURES> texture_handles;
|
||||
std::array<GLuint, MAX_IMAGES> image_handles;
|
||||
|
||||
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||
transform_feedback_buffers;
|
||||
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||
enabled_transform_feedback_buffers;
|
||||
|
||||
static constexpr std::size_t NUM_CONSTANT_BUFFERS =
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
|
||||
std::size_t current_cbuf = 0;
|
||||
OGLBuffer unified_uniform_buffer;
|
||||
|
||||
/// Number of commands queued to the OpenGL driver. Resetted on flush.
|
||||
std::size_t num_queued_commands = 0;
|
||||
|
||||
u32 last_clip_distance_mask = 0;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
231
src/video_core/renderer_opengl/gl_resource_manager.cpp
Executable file
231
src/video_core/renderer_opengl/gl_resource_manager.cpp
Executable file
@@ -0,0 +1,231 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <glad/glad.h>
|
||||
#include "common/common_types.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
void OGLRenderbuffer::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glCreateRenderbuffers(1, &handle);
|
||||
}
|
||||
|
||||
void OGLRenderbuffer::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteRenderbuffers(1, &handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLTexture::Create(GLenum target) {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glCreateTextures(target, 1, &handle);
|
||||
}
|
||||
|
||||
void OGLTexture::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteTextures(1, &handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLTextureView::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glGenTextures(1, &handle);
|
||||
}
|
||||
|
||||
void OGLTextureView::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteTextures(1, &handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLSampler::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glCreateSamplers(1, &handle);
|
||||
}
|
||||
|
||||
void OGLSampler::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteSamplers(1, &handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLShader::Create(std::string_view source, GLenum type) {
|
||||
if (handle != 0) {
|
||||
return;
|
||||
}
|
||||
if (source.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
handle = GLShader::LoadShader(source, type);
|
||||
}
|
||||
|
||||
void OGLShader::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteShader(handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
|
||||
const char* frag_shader, bool separable_program,
|
||||
bool hint_retrievable) {
|
||||
OGLShader vert, geo, frag;
|
||||
if (vert_shader)
|
||||
vert.Create(vert_shader, GL_VERTEX_SHADER);
|
||||
if (geo_shader)
|
||||
geo.Create(geo_shader, GL_GEOMETRY_SHADER);
|
||||
if (frag_shader)
|
||||
frag.Create(frag_shader, GL_FRAGMENT_SHADER);
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
|
||||
}
|
||||
|
||||
void OGLProgram::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteProgram(handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLAssemblyProgram::Release() {
|
||||
if (handle == 0) {
|
||||
return;
|
||||
}
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteProgramsARB(1, &handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLPipeline::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glGenProgramPipelines(1, &handle);
|
||||
}
|
||||
|
||||
void OGLPipeline::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteProgramPipelines(1, &handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLBuffer::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glCreateBuffers(1, &handle);
|
||||
}
|
||||
|
||||
void OGLBuffer::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteBuffers(1, &handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
|
||||
ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
|
||||
|
||||
glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
|
||||
}
|
||||
|
||||
void OGLSync::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
||||
// Don't profile here, this one is expected to happen ingame.
|
||||
handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
}
|
||||
|
||||
void OGLSync::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
// Don't profile here, this one is expected to happen ingame.
|
||||
glDeleteSync(handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLFramebuffer::Create() {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glGenFramebuffers(1, &handle);
|
||||
}
|
||||
|
||||
void OGLFramebuffer::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteFramebuffers(1, &handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void OGLQuery::Create(GLenum target) {
|
||||
if (handle != 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glCreateQueries(target, 1, &handle);
|
||||
}
|
||||
|
||||
void OGLQuery::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||
glDeleteQueries(1, &handle);
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
317
src/video_core/renderer_opengl/gl_resource_manager.h
Executable file
317
src/video_core/renderer_opengl/gl_resource_manager.h
Executable file
@@ -0,0 +1,317 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <glad/glad.h>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class OGLRenderbuffer : private NonCopyable {
|
||||
public:
|
||||
OGLRenderbuffer() = default;
|
||||
|
||||
OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||
|
||||
~OGLRenderbuffer() {
|
||||
Release();
|
||||
}
|
||||
|
||||
OGLRenderbuffer& operator=(OGLRenderbuffer&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void Create();
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLTexture : private NonCopyable {
|
||||
public:
|
||||
OGLTexture() = default;
|
||||
|
||||
OGLTexture(OGLTexture&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||
|
||||
~OGLTexture() {
|
||||
Release();
|
||||
}
|
||||
|
||||
OGLTexture& operator=(OGLTexture&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void Create(GLenum target);
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLTextureView : private NonCopyable {
|
||||
public:
|
||||
OGLTextureView() = default;
|
||||
|
||||
OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||
|
||||
~OGLTextureView() {
|
||||
Release();
|
||||
}
|
||||
|
||||
OGLTextureView& operator=(OGLTextureView&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void Create();
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLSampler : private NonCopyable {
|
||||
public:
|
||||
OGLSampler() = default;
|
||||
|
||||
OGLSampler(OGLSampler&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||
|
||||
~OGLSampler() {
|
||||
Release();
|
||||
}
|
||||
|
||||
OGLSampler& operator=(OGLSampler&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void Create();
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLShader : private NonCopyable {
|
||||
public:
|
||||
OGLShader() = default;
|
||||
|
||||
OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||
|
||||
~OGLShader() {
|
||||
Release();
|
||||
}
|
||||
|
||||
OGLShader& operator=(OGLShader&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void Create(std::string_view source, GLenum type);
|
||||
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLProgram : private NonCopyable {
|
||||
public:
|
||||
OGLProgram() = default;
|
||||
|
||||
OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||
|
||||
~OGLProgram() {
|
||||
Release();
|
||||
}
|
||||
|
||||
OGLProgram& operator=(OGLProgram&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename... T>
|
||||
void Create(bool separable_program, bool hint_retrievable, T... shaders) {
|
||||
if (handle != 0)
|
||||
return;
|
||||
handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
|
||||
bool separable_program = false, bool hint_retrievable = false);
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLAssemblyProgram : private NonCopyable {
|
||||
public:
|
||||
OGLAssemblyProgram() = default;
|
||||
|
||||
OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||
|
||||
~OGLAssemblyProgram() {
|
||||
Release();
|
||||
}
|
||||
|
||||
OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLPipeline : private NonCopyable {
|
||||
public:
|
||||
OGLPipeline() = default;
|
||||
OGLPipeline(OGLPipeline&& o) noexcept : handle{std::exchange<GLuint>(o.handle, 0)} {}
|
||||
|
||||
~OGLPipeline() {
|
||||
Release();
|
||||
}
|
||||
OGLPipeline& operator=(OGLPipeline&& o) noexcept {
|
||||
handle = std::exchange<GLuint>(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void Create();
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLBuffer : private NonCopyable {
|
||||
public:
|
||||
OGLBuffer() = default;
|
||||
|
||||
OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||
|
||||
~OGLBuffer() {
|
||||
Release();
|
||||
}
|
||||
|
||||
OGLBuffer& operator=(OGLBuffer&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void Create();
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
// Converts the buffer into a stream copy buffer with a fixed size
|
||||
void MakeStreamCopy(std::size_t buffer_size);
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLSync : private NonCopyable {
|
||||
public:
|
||||
OGLSync() = default;
|
||||
|
||||
OGLSync(OGLSync&& o) noexcept : handle(std::exchange(o.handle, nullptr)) {}
|
||||
|
||||
~OGLSync() {
|
||||
Release();
|
||||
}
|
||||
OGLSync& operator=(OGLSync&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, nullptr);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void Create();
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
GLsync handle = 0;
|
||||
};
|
||||
|
||||
class OGLFramebuffer : private NonCopyable {
|
||||
public:
|
||||
OGLFramebuffer() = default;
|
||||
|
||||
OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||
|
||||
~OGLFramebuffer() {
|
||||
Release();
|
||||
}
|
||||
|
||||
OGLFramebuffer& operator=(OGLFramebuffer&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void Create();
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class OGLQuery : private NonCopyable {
|
||||
public:
|
||||
OGLQuery() = default;
|
||||
|
||||
OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||
|
||||
~OGLQuery() {
|
||||
Release();
|
||||
}
|
||||
|
||||
OGLQuery& operator=(OGLQuery&& o) noexcept {
|
||||
Release();
|
||||
handle = std::exchange(o.handle, 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Creates a new internal OpenGL resource and stores the handle
|
||||
void Create(GLenum target);
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
601
src/video_core/renderer_opengl/gl_shader_cache.cpp
Executable file
601
src/video_core/renderer_opengl/gl_shader_cache.cpp
Executable file
@@ -0,0 +1,601 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "core/core.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_arb_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Tegra::Engines::ShaderType;
|
||||
using VideoCommon::Shader::GetShaderAddress;
|
||||
using VideoCommon::Shader::GetShaderCode;
|
||||
using VideoCommon::Shader::GetUniqueIdentifier;
|
||||
using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
|
||||
using VideoCommon::Shader::ProgramCode;
|
||||
using VideoCommon::Shader::Registry;
|
||||
using VideoCommon::Shader::ShaderIR;
|
||||
using VideoCommon::Shader::STAGE_MAIN_OFFSET;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
|
||||
|
||||
/// Gets the shader type from a Maxwell program type
|
||||
constexpr GLenum GetGLShaderType(ShaderType shader_type) {
|
||||
switch (shader_type) {
|
||||
case ShaderType::Vertex:
|
||||
return GL_VERTEX_SHADER;
|
||||
case ShaderType::Geometry:
|
||||
return GL_GEOMETRY_SHADER;
|
||||
case ShaderType::Fragment:
|
||||
return GL_FRAGMENT_SHADER;
|
||||
case ShaderType::Compute:
|
||||
return GL_COMPUTE_SHADER;
|
||||
default:
|
||||
return GL_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr const char* GetShaderTypeName(ShaderType shader_type) {
|
||||
switch (shader_type) {
|
||||
case ShaderType::Vertex:
|
||||
return "VS";
|
||||
case ShaderType::TesselationControl:
|
||||
return "HS";
|
||||
case ShaderType::TesselationEval:
|
||||
return "DS";
|
||||
case ShaderType::Geometry:
|
||||
return "GS";
|
||||
case ShaderType::Fragment:
|
||||
return "FS";
|
||||
case ShaderType::Compute:
|
||||
return "CS";
|
||||
}
|
||||
return "UNK";
|
||||
}
|
||||
|
||||
constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
|
||||
switch (program_type) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
return ShaderType::Vertex;
|
||||
case Maxwell::ShaderProgram::TesselationControl:
|
||||
return ShaderType::TesselationControl;
|
||||
case Maxwell::ShaderProgram::TesselationEval:
|
||||
return ShaderType::TesselationEval;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
return ShaderType::Geometry;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
return ShaderType::Fragment;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
constexpr GLenum AssemblyEnum(ShaderType shader_type) {
|
||||
switch (shader_type) {
|
||||
case ShaderType::Vertex:
|
||||
return GL_VERTEX_PROGRAM_NV;
|
||||
case ShaderType::TesselationControl:
|
||||
return GL_TESS_CONTROL_PROGRAM_NV;
|
||||
case ShaderType::TesselationEval:
|
||||
return GL_TESS_EVALUATION_PROGRAM_NV;
|
||||
case ShaderType::Geometry:
|
||||
return GL_GEOMETRY_PROGRAM_NV;
|
||||
case ShaderType::Fragment:
|
||||
return GL_FRAGMENT_PROGRAM_NV;
|
||||
case ShaderType::Compute:
|
||||
return GL_COMPUTE_PROGRAM_NV;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
|
||||
return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
|
||||
}
|
||||
|
||||
std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
|
||||
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
|
||||
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
|
||||
entry.graphics_info, entry.compute_info};
|
||||
auto registry = std::make_shared<Registry>(entry.type, info);
|
||||
for (const auto& [address, value] : entry.keys) {
|
||||
const auto [buffer, offset] = address;
|
||||
registry->InsertKey(buffer, offset, value);
|
||||
}
|
||||
for (const auto& [offset, sampler] : entry.bound_samplers) {
|
||||
registry->InsertBoundSampler(offset, sampler);
|
||||
}
|
||||
for (const auto& [key, sampler] : entry.bindless_samplers) {
|
||||
const auto [buffer, offset] = key;
|
||||
registry->InsertBindlessSampler(buffer, offset, sampler);
|
||||
}
|
||||
return registry;
|
||||
}
|
||||
|
||||
std::unordered_set<GLenum> GetSupportedFormats() {
|
||||
GLint num_formats;
|
||||
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
|
||||
|
||||
std::vector<GLint> formats(num_formats);
|
||||
glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
|
||||
|
||||
std::unordered_set<GLenum> supported_formats;
|
||||
for (const GLint format : formats) {
|
||||
supported_formats.insert(static_cast<GLenum>(format));
|
||||
}
|
||||
return supported_formats;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
|
||||
const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
|
||||
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
|
||||
LOG_INFO(Render_OpenGL, "{}", shader_id);
|
||||
|
||||
auto program = std::make_shared<ProgramHandle>();
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
const std::string arb =
|
||||
DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
|
||||
|
||||
GLuint& arb_prog = program->assembly_program.handle;
|
||||
|
||||
// Commented out functions signal OpenGL errors but are compatible with apitrace.
|
||||
// Use them only to capture and replay on apitrace.
|
||||
#if 0
|
||||
glGenProgramsNV(1, &arb_prog);
|
||||
glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
|
||||
reinterpret_cast<const GLubyte*>(arb.data()));
|
||||
#else
|
||||
glGenProgramsARB(1, &arb_prog);
|
||||
glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
|
||||
static_cast<GLsizei>(arb.size()), arb.data());
|
||||
#endif
|
||||
const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
|
||||
if (err && *err) {
|
||||
LOG_CRITICAL(Render_OpenGL, "{}", err);
|
||||
LOG_INFO(Render_OpenGL, "\n{}", arb);
|
||||
}
|
||||
} else {
|
||||
const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
|
||||
OGLShader shader;
|
||||
shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
|
||||
|
||||
program->source_program.Create(true, hint_retrievable, shader.handle);
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
|
||||
ProgramSharedPtr program_, bool is_built_)
|
||||
: registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
|
||||
is_built{is_built_} {
|
||||
handle = program->assembly_program.handle;
|
||||
if (handle == 0) {
|
||||
handle = program->source_program.handle;
|
||||
}
|
||||
if (is_built) {
|
||||
ASSERT(handle != 0);
|
||||
}
|
||||
}
|
||||
|
||||
Shader::~Shader() = default;
|
||||
|
||||
GLuint Shader::GetHandle() const {
|
||||
DEBUG_ASSERT(registry->IsConsistent());
|
||||
return handle;
|
||||
}
|
||||
|
||||
bool Shader::IsBuilt() const {
|
||||
return is_built;
|
||||
}
|
||||
|
||||
void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
|
||||
program->source_program = std::move(new_program);
|
||||
handle = program->source_program.handle;
|
||||
is_built = true;
|
||||
}
|
||||
|
||||
void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
|
||||
program->assembly_program = std::move(new_program);
|
||||
handle = program->assembly_program.handle;
|
||||
is_built = true;
|
||||
}
|
||||
|
||||
std::unique_ptr<Shader> Shader::CreateStageFromMemory(
|
||||
const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
|
||||
ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
|
||||
const auto shader_type = GetShaderType(program_type);
|
||||
|
||||
auto& gpu = params.gpu;
|
||||
gpu.ShaderNotify().MarkSharderBuilding();
|
||||
|
||||
auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
|
||||
if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
|
||||
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
||||
// TODO(Rodrigo): Handle VertexA shaders
|
||||
// std::optional<ShaderIR> ir_b;
|
||||
// if (!code_b.empty()) {
|
||||
// ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
|
||||
// }
|
||||
auto program =
|
||||
BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
|
||||
ShaderDiskCacheEntry entry;
|
||||
entry.type = shader_type;
|
||||
entry.code = std::move(code);
|
||||
entry.code_b = std::move(code_b);
|
||||
entry.unique_identifier = params.unique_identifier;
|
||||
entry.bound_buffer = registry->GetBoundBuffer();
|
||||
entry.graphics_info = registry->GetGraphicsInfo();
|
||||
entry.keys = registry->GetKeys();
|
||||
entry.bound_samplers = registry->GetBoundSamplers();
|
||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
gpu.ShaderNotify().MarkShaderComplete();
|
||||
|
||||
return std::unique_ptr<Shader>(new Shader(std::move(registry),
|
||||
MakeEntries(params.device, ir, shader_type),
|
||||
std::move(program), true));
|
||||
} else {
|
||||
// Required for entries
|
||||
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
||||
auto entries = MakeEntries(params.device, ir, shader_type);
|
||||
|
||||
async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
|
||||
std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
|
||||
COMPILER_SETTINGS, *registry, cpu_addr);
|
||||
|
||||
auto program = std::make_shared<ProgramHandle>();
|
||||
return std::unique_ptr<Shader>(
|
||||
new Shader(std::move(registry), std::move(entries), std::move(program), false));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
|
||||
ProgramCode code) {
|
||||
auto& gpu = params.gpu;
|
||||
gpu.ShaderNotify().MarkSharderBuilding();
|
||||
|
||||
auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
|
||||
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
||||
const u64 uid = params.unique_identifier;
|
||||
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
|
||||
|
||||
ShaderDiskCacheEntry entry;
|
||||
entry.type = ShaderType::Compute;
|
||||
entry.code = std::move(code);
|
||||
entry.unique_identifier = uid;
|
||||
entry.bound_buffer = registry->GetBoundBuffer();
|
||||
entry.compute_info = registry->GetComputeInfo();
|
||||
entry.keys = registry->GetKeys();
|
||||
entry.bound_samplers = registry->GetBoundSamplers();
|
||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
gpu.ShaderNotify().MarkShaderComplete();
|
||||
|
||||
return std::unique_ptr<Shader>(new Shader(std::move(registry),
|
||||
MakeEntries(params.device, ir, ShaderType::Compute),
|
||||
std::move(program)));
|
||||
}
|
||||
|
||||
std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader) {
|
||||
return std::unique_ptr<Shader>(new Shader(
|
||||
precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
|
||||
}
|
||||
|
||||
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
|
||||
Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_)
|
||||
: ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
|
||||
maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
|
||||
|
||||
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
|
||||
|
||||
void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) {
|
||||
disk_cache.BindTitleID(title_id);
|
||||
const std::optional transferable = disk_cache.LoadTransferable();
|
||||
if (!transferable) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<ShaderDiskCachePrecompiled> gl_cache;
|
||||
if (!device.UseAssemblyShaders()) {
|
||||
// Only load precompiled cache when we are not using assembly shaders
|
||||
gl_cache = disk_cache.LoadPrecompiled();
|
||||
}
|
||||
const auto supported_formats = GetSupportedFormats();
|
||||
|
||||
// Track if precompiled cache was altered during loading to know if we have to
|
||||
// serialize the virtual precompiled cache file back to the hard drive
|
||||
bool precompiled_cache_altered = false;
|
||||
|
||||
// Inform the frontend about shader build initialization
|
||||
if (callback) {
|
||||
callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
|
||||
}
|
||||
|
||||
std::mutex mutex;
|
||||
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
|
||||
std::atomic_bool gl_cache_failed = false;
|
||||
|
||||
const auto find_precompiled = [&gl_cache](u64 id) {
|
||||
return std::find_if(gl_cache.begin(), gl_cache.end(),
|
||||
[id](const auto& entry) { return entry.unique_identifier == id; });
|
||||
};
|
||||
|
||||
const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
|
||||
std::size_t end) {
|
||||
const auto scope = context->Acquire();
|
||||
|
||||
for (std::size_t i = begin; i < end; ++i) {
|
||||
if (stop_loading) {
|
||||
return;
|
||||
}
|
||||
const auto& entry = (*transferable)[i];
|
||||
const u64 uid = entry.unique_identifier;
|
||||
const auto it = find_precompiled(uid);
|
||||
const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
|
||||
|
||||
const bool is_compute = entry.type == ShaderType::Compute;
|
||||
const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
|
||||
auto registry = MakeRegistry(entry);
|
||||
const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
|
||||
|
||||
ProgramSharedPtr program;
|
||||
if (precompiled_entry) {
|
||||
// If the shader is precompiled, attempt to load it with
|
||||
program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
|
||||
if (!program) {
|
||||
gl_cache_failed = true;
|
||||
}
|
||||
}
|
||||
if (!program) {
|
||||
// Otherwise compile it from GLSL
|
||||
program = BuildShader(device, entry.type, uid, ir, *registry, true);
|
||||
}
|
||||
|
||||
PrecompiledShader shader;
|
||||
shader.program = std::move(program);
|
||||
shader.registry = std::move(registry);
|
||||
shader.entries = MakeEntries(device, ir, entry.type);
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
if (callback) {
|
||||
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
|
||||
transferable->size());
|
||||
}
|
||||
runtime_cache.emplace(entry.unique_identifier, std::move(shader));
|
||||
}
|
||||
};
|
||||
|
||||
const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
|
||||
const std::size_t bucket_size{transferable->size() / num_workers};
|
||||
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
|
||||
std::vector<std::thread> threads(num_workers);
|
||||
for (std::size_t i = 0; i < num_workers; ++i) {
|
||||
const bool is_last_worker = i + 1 == num_workers;
|
||||
const std::size_t start{bucket_size * i};
|
||||
const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
|
||||
|
||||
// On some platforms the shared context has to be created from the GUI thread
|
||||
contexts[i] = emu_window.CreateSharedContext();
|
||||
threads[i] = std::thread(worker, contexts[i].get(), start, end);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
|
||||
if (gl_cache_failed) {
|
||||
// Invalidate the precompiled cache if a shader dumped shader was rejected
|
||||
disk_cache.InvalidatePrecompiled();
|
||||
precompiled_cache_altered = true;
|
||||
return;
|
||||
}
|
||||
if (stop_loading) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
// Don't store precompiled binaries for assembly shaders.
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
|
||||
// before precompiling them
|
||||
|
||||
for (std::size_t i = 0; i < transferable->size(); ++i) {
|
||||
const u64 id = (*transferable)[i].unique_identifier;
|
||||
const auto it = find_precompiled(id);
|
||||
if (it == gl_cache.end()) {
|
||||
const GLuint program = runtime_cache.at(id).program->source_program.handle;
|
||||
disk_cache.SavePrecompiled(id, program);
|
||||
precompiled_cache_altered = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (precompiled_cache_altered) {
|
||||
disk_cache.SaveVirtualPrecompiledFile();
|
||||
}
|
||||
}
|
||||
|
||||
ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
|
||||
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
|
||||
const std::unordered_set<GLenum>& supported_formats) {
|
||||
if (!supported_formats.contains(precompiled_entry.binary_format)) {
|
||||
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
|
||||
return {};
|
||||
}
|
||||
|
||||
auto program = std::make_shared<ProgramHandle>();
|
||||
GLuint& handle = program->source_program.handle;
|
||||
handle = glCreateProgram();
|
||||
glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
|
||||
glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
|
||||
static_cast<GLsizei>(precompiled_entry.binary.size()));
|
||||
|
||||
GLint link_status;
|
||||
glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
|
||||
if (link_status == GL_FALSE) {
|
||||
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
|
||||
return {};
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
|
||||
VideoCommon::Shader::AsyncShaders& async_shaders) {
|
||||
if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
|
||||
auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
|
||||
if (last_shader->IsBuilt()) {
|
||||
return last_shader;
|
||||
}
|
||||
}
|
||||
|
||||
const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
|
||||
|
||||
if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
|
||||
auto completed_work = async_shaders.GetCompletedWork();
|
||||
for (auto& work : completed_work) {
|
||||
Shader* shader = TryGet(work.cpu_address);
|
||||
gpu.ShaderNotify().MarkShaderComplete();
|
||||
if (shader == nullptr) {
|
||||
continue;
|
||||
}
|
||||
using namespace VideoCommon::Shader;
|
||||
if (work.backend == AsyncShaders::Backend::OpenGL) {
|
||||
shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
|
||||
} else if (work.backend == AsyncShaders::Backend::GLASM) {
|
||||
shader->AsyncGLASMBuilt(std::move(work.program.glasm));
|
||||
}
|
||||
|
||||
auto& registry = shader->GetRegistry();
|
||||
|
||||
ShaderDiskCacheEntry entry;
|
||||
entry.type = work.shader_type;
|
||||
entry.code = std::move(work.code);
|
||||
entry.code_b = std::move(work.code_b);
|
||||
entry.unique_identifier = work.uid;
|
||||
entry.bound_buffer = registry.GetBoundBuffer();
|
||||
entry.graphics_info = registry.GetGraphicsInfo();
|
||||
entry.keys = registry.GetKeys();
|
||||
entry.bound_samplers = registry.GetBoundSamplers();
|
||||
entry.bindless_samplers = registry.GetBindlessSamplers();
|
||||
disk_cache.SaveEntry(std::move(entry));
|
||||
}
|
||||
}
|
||||
|
||||
// Look up shader in the cache based on address
|
||||
const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
|
||||
if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
|
||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
||||
}
|
||||
|
||||
const u8* const host_ptr{gpu_memory.GetPointer(address)};
|
||||
|
||||
// No shader found - create a new one
|
||||
ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
|
||||
ProgramCode code_b;
|
||||
if (program == Maxwell::ShaderProgram::VertexA) {
|
||||
const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
|
||||
const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
|
||||
code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
|
||||
}
|
||||
const std::size_t code_size = code.size() * sizeof(u64);
|
||||
|
||||
const u64 unique_identifier = GetUniqueIdentifier(
|
||||
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
|
||||
|
||||
const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
|
||||
*cpu_addr, host_ptr, unique_identifier};
|
||||
|
||||
std::unique_ptr<Shader> shader;
|
||||
const auto found = runtime_cache.find(unique_identifier);
|
||||
if (found == runtime_cache.end()) {
|
||||
shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
|
||||
async_shaders, cpu_addr.value_or(0));
|
||||
} else {
|
||||
shader = Shader::CreateFromCache(params, found->second);
|
||||
}
|
||||
|
||||
Shader* const result = shader.get();
|
||||
if (cpu_addr) {
|
||||
Register(std::move(shader), *cpu_addr, code_size);
|
||||
} else {
|
||||
null_shader = std::move(shader);
|
||||
}
|
||||
|
||||
return last_shaders[static_cast<std::size_t>(program)] = result;
|
||||
}
|
||||
|
||||
Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||
const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
|
||||
|
||||
if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
|
||||
return kernel;
|
||||
}
|
||||
|
||||
// No kernel found, create a new one
|
||||
const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
|
||||
ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
|
||||
const std::size_t code_size{code.size() * sizeof(u64)};
|
||||
const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
|
||||
|
||||
const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
|
||||
*cpu_addr, host_ptr, unique_identifier};
|
||||
|
||||
std::unique_ptr<Shader> kernel;
|
||||
const auto found = runtime_cache.find(unique_identifier);
|
||||
if (found == runtime_cache.end()) {
|
||||
kernel = Shader::CreateKernelFromMemory(params, std::move(code));
|
||||
} else {
|
||||
kernel = Shader::CreateFromCache(params, found->second);
|
||||
}
|
||||
|
||||
Shader* const result = kernel.get();
|
||||
if (cpu_addr) {
|
||||
Register(std::move(kernel), *cpu_addr, code_size);
|
||||
} else {
|
||||
null_kernel = std::move(kernel);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
161
src/video_core/renderer_opengl/gl_shader_cache.h
Executable file
161
src/video_core/renderer_opengl/gl_shader_cache.h
Executable file
@@ -0,0 +1,161 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Core::Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class AsyncShaders;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class RasterizerOpenGL;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct ProgramHandle {
|
||||
OGLProgram source_program;
|
||||
OGLAssemblyProgram assembly_program;
|
||||
};
|
||||
using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
|
||||
|
||||
struct PrecompiledShader {
|
||||
ProgramSharedPtr program;
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
||||
ShaderEntries entries;
|
||||
};
|
||||
|
||||
struct ShaderParameters {
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::ConstBufferEngineInterface& engine;
|
||||
ShaderDiskCacheOpenGL& disk_cache;
|
||||
const Device& device;
|
||||
VAddr cpu_addr;
|
||||
const u8* host_ptr;
|
||||
u64 unique_identifier;
|
||||
};
|
||||
|
||||
ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
|
||||
u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
bool hint_retrievable = false);
|
||||
|
||||
class Shader final {
|
||||
public:
|
||||
~Shader();
|
||||
|
||||
/// Gets the GL program handle for the shader
|
||||
GLuint GetHandle() const;
|
||||
|
||||
bool IsBuilt() const;
|
||||
|
||||
/// Gets the shader entries for the shader
|
||||
const ShaderEntries& GetEntries() const {
|
||||
return entries;
|
||||
}
|
||||
|
||||
const VideoCommon::Shader::Registry& GetRegistry() const {
|
||||
return *registry;
|
||||
}
|
||||
|
||||
/// Mark a OpenGL shader as built
|
||||
void AsyncOpenGLBuilt(OGLProgram new_program);
|
||||
|
||||
/// Mark a GLASM shader as built
|
||||
void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
|
||||
|
||||
static std::unique_ptr<Shader> CreateStageFromMemory(
|
||||
const ShaderParameters& params, Maxwell::ShaderProgram program_type,
|
||||
ProgramCode program_code, ProgramCode program_code_b,
|
||||
VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
|
||||
|
||||
static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
|
||||
ProgramCode code);
|
||||
|
||||
static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader);
|
||||
|
||||
private:
|
||||
explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
|
||||
ProgramSharedPtr program, bool is_built_ = true);
|
||||
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
||||
ShaderEntries entries;
|
||||
ProgramSharedPtr program;
|
||||
GLuint handle = 0;
|
||||
bool is_built{};
|
||||
};
|
||||
|
||||
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
|
||||
public:
|
||||
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
|
||||
Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_);
|
||||
~ShaderCacheOpenGL() override;
|
||||
|
||||
/// Loads disk cache for the current game
|
||||
void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback);
|
||||
|
||||
/// Gets the current specified shader stage program
|
||||
Shader* GetStageProgram(Maxwell::ShaderProgram program,
|
||||
VideoCommon::Shader::AsyncShaders& async_shaders);
|
||||
|
||||
/// Gets a compute kernel in the passed address
|
||||
Shader* GetComputeKernel(GPUVAddr code_addr);
|
||||
|
||||
private:
|
||||
ProgramSharedPtr GeneratePrecompiledProgram(
|
||||
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
|
||||
const std::unordered_set<GLenum>& supported_formats);
|
||||
|
||||
Core::Frontend::EmuWindow& emu_window;
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
const Device& device;
|
||||
|
||||
ShaderDiskCacheOpenGL disk_cache;
|
||||
std::unordered_map<u64, PrecompiledShader> runtime_cache;
|
||||
|
||||
std::unique_ptr<Shader> null_shader;
|
||||
std::unique_ptr<Shader> null_kernel;
|
||||
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
3018
src/video_core/renderer_opengl/gl_shader_decompiler.cpp
Executable file
3018
src/video_core/renderer_opengl/gl_shader_decompiler.cpp
Executable file
File diff suppressed because it is too large
Load Diff
69
src/video_core/renderer_opengl/gl_shader_decompiler.h
Executable file
69
src/video_core/renderer_opengl/gl_shader_decompiler.h
Executable file
@@ -0,0 +1,69 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using SamplerEntry = VideoCommon::Shader::SamplerEntry;
|
||||
using ImageEntry = VideoCommon::Shader::ImageEntry;
|
||||
|
||||
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
|
||||
public:
|
||||
explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
|
||||
: ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
|
||||
|
||||
u32 GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 index = 0;
|
||||
};
|
||||
|
||||
struct GlobalMemoryEntry {
|
||||
constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
|
||||
bool is_written_)
|
||||
: cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
|
||||
is_written_} {}
|
||||
|
||||
u32 cbuf_index = 0;
|
||||
u32 cbuf_offset = 0;
|
||||
bool is_read = false;
|
||||
bool is_written = false;
|
||||
};
|
||||
|
||||
struct ShaderEntries {
|
||||
std::vector<ConstBufferEntry> const_buffers;
|
||||
std::vector<GlobalMemoryEntry> global_memory_entries;
|
||||
std::vector<SamplerEntry> samplers;
|
||||
std::vector<ImageEntry> images;
|
||||
std::size_t shader_length{};
|
||||
u32 clip_distances{};
|
||||
bool use_unified_uniforms{};
|
||||
};
|
||||
|
||||
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
Tegra::Engines::ShaderType stage);
|
||||
|
||||
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
Tegra::Engines::ShaderType stage, std::string_view identifier,
|
||||
std::string_view suffix = {});
|
||||
|
||||
} // namespace OpenGL
|
483
src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
Executable file
483
src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
Executable file
@@ -0,0 +1,483 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_paths.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/file_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scm_rev.h"
|
||||
#include "common/zstd_compression.h"
|
||||
#include "core/core.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Tegra::Engines::ShaderType;
|
||||
using VideoCommon::Shader::BindlessSamplerMap;
|
||||
using VideoCommon::Shader::BoundSamplerMap;
|
||||
using VideoCommon::Shader::KeyMap;
|
||||
|
||||
namespace {
|
||||
|
||||
using VideoCommon::Shader::SeparateSamplerKey;
|
||||
|
||||
using ShaderCacheVersionHash = std::array<u8, 64>;
|
||||
|
||||
struct ConstBufferKey {
|
||||
u32 cbuf = 0;
|
||||
u32 offset = 0;
|
||||
u32 value = 0;
|
||||
};
|
||||
|
||||
struct BoundSamplerEntry {
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct SeparateSamplerEntry {
|
||||
u32 cbuf1 = 0;
|
||||
u32 cbuf2 = 0;
|
||||
u32 offset1 = 0;
|
||||
u32 offset2 = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct BindlessSamplerEntry {
|
||||
u32 cbuf = 0;
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
constexpr u32 NativeVersion = 21;
|
||||
|
||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||
ShaderCacheVersionHash hash{};
|
||||
const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
|
||||
std::memcpy(hash.data(), Common::g_shader_cache_version, length);
|
||||
return hash;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
|
||||
|
||||
ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
|
||||
|
||||
bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
|
||||
if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
|
||||
return false;
|
||||
}
|
||||
u32 code_size;
|
||||
u32 code_size_b;
|
||||
if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
|
||||
file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) {
|
||||
return false;
|
||||
}
|
||||
code.resize(code_size);
|
||||
code_b.resize(code_size_b);
|
||||
|
||||
if (file.ReadArray(code.data(), code_size) != code_size) {
|
||||
return false;
|
||||
}
|
||||
if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) {
|
||||
return false;
|
||||
}
|
||||
|
||||
u8 is_texture_handler_size_known;
|
||||
u32 texture_handler_size_value;
|
||||
u32 num_keys;
|
||||
u32 num_bound_samplers;
|
||||
u32 num_separate_samplers;
|
||||
u32 num_bindless_samplers;
|
||||
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
|
||||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
|
||||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
|
||||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
|
||||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
||||
file.ReadArray(&num_separate_samplers, 1) != 1 ||
|
||||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
||||
return false;
|
||||
}
|
||||
if (is_texture_handler_size_known) {
|
||||
texture_handler_size = texture_handler_size_value;
|
||||
}
|
||||
|
||||
std::vector<ConstBufferKey> flat_keys(num_keys);
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
|
||||
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
|
||||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
|
||||
flat_bound_samplers.size() ||
|
||||
file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
|
||||
flat_separate_samplers.size() ||
|
||||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
|
||||
flat_bindless_samplers.size()) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& entry : flat_keys) {
|
||||
keys.insert({{entry.cbuf, entry.offset}, entry.value});
|
||||
}
|
||||
for (const auto& entry : flat_bound_samplers) {
|
||||
bound_samplers.emplace(entry.offset, entry.sampler);
|
||||
}
|
||||
for (const auto& entry : flat_separate_samplers) {
|
||||
SeparateSamplerKey key;
|
||||
key.buffers = {entry.cbuf1, entry.cbuf2};
|
||||
key.offsets = {entry.offset1, entry.offset2};
|
||||
separate_samplers.emplace(key, entry.sampler);
|
||||
}
|
||||
for (const auto& entry : flat_bindless_samplers) {
|
||||
bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
|
||||
if (file.WriteObject(static_cast<u32>(type)) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
|
||||
return false;
|
||||
}
|
||||
if (file.WriteArray(code.data(), code.size()) != code.size()) {
|
||||
return false;
|
||||
}
|
||||
if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
|
||||
file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
|
||||
file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
|
||||
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<ConstBufferKey> flat_keys;
|
||||
flat_keys.reserve(keys.size());
|
||||
for (const auto& [address, value] : keys) {
|
||||
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
|
||||
}
|
||||
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers;
|
||||
flat_bound_samplers.reserve(bound_samplers.size());
|
||||
for (const auto& [address, sampler] : bound_samplers) {
|
||||
flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
|
||||
}
|
||||
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers;
|
||||
flat_separate_samplers.reserve(separate_samplers.size());
|
||||
for (const auto& [key, sampler] : separate_samplers) {
|
||||
SeparateSamplerEntry entry;
|
||||
std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
|
||||
std::tie(entry.offset1, entry.offset2) = key.offsets;
|
||||
entry.sampler = sampler;
|
||||
flat_separate_samplers.push_back(entry);
|
||||
}
|
||||
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers;
|
||||
flat_bindless_samplers.reserve(bindless_samplers.size());
|
||||
for (const auto& [address, sampler] : bindless_samplers) {
|
||||
flat_bindless_samplers.push_back(
|
||||
BindlessSamplerEntry{address.first, address.second, sampler});
|
||||
}
|
||||
|
||||
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
|
||||
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
|
||||
flat_bound_samplers.size() &&
|
||||
file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
|
||||
flat_separate_samplers.size() &&
|
||||
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
|
||||
flat_bindless_samplers.size();
|
||||
}
|
||||
|
||||
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
|
||||
|
||||
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
|
||||
|
||||
void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
|
||||
title_id = title_id_;
|
||||
}
|
||||
|
||||
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
|
||||
// Skip games without title id
|
||||
const bool has_title_id = title_id != 0;
|
||||
if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Common::FS::IOFile file(GetTransferablePath(), "rb");
|
||||
if (!file.IsOpen()) {
|
||||
LOG_INFO(Render_OpenGL, "No transferable shader cache found");
|
||||
is_usable = true;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
u32 version{};
|
||||
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (version < NativeVersion) {
|
||||
LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
|
||||
file.Close();
|
||||
InvalidateTransferable();
|
||||
is_usable = true;
|
||||
return std::nullopt;
|
||||
}
|
||||
if (version > NativeVersion) {
|
||||
LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
|
||||
"of the emulator, skipping");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Version is valid, load the shaders
|
||||
std::vector<ShaderDiskCacheEntry> entries;
|
||||
while (file.Tell() < file.GetSize()) {
|
||||
ShaderDiskCacheEntry& entry = entries.emplace_back();
|
||||
if (!entry.Load(file)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
is_usable = true;
|
||||
return {std::move(entries)};
|
||||
}
|
||||
|
||||
std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
|
||||
if (!is_usable) {
|
||||
return {};
|
||||
}
|
||||
|
||||
Common::FS::IOFile file(GetPrecompiledPath(), "rb");
|
||||
if (!file.IsOpen()) {
|
||||
LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
|
||||
return {};
|
||||
}
|
||||
|
||||
if (const auto result = LoadPrecompiledFile(file)) {
|
||||
return *result;
|
||||
}
|
||||
|
||||
LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
|
||||
file.Close();
|
||||
InvalidatePrecompiled();
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
|
||||
Common::FS::IOFile& file) {
|
||||
// Read compressed file from disk and decompress to virtual precompiled cache file
|
||||
std::vector<u8> compressed(file.GetSize());
|
||||
file.ReadBytes(compressed.data(), compressed.size());
|
||||
const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
|
||||
SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
|
||||
ShaderCacheVersionHash file_hash{};
|
||||
if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
return std::nullopt;
|
||||
}
|
||||
if (GetShaderCacheVersionHash() != file_hash) {
|
||||
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::vector<ShaderDiskCachePrecompiled> entries;
|
||||
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
|
||||
u32 binary_size;
|
||||
auto& entry = entries.emplace_back();
|
||||
if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
|
||||
!LoadObjectFromPrecompiled(entry.binary_format) ||
|
||||
!LoadObjectFromPrecompiled(binary_size)) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
entry.binary.resize(binary_size);
|
||||
if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
|
||||
if (!Common::FS::Delete(GetTransferablePath())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
|
||||
GetTransferablePath());
|
||||
}
|
||||
InvalidatePrecompiled();
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
|
||||
// Clear virtaul precompiled cache file
|
||||
precompiled_cache_virtual_file.Resize(0);
|
||||
|
||||
if (!Common::FS::Delete(GetPrecompiledPath())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
|
||||
if (!is_usable) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 id = entry.unique_identifier;
|
||||
if (stored_transferable.contains(id)) {
|
||||
// The shader already exists
|
||||
return;
|
||||
}
|
||||
|
||||
Common::FS::IOFile file = AppendTransferableFile();
|
||||
if (!file.IsOpen()) {
|
||||
return;
|
||||
}
|
||||
if (!entry.Save(file)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
|
||||
file.Close();
|
||||
InvalidateTransferable();
|
||||
return;
|
||||
}
|
||||
|
||||
stored_transferable.insert(id);
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
|
||||
if (!is_usable) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
|
||||
// when writing the dump. This should be done the moment I get access to write to the virtual
|
||||
// file.
|
||||
if (precompiled_cache_virtual_file.GetSize() == 0) {
|
||||
SavePrecompiledHeaderToVirtualPrecompiledCache();
|
||||
}
|
||||
|
||||
GLint binary_length;
|
||||
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
|
||||
|
||||
GLenum binary_format;
|
||||
std::vector<u8> binary(binary_length);
|
||||
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
|
||||
|
||||
if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
|
||||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
|
||||
unique_identifier);
|
||||
InvalidatePrecompiled();
|
||||
}
|
||||
}
|
||||
|
||||
Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
|
||||
if (!EnsureDirectories()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto transferable_path{GetTransferablePath()};
|
||||
const bool existed = Common::FS::Exists(transferable_path);
|
||||
|
||||
Common::FS::IOFile file(transferable_path, "ab");
|
||||
if (!file.IsOpen()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
|
||||
return {};
|
||||
}
|
||||
if (!existed || file.GetSize() == 0) {
|
||||
// If the file didn't exist, write its version
|
||||
if (file.WriteObject(NativeVersion) != 1) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
|
||||
transferable_path);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return file;
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
|
||||
const auto hash{GetShaderCacheVersionHash()};
|
||||
if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
|
||||
LOG_ERROR(
|
||||
Render_OpenGL,
|
||||
"Failed to write precompiled cache version hash to virtual precompiled cache file");
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
|
||||
const std::vector<u8> compressed =
|
||||
Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
|
||||
|
||||
const auto precompiled_path{GetPrecompiledPath()};
|
||||
Common::FS::IOFile file(precompiled_path, "wb");
|
||||
|
||||
if (!file.IsOpen()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
|
||||
return;
|
||||
}
|
||||
if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
|
||||
precompiled_path);
|
||||
}
|
||||
}
|
||||
|
||||
bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
|
||||
const auto CreateDir = [](const std::string& dir) {
|
||||
if (!Common::FS::CreateDir(dir)) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
return CreateDir(Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)) &&
|
||||
CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
|
||||
CreateDir(GetPrecompiledDir());
|
||||
}
|
||||
|
||||
std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
|
||||
return Common::FS::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
|
||||
}
|
||||
|
||||
std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
|
||||
return Common::FS::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
|
||||
}
|
||||
|
||||
std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
|
||||
return GetBaseDir() + DIR_SEP "transferable";
|
||||
}
|
||||
|
||||
std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
|
||||
return GetBaseDir() + DIR_SEP "precompiled";
|
||||
}
|
||||
|
||||
std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
|
||||
return Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir) + DIR_SEP "opengl";
|
||||
}
|
||||
|
||||
std::string ShaderDiskCacheOpenGL::GetTitleID() const {
|
||||
return fmt::format("{:016X}", title_id);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
175
src/video_core/renderer_opengl/gl_shader_disk_cache.h
Executable file
175
src/video_core/renderer_opengl/gl_shader_disk_cache.h
Executable file
@@ -0,0 +1,175 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/file_sys/vfs_vector.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
|
||||
namespace Common::FS {
|
||||
class IOFile;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using ProgramCode = std::vector<u64>;
|
||||
|
||||
/// Describes a shader and how it's used by the guest GPU
|
||||
struct ShaderDiskCacheEntry {
|
||||
ShaderDiskCacheEntry();
|
||||
~ShaderDiskCacheEntry();
|
||||
|
||||
bool Load(Common::FS::IOFile& file);
|
||||
|
||||
bool Save(Common::FS::IOFile& file) const;
|
||||
|
||||
bool HasProgramA() const {
|
||||
return !code.empty() && !code_b.empty();
|
||||
}
|
||||
|
||||
Tegra::Engines::ShaderType type{};
|
||||
ProgramCode code;
|
||||
ProgramCode code_b;
|
||||
|
||||
u64 unique_identifier = 0;
|
||||
std::optional<u32> texture_handler_size;
|
||||
u32 bound_buffer = 0;
|
||||
VideoCommon::Shader::GraphicsInfo graphics_info;
|
||||
VideoCommon::Shader::ComputeInfo compute_info;
|
||||
VideoCommon::Shader::KeyMap keys;
|
||||
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
||||
VideoCommon::Shader::SeparateSamplerMap separate_samplers;
|
||||
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
||||
};
|
||||
|
||||
/// Contains an OpenGL dumped binary program
|
||||
struct ShaderDiskCachePrecompiled {
|
||||
u64 unique_identifier = 0;
|
||||
GLenum binary_format = 0;
|
||||
std::vector<u8> binary;
|
||||
};
|
||||
|
||||
class ShaderDiskCacheOpenGL {
|
||||
public:
|
||||
explicit ShaderDiskCacheOpenGL();
|
||||
~ShaderDiskCacheOpenGL();
|
||||
|
||||
/// Binds a title ID for all future operations.
|
||||
void BindTitleID(u64 title_id);
|
||||
|
||||
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
|
||||
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
|
||||
|
||||
/// Loads current game's precompiled cache. Invalidates on failure.
|
||||
std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
|
||||
|
||||
/// Removes the transferable (and precompiled) cache file.
|
||||
void InvalidateTransferable();
|
||||
|
||||
/// Removes the precompiled cache file and clears virtual precompiled cache file.
|
||||
void InvalidatePrecompiled();
|
||||
|
||||
/// Saves a raw dump to the transferable file. Checks for collisions.
|
||||
void SaveEntry(const ShaderDiskCacheEntry& entry);
|
||||
|
||||
/// Saves a dump entry to the precompiled file. Does not check for collisions.
|
||||
void SavePrecompiled(u64 unique_identifier, GLuint program);
|
||||
|
||||
/// Serializes virtual precompiled shader cache file to real file
|
||||
void SaveVirtualPrecompiledFile();
|
||||
|
||||
private:
|
||||
/// Loads the transferable cache. Returns empty on failure.
|
||||
std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
|
||||
Common::FS::IOFile& file);
|
||||
|
||||
/// Opens current game's transferable file and write it's header if it doesn't exist
|
||||
Common::FS::IOFile AppendTransferableFile() const;
|
||||
|
||||
/// Save precompiled header to precompiled_cache_in_memory
|
||||
void SavePrecompiledHeaderToVirtualPrecompiledCache();
|
||||
|
||||
/// Create shader disk cache directories. Returns true on success.
|
||||
bool EnsureDirectories() const;
|
||||
|
||||
/// Gets current game's transferable file path
|
||||
std::string GetTransferablePath() const;
|
||||
|
||||
/// Gets current game's precompiled file path
|
||||
std::string GetPrecompiledPath() const;
|
||||
|
||||
/// Get user's transferable directory path
|
||||
std::string GetTransferableDir() const;
|
||||
|
||||
/// Get user's precompiled directory path
|
||||
std::string GetPrecompiledDir() const;
|
||||
|
||||
/// Get user's shader directory path
|
||||
std::string GetBaseDir() const;
|
||||
|
||||
/// Get current game's title id
|
||||
std::string GetTitleID() const;
|
||||
|
||||
template <typename T>
|
||||
bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
|
||||
const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
|
||||
data, length, precompiled_cache_virtual_file_offset);
|
||||
precompiled_cache_virtual_file_offset += write_length;
|
||||
return write_length == sizeof(T) * length;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
|
||||
const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
|
||||
data, length, precompiled_cache_virtual_file_offset);
|
||||
precompiled_cache_virtual_file_offset += read_length;
|
||||
return read_length == sizeof(T) * length;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SaveObjectToPrecompiled(const T& object) {
|
||||
return SaveArrayToPrecompiled(&object, 1);
|
||||
}
|
||||
|
||||
bool SaveObjectToPrecompiled(bool object) {
|
||||
const auto value = static_cast<u8>(object);
|
||||
return SaveArrayToPrecompiled(&value, 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool LoadObjectFromPrecompiled(T& object) {
|
||||
return LoadArrayFromPrecompiled(&object, 1);
|
||||
}
|
||||
|
||||
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
|
||||
// file
|
||||
FileSys::VectorVfsFile precompiled_cache_virtual_file;
|
||||
// Stores the current offset of the precompiled cache file for IO purposes
|
||||
std::size_t precompiled_cache_virtual_file_offset = 0;
|
||||
|
||||
// Stored transferable shaders
|
||||
std::unordered_set<u64> stored_transferable;
|
||||
|
||||
/// Title ID to operate on
|
||||
u64 title_id = 0;
|
||||
|
||||
// The cache has been loaded at boot
|
||||
bool is_usable = false;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
149
src/video_core/renderer_opengl/gl_shader_manager.cpp
Executable file
149
src/video_core/renderer_opengl/gl_shader_manager.cpp
Executable file
@@ -0,0 +1,149 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
|
||||
if (current == old) {
|
||||
return;
|
||||
}
|
||||
if (current == 0) {
|
||||
if (enabled) {
|
||||
enabled = false;
|
||||
glDisable(stage);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!enabled) {
|
||||
enabled = true;
|
||||
glEnable(stage);
|
||||
}
|
||||
glBindProgramARB(stage, current);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ProgramManager::ProgramManager(const Device& device)
|
||||
: use_assembly_programs{device.UseAssemblyShaders()} {
|
||||
if (use_assembly_programs) {
|
||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||
} else {
|
||||
graphics_pipeline.Create();
|
||||
glBindProgramPipeline(graphics_pipeline.handle);
|
||||
}
|
||||
}
|
||||
|
||||
ProgramManager::~ProgramManager() = default;
|
||||
|
||||
void ProgramManager::BindCompute(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
|
||||
} else {
|
||||
is_graphics_bound = false;
|
||||
glUseProgram(program);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindGraphicsPipeline() {
|
||||
if (!use_assembly_programs) {
|
||||
UpdateSourcePrograms();
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindHostPipeline(GLuint pipeline) {
|
||||
if (use_assembly_programs) {
|
||||
if (geometry_enabled) {
|
||||
geometry_enabled = false;
|
||||
old_state.geometry = 0;
|
||||
glDisable(GL_GEOMETRY_PROGRAM_NV);
|
||||
}
|
||||
} else {
|
||||
if (!is_graphics_bound) {
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
glBindProgramPipeline(pipeline);
|
||||
}
|
||||
|
||||
void ProgramManager::RestoreGuestPipeline() {
|
||||
if (use_assembly_programs) {
|
||||
glBindProgramPipeline(0);
|
||||
} else {
|
||||
glBindProgramPipeline(graphics_pipeline.handle);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindHostCompute(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
glDisable(GL_COMPUTE_PROGRAM_NV);
|
||||
}
|
||||
glUseProgram(program);
|
||||
is_graphics_bound = false;
|
||||
}
|
||||
|
||||
void ProgramManager::RestoreGuestCompute() {
|
||||
if (use_assembly_programs) {
|
||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::UseVertexShader(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
|
||||
}
|
||||
current_state.vertex = program;
|
||||
}
|
||||
|
||||
void ProgramManager::UseGeometryShader(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
|
||||
}
|
||||
current_state.geometry = program;
|
||||
}
|
||||
|
||||
void ProgramManager::UseFragmentShader(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
|
||||
}
|
||||
current_state.fragment = program;
|
||||
}
|
||||
|
||||
void ProgramManager::UpdateSourcePrograms() {
|
||||
if (!is_graphics_bound) {
|
||||
is_graphics_bound = true;
|
||||
glUseProgram(0);
|
||||
}
|
||||
|
||||
const GLuint handle = graphics_pipeline.handle;
|
||||
const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
|
||||
if (current == old) {
|
||||
return;
|
||||
}
|
||||
glUseProgramStages(handle, stage, current);
|
||||
};
|
||||
update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
|
||||
update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
|
||||
update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
|
||||
|
||||
old_state = current_state;
|
||||
}
|
||||
|
||||
void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
|
||||
const auto& regs = maxwell.regs;
|
||||
|
||||
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
|
||||
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
82
src/video_core/renderer_opengl/gl_shader_manager.h
Executable file
82
src/video_core/renderer_opengl/gl_shader_manager.h
Executable file
@@ -0,0 +1,82 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
|
||||
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
|
||||
/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
|
||||
/// Not following that rule will cause problems on some AMD drivers.
|
||||
struct alignas(16) MaxwellUniformData {
|
||||
void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
|
||||
|
||||
GLfloat y_direction;
|
||||
};
|
||||
static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
|
||||
static_assert(sizeof(MaxwellUniformData) < 16384,
|
||||
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
class ProgramManager {
|
||||
public:
|
||||
explicit ProgramManager(const Device& device);
|
||||
~ProgramManager();
|
||||
|
||||
/// Binds a compute program
|
||||
void BindCompute(GLuint program);
|
||||
|
||||
/// Updates bound programs.
|
||||
void BindGraphicsPipeline();
|
||||
|
||||
/// Binds an OpenGL pipeline object unsynchronized with the guest state.
|
||||
void BindHostPipeline(GLuint pipeline);
|
||||
|
||||
/// Rewinds BindHostPipeline state changes.
|
||||
void RestoreGuestPipeline();
|
||||
|
||||
/// Binds an OpenGL GLSL program object unsynchronized with the guest state.
|
||||
void BindHostCompute(GLuint program);
|
||||
|
||||
/// Rewinds BindHostCompute state changes.
|
||||
void RestoreGuestCompute();
|
||||
|
||||
void UseVertexShader(GLuint program);
|
||||
void UseGeometryShader(GLuint program);
|
||||
void UseFragmentShader(GLuint program);
|
||||
|
||||
private:
|
||||
struct PipelineState {
|
||||
GLuint vertex = 0;
|
||||
GLuint geometry = 0;
|
||||
GLuint fragment = 0;
|
||||
};
|
||||
|
||||
/// Update GLSL programs.
|
||||
void UpdateSourcePrograms();
|
||||
|
||||
OGLPipeline graphics_pipeline;
|
||||
|
||||
PipelineState current_state;
|
||||
PipelineState old_state;
|
||||
|
||||
bool use_assembly_programs = false;
|
||||
|
||||
bool is_graphics_bound = true;
|
||||
|
||||
bool vertex_enabled = false;
|
||||
bool geometry_enabled = false;
|
||||
bool fragment_enabled = false;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
61
src/video_core/renderer_opengl/gl_shader_util.cpp
Executable file
61
src/video_core/renderer_opengl/gl_shader_util.cpp
Executable file
@@ -0,0 +1,61 @@
|
||||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <glad/glad.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
|
||||
namespace OpenGL::GLShader {
|
||||
|
||||
namespace {
|
||||
|
||||
std::string_view StageDebugName(GLenum type) {
|
||||
switch (type) {
|
||||
case GL_VERTEX_SHADER:
|
||||
return "vertex";
|
||||
case GL_GEOMETRY_SHADER:
|
||||
return "geometry";
|
||||
case GL_FRAGMENT_SHADER:
|
||||
return "fragment";
|
||||
case GL_COMPUTE_SHADER:
|
||||
return "compute";
|
||||
}
|
||||
UNIMPLEMENTED();
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
GLuint LoadShader(std::string_view source, GLenum type) {
|
||||
const std::string_view debug_type = StageDebugName(type);
|
||||
const GLuint shader_id = glCreateShader(type);
|
||||
|
||||
const GLchar* source_string = source.data();
|
||||
const GLint source_length = static_cast<GLint>(source.size());
|
||||
|
||||
glShaderSource(shader_id, 1, &source_string, &source_length);
|
||||
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
|
||||
glCompileShader(shader_id);
|
||||
|
||||
GLint result = GL_FALSE;
|
||||
GLint info_log_length;
|
||||
glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
|
||||
glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
|
||||
|
||||
if (info_log_length > 1) {
|
||||
std::string shader_error(info_log_length, ' ');
|
||||
glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
|
||||
if (result == GL_TRUE) {
|
||||
LOG_DEBUG(Render_OpenGL, "{}", shader_error);
|
||||
} else {
|
||||
LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
|
||||
}
|
||||
}
|
||||
return shader_id;
|
||||
}
|
||||
|
||||
} // namespace OpenGL::GLShader
|
95
src/video_core/renderer_opengl/gl_shader_util.h
Executable file
95
src/video_core/renderer_opengl/gl_shader_util.h
Executable file
@@ -0,0 +1,95 @@
|
||||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <glad/glad.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
|
||||
namespace OpenGL::GLShader {
|
||||
|
||||
/**
|
||||
* Utility function to log the source code of a list of shaders.
|
||||
* @param shaders The OpenGL shaders whose source we will print.
|
||||
*/
|
||||
template <typename... T>
|
||||
void LogShaderSource(T... shaders) {
|
||||
auto shader_list = {shaders...};
|
||||
|
||||
for (const auto& shader : shader_list) {
|
||||
if (shader == 0)
|
||||
continue;
|
||||
|
||||
GLint source_length;
|
||||
glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length);
|
||||
|
||||
std::string source(source_length, ' ');
|
||||
glGetShaderSource(shader, source_length, nullptr, &source[0]);
|
||||
LOG_INFO(Render_OpenGL, "Shader source {}", source);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function to create and compile an OpenGL GLSL shader
|
||||
* @param source String of the GLSL shader program
|
||||
* @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
|
||||
*/
|
||||
GLuint LoadShader(std::string_view source, GLenum type);
|
||||
|
||||
/**
|
||||
* Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
|
||||
* @param separable_program whether to create a separable program
|
||||
* @param shaders ID of shaders to attach to the program
|
||||
* @returns Handle of the newly created OpenGL program object
|
||||
*/
|
||||
template <typename... T>
|
||||
GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
|
||||
// Link the program
|
||||
LOG_DEBUG(Render_OpenGL, "Linking program...");
|
||||
|
||||
GLuint program_id = glCreateProgram();
|
||||
|
||||
((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
|
||||
|
||||
if (separable_program) {
|
||||
glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
|
||||
}
|
||||
if (hint_retrievable) {
|
||||
glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
|
||||
}
|
||||
|
||||
glLinkProgram(program_id);
|
||||
|
||||
// Check the program
|
||||
GLint result = GL_FALSE;
|
||||
GLint info_log_length;
|
||||
glGetProgramiv(program_id, GL_LINK_STATUS, &result);
|
||||
glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
|
||||
|
||||
if (info_log_length > 1) {
|
||||
std::string program_error(info_log_length, ' ');
|
||||
glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
|
||||
if (result == GL_TRUE) {
|
||||
LOG_DEBUG(Render_OpenGL, "{}", program_error);
|
||||
} else {
|
||||
LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
|
||||
}
|
||||
}
|
||||
|
||||
if (result == GL_FALSE) {
|
||||
// There was a problem linking the shader, print the source for debugging purposes.
|
||||
LogShaderSource(shaders...);
|
||||
}
|
||||
|
||||
ASSERT_MSG(result == GL_TRUE, "Shader not linked");
|
||||
|
||||
((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
|
||||
|
||||
return program_id;
|
||||
}
|
||||
|
||||
} // namespace OpenGL::GLShader
|
259
src/video_core/renderer_opengl/gl_state_tracker.cpp
Executable file
259
src/video_core/renderer_opengl/gl_state_tracker.cpp
Executable file
@@ -0,0 +1,259 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
|
||||
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
|
||||
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace Dirty;
|
||||
using namespace VideoCommon::Dirty;
|
||||
using Tegra::Engines::Maxwell3D;
|
||||
using Regs = Maxwell3D::Regs;
|
||||
using Tables = Maxwell3D::DirtyState::Tables;
|
||||
using Table = Maxwell3D::DirtyState::Table;
|
||||
|
||||
void SetupDirtyColorMasks(Tables& tables) {
|
||||
tables[0][OFF(color_mask_common)] = ColorMaskCommon;
|
||||
for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) {
|
||||
const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]);
|
||||
FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt);
|
||||
}
|
||||
|
||||
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
|
||||
}
|
||||
|
||||
void SetupDirtyVertexArrays(Tables& tables) {
|
||||
static constexpr std::size_t num_array = 3;
|
||||
static constexpr std::size_t instance_base_offset = 3;
|
||||
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
|
||||
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
|
||||
const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
|
||||
|
||||
FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
|
||||
FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
|
||||
|
||||
const std::size_t instance_array_offset = array_offset + instance_base_offset;
|
||||
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
|
||||
tables[1][instance_array_offset] = VertexInstances;
|
||||
|
||||
const std::size_t instance_offset = OFF(instanced_arrays) + i;
|
||||
tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i);
|
||||
tables[1][instance_offset] = VertexInstances;
|
||||
}
|
||||
}
|
||||
|
||||
void SetupDirtyVertexFormat(Tables& tables) {
|
||||
for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
|
||||
const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
|
||||
FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i);
|
||||
}
|
||||
|
||||
FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats);
|
||||
}
|
||||
|
||||
void SetupDirtyViewports(Tables& tables) {
|
||||
for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
|
||||
const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]);
|
||||
const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]);
|
||||
|
||||
FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i);
|
||||
FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i);
|
||||
}
|
||||
|
||||
FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports);
|
||||
FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports);
|
||||
|
||||
tables[0][OFF(viewport_transform_enabled)] = ViewportTransform;
|
||||
tables[1][OFF(viewport_transform_enabled)] = Viewports;
|
||||
}
|
||||
|
||||
void SetupDirtyScissors(Tables& tables) {
|
||||
for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
|
||||
const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]);
|
||||
FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i);
|
||||
}
|
||||
FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
|
||||
}
|
||||
|
||||
void SetupDirtyShaders(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
|
||||
Shaders);
|
||||
}
|
||||
|
||||
void SetupDirtyPolygonModes(Tables& tables) {
|
||||
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
|
||||
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
|
||||
|
||||
tables[1][OFF(polygon_mode_front)] = PolygonModes;
|
||||
tables[1][OFF(polygon_mode_back)] = PolygonModes;
|
||||
tables[0][OFF(fill_rectangle)] = PolygonModes;
|
||||
}
|
||||
|
||||
void SetupDirtyDepthTest(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(depth_test_enable)] = DepthTest;
|
||||
table[OFF(depth_write_enabled)] = DepthMask;
|
||||
table[OFF(depth_test_func)] = DepthTest;
|
||||
}
|
||||
|
||||
void SetupDirtyStencilTest(Tables& tables) {
|
||||
static constexpr std::array offsets = {
|
||||
OFF(stencil_enable), OFF(stencil_front_func_func), OFF(stencil_front_func_ref),
|
||||
OFF(stencil_front_func_mask), OFF(stencil_front_op_fail), OFF(stencil_front_op_zfail),
|
||||
OFF(stencil_front_op_zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable),
|
||||
OFF(stencil_back_func_func), OFF(stencil_back_func_ref), OFF(stencil_back_func_mask),
|
||||
OFF(stencil_back_op_fail), OFF(stencil_back_op_zfail), OFF(stencil_back_op_zpass),
|
||||
OFF(stencil_back_mask)};
|
||||
for (const auto offset : offsets) {
|
||||
tables[0][offset] = StencilTest;
|
||||
}
|
||||
}
|
||||
|
||||
void SetupDirtyAlphaTest(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(alpha_test_ref)] = AlphaTest;
|
||||
table[OFF(alpha_test_func)] = AlphaTest;
|
||||
table[OFF(alpha_test_enabled)] = AlphaTest;
|
||||
}
|
||||
|
||||
void SetupDirtyBlend(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor);
|
||||
|
||||
tables[0][OFF(independent_blend_enable)] = BlendIndependentEnabled;
|
||||
|
||||
for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) {
|
||||
const std::size_t offset = OFF(independent_blend) + i * NUM(independent_blend[0]);
|
||||
FillBlock(tables[0], offset, NUM(independent_blend[0]), BlendState0 + i);
|
||||
|
||||
tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i);
|
||||
}
|
||||
FillBlock(tables[1], OFF(independent_blend), NUM(independent_blend), BlendStates);
|
||||
FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates);
|
||||
}
|
||||
|
||||
void SetupDirtyPrimitiveRestart(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart);
|
||||
}
|
||||
|
||||
void SetupDirtyPolygonOffset(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(polygon_offset_fill_enable)] = PolygonOffset;
|
||||
table[OFF(polygon_offset_line_enable)] = PolygonOffset;
|
||||
table[OFF(polygon_offset_point_enable)] = PolygonOffset;
|
||||
table[OFF(polygon_offset_factor)] = PolygonOffset;
|
||||
table[OFF(polygon_offset_units)] = PolygonOffset;
|
||||
table[OFF(polygon_offset_clamp)] = PolygonOffset;
|
||||
}
|
||||
|
||||
void SetupDirtyMultisampleControl(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(multisample_control), NUM(multisample_control), MultisampleControl);
|
||||
}
|
||||
|
||||
void SetupDirtyRasterizeEnable(Tables& tables) {
|
||||
tables[0][OFF(rasterize_enable)] = RasterizeEnable;
|
||||
}
|
||||
|
||||
void SetupDirtyFramebufferSRGB(Tables& tables) {
|
||||
tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB;
|
||||
}
|
||||
|
||||
void SetupDirtyLogicOp(Tables& tables) {
|
||||
FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp);
|
||||
}
|
||||
|
||||
void SetupDirtyFragmentClampColor(Tables& tables) {
|
||||
tables[0][OFF(frag_color_clamp)] = FragmentClampColor;
|
||||
}
|
||||
|
||||
void SetupDirtyPointSize(Tables& tables) {
|
||||
tables[0][OFF(vp_point_size)] = PointSize;
|
||||
tables[0][OFF(point_size)] = PointSize;
|
||||
tables[0][OFF(point_sprite_enable)] = PointSize;
|
||||
}
|
||||
|
||||
void SetupDirtyLineWidth(Tables& tables) {
|
||||
tables[0][OFF(line_width_smooth)] = LineWidth;
|
||||
tables[0][OFF(line_width_aliased)] = LineWidth;
|
||||
tables[0][OFF(line_smooth_enable)] = LineWidth;
|
||||
}
|
||||
|
||||
void SetupDirtyClipControl(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(screen_y_control)] = ClipControl;
|
||||
table[OFF(depth_mode)] = ClipControl;
|
||||
}
|
||||
|
||||
void SetupDirtyDepthClampEnabled(Tables& tables) {
|
||||
tables[0][OFF(view_volume_clip_control)] = DepthClampEnabled;
|
||||
}
|
||||
|
||||
void SetupDirtyMisc(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
|
||||
table[OFF(clip_distance_enabled)] = ClipDistances;
|
||||
|
||||
table[OFF(front_face)] = FrontFace;
|
||||
|
||||
table[OFF(cull_test_enabled)] = CullTest;
|
||||
table[OFF(cull_face)] = CullTest;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
|
||||
auto& dirty = gpu.Maxwell3D().dirty;
|
||||
auto& tables = dirty.tables;
|
||||
SetupDirtyRenderTargets(tables);
|
||||
SetupDirtyColorMasks(tables);
|
||||
SetupDirtyViewports(tables);
|
||||
SetupDirtyScissors(tables);
|
||||
SetupDirtyVertexArrays(tables);
|
||||
SetupDirtyVertexFormat(tables);
|
||||
SetupDirtyShaders(tables);
|
||||
SetupDirtyPolygonModes(tables);
|
||||
SetupDirtyDepthTest(tables);
|
||||
SetupDirtyStencilTest(tables);
|
||||
SetupDirtyAlphaTest(tables);
|
||||
SetupDirtyBlend(tables);
|
||||
SetupDirtyPrimitiveRestart(tables);
|
||||
SetupDirtyPolygonOffset(tables);
|
||||
SetupDirtyMultisampleControl(tables);
|
||||
SetupDirtyRasterizeEnable(tables);
|
||||
SetupDirtyFramebufferSRGB(tables);
|
||||
SetupDirtyLogicOp(tables);
|
||||
SetupDirtyFragmentClampColor(tables);
|
||||
SetupDirtyPointSize(tables);
|
||||
SetupDirtyLineWidth(tables);
|
||||
SetupDirtyClipControl(tables);
|
||||
SetupDirtyDepthClampEnabled(tables);
|
||||
SetupDirtyMisc(tables);
|
||||
|
||||
auto& store = dirty.on_write_stores;
|
||||
store[VertexBuffers] = true;
|
||||
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
|
||||
store[VertexBuffer0 + i] = true;
|
||||
}
|
||||
}
|
||||
|
||||
void StateTracker::InvalidateStreamBuffer() {
|
||||
flags[Dirty::VertexBuffers] = true;
|
||||
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
207
src/video_core/renderer_opengl/gl_state_tracker.h
Executable file
207
src/video_core/renderer_opengl/gl_state_tracker.h
Executable file
@@ -0,0 +1,207 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace Dirty {
|
||||
|
||||
enum : u8 {
|
||||
First = VideoCommon::Dirty::LastCommonEntry,
|
||||
|
||||
VertexFormats,
|
||||
VertexFormat0,
|
||||
VertexFormat31 = VertexFormat0 + 31,
|
||||
|
||||
VertexBuffers,
|
||||
VertexBuffer0,
|
||||
VertexBuffer31 = VertexBuffer0 + 31,
|
||||
|
||||
VertexInstances,
|
||||
VertexInstance0,
|
||||
VertexInstance31 = VertexInstance0 + 31,
|
||||
|
||||
ViewportTransform,
|
||||
Viewports,
|
||||
Viewport0,
|
||||
Viewport15 = Viewport0 + 15,
|
||||
|
||||
Scissors,
|
||||
Scissor0,
|
||||
Scissor15 = Scissor0 + 15,
|
||||
|
||||
ColorMaskCommon,
|
||||
ColorMasks,
|
||||
ColorMask0,
|
||||
ColorMask7 = ColorMask0 + 7,
|
||||
|
||||
BlendColor,
|
||||
BlendIndependentEnabled,
|
||||
BlendStates,
|
||||
BlendState0,
|
||||
BlendState7 = BlendState0 + 7,
|
||||
|
||||
Shaders,
|
||||
ClipDistances,
|
||||
|
||||
PolygonModes,
|
||||
PolygonModeFront,
|
||||
PolygonModeBack,
|
||||
|
||||
ColorMask,
|
||||
FrontFace,
|
||||
CullTest,
|
||||
DepthMask,
|
||||
DepthTest,
|
||||
StencilTest,
|
||||
AlphaTest,
|
||||
PrimitiveRestart,
|
||||
PolygonOffset,
|
||||
MultisampleControl,
|
||||
RasterizeEnable,
|
||||
FramebufferSRGB,
|
||||
LogicOp,
|
||||
FragmentClampColor,
|
||||
PointSize,
|
||||
LineWidth,
|
||||
ClipControl,
|
||||
DepthClampEnabled,
|
||||
|
||||
Last
|
||||
};
|
||||
static_assert(Last <= std::numeric_limits<u8>::max());
|
||||
|
||||
} // namespace Dirty
|
||||
|
||||
class StateTracker {
|
||||
public:
|
||||
explicit StateTracker(Tegra::GPU& gpu);
|
||||
|
||||
void InvalidateStreamBuffer();
|
||||
|
||||
void BindIndexBuffer(GLuint new_index_buffer) {
|
||||
if (index_buffer == new_index_buffer) {
|
||||
return;
|
||||
}
|
||||
index_buffer = new_index_buffer;
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
|
||||
}
|
||||
|
||||
void BindFramebuffer(GLuint new_framebuffer) {
|
||||
if (framebuffer == new_framebuffer) {
|
||||
return;
|
||||
}
|
||||
framebuffer = new_framebuffer;
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
|
||||
}
|
||||
|
||||
void NotifyScreenDrawVertexArray() {
|
||||
flags[OpenGL::Dirty::VertexFormats] = true;
|
||||
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
|
||||
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
|
||||
|
||||
flags[OpenGL::Dirty::VertexBuffers] = true;
|
||||
flags[OpenGL::Dirty::VertexBuffer0] = true;
|
||||
|
||||
flags[OpenGL::Dirty::VertexInstances] = true;
|
||||
flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
|
||||
flags[OpenGL::Dirty::VertexInstance0 + 1] = true;
|
||||
}
|
||||
|
||||
void NotifyPolygonModes() {
|
||||
flags[OpenGL::Dirty::PolygonModes] = true;
|
||||
flags[OpenGL::Dirty::PolygonModeFront] = true;
|
||||
flags[OpenGL::Dirty::PolygonModeBack] = true;
|
||||
}
|
||||
|
||||
void NotifyViewport0() {
|
||||
flags[OpenGL::Dirty::Viewports] = true;
|
||||
flags[OpenGL::Dirty::Viewport0] = true;
|
||||
}
|
||||
|
||||
void NotifyScissor0() {
|
||||
flags[OpenGL::Dirty::Scissors] = true;
|
||||
flags[OpenGL::Dirty::Scissor0] = true;
|
||||
}
|
||||
|
||||
void NotifyColorMask(size_t index) {
|
||||
flags[OpenGL::Dirty::ColorMasks] = true;
|
||||
flags[OpenGL::Dirty::ColorMask0 + index] = true;
|
||||
}
|
||||
|
||||
void NotifyBlend0() {
|
||||
flags[OpenGL::Dirty::BlendStates] = true;
|
||||
flags[OpenGL::Dirty::BlendState0] = true;
|
||||
}
|
||||
|
||||
void NotifyFramebuffer() {
|
||||
flags[VideoCommon::Dirty::RenderTargets] = true;
|
||||
}
|
||||
|
||||
void NotifyFrontFace() {
|
||||
flags[OpenGL::Dirty::FrontFace] = true;
|
||||
}
|
||||
|
||||
void NotifyCullTest() {
|
||||
flags[OpenGL::Dirty::CullTest] = true;
|
||||
}
|
||||
|
||||
void NotifyDepthMask() {
|
||||
flags[OpenGL::Dirty::DepthMask] = true;
|
||||
}
|
||||
|
||||
void NotifyDepthTest() {
|
||||
flags[OpenGL::Dirty::DepthTest] = true;
|
||||
}
|
||||
|
||||
void NotifyStencilTest() {
|
||||
flags[OpenGL::Dirty::StencilTest] = true;
|
||||
}
|
||||
|
||||
void NotifyPolygonOffset() {
|
||||
flags[OpenGL::Dirty::PolygonOffset] = true;
|
||||
}
|
||||
|
||||
void NotifyRasterizeEnable() {
|
||||
flags[OpenGL::Dirty::RasterizeEnable] = true;
|
||||
}
|
||||
|
||||
void NotifyFramebufferSRGB() {
|
||||
flags[OpenGL::Dirty::FramebufferSRGB] = true;
|
||||
}
|
||||
|
||||
void NotifyLogicOp() {
|
||||
flags[OpenGL::Dirty::LogicOp] = true;
|
||||
}
|
||||
|
||||
void NotifyClipControl() {
|
||||
flags[OpenGL::Dirty::ClipControl] = true;
|
||||
}
|
||||
|
||||
void NotifyAlphaTest() {
|
||||
flags[OpenGL::Dirty::AlphaTest] = true;
|
||||
}
|
||||
|
||||
private:
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
|
||||
|
||||
GLuint framebuffer = 0;
|
||||
GLuint index_buffer = 0;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
70
src/video_core/renderer_opengl/gl_stream_buffer.cpp
Executable file
70
src/video_core/renderer_opengl/gl_stream_buffer.cpp
Executable file
@@ -0,0 +1,70 @@
|
||||
// Copyright 2018 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||
MP_RGB(128, 128, 192));
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
|
||||
: state_tracker{state_tracker_} {
|
||||
gl_buffer.Create();
|
||||
|
||||
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
|
||||
glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
|
||||
mapped_ptr = static_cast<u8*>(
|
||||
glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||
|
||||
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
|
||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||
}
|
||||
}
|
||||
|
||||
OGLStreamBuffer::~OGLStreamBuffer() {
|
||||
glUnmapNamedBuffer(gl_buffer.handle);
|
||||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= BUFFER_SIZE);
|
||||
ASSERT(alignment <= BUFFER_SIZE);
|
||||
mapped_size = size;
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
||||
}
|
||||
|
||||
if (buffer_pos + size > BUFFER_SIZE) {
|
||||
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
||||
glInvalidateBufferData(gl_buffer.handle);
|
||||
state_tracker.InvalidateStreamBuffer();
|
||||
|
||||
buffer_pos = 0;
|
||||
}
|
||||
|
||||
return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
|
||||
}
|
||||
|
||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||
ASSERT(size <= mapped_size);
|
||||
|
||||
if (size > 0) {
|
||||
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
|
||||
}
|
||||
|
||||
buffer_pos += size;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
60
src/video_core/renderer_opengl/gl_stream_buffer.h
Executable file
60
src/video_core/renderer_opengl/gl_stream_buffer.h
Executable file
@@ -0,0 +1,60 @@
|
||||
// Copyright 2018 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class StateTracker;
|
||||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
public:
|
||||
explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
|
||||
~OGLStreamBuffer();
|
||||
|
||||
/*
|
||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
* and the optional alignment requirement.
|
||||
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
|
||||
* The return values are the pointer to the new chunk, and the offset within the buffer.
|
||||
* The actual used size must be specified on unmapping the chunk.
|
||||
*/
|
||||
std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
|
||||
|
||||
void Unmap(GLsizeiptr size);
|
||||
|
||||
GLuint Handle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
u64 Address() const {
|
||||
return gpu_address;
|
||||
}
|
||||
|
||||
GLsizeiptr Size() const noexcept {
|
||||
return BUFFER_SIZE;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
|
||||
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLBuffer gl_buffer;
|
||||
|
||||
GLuint64EXT gpu_address = 0;
|
||||
GLintptr buffer_pos = 0;
|
||||
GLsizeiptr mapped_size = 0;
|
||||
u8* mapped_ptr = nullptr;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
1061
src/video_core/renderer_opengl/gl_texture_cache.cpp
Executable file
1061
src/video_core/renderer_opengl/gl_texture_cache.cpp
Executable file
File diff suppressed because it is too large
Load Diff
250
src/video_core/renderer_opengl/gl_texture_cache.h
Executable file
250
src/video_core/renderer_opengl/gl_texture_cache.h
Executable file
@@ -0,0 +1,250 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class ProgramManager;
|
||||
class StateTracker;
|
||||
|
||||
class Framebuffer;
|
||||
class Image;
|
||||
class ImageView;
|
||||
class Sampler;
|
||||
|
||||
using VideoCommon::ImageId;
|
||||
using VideoCommon::ImageViewId;
|
||||
using VideoCommon::ImageViewType;
|
||||
using VideoCommon::NUM_RT;
|
||||
using VideoCommon::Offset2D;
|
||||
using VideoCommon::RenderTargets;
|
||||
|
||||
class ImageBufferMap {
|
||||
public:
|
||||
explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
|
||||
~ImageBufferMap();
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return handle;
|
||||
}
|
||||
|
||||
std::span<u8> Span() const noexcept {
|
||||
return span;
|
||||
}
|
||||
|
||||
private:
|
||||
std::span<u8> span;
|
||||
OGLSync* sync;
|
||||
GLuint handle;
|
||||
};
|
||||
|
||||
struct FormatProperties {
|
||||
GLenum compatibility_class;
|
||||
bool compatibility_by_size;
|
||||
bool is_compressed;
|
||||
};
|
||||
|
||||
class TextureCacheRuntime {
|
||||
friend Framebuffer;
|
||||
friend Image;
|
||||
friend ImageView;
|
||||
friend Sampler;
|
||||
|
||||
public:
|
||||
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
|
||||
StateTracker& state_tracker);
|
||||
~TextureCacheRuntime();
|
||||
|
||||
void Finish();
|
||||
|
||||
ImageBufferMap MapUploadBuffer(size_t size);
|
||||
|
||||
ImageBufferMap MapDownloadBuffer(size_t size);
|
||||
|
||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
bool CanImageBeCopied(const Image& dst, const Image& src);
|
||||
|
||||
void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
|
||||
const std::array<Offset2D, 2>& dst_region,
|
||||
const std::array<Offset2D, 2>& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void InsertUploadMemoryBarrier();
|
||||
|
||||
FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
|
||||
|
||||
private:
|
||||
struct StagingBuffers {
|
||||
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
|
||||
~StagingBuffers();
|
||||
|
||||
ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
|
||||
|
||||
size_t RequestBuffer(size_t requested_size);
|
||||
|
||||
std::optional<size_t> FindBuffer(size_t requested_size);
|
||||
|
||||
std::vector<OGLSync> syncs;
|
||||
std::vector<OGLBuffer> buffers;
|
||||
std::vector<u8*> maps;
|
||||
std::vector<size_t> sizes;
|
||||
GLenum storage_flags;
|
||||
GLenum map_flags;
|
||||
};
|
||||
|
||||
const Device& device;
|
||||
StateTracker& state_tracker;
|
||||
UtilShaders util_shaders;
|
||||
|
||||
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
|
||||
|
||||
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
|
||||
StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
|
||||
|
||||
OGLTexture null_image_1d_array;
|
||||
OGLTexture null_image_cube_array;
|
||||
OGLTexture null_image_3d;
|
||||
OGLTexture null_image_rect;
|
||||
OGLTextureView null_image_view_1d;
|
||||
OGLTextureView null_image_view_2d;
|
||||
OGLTextureView null_image_view_2d_array;
|
||||
OGLTextureView null_image_view_cube;
|
||||
|
||||
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
|
||||
};
|
||||
|
||||
class Image : public VideoCommon::ImageBase {
|
||||
friend ImageView;
|
||||
|
||||
public:
|
||||
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return texture.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||
|
||||
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||
|
||||
OGLTexture texture;
|
||||
OGLTextureView store_view;
|
||||
OGLBuffer buffer;
|
||||
GLenum gl_internal_format = GL_NONE;
|
||||
GLenum gl_store_format = GL_NONE;
|
||||
GLenum gl_format = GL_NONE;
|
||||
GLenum gl_type = GL_NONE;
|
||||
};
|
||||
|
||||
class ImageView : public VideoCommon::ImageViewBase {
|
||||
friend Image;
|
||||
|
||||
public:
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
|
||||
|
||||
[[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
|
||||
return views[static_cast<size_t>(query_type)];
|
||||
}
|
||||
|
||||
[[nodiscard]] GLuint DefaultHandle() const noexcept {
|
||||
return default_handle;
|
||||
}
|
||||
|
||||
[[nodiscard]] GLenum Format() const noexcept {
|
||||
return internal_format;
|
||||
}
|
||||
|
||||
private:
|
||||
void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
|
||||
const VideoCommon::ImageViewInfo& info,
|
||||
VideoCommon::SubresourceRange view_range);
|
||||
|
||||
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
|
||||
std::vector<OGLTextureView> stored_views;
|
||||
GLuint default_handle = 0;
|
||||
GLenum internal_format = GL_NONE;
|
||||
};
|
||||
|
||||
class ImageAlloc : public VideoCommon::ImageAllocBase {};
|
||||
|
||||
class Sampler {
|
||||
public:
|
||||
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return sampler.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLSampler sampler;
|
||||
};
|
||||
|
||||
class Framebuffer {
|
||||
public:
|
||||
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
|
||||
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
|
||||
|
||||
[[nodiscard]] GLuint Handle() const noexcept {
|
||||
return framebuffer.handle;
|
||||
}
|
||||
|
||||
[[nodiscard]] GLbitfield BufferBits() const noexcept {
|
||||
return buffer_bits;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLFramebuffer framebuffer;
|
||||
GLbitfield buffer_bits = GL_NONE;
|
||||
};
|
||||
|
||||
struct TextureCacheParams {
|
||||
static constexpr bool ENABLE_VALIDATION = true;
|
||||
static constexpr bool FRAMEBUFFER_BLITS = true;
|
||||
static constexpr bool HAS_EMULATED_COPIES = true;
|
||||
|
||||
using Runtime = OpenGL::TextureCacheRuntime;
|
||||
using Image = OpenGL::Image;
|
||||
using ImageAlloc = OpenGL::ImageAlloc;
|
||||
using ImageView = OpenGL::ImageView;
|
||||
using Sampler = OpenGL::Sampler;
|
||||
using Framebuffer = OpenGL::Framebuffer;
|
||||
};
|
||||
|
||||
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
|
||||
|
||||
} // namespace OpenGL
|
497
src/video_core/renderer_opengl/maxwell_to_gl.h
Executable file
497
src/video_core/renderer_opengl/maxwell_to_gl.h
Executable file
@@ -0,0 +1,497 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <glad/glad.h>
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using GLvec2 = std::array<GLfloat, 2>;
|
||||
using GLvec3 = std::array<GLfloat, 3>;
|
||||
using GLvec4 = std::array<GLfloat, 4>;
|
||||
|
||||
using GLuvec2 = std::array<GLuint, 2>;
|
||||
using GLuvec3 = std::array<GLuint, 3>;
|
||||
using GLuvec4 = std::array<GLuint, 4>;
|
||||
|
||||
namespace MaxwellToGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case Maxwell::VertexAttribute::Type::UnsignedNorm:
|
||||
case Maxwell::VertexAttribute::Type::UnsignedScaled:
|
||||
case Maxwell::VertexAttribute::Type::UnsignedInt:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_8:
|
||||
case Maxwell::VertexAttribute::Size::Size_8_8:
|
||||
case Maxwell::VertexAttribute::Size::Size_8_8_8:
|
||||
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
|
||||
return GL_UNSIGNED_BYTE;
|
||||
case Maxwell::VertexAttribute::Size::Size_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
||||
return GL_UNSIGNED_SHORT;
|
||||
case Maxwell::VertexAttribute::Size::Size_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
|
||||
return GL_UNSIGNED_INT;
|
||||
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
||||
return GL_UNSIGNED_INT_2_10_10_10_REV;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::SignedNorm:
|
||||
case Maxwell::VertexAttribute::Type::SignedScaled:
|
||||
case Maxwell::VertexAttribute::Type::SignedInt:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_8:
|
||||
case Maxwell::VertexAttribute::Size::Size_8_8:
|
||||
case Maxwell::VertexAttribute::Size::Size_8_8_8:
|
||||
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
|
||||
return GL_BYTE;
|
||||
case Maxwell::VertexAttribute::Size::Size_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
||||
return GL_SHORT;
|
||||
case Maxwell::VertexAttribute::Size::Size_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
|
||||
return GL_INT;
|
||||
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
||||
return GL_INT_2_10_10_10_REV;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::Float:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
||||
return GL_HALF_FLOAT;
|
||||
case Maxwell::VertexAttribute::Size::Size_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
|
||||
return GL_FLOAT;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(),
|
||||
attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
|
||||
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
|
||||
switch (index_format) {
|
||||
case Maxwell::IndexFormat::UnsignedByte:
|
||||
return GL_UNSIGNED_BYTE;
|
||||
case Maxwell::IndexFormat::UnsignedShort:
|
||||
return GL_UNSIGNED_SHORT;
|
||||
case Maxwell::IndexFormat::UnsignedInt:
|
||||
return GL_UNSIGNED_INT;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid index_format={}", index_format);
|
||||
return {};
|
||||
}
|
||||
|
||||
inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
|
||||
switch (topology) {
|
||||
case Maxwell::PrimitiveTopology::Points:
|
||||
return GL_POINTS;
|
||||
case Maxwell::PrimitiveTopology::Lines:
|
||||
return GL_LINES;
|
||||
case Maxwell::PrimitiveTopology::LineLoop:
|
||||
return GL_LINE_LOOP;
|
||||
case Maxwell::PrimitiveTopology::LineStrip:
|
||||
return GL_LINE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::Triangles:
|
||||
return GL_TRIANGLES;
|
||||
case Maxwell::PrimitiveTopology::TriangleStrip:
|
||||
return GL_TRIANGLE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::TriangleFan:
|
||||
return GL_TRIANGLE_FAN;
|
||||
case Maxwell::PrimitiveTopology::Quads:
|
||||
return GL_QUADS;
|
||||
case Maxwell::PrimitiveTopology::QuadStrip:
|
||||
return GL_QUAD_STRIP;
|
||||
case Maxwell::PrimitiveTopology::Polygon:
|
||||
return GL_POLYGON;
|
||||
case Maxwell::PrimitiveTopology::LinesAdjacency:
|
||||
return GL_LINES_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::LineStripAdjacency:
|
||||
return GL_LINE_STRIP_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
|
||||
return GL_TRIANGLES_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
|
||||
return GL_TRIANGLE_STRIP_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::Patches:
|
||||
return GL_PATCHES;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid topology={}", topology);
|
||||
return GL_POINTS;
|
||||
}
|
||||
|
||||
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
|
||||
Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) {
|
||||
switch (filter_mode) {
|
||||
case Tegra::Texture::TextureFilter::Nearest:
|
||||
switch (mipmap_filter_mode) {
|
||||
case Tegra::Texture::TextureMipmapFilter::None:
|
||||
return GL_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||
return GL_NEAREST_MIPMAP_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||
return GL_NEAREST_MIPMAP_LINEAR;
|
||||
}
|
||||
break;
|
||||
case Tegra::Texture::TextureFilter::Linear:
|
||||
switch (mipmap_filter_mode) {
|
||||
case Tegra::Texture::TextureMipmapFilter::None:
|
||||
return GL_LINEAR;
|
||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||
return GL_LINEAR_MIPMAP_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||
return GL_LINEAR_MIPMAP_LINEAR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", filter_mode,
|
||||
mipmap_filter_mode);
|
||||
return GL_NEAREST;
|
||||
}
|
||||
|
||||
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
|
||||
switch (wrap_mode) {
|
||||
case Tegra::Texture::WrapMode::Wrap:
|
||||
return GL_REPEAT;
|
||||
case Tegra::Texture::WrapMode::Mirror:
|
||||
return GL_MIRRORED_REPEAT;
|
||||
case Tegra::Texture::WrapMode::ClampToEdge:
|
||||
return GL_CLAMP_TO_EDGE;
|
||||
case Tegra::Texture::WrapMode::Border:
|
||||
return GL_CLAMP_TO_BORDER;
|
||||
case Tegra::Texture::WrapMode::Clamp:
|
||||
return GL_CLAMP;
|
||||
case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
|
||||
return GL_MIRROR_CLAMP_TO_EDGE;
|
||||
case Tegra::Texture::WrapMode::MirrorOnceBorder:
|
||||
if (GL_EXT_texture_mirror_clamp) {
|
||||
return GL_MIRROR_CLAMP_TO_BORDER_EXT;
|
||||
} else {
|
||||
return GL_MIRROR_CLAMP_TO_EDGE;
|
||||
}
|
||||
case Tegra::Texture::WrapMode::MirrorOnceClampOGL:
|
||||
if (GL_EXT_texture_mirror_clamp) {
|
||||
return GL_MIRROR_CLAMP_EXT;
|
||||
} else {
|
||||
return GL_MIRROR_CLAMP_TO_EDGE;
|
||||
}
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", wrap_mode);
|
||||
return GL_REPEAT;
|
||||
}
|
||||
|
||||
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
|
||||
switch (func) {
|
||||
case Tegra::Texture::DepthCompareFunc::Never:
|
||||
return GL_NEVER;
|
||||
case Tegra::Texture::DepthCompareFunc::Less:
|
||||
return GL_LESS;
|
||||
case Tegra::Texture::DepthCompareFunc::LessEqual:
|
||||
return GL_LEQUAL;
|
||||
case Tegra::Texture::DepthCompareFunc::Equal:
|
||||
return GL_EQUAL;
|
||||
case Tegra::Texture::DepthCompareFunc::NotEqual:
|
||||
return GL_NOTEQUAL;
|
||||
case Tegra::Texture::DepthCompareFunc::Greater:
|
||||
return GL_GREATER;
|
||||
case Tegra::Texture::DepthCompareFunc::GreaterEqual:
|
||||
return GL_GEQUAL;
|
||||
case Tegra::Texture::DepthCompareFunc::Always:
|
||||
return GL_ALWAYS;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", func);
|
||||
return GL_GREATER;
|
||||
}
|
||||
|
||||
inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
|
||||
switch (equation) {
|
||||
case Maxwell::Blend::Equation::Add:
|
||||
case Maxwell::Blend::Equation::AddGL:
|
||||
return GL_FUNC_ADD;
|
||||
case Maxwell::Blend::Equation::Subtract:
|
||||
case Maxwell::Blend::Equation::SubtractGL:
|
||||
return GL_FUNC_SUBTRACT;
|
||||
case Maxwell::Blend::Equation::ReverseSubtract:
|
||||
case Maxwell::Blend::Equation::ReverseSubtractGL:
|
||||
return GL_FUNC_REVERSE_SUBTRACT;
|
||||
case Maxwell::Blend::Equation::Min:
|
||||
case Maxwell::Blend::Equation::MinGL:
|
||||
return GL_MIN;
|
||||
case Maxwell::Blend::Equation::Max:
|
||||
case Maxwell::Blend::Equation::MaxGL:
|
||||
return GL_MAX;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation);
|
||||
return GL_FUNC_ADD;
|
||||
}
|
||||
|
||||
inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
|
||||
switch (factor) {
|
||||
case Maxwell::Blend::Factor::Zero:
|
||||
case Maxwell::Blend::Factor::ZeroGL:
|
||||
return GL_ZERO;
|
||||
case Maxwell::Blend::Factor::One:
|
||||
case Maxwell::Blend::Factor::OneGL:
|
||||
return GL_ONE;
|
||||
case Maxwell::Blend::Factor::SourceColor:
|
||||
case Maxwell::Blend::Factor::SourceColorGL:
|
||||
return GL_SRC_COLOR;
|
||||
case Maxwell::Blend::Factor::OneMinusSourceColor:
|
||||
case Maxwell::Blend::Factor::OneMinusSourceColorGL:
|
||||
return GL_ONE_MINUS_SRC_COLOR;
|
||||
case Maxwell::Blend::Factor::SourceAlpha:
|
||||
case Maxwell::Blend::Factor::SourceAlphaGL:
|
||||
return GL_SRC_ALPHA;
|
||||
case Maxwell::Blend::Factor::OneMinusSourceAlpha:
|
||||
case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
|
||||
return GL_ONE_MINUS_SRC_ALPHA;
|
||||
case Maxwell::Blend::Factor::DestAlpha:
|
||||
case Maxwell::Blend::Factor::DestAlphaGL:
|
||||
return GL_DST_ALPHA;
|
||||
case Maxwell::Blend::Factor::OneMinusDestAlpha:
|
||||
case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
|
||||
return GL_ONE_MINUS_DST_ALPHA;
|
||||
case Maxwell::Blend::Factor::DestColor:
|
||||
case Maxwell::Blend::Factor::DestColorGL:
|
||||
return GL_DST_COLOR;
|
||||
case Maxwell::Blend::Factor::OneMinusDestColor:
|
||||
case Maxwell::Blend::Factor::OneMinusDestColorGL:
|
||||
return GL_ONE_MINUS_DST_COLOR;
|
||||
case Maxwell::Blend::Factor::SourceAlphaSaturate:
|
||||
case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
|
||||
return GL_SRC_ALPHA_SATURATE;
|
||||
case Maxwell::Blend::Factor::Source1Color:
|
||||
case Maxwell::Blend::Factor::Source1ColorGL:
|
||||
return GL_SRC1_COLOR;
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Color:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
|
||||
return GL_ONE_MINUS_SRC1_COLOR;
|
||||
case Maxwell::Blend::Factor::Source1Alpha:
|
||||
case Maxwell::Blend::Factor::Source1AlphaGL:
|
||||
return GL_SRC1_ALPHA;
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Alpha:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
|
||||
return GL_ONE_MINUS_SRC1_ALPHA;
|
||||
case Maxwell::Blend::Factor::ConstantColor:
|
||||
case Maxwell::Blend::Factor::ConstantColorGL:
|
||||
return GL_CONSTANT_COLOR;
|
||||
case Maxwell::Blend::Factor::OneMinusConstantColor:
|
||||
case Maxwell::Blend::Factor::OneMinusConstantColorGL:
|
||||
return GL_ONE_MINUS_CONSTANT_COLOR;
|
||||
case Maxwell::Blend::Factor::ConstantAlpha:
|
||||
case Maxwell::Blend::Factor::ConstantAlphaGL:
|
||||
return GL_CONSTANT_ALPHA;
|
||||
case Maxwell::Blend::Factor::OneMinusConstantAlpha:
|
||||
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
|
||||
return GL_ONE_MINUS_CONSTANT_ALPHA;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor);
|
||||
return GL_ZERO;
|
||||
}
|
||||
|
||||
inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
|
||||
switch (source) {
|
||||
case Tegra::Texture::SwizzleSource::Zero:
|
||||
return GL_ZERO;
|
||||
case Tegra::Texture::SwizzleSource::R:
|
||||
return GL_RED;
|
||||
case Tegra::Texture::SwizzleSource::G:
|
||||
return GL_GREEN;
|
||||
case Tegra::Texture::SwizzleSource::B:
|
||||
return GL_BLUE;
|
||||
case Tegra::Texture::SwizzleSource::A:
|
||||
return GL_ALPHA;
|
||||
case Tegra::Texture::SwizzleSource::OneInt:
|
||||
case Tegra::Texture::SwizzleSource::OneFloat:
|
||||
return GL_ONE;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source);
|
||||
return GL_ZERO;
|
||||
}
|
||||
|
||||
inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
|
||||
switch (comparison) {
|
||||
case Maxwell::ComparisonOp::Never:
|
||||
case Maxwell::ComparisonOp::NeverOld:
|
||||
return GL_NEVER;
|
||||
case Maxwell::ComparisonOp::Less:
|
||||
case Maxwell::ComparisonOp::LessOld:
|
||||
return GL_LESS;
|
||||
case Maxwell::ComparisonOp::Equal:
|
||||
case Maxwell::ComparisonOp::EqualOld:
|
||||
return GL_EQUAL;
|
||||
case Maxwell::ComparisonOp::LessEqual:
|
||||
case Maxwell::ComparisonOp::LessEqualOld:
|
||||
return GL_LEQUAL;
|
||||
case Maxwell::ComparisonOp::Greater:
|
||||
case Maxwell::ComparisonOp::GreaterOld:
|
||||
return GL_GREATER;
|
||||
case Maxwell::ComparisonOp::NotEqual:
|
||||
case Maxwell::ComparisonOp::NotEqualOld:
|
||||
return GL_NOTEQUAL;
|
||||
case Maxwell::ComparisonOp::GreaterEqual:
|
||||
case Maxwell::ComparisonOp::GreaterEqualOld:
|
||||
return GL_GEQUAL;
|
||||
case Maxwell::ComparisonOp::Always:
|
||||
case Maxwell::ComparisonOp::AlwaysOld:
|
||||
return GL_ALWAYS;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
|
||||
return GL_ALWAYS;
|
||||
}
|
||||
|
||||
inline GLenum StencilOp(Maxwell::StencilOp stencil) {
|
||||
switch (stencil) {
|
||||
case Maxwell::StencilOp::Keep:
|
||||
case Maxwell::StencilOp::KeepOGL:
|
||||
return GL_KEEP;
|
||||
case Maxwell::StencilOp::Zero:
|
||||
case Maxwell::StencilOp::ZeroOGL:
|
||||
return GL_ZERO;
|
||||
case Maxwell::StencilOp::Replace:
|
||||
case Maxwell::StencilOp::ReplaceOGL:
|
||||
return GL_REPLACE;
|
||||
case Maxwell::StencilOp::Incr:
|
||||
case Maxwell::StencilOp::IncrOGL:
|
||||
return GL_INCR;
|
||||
case Maxwell::StencilOp::Decr:
|
||||
case Maxwell::StencilOp::DecrOGL:
|
||||
return GL_DECR;
|
||||
case Maxwell::StencilOp::Invert:
|
||||
case Maxwell::StencilOp::InvertOGL:
|
||||
return GL_INVERT;
|
||||
case Maxwell::StencilOp::IncrWrap:
|
||||
case Maxwell::StencilOp::IncrWrapOGL:
|
||||
return GL_INCR_WRAP;
|
||||
case Maxwell::StencilOp::DecrWrap:
|
||||
case Maxwell::StencilOp::DecrWrapOGL:
|
||||
return GL_DECR_WRAP;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil);
|
||||
return GL_KEEP;
|
||||
}
|
||||
|
||||
inline GLenum FrontFace(Maxwell::FrontFace front_face) {
|
||||
switch (front_face) {
|
||||
case Maxwell::FrontFace::ClockWise:
|
||||
return GL_CW;
|
||||
case Maxwell::FrontFace::CounterClockWise:
|
||||
return GL_CCW;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented front face cull={}", front_face);
|
||||
return GL_CCW;
|
||||
}
|
||||
|
||||
inline GLenum CullFace(Maxwell::CullFace cull_face) {
|
||||
switch (cull_face) {
|
||||
case Maxwell::CullFace::Front:
|
||||
return GL_FRONT;
|
||||
case Maxwell::CullFace::Back:
|
||||
return GL_BACK;
|
||||
case Maxwell::CullFace::FrontAndBack:
|
||||
return GL_FRONT_AND_BACK;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face);
|
||||
return GL_BACK;
|
||||
}
|
||||
|
||||
inline GLenum LogicOp(Maxwell::LogicOperation operation) {
|
||||
switch (operation) {
|
||||
case Maxwell::LogicOperation::Clear:
|
||||
return GL_CLEAR;
|
||||
case Maxwell::LogicOperation::And:
|
||||
return GL_AND;
|
||||
case Maxwell::LogicOperation::AndReverse:
|
||||
return GL_AND_REVERSE;
|
||||
case Maxwell::LogicOperation::Copy:
|
||||
return GL_COPY;
|
||||
case Maxwell::LogicOperation::AndInverted:
|
||||
return GL_AND_INVERTED;
|
||||
case Maxwell::LogicOperation::NoOp:
|
||||
return GL_NOOP;
|
||||
case Maxwell::LogicOperation::Xor:
|
||||
return GL_XOR;
|
||||
case Maxwell::LogicOperation::Or:
|
||||
return GL_OR;
|
||||
case Maxwell::LogicOperation::Nor:
|
||||
return GL_NOR;
|
||||
case Maxwell::LogicOperation::Equiv:
|
||||
return GL_EQUIV;
|
||||
case Maxwell::LogicOperation::Invert:
|
||||
return GL_INVERT;
|
||||
case Maxwell::LogicOperation::OrReverse:
|
||||
return GL_OR_REVERSE;
|
||||
case Maxwell::LogicOperation::CopyInverted:
|
||||
return GL_COPY_INVERTED;
|
||||
case Maxwell::LogicOperation::OrInverted:
|
||||
return GL_OR_INVERTED;
|
||||
case Maxwell::LogicOperation::Nand:
|
||||
return GL_NAND;
|
||||
case Maxwell::LogicOperation::Set:
|
||||
return GL_SET;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented logic operation={}", operation);
|
||||
return GL_COPY;
|
||||
}
|
||||
|
||||
inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
|
||||
switch (polygon_mode) {
|
||||
case Maxwell::PolygonMode::Point:
|
||||
return GL_POINT;
|
||||
case Maxwell::PolygonMode::Line:
|
||||
return GL_LINE;
|
||||
case Maxwell::PolygonMode::Fill:
|
||||
return GL_FILL;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid polygon mode={}", polygon_mode);
|
||||
return GL_FILL;
|
||||
}
|
||||
|
||||
inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
|
||||
switch (filter) {
|
||||
case Tegra::Texture::SamplerReduction::WeightedAverage:
|
||||
return GL_WEIGHTED_AVERAGE_ARB;
|
||||
case Tegra::Texture::SamplerReduction::Min:
|
||||
return GL_MIN;
|
||||
case Tegra::Texture::SamplerReduction::Max:
|
||||
return GL_MAX;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
|
||||
return GL_WEIGHTED_AVERAGE_ARB;
|
||||
}
|
||||
|
||||
inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
|
||||
// Enumeration order matches register order. We can convert it arithmetically.
|
||||
return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
|
||||
}
|
||||
|
||||
} // namespace MaxwellToGL
|
||||
} // namespace OpenGL
|
521
src/video_core/renderer_opengl/renderer_opengl.cpp
Executable file
521
src/video_core/renderer_opengl/renderer_opengl.cpp
Executable file
@@ -0,0 +1,521 @@
|
||||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/telemetry.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/perf_stats.h"
|
||||
#include "core/settings.h"
|
||||
#include "core/telemetry_session.h"
|
||||
#include "video_core/host_shaders/opengl_present_frag.h"
|
||||
#include "video_core/host_shaders/opengl_present_vert.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr GLint PositionLocation = 0;
|
||||
constexpr GLint TexCoordLocation = 1;
|
||||
constexpr GLint ModelViewMatrixLocation = 0;
|
||||
|
||||
struct ScreenRectVertex {
|
||||
constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v)
|
||||
: position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {}
|
||||
|
||||
std::array<GLfloat, 2> position;
|
||||
std::array<GLfloat, 2> tex_coord;
|
||||
};
|
||||
|
||||
/**
|
||||
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
|
||||
* corner and (width, height) on the lower-bottom.
|
||||
*
|
||||
* The projection part of the matrix is trivial, hence these operations are represented
|
||||
* by a 3x2 matrix.
|
||||
*/
|
||||
std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) {
|
||||
std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order
|
||||
|
||||
// clang-format off
|
||||
matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
|
||||
matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
|
||||
// Last matrix row is implicitly assumed to be [0, 0, 1].
|
||||
// clang-format on
|
||||
|
||||
return matrix;
|
||||
}
|
||||
|
||||
const char* GetSource(GLenum source) {
|
||||
switch (source) {
|
||||
case GL_DEBUG_SOURCE_API:
|
||||
return "API";
|
||||
case GL_DEBUG_SOURCE_WINDOW_SYSTEM:
|
||||
return "WINDOW_SYSTEM";
|
||||
case GL_DEBUG_SOURCE_SHADER_COMPILER:
|
||||
return "SHADER_COMPILER";
|
||||
case GL_DEBUG_SOURCE_THIRD_PARTY:
|
||||
return "THIRD_PARTY";
|
||||
case GL_DEBUG_SOURCE_APPLICATION:
|
||||
return "APPLICATION";
|
||||
case GL_DEBUG_SOURCE_OTHER:
|
||||
return "OTHER";
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return "Unknown source";
|
||||
}
|
||||
}
|
||||
|
||||
const char* GetType(GLenum type) {
|
||||
switch (type) {
|
||||
case GL_DEBUG_TYPE_ERROR:
|
||||
return "ERROR";
|
||||
case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
|
||||
return "DEPRECATED_BEHAVIOR";
|
||||
case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
|
||||
return "UNDEFINED_BEHAVIOR";
|
||||
case GL_DEBUG_TYPE_PORTABILITY:
|
||||
return "PORTABILITY";
|
||||
case GL_DEBUG_TYPE_PERFORMANCE:
|
||||
return "PERFORMANCE";
|
||||
case GL_DEBUG_TYPE_OTHER:
|
||||
return "OTHER";
|
||||
case GL_DEBUG_TYPE_MARKER:
|
||||
return "MARKER";
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return "Unknown type";
|
||||
}
|
||||
}
|
||||
|
||||
void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
|
||||
const GLchar* message, const void* user_param) {
|
||||
const char format[] = "{} {} {}: {}";
|
||||
const char* const str_source = GetSource(source);
|
||||
const char* const str_type = GetType(type);
|
||||
|
||||
switch (severity) {
|
||||
case GL_DEBUG_SEVERITY_HIGH:
|
||||
LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
|
||||
break;
|
||||
case GL_DEBUG_SEVERITY_MEDIUM:
|
||||
LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
|
||||
break;
|
||||
case GL_DEBUG_SEVERITY_NOTIFICATION:
|
||||
case GL_DEBUG_SEVERITY_LOW:
|
||||
LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
|
||||
Core::Frontend::EmuWindow& emu_window_,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
|
||||
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
|
||||
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
|
||||
|
||||
RendererOpenGL::~RendererOpenGL() = default;
|
||||
|
||||
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
if (!framebuffer) {
|
||||
return;
|
||||
}
|
||||
PrepareRendertarget(framebuffer);
|
||||
RenderScreenshot();
|
||||
|
||||
state_tracker.BindFramebuffer(0);
|
||||
DrawScreen(emu_window.GetFramebufferLayout());
|
||||
|
||||
++m_current_frame;
|
||||
|
||||
rasterizer->TickFrame();
|
||||
|
||||
context->SwapBuffers();
|
||||
render_window.OnFrameDisplayed();
|
||||
}
|
||||
|
||||
void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
|
||||
if (!framebuffer) {
|
||||
return;
|
||||
}
|
||||
// If framebuffer is provided, reload it from memory to a texture
|
||||
if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
|
||||
screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
|
||||
screen_info.texture.pixel_format != framebuffer->pixel_format ||
|
||||
gl_framebuffer_data.empty()) {
|
||||
// Reallocate texture if the framebuffer size has changed.
|
||||
// This is expected to not happen very often and hence should not be a
|
||||
// performance problem.
|
||||
ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
|
||||
}
|
||||
|
||||
// Load the framebuffer from memory, draw it to the screen, and swap buffers
|
||||
LoadFBToScreenInfo(*framebuffer);
|
||||
}
|
||||
|
||||
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
|
||||
// Framebuffer orientation handling
|
||||
framebuffer_transform_flags = framebuffer.transform_flags;
|
||||
framebuffer_crop_rect = framebuffer.crop_rect;
|
||||
|
||||
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
|
||||
if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Reset the screen info's display texture to its own permanent texture
|
||||
screen_info.display_texture = screen_info.texture.resource.handle;
|
||||
|
||||
const auto pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
|
||||
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
|
||||
const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
|
||||
u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
|
||||
rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
|
||||
|
||||
// TODO(Rodrigo): Read this from HLE
|
||||
constexpr u32 block_height_log2 = 4;
|
||||
Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, std::span<u8>(host_ptr, size_in_bytes),
|
||||
bytes_per_pixel, framebuffer.width, framebuffer.height, 1,
|
||||
block_height_log2, 0);
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
|
||||
|
||||
// Update existing texture
|
||||
// TODO: Test what happens on hardware when you change the framebuffer dimensions so that
|
||||
// they differ from the LCD resolution.
|
||||
// TODO: Applications could theoretically crash yuzu here by specifying too large
|
||||
// framebuffer sizes. We should make sure that this cannot happen.
|
||||
glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
|
||||
framebuffer.height, screen_info.texture.gl_format,
|
||||
screen_info.texture.gl_type, gl_framebuffer_data.data());
|
||||
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
}
|
||||
|
||||
void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
|
||||
const TextureInfo& texture) {
|
||||
const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
|
||||
glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
|
||||
}
|
||||
|
||||
void RendererOpenGL::InitOpenGLObjects() {
|
||||
glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
|
||||
Settings::values.bg_blue.GetValue(), 0.0f);
|
||||
|
||||
// Create shader programs
|
||||
OGLShader vertex_shader;
|
||||
vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
|
||||
|
||||
OGLShader fragment_shader;
|
||||
fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
|
||||
|
||||
vertex_program.Create(true, false, vertex_shader.handle);
|
||||
fragment_program.Create(true, false, fragment_shader.handle);
|
||||
|
||||
pipeline.Create();
|
||||
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
|
||||
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
|
||||
|
||||
// Generate presentation sampler
|
||||
present_sampler.Create();
|
||||
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
|
||||
// Generate VBO handle for drawing
|
||||
vertex_buffer.Create();
|
||||
|
||||
// Attach vertex data to VAO
|
||||
glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
|
||||
|
||||
// Allocate textures for the screen
|
||||
screen_info.texture.resource.Create(GL_TEXTURE_2D);
|
||||
|
||||
const GLuint texture = screen_info.texture.resource.handle;
|
||||
glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);
|
||||
|
||||
screen_info.display_texture = screen_info.texture.resource.handle;
|
||||
|
||||
// Clear screen to black
|
||||
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
|
||||
|
||||
// Enable seamless cubemaps when per texture parameters are not available
|
||||
if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
|
||||
glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
|
||||
}
|
||||
|
||||
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
|
||||
if (device.HasVertexBufferUnifiedMemory()) {
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
|
||||
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
|
||||
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
|
||||
&vertex_buffer_address);
|
||||
}
|
||||
}
|
||||
|
||||
void RendererOpenGL::AddTelemetryFields() {
|
||||
const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
|
||||
const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
|
||||
const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
|
||||
|
||||
LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
|
||||
LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
|
||||
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
|
||||
|
||||
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
|
||||
telemetry_session.AddField(user_system, "GPU_Vendor", std::string(gpu_vendor));
|
||||
telemetry_session.AddField(user_system, "GPU_Model", std::string(gpu_model));
|
||||
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
|
||||
}
|
||||
|
||||
void RendererOpenGL::CreateRasterizer() {
|
||||
if (rasterizer) {
|
||||
return;
|
||||
}
|
||||
rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
|
||||
screen_info, program_manager, state_tracker);
|
||||
}
|
||||
|
||||
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
||||
const Tegra::FramebufferConfig& framebuffer) {
|
||||
texture.width = framebuffer.width;
|
||||
texture.height = framebuffer.height;
|
||||
texture.pixel_format = framebuffer.pixel_format;
|
||||
|
||||
const auto pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
|
||||
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
|
||||
gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
|
||||
|
||||
GLint internal_format;
|
||||
switch (framebuffer.pixel_format) {
|
||||
case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
|
||||
internal_format = GL_RGBA8;
|
||||
texture.gl_format = GL_RGBA;
|
||||
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
|
||||
break;
|
||||
case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
|
||||
internal_format = GL_RGB565;
|
||||
texture.gl_format = GL_RGB;
|
||||
texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
|
||||
break;
|
||||
default:
|
||||
internal_format = GL_RGBA8;
|
||||
texture.gl_format = GL_RGBA;
|
||||
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
|
||||
// UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
|
||||
// static_cast<u32>(framebuffer.pixel_format));
|
||||
}
|
||||
|
||||
texture.resource.Release();
|
||||
texture.resource.Create(GL_TEXTURE_2D);
|
||||
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
|
||||
}
|
||||
|
||||
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
||||
if (renderer_settings.set_background_color) {
|
||||
// Update background color before drawing
|
||||
glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
|
||||
Settings::values.bg_blue.GetValue(), 0.0f);
|
||||
}
|
||||
|
||||
// Set projection matrix
|
||||
const std::array ortho_matrix =
|
||||
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
|
||||
glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
|
||||
std::data(ortho_matrix));
|
||||
|
||||
const auto& texcoords = screen_info.display_texcoords;
|
||||
auto left = texcoords.left;
|
||||
auto right = texcoords.right;
|
||||
if (framebuffer_transform_flags != Tegra::FramebufferConfig::TransformFlags::Unset) {
|
||||
if (framebuffer_transform_flags == Tegra::FramebufferConfig::TransformFlags::FlipV) {
|
||||
// Flip the framebuffer vertically
|
||||
left = texcoords.right;
|
||||
right = texcoords.left;
|
||||
} else {
|
||||
// Other transformations are unsupported
|
||||
LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}",
|
||||
framebuffer_transform_flags);
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_MSG(framebuffer_crop_rect.top == 0, "Unimplemented");
|
||||
ASSERT_MSG(framebuffer_crop_rect.left == 0, "Unimplemented");
|
||||
|
||||
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
|
||||
// (e.g. handheld mode) on a 1920x1080 framebuffer.
|
||||
f32 scale_u = 1.f, scale_v = 1.f;
|
||||
if (framebuffer_crop_rect.GetWidth() > 0) {
|
||||
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
|
||||
static_cast<f32>(screen_info.texture.width);
|
||||
}
|
||||
if (framebuffer_crop_rect.GetHeight() > 0) {
|
||||
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
|
||||
static_cast<f32>(screen_info.texture.height);
|
||||
}
|
||||
|
||||
const auto& screen = layout.screen;
|
||||
const std::array vertices = {
|
||||
ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, left * scale_v),
|
||||
ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, left * scale_v),
|
||||
ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, right * scale_v),
|
||||
ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, right * scale_v),
|
||||
};
|
||||
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
|
||||
|
||||
// TODO: Signal state tracker about these changes
|
||||
state_tracker.NotifyScreenDrawVertexArray();
|
||||
state_tracker.NotifyPolygonModes();
|
||||
state_tracker.NotifyViewport0();
|
||||
state_tracker.NotifyScissor0();
|
||||
state_tracker.NotifyColorMask(0);
|
||||
state_tracker.NotifyBlend0();
|
||||
state_tracker.NotifyFramebuffer();
|
||||
state_tracker.NotifyFrontFace();
|
||||
state_tracker.NotifyCullTest();
|
||||
state_tracker.NotifyDepthTest();
|
||||
state_tracker.NotifyStencilTest();
|
||||
state_tracker.NotifyPolygonOffset();
|
||||
state_tracker.NotifyRasterizeEnable();
|
||||
state_tracker.NotifyFramebufferSRGB();
|
||||
state_tracker.NotifyLogicOp();
|
||||
state_tracker.NotifyClipControl();
|
||||
state_tracker.NotifyAlphaTest();
|
||||
|
||||
program_manager.BindHostPipeline(pipeline.handle);
|
||||
|
||||
glEnable(GL_CULL_FACE);
|
||||
if (screen_info.display_srgb) {
|
||||
glEnable(GL_FRAMEBUFFER_SRGB);
|
||||
} else {
|
||||
glDisable(GL_FRAMEBUFFER_SRGB);
|
||||
}
|
||||
glDisable(GL_COLOR_LOGIC_OP);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glDisable(GL_STENCIL_TEST);
|
||||
glDisable(GL_POLYGON_OFFSET_FILL);
|
||||
glDisable(GL_RASTERIZER_DISCARD);
|
||||
glDisable(GL_ALPHA_TEST);
|
||||
glDisablei(GL_BLEND, 0);
|
||||
glDisablei(GL_SCISSOR_TEST, 0);
|
||||
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
|
||||
glCullFace(GL_BACK);
|
||||
glFrontFace(GL_CW);
|
||||
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
||||
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
|
||||
static_cast<GLfloat>(layout.height));
|
||||
glDepthRangeIndexed(0, 0.0, 0.0);
|
||||
|
||||
glEnableVertexAttribArray(PositionLocation);
|
||||
glEnableVertexAttribArray(TexCoordLocation);
|
||||
glVertexAttribDivisor(PositionLocation, 0);
|
||||
glVertexAttribDivisor(TexCoordLocation, 0);
|
||||
glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE,
|
||||
offsetof(ScreenRectVertex, position));
|
||||
glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
|
||||
offsetof(ScreenRectVertex, tex_coord));
|
||||
glVertexAttribBinding(PositionLocation, 0);
|
||||
glVertexAttribBinding(TexCoordLocation, 0);
|
||||
if (device.HasVertexBufferUnifiedMemory()) {
|
||||
glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
|
||||
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
|
||||
sizeof(vertices));
|
||||
} else {
|
||||
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
|
||||
}
|
||||
|
||||
glBindTextureUnit(0, screen_info.display_texture);
|
||||
glBindSampler(0, present_sampler.handle);
|
||||
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
|
||||
program_manager.RestoreGuestPipeline();
|
||||
}
|
||||
|
||||
void RendererOpenGL::RenderScreenshot() {
|
||||
if (!renderer_settings.screenshot_requested) {
|
||||
return;
|
||||
}
|
||||
|
||||
GLint old_read_fb;
|
||||
GLint old_draw_fb;
|
||||
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
|
||||
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
|
||||
|
||||
// Draw the current frame to the screenshot framebuffer
|
||||
screenshot_framebuffer.Create();
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
|
||||
|
||||
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
|
||||
|
||||
GLuint renderbuffer;
|
||||
glGenRenderbuffers(1, &renderbuffer);
|
||||
glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, screen_info.display_srgb ? GL_SRGB8 : GL_RGB8,
|
||||
layout.width, layout.height);
|
||||
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
|
||||
|
||||
DrawScreen(layout);
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
|
||||
renderer_settings.screenshot_bits);
|
||||
|
||||
screenshot_framebuffer.Release();
|
||||
glDeleteRenderbuffers(1, &renderbuffer);
|
||||
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
|
||||
|
||||
renderer_settings.screenshot_complete_callback();
|
||||
renderer_settings.screenshot_requested = false;
|
||||
}
|
||||
|
||||
bool RendererOpenGL::Init() {
|
||||
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||
glDebugMessageCallback(DebugHandler, nullptr);
|
||||
}
|
||||
|
||||
AddTelemetryFields();
|
||||
|
||||
if (!GLAD_GL_VERSION_4_3) {
|
||||
return false;
|
||||
}
|
||||
|
||||
InitOpenGLObjects();
|
||||
CreateRasterizer();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void RendererOpenGL::ShutDown() {}
|
||||
|
||||
} // namespace OpenGL
|
129
src/video_core/renderer_opengl/renderer_opengl.h
Executable file
129
src/video_core/renderer_opengl/renderer_opengl.h
Executable file
@@ -0,0 +1,129 @@
|
||||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <glad/glad.h>
|
||||
#include "common/common_types.h"
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
class TelemetrySession;
|
||||
} // namespace Core
|
||||
|
||||
namespace Core::Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace Core::Memory {
|
||||
class Memory;
|
||||
}
|
||||
|
||||
namespace Layout {
|
||||
struct FramebufferLayout;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
/// Structure used for storing information about the textures for the Switch screen
|
||||
struct TextureInfo {
|
||||
OGLTexture resource;
|
||||
GLsizei width;
|
||||
GLsizei height;
|
||||
GLenum gl_format;
|
||||
GLenum gl_type;
|
||||
Tegra::FramebufferConfig::PixelFormat pixel_format;
|
||||
};
|
||||
|
||||
/// Structure used for storing information about the display target for the Switch screen
|
||||
struct ScreenInfo {
|
||||
GLuint display_texture{};
|
||||
bool display_srgb{};
|
||||
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
|
||||
TextureInfo texture;
|
||||
};
|
||||
|
||||
class RendererOpenGL final : public VideoCore::RendererBase {
|
||||
public:
|
||||
explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_,
|
||||
Core::Frontend::EmuWindow& emu_window_,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
|
||||
~RendererOpenGL() override;
|
||||
|
||||
bool Init() override;
|
||||
void ShutDown() override;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
|
||||
private:
|
||||
/// Initializes the OpenGL state and creates persistent objects.
|
||||
void InitOpenGLObjects();
|
||||
|
||||
void AddTelemetryFields();
|
||||
|
||||
void CreateRasterizer();
|
||||
|
||||
void ConfigureFramebufferTexture(TextureInfo& texture,
|
||||
const Tegra::FramebufferConfig& framebuffer);
|
||||
|
||||
/// Draws the emulated screens to the emulator window.
|
||||
void DrawScreen(const Layout::FramebufferLayout& layout);
|
||||
|
||||
void RenderScreenshot();
|
||||
|
||||
/// Loads framebuffer from emulated memory into the active OpenGL texture.
|
||||
void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
|
||||
|
||||
/// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture
|
||||
/// can be 1x1 but will stretch across whatever it's rendered on.
|
||||
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
|
||||
const TextureInfo& texture);
|
||||
|
||||
void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
|
||||
|
||||
Core::TelemetrySession& telemetry_session;
|
||||
Core::Frontend::EmuWindow& emu_window;
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
const Device device;
|
||||
StateTracker state_tracker{gpu};
|
||||
|
||||
// OpenGL object IDs
|
||||
OGLSampler present_sampler;
|
||||
OGLBuffer vertex_buffer;
|
||||
OGLProgram vertex_program;
|
||||
OGLProgram fragment_program;
|
||||
OGLPipeline pipeline;
|
||||
OGLFramebuffer screenshot_framebuffer;
|
||||
|
||||
// GPU address of the vertex buffer
|
||||
GLuint64EXT vertex_buffer_address = 0;
|
||||
|
||||
/// Display information for Switch screen
|
||||
ScreenInfo screen_info;
|
||||
|
||||
/// Global dummy shader pipeline
|
||||
ProgramManager program_manager;
|
||||
|
||||
/// OpenGL framebuffer data
|
||||
std::vector<u8> gl_framebuffer_data;
|
||||
|
||||
/// Used for transforming the framebuffer orientation
|
||||
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{};
|
||||
Common::Rectangle<int> framebuffer_crop_rect;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
270
src/video_core/renderer_opengl/util_shaders.cpp
Executable file
270
src/video_core/renderer_opengl/util_shaders.cpp
Executable file
@@ -0,0 +1,270 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <bit>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
|
||||
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
|
||||
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
|
||||
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/texture_cache/util.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using namespace HostShaders;
|
||||
|
||||
using Tegra::Texture::GOB_SIZE_SHIFT;
|
||||
using Tegra::Texture::GOB_SIZE_X;
|
||||
using Tegra::Texture::GOB_SIZE_X_SHIFT;
|
||||
using Tegra::Texture::GOB_SIZE_Y_SHIFT;
|
||||
using VideoCommon::Extent3D;
|
||||
using VideoCommon::ImageCopy;
|
||||
using VideoCommon::ImageType;
|
||||
using VideoCommon::SwizzleParameters;
|
||||
using VideoCore::Surface::BytesPerBlock;
|
||||
|
||||
namespace {
|
||||
|
||||
OGLProgram MakeProgram(std::string_view source) {
|
||||
OGLShader shader;
|
||||
shader.Create(source, GL_COMPUTE_SHADER);
|
||||
|
||||
OGLProgram program;
|
||||
program.Create(true, false, shader.handle);
|
||||
return program;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||
: program_manager{program_manager_},
|
||||
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
|
||||
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
|
||||
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
|
||||
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
|
||||
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
|
||||
swizzle_table_buffer.Create();
|
||||
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
|
||||
}
|
||||
|
||||
UtilShaders::~UtilShaders() = default;
|
||||
|
||||
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
|
||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
static constexpr GLuint LOC_ORIGIN = 0;
|
||||
static constexpr GLuint LOC_DESTINATION = 1;
|
||||
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
|
||||
static constexpr GLuint LOC_LAYER_STRIDE = 3;
|
||||
static constexpr GLuint LOC_BLOCK_SIZE = 4;
|
||||
static constexpr GLuint LOC_X_SHIFT = 5;
|
||||
static constexpr GLuint LOC_BLOCK_HEIGHT = 6;
|
||||
static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7;
|
||||
|
||||
const u32 bytes_per_block = BytesPerBlock(image.info.format);
|
||||
const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block);
|
||||
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
glUniform3ui(LOC_ORIGIN, 0, 0, 0); // TODO
|
||||
glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO
|
||||
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2);
|
||||
glUniform1ui(LOC_LAYER_STRIDE, image.info.layer_stride);
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D block = swizzle.block;
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t offset = swizzle.buffer_offset + buffer_offset;
|
||||
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
|
||||
const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level);
|
||||
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
|
||||
|
||||
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
|
||||
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
|
||||
|
||||
const u32 block_height_mask = (1U << block.height) - 1;
|
||||
const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth;
|
||||
|
||||
glUniform1ui(LOC_BLOCK_SIZE, block_size);
|
||||
glUniform1ui(LOC_X_SHIFT, x_shift);
|
||||
glUniform1ui(LOC_BLOCK_HEIGHT, block.height);
|
||||
glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, StoreFormat(bytes_per_block));
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
|
||||
|
||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
static constexpr GLuint LOC_ORIGIN = 0;
|
||||
static constexpr GLuint LOC_DESTINATION = 1;
|
||||
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
|
||||
static constexpr GLuint SLICE_SIZE_LOC = 3;
|
||||
static constexpr GLuint LOC_BLOCK_SIZE = 4;
|
||||
static constexpr GLuint LOC_X_SHIFT = 5;
|
||||
static constexpr GLuint LOC_BLOCK_HEIGHT = 6;
|
||||
static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7;
|
||||
static constexpr GLuint BLOCK_DEPTH_LOC = 8;
|
||||
static constexpr GLuint BLOCK_DEPTH_MASK_LOC = 9;
|
||||
|
||||
const u32 bytes_per_block = BytesPerBlock(image.info.format);
|
||||
const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block);
|
||||
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
glUniform3ui(LOC_ORIGIN, 0, 0, 0); // TODO
|
||||
glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO
|
||||
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2);
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D block = swizzle.block;
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t offset = swizzle.buffer_offset + buffer_offset;
|
||||
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
|
||||
|
||||
const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level);
|
||||
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
|
||||
|
||||
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
|
||||
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
|
||||
const u32 slice_size =
|
||||
Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
|
||||
|
||||
const u32 block_height_mask = (1U << block.height) - 1;
|
||||
const u32 block_depth_mask = (1U << block.depth) - 1;
|
||||
const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth;
|
||||
|
||||
glUniform1ui(SLICE_SIZE_LOC, slice_size);
|
||||
glUniform1ui(LOC_BLOCK_SIZE, block_size);
|
||||
glUniform1ui(LOC_X_SHIFT, x_shift);
|
||||
glUniform1ui(LOC_BLOCK_HEIGHT, block.height);
|
||||
glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask);
|
||||
glUniform1ui(BLOCK_DEPTH_LOC, block.depth);
|
||||
glUniform1ui(BLOCK_DEPTH_MASK_LOC, block_depth_mask);
|
||||
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, StoreFormat(bytes_per_block));
|
||||
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
static constexpr GLuint LOC_ORIGIN = 0;
|
||||
static constexpr GLuint LOC_DESTINATION = 1;
|
||||
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
|
||||
static constexpr GLuint LOC_PITCH = 3;
|
||||
|
||||
const u32 bytes_per_block = BytesPerBlock(image.info.format);
|
||||
const GLenum format = StoreFormat(bytes_per_block);
|
||||
const u32 pitch = image.info.pitch;
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
|
||||
"Non-power of two images are not implemented");
|
||||
|
||||
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glUniform2ui(LOC_ORIGIN, 0, 0); // TODO
|
||||
glUniform2i(LOC_DESTINATION, 0, 0); // TODO
|
||||
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
|
||||
glUniform1ui(LOC_PITCH, pitch);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t offset = swizzle.buffer_offset + buffer_offset;
|
||||
|
||||
const u32 aligned_width = Common::AlignUp(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 aligned_height = Common::AlignUp(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
const u32 num_dispatches_x = aligned_width / WORKGROUP_SIZE.width;
|
||||
const u32 num_dispatches_y = aligned_height / WORKGROUP_SIZE.height;
|
||||
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
|
||||
static constexpr GLuint BINDING_INPUT_IMAGE = 0;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
|
||||
static constexpr GLuint LOC_SRC_OFFSET = 0;
|
||||
static constexpr GLuint LOC_DST_OFFSET = 1;
|
||||
|
||||
program_manager.BindHostCompute(copy_bc4_program.handle);
|
||||
|
||||
for (const ImageCopy& copy : copies) {
|
||||
ASSERT(copy.src_subresource.base_layer == 0);
|
||||
ASSERT(copy.src_subresource.num_layers == 1);
|
||||
ASSERT(copy.dst_subresource.base_layer == 0);
|
||||
ASSERT(copy.dst_subresource.num_layers == 1);
|
||||
|
||||
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
|
||||
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
|
||||
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
|
||||
GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
|
||||
copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
|
||||
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
GLenum StoreFormat(u32 bytes_per_block) {
|
||||
switch (bytes_per_block) {
|
||||
case 1:
|
||||
return GL_R8UI;
|
||||
case 2:
|
||||
return GL_R16UI;
|
||||
case 4:
|
||||
return GL_R32UI;
|
||||
case 8:
|
||||
return GL_RG32UI;
|
||||
case 16:
|
||||
return GL_RGBA32UI;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return GL_R8UI;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
51
src/video_core/renderer_opengl/util_shaders.h
Executable file
51
src/video_core/renderer_opengl/util_shaders.h
Executable file
@@ -0,0 +1,51 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Image;
|
||||
class ImageBufferMap;
|
||||
class ProgramManager;
|
||||
|
||||
class UtilShaders {
|
||||
public:
|
||||
explicit UtilShaders(ProgramManager& program_manager);
|
||||
~UtilShaders();
|
||||
|
||||
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void CopyBC4(Image& dst_image, Image& src_image,
|
||||
std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
private:
|
||||
ProgramManager& program_manager;
|
||||
|
||||
OGLBuffer swizzle_table_buffer;
|
||||
|
||||
OGLProgram block_linear_unswizzle_2d_program;
|
||||
OGLProgram block_linear_unswizzle_3d_program;
|
||||
OGLProgram pitch_unswizzle_program;
|
||||
OGLProgram copy_bc4_program;
|
||||
};
|
||||
|
||||
GLenum StoreFormat(u32 bytes_per_block);
|
||||
|
||||
} // namespace OpenGL
|
Reference in New Issue
Block a user