early-access version 1255

This commit is contained in:
pineappleEA
2020-12-28 15:15:37 +00:00
parent 84b39492d1
commit 78b48028e1
6254 changed files with 1868140 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,29 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <string_view>
#include "common/common_types.h"
namespace Tegra::Engines {
enum class ShaderType : u32;
}
namespace VideoCommon::Shader {
class ShaderIR;
class Registry;
} // namespace VideoCommon::Shader
namespace OpenGL {
class Device;
std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
Tegra::Engines::ShaderType stage, std::string_view identifier);
} // namespace OpenGL

View File

@@ -0,0 +1,99 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
: BufferBlock{cpu_addr_, size_} {
gl_buffer.Create();
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
}
}
Buffer::~Buffer() = default;
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(data_size), data);
}
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
const GLintptr gl_offset = static_cast<GLintptr>(offset);
if (read_buffer.handle == 0) {
read_buffer.Create();
glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
GL_STREAM_READ);
}
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
}
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size) {
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
}
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
const Device& device_, OGLStreamBuffer& stream_buffer_,
StateTracker& state_tracker)
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
if (!device.HasFastBufferSubData()) {
return;
}
static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
for (const GLuint cbuf : cbufs) {
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
}
}
OGLBufferCache::~OGLBufferCache() {
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
}
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<Buffer>(device, cpu_addr, size);
}
OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
return {0, 0, 0};
}
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
std::size_t size) {
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
const GLuint cbuf = cbufs[cbuf_cursor++];
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
return {cbuf, 0, 0};
}
} // namespace OpenGL

View File

@@ -0,0 +1,83 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <memory>
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
namespace Core {
class System;
}
namespace OpenGL {
class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
class StateTracker;
class Buffer : public VideoCommon::BufferBlock {
public:
explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
~Buffer();
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
void Download(std::size_t offset, std::size_t data_size, u8* data);
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
std::size_t copy_size);
GLuint Handle() const noexcept {
return gl_buffer.handle;
}
u64 Address() const noexcept {
return gpu_address;
}
private:
OGLBuffer gl_buffer;
OGLBuffer read_buffer;
u64 gpu_address = 0;
};
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class OGLBufferCache final : public GenericBufferCache {
public:
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device, OGLStreamBuffer& stream_buffer,
StateTracker& state_tracker);
~OGLBufferCache();
BufferInfo GetEmptyBuffer(std::size_t) override;
void Acquire() noexcept {
cbuf_cursor = 0;
}
protected:
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
private:
static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
const Device& device;
std::size_t cbuf_cursor = 0;
std::array<GLuint, NUM_CBUFS> cbufs{};
};
} // namespace OpenGL

View File

@@ -0,0 +1,294 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <limits>
#include <optional>
#include <span>
#include <vector>
#include <glad/glad.h>
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "core/settings.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
namespace {
// One uniform block is reserved for emulation purposes
constexpr u32 ReservedUniformBlocks = 1;
constexpr u32 NumStages = 5;
constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
};
constexpr std::array LIMIT_SSBOS = {
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
};
constexpr std::array LIMIT_SAMPLERS = {
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
GL_MAX_TEXTURE_IMAGE_UNITS,
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
};
constexpr std::array LIMIT_IMAGES = {
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
};
template <typename T>
T GetInteger(GLenum pname) {
GLint temporary;
glGetIntegerv(pname, &temporary);
return static_cast<T>(temporary);
}
bool TestProgram(const GLchar* glsl) {
const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &glsl)};
GLint link_status;
glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
glDeleteProgram(shader);
return link_status == GL_TRUE;
}
std::vector<std::string_view> GetExtensions() {
GLint num_extensions;
glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
std::vector<std::string_view> extensions;
extensions.reserve(num_extensions);
for (GLint index = 0; index < num_extensions; ++index) {
extensions.push_back(
reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, static_cast<GLuint>(index))));
}
return extensions;
}
bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
return std::ranges::find(extensions, extension) != extensions.end();
}
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
ASSERT(num >= amount);
if (limit) {
amount = std::min(amount, GetInteger<u32>(*limit));
}
num -= amount;
return std::exchange(base, base + amount);
}
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
std::ranges::transform(LIMIT_UBOS, max.begin(),
[](GLenum pname) { return GetInteger<u32>(pname); });
return max;
}
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
u32 num_ubos = total_ubos - ReservedUniformBlocks;
u32 num_ssbos = total_ssbos;
u32 num_samplers = total_samplers;
u32 base_ubo = ReservedUniformBlocks;
u32 base_ssbo = 0;
u32 base_samplers = 0;
for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
bindings[stage] = {
Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
Extract(base_samplers, num_samplers, total_samplers / NumStages,
LIMIT_SAMPLERS[stage])};
}
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
u32 base_images = 0;
// GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
// Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
// fragment stage, and at least 1 for the rest of the stages.
// So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
// Reserve at least 4 image bindings on the fragment stage.
bindings[4].image =
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
// This is guaranteed to be at least 1.
const u32 total_extracted_images = num_images / (NumStages - 1);
// Reserve the other image bindings.
for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
if (stage == 4) {
continue;
}
bindings[stage].image =
Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
}
// Compute doesn't care about any of this.
bindings[5] = {0, 0, 0, 0};
return bindings;
}
bool IsASTCSupported() {
static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
static constexpr std::array formats = {
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
GL_COMPRESSED_RGBA_ASTC_8x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR,
GL_COMPRESSED_RGBA_ASTC_10x5_KHR, GL_COMPRESSED_RGBA_ASTC_10x6_KHR,
GL_COMPRESSED_RGBA_ASTC_10x8_KHR, GL_COMPRESSED_RGBA_ASTC_10x10_KHR,
GL_COMPRESSED_RGBA_ASTC_12x10_KHR, GL_COMPRESSED_RGBA_ASTC_12x12_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
};
static constexpr std::array required_support = {
GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
};
for (const GLenum target : targets) {
for (const GLenum format : formats) {
for (const GLenum support : required_support) {
GLint value;
glGetInternalformativ(target, format, support, 1, &value);
if (value != GL_FULL_SUPPORT) {
return false;
}
}
}
}
return true;
}
[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
}
} // Anonymous namespace
Device::Device()
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_amd = vendor == "ATI Technologies Inc.";
bool disable_fast_buffer_sub_data = false;
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
LOG_WARNING(
Render_OpenGL,
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
has_astc = IsASTCSupported();
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
}
}
Device::Device(std::nullptr_t) {
max_uniform_buffers.fill(std::numeric_limits<u32>::max());
uniform_buffer_alignment = 4;
shader_storage_alignment = 4;
max_vertex_attributes = 16;
max_varyings = 15;
max_compute_shared_memory_size = 0x10000;
has_warp_intrinsics = true;
has_shader_ballot = true;
has_vertex_viewport_layer = true;
has_image_load_formatted = true;
has_texture_shadow_lod = true;
has_variable_aoffi = true;
}
bool Device::TestVariableAoffi() {
return TestProgram(R"(#version 430 core
// This is a unit test, please ignore me on apitrace bug reports.
uniform sampler2D tex;
uniform ivec2 variable_offset;
out vec4 output_attribute;
void main() {
output_attribute = textureOffset(tex, vec2(0), variable_offset);
})");
}
bool Device::TestPreciseBug() {
return !TestProgram(R"(#version 430 core
in vec3 coords;
out float out_value;
uniform sampler2DShadow tex;
void main() {
precise float tmp_value = vec4(texture(tex, coords)).x;
out_value = tmp_value;
})");
}
} // namespace OpenGL

View File

@@ -0,0 +1,147 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include "common/common_types.h"
#include "video_core/engines/shader_type.h"
namespace OpenGL {
static constexpr u32 EmulationUniformBlockBinding = 0;
class Device final {
public:
struct BaseBindings final {
u32 uniform_buffer{};
u32 shader_storage_buffer{};
u32 sampler{};
u32 image{};
};
explicit Device();
explicit Device(std::nullptr_t);
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
}
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
return base_bindings[stage_index];
}
const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
return GetBaseBindings(static_cast<std::size_t>(shader_type));
}
size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
size_t GetShaderStorageBufferAlignment() const {
return shader_storage_alignment;
}
u32 GetMaxVertexAttributes() const {
return max_vertex_attributes;
}
u32 GetMaxVaryings() const {
return max_varyings;
}
u32 GetMaxComputeSharedMemorySize() const {
return max_compute_shared_memory_size;
}
bool HasWarpIntrinsics() const {
return has_warp_intrinsics;
}
bool HasShaderBallot() const {
return has_shader_ballot;
}
bool HasVertexViewportLayer() const {
return has_vertex_viewport_layer;
}
bool HasImageLoadFormatted() const {
return has_image_load_formatted;
}
bool HasTextureShadowLod() const {
return has_texture_shadow_lod;
}
bool HasVertexBufferUnifiedMemory() const {
return has_vertex_buffer_unified_memory;
}
bool HasASTC() const {
return has_astc;
}
bool HasVariableAoffi() const {
return has_variable_aoffi;
}
bool HasComponentIndexingBug() const {
return has_component_indexing_bug;
}
bool HasPreciseBug() const {
return has_precise_bug;
}
bool HasFastBufferSubData() const {
return has_fast_buffer_sub_data;
}
bool HasNvViewportArray2() const {
return has_nv_viewport_array2;
}
bool HasDebuggingToolAttached() const {
return has_debugging_tool_attached;
}
bool UseAssemblyShaders() const {
return use_assembly_shaders;
}
bool UseAsynchronousShaders() const {
return use_asynchronous_shaders;
}
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
size_t uniform_buffer_alignment{};
size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
bool has_warp_intrinsics{};
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
bool has_image_load_formatted{};
bool has_texture_shadow_lod{};
bool has_vertex_buffer_unified_memory{};
bool has_astc{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
bool has_debugging_tool_attached{};
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
};
} // namespace OpenGL

View File

@@ -0,0 +1,73 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
namespace OpenGL {
GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
: FenceBase{address_, payload_, is_stubbed_} {}
GLInnerFence::~GLInnerFence() = default;
void GLInnerFence::Queue() {
if (is_stubbed) {
return;
}
ASSERT(sync_object.handle == 0);
sync_object.Create();
}
bool GLInnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
ASSERT(sync_object.handle != 0);
GLsizei length;
GLint sync_status;
glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
return sync_status == GL_SIGNALED;
}
void GLInnerFence::Wait() {
if (is_stubbed) {
return;
}
ASSERT(sync_object.handle != 0);
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
}
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
Tegra::GPU& gpu_, TextureCache& texture_cache_,
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed);
}
Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
}
void FenceManagerOpenGL::QueueFence(Fence& fence) {
fence->Queue();
}
bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
void FenceManagerOpenGL::WaitFence(Fence& fence) {
fence->Wait();
}
} // namespace OpenGL

View File

@@ -0,0 +1,52 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "common/common_types.h"
#include "video_core/fence_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
explicit GLInnerFence(u32 payload_, bool is_stubbed_);
explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
~GLInnerFence();
void Queue();
bool IsSignaled() const;
void Wait();
private:
OGLSync sync_object;
};
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
QueryCache& query_cache_);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
};
} // namespace OpenGL

View File

@@ -0,0 +1,121 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstring>
#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
namespace OpenGL {
namespace {
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
constexpr GLenum GetTarget(VideoCore::QueryType type) {
return QueryTargets[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::MemoryManager& gpu_memory_)
: QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;
OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
auto& reserve = query_pools[static_cast<std::size_t>(type)];
OGLQuery query;
if (reserve.empty()) {
query.Create(GetTarget(type));
return query;
}
query = std::move(reserve.back());
reserve.pop_back();
return query;
}
void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
}
bool QueryCache::AnyCommandQueued() const noexcept {
return gl_rasterizer.AnyCommandQueued();
}
HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
VideoCore::QueryType type_)
: HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, query{
cache.AllocateQuery(
type)} {
glBeginQuery(GetTarget(type), query.handle);
}
HostCounter::~HostCounter() {
cache.Reserve(type, std::move(query));
}
void HostCounter::EndQuery() {
if (!cache.AnyCommandQueued()) {
// There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
// having any of these causes a lock. glFlush is considered a command, so we can safely wait
// for this. Insert to the OpenGL command stream a flush.
glFlush();
}
glEndQuery(GetTarget(type));
}
u64 HostCounter::BlockingQuery() const {
GLint64 value;
glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
return static_cast<u64>(value);
}
CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
u8* host_ptr_)
: CachedQueryBase{cpu_addr_, host_ptr_}, cache{&cache_}, type{type_} {}
CachedQuery::~CachedQuery() = default;
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
: CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
cache = rhs.cache;
type = rhs.type;
CachedQueryBase<HostCounter>::operator=(std::move(rhs));
return *this;
}
void CachedQuery::Flush() {
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
// To avoid this disable and re-enable keeping the dependency stream.
// But we only have to do this if we have pending waits to be done.
auto& stream = cache->Stream(type);
const bool slice_counter = WaitPending() && stream.IsEnabled();
if (slice_counter) {
stream.Update(false);
}
VideoCommon::CachedQueryBase<HostCounter>::Flush();
if (slice_counter) {
stream.Update(true);
}
}
} // namespace OpenGL

View File

@@ -0,0 +1,82 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "video_core/query_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace Core {
class System;
}
namespace OpenGL {
class CachedQuery;
class HostCounter;
class QueryCache;
class RasterizerOpenGL;
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::MemoryManager& gpu_memory_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
void Reserve(VideoCore::QueryType type, OGLQuery&& query);
bool AnyCommandQueued() const noexcept;
private:
RasterizerOpenGL& gl_rasterizer;
std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
public:
explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
VideoCore::QueryType type_);
~HostCounter();
void EndQuery();
private:
u64 BlockingQuery() const override;
QueryCache& cache;
const VideoCore::QueryType type;
OGLQuery query;
};
class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
public:
explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
u8* host_ptr_);
~CachedQuery() override;
CachedQuery(CachedQuery&& rhs) noexcept;
CachedQuery& operator=(CachedQuery&& rhs) noexcept;
CachedQuery(const CachedQuery&) = delete;
CachedQuery& operator=(const CachedQuery&) = delete;
void Flush() override;
private:
QueryCache* cache;
VideoCore::QueryType type;
};
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,288 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <atomic>
#include <cstddef>
#include <memory>
#include <optional>
#include <tuple>
#include <utility>
#include <boost/container/static_vector.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
namespace Core::Memory {
class Memory;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra {
class MemoryManager;
}
namespace OpenGL {
struct ScreenInfo;
struct ShaderEntries;
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
GLsizei padding;
};
static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
ScreenInfo& screen_info_, ProgramManager& program_manager_,
StateTracker& state_tracker_);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override;
void Clear() override;
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
void InvalidateTextureCache(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void UnmapMemory(VAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
void FragmentBarrier() override;
void TiledCacheBarrier() override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
/// Returns true when there are commands queued to the OpenGL server.
bool AnyCommandQueued() const {
return num_queued_commands > 0;
}
VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
return async_shaders;
}
const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
return async_shaders;
}
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
void BindComputeTextures(Shader* kernel);
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
size_t& image_view_index, size_t& texture_index, size_t& image_index);
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
/// Configures the current constbuffers to use for the kernel invocation.
void SetupComputeConstBuffers(Shader* kernel);
/// Configures a constant buffer.
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const ConstBufferEntry& entry, bool use_unified,
std::size_t unified_offset);
/// Configures the current global memory entries to use for the draw command.
void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
/// Configures the current global memory entries to use for the kernel invocation.
void SetupComputeGlobalMemory(Shader* kernel);
/// Configures a global memory buffer.
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
size_t size, BindlessSSBO* ssbo);
/// Configures the current textures to use for the draw command.
void SetupDrawTextures(const Shader* shader, size_t stage_index);
/// Configures the textures used in a compute shader.
void SetupComputeTextures(const Shader* kernel);
/// Configures images in a graphics shader.
void SetupDrawImages(const Shader* shader, size_t stage_index);
/// Configures images in a compute shader.
void SetupComputeImages(const Shader* shader);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
/// Syncs the depth clamp state
void SyncDepthClamp();
/// Syncs the clip enabled status to match the guest state
void SyncClipEnabled(u32 clip_mask);
/// Syncs the clip coefficients to match the guest state
void SyncClipCoef();
/// Syncs the cull mode to match the guest state
void SyncCullMode();
/// Syncs the primitve restart to match the guest state
void SyncPrimitiveRestart();
/// Syncs the depth test state to match the guest state
void SyncDepthTestState();
/// Syncs the stencil test state to match the guest state
void SyncStencilTestState();
/// Syncs the blend state to match the guest state
void SyncBlendState();
/// Syncs the LogicOp state to match the guest state
void SyncLogicOpState();
/// Syncs the the color clamp state
void SyncFragmentColorClampState();
/// Syncs the alpha coverage and alpha to one
void SyncMultiSampleState();
/// Syncs the scissor test state to match the guest state
void SyncScissorTest();
/// Syncs the point state to match the guest state
void SyncPointState();
/// Syncs the line state to match the guest state
void SyncLineState();
/// Syncs the rasterizer enable state to match the guest state
void SyncRasterizeEnable();
/// Syncs polygon modes to match the guest state
void SyncPolygonModes();
/// Syncs Color Mask
void SyncColorMask();
/// Syncs the polygon offsets
void SyncPolygonOffset();
/// Syncs the alpha test state to match the guest state
void SyncAlphaTest();
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
/// Syncs transform feedback state to match guest state
/// @note Only valid on assembly shaders
void SyncTransformFeedback();
/// Begin a transform feedback
void BeginTransformFeedback(GLenum primitive_mode);
/// End a transform feedback
void EndTransformFeedback();
std::size_t CalculateVertexArraysSize() const;
std::size_t CalculateIndexBufferSize() const;
/// Updates the current vertex format
void SetupVertexFormat();
void SetupVertexBuffer();
void SetupVertexInstances();
GLintptr SetupIndexBuffer();
void SetupShaders();
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
const Device& device;
ScreenInfo& screen_info;
ProgramManager& program_manager;
StateTracker& state_tracker;
OGLStreamBuffer stream_buffer;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
ShaderCacheOpenGL shader_cache;
QueryCache query_cache;
OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
VideoCommon::Shader::AsyncShaders async_shaders;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
std::array<GLuint, MAX_TEXTURES> texture_handles;
std::array<GLuint, MAX_IMAGES> image_handles;
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
transform_feedback_buffers;
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
enabled_transform_feedback_buffers;
static constexpr std::size_t NUM_CONSTANT_BUFFERS =
Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
std::size_t current_cbuf = 0;
OGLBuffer unified_uniform_buffer;
/// Number of commands queued to the OpenGL driver. Resetted on flush.
std::size_t num_queued_commands = 0;
u32 last_clip_distance_mask = 0;
};
} // namespace OpenGL

View File

@@ -0,0 +1,231 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
namespace OpenGL {
void OGLRenderbuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateRenderbuffers(1, &handle);
}
void OGLRenderbuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteRenderbuffers(1, &handle);
handle = 0;
}
void OGLTexture::Create(GLenum target) {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateTextures(target, 1, &handle);
}
void OGLTexture::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
handle = 0;
}
void OGLTextureView::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenTextures(1, &handle);
}
void OGLTextureView::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
handle = 0;
}
void OGLSampler::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateSamplers(1, &handle);
}
void OGLSampler::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteSamplers(1, &handle);
handle = 0;
}
void OGLShader::Create(std::string_view source, GLenum type) {
if (handle != 0) {
return;
}
if (source.empty()) {
return;
}
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
handle = GLShader::LoadShader(source, type);
}
void OGLShader::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteShader(handle);
handle = 0;
}
void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
const char* frag_shader, bool separable_program,
bool hint_retrievable) {
OGLShader vert, geo, frag;
if (vert_shader)
vert.Create(vert_shader, GL_VERTEX_SHADER);
if (geo_shader)
geo.Create(geo_shader, GL_GEOMETRY_SHADER);
if (frag_shader)
frag.Create(frag_shader, GL_FRAGMENT_SHADER);
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
}
void OGLProgram::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgram(handle);
handle = 0;
}
void OGLAssemblyProgram::Release() {
if (handle == 0) {
return;
}
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramsARB(1, &handle);
handle = 0;
}
void OGLPipeline::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenProgramPipelines(1, &handle);
}
void OGLPipeline::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramPipelines(1, &handle);
handle = 0;
}
void OGLBuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateBuffers(1, &handle);
}
void OGLBuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteBuffers(1, &handle);
handle = 0;
}
void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
}
void OGLSync::Create() {
if (handle != 0)
return;
// Don't profile here, this one is expected to happen ingame.
handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
void OGLSync::Release() {
if (handle == 0)
return;
// Don't profile here, this one is expected to happen ingame.
glDeleteSync(handle);
handle = 0;
}
void OGLFramebuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenFramebuffers(1, &handle);
}
void OGLFramebuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteFramebuffers(1, &handle);
handle = 0;
}
void OGLQuery::Create(GLenum target) {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateQueries(target, 1, &handle);
}
void OGLQuery::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteQueries(1, &handle);
handle = 0;
}
} // namespace OpenGL

View File

@@ -0,0 +1,317 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string_view>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
class OGLRenderbuffer : private NonCopyable {
public:
OGLRenderbuffer() = default;
OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLRenderbuffer() {
Release();
}
OGLRenderbuffer& operator=(OGLRenderbuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLTexture : private NonCopyable {
public:
OGLTexture() = default;
OGLTexture(OGLTexture&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLTexture() {
Release();
}
OGLTexture& operator=(OGLTexture&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create(GLenum target);
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLTextureView : private NonCopyable {
public:
OGLTextureView() = default;
OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLTextureView() {
Release();
}
OGLTextureView& operator=(OGLTextureView&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLSampler : private NonCopyable {
public:
OGLSampler() = default;
OGLSampler(OGLSampler&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLSampler() {
Release();
}
OGLSampler& operator=(OGLSampler&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLShader : private NonCopyable {
public:
OGLShader() = default;
OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLShader() {
Release();
}
OGLShader& operator=(OGLShader&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
void Create(std::string_view source, GLenum type);
void Release();
GLuint handle = 0;
};
class OGLProgram : private NonCopyable {
public:
OGLProgram() = default;
OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLProgram() {
Release();
}
OGLProgram& operator=(OGLProgram&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
template <typename... T>
void Create(bool separable_program, bool hint_retrievable, T... shaders) {
if (handle != 0)
return;
handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
}
/// Creates a new internal OpenGL resource and stores the handle
void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
bool separable_program = false, bool hint_retrievable = false);
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLAssemblyProgram : private NonCopyable {
public:
OGLAssemblyProgram() = default;
OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLAssemblyProgram() {
Release();
}
OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLPipeline : private NonCopyable {
public:
OGLPipeline() = default;
OGLPipeline(OGLPipeline&& o) noexcept : handle{std::exchange<GLuint>(o.handle, 0)} {}
~OGLPipeline() {
Release();
}
OGLPipeline& operator=(OGLPipeline&& o) noexcept {
handle = std::exchange<GLuint>(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLBuffer : private NonCopyable {
public:
OGLBuffer() = default;
OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLBuffer() {
Release();
}
OGLBuffer& operator=(OGLBuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
// Converts the buffer into a stream copy buffer with a fixed size
void MakeStreamCopy(std::size_t buffer_size);
GLuint handle = 0;
};
class OGLSync : private NonCopyable {
public:
OGLSync() = default;
OGLSync(OGLSync&& o) noexcept : handle(std::exchange(o.handle, nullptr)) {}
~OGLSync() {
Release();
}
OGLSync& operator=(OGLSync&& o) noexcept {
Release();
handle = std::exchange(o.handle, nullptr);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLsync handle = 0;
};
class OGLFramebuffer : private NonCopyable {
public:
OGLFramebuffer() = default;
OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLFramebuffer() {
Release();
}
OGLFramebuffer& operator=(OGLFramebuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLQuery : private NonCopyable {
public:
OGLQuery() = default;
OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLQuery() {
Release();
}
OGLQuery& operator=(OGLQuery&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create(GLenum target);
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
} // namespace OpenGL

View File

@@ -0,0 +1,601 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <atomic>
#include <functional>
#include <mutex>
#include <optional>
#include <string>
#include <thread>
#include <unordered_set>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
#include "video_core/shader_notify.h"
namespace OpenGL {
using Tegra::Engines::ShaderType;
using VideoCommon::Shader::GetShaderAddress;
using VideoCommon::Shader::GetShaderCode;
using VideoCommon::Shader::GetUniqueIdentifier;
using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::Registry;
using VideoCommon::Shader::ShaderIR;
using VideoCommon::Shader::STAGE_MAIN_OFFSET;
namespace {
constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
/// Gets the shader type from a Maxwell program type
constexpr GLenum GetGLShaderType(ShaderType shader_type) {
switch (shader_type) {
case ShaderType::Vertex:
return GL_VERTEX_SHADER;
case ShaderType::Geometry:
return GL_GEOMETRY_SHADER;
case ShaderType::Fragment:
return GL_FRAGMENT_SHADER;
case ShaderType::Compute:
return GL_COMPUTE_SHADER;
default:
return GL_NONE;
}
}
constexpr const char* GetShaderTypeName(ShaderType shader_type) {
switch (shader_type) {
case ShaderType::Vertex:
return "VS";
case ShaderType::TesselationControl:
return "HS";
case ShaderType::TesselationEval:
return "DS";
case ShaderType::Geometry:
return "GS";
case ShaderType::Fragment:
return "FS";
case ShaderType::Compute:
return "CS";
}
return "UNK";
}
constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
switch (program_type) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
return ShaderType::Vertex;
case Maxwell::ShaderProgram::TesselationControl:
return ShaderType::TesselationControl;
case Maxwell::ShaderProgram::TesselationEval:
return ShaderType::TesselationEval;
case Maxwell::ShaderProgram::Geometry:
return ShaderType::Geometry;
case Maxwell::ShaderProgram::Fragment:
return ShaderType::Fragment;
}
return {};
}
constexpr GLenum AssemblyEnum(ShaderType shader_type) {
switch (shader_type) {
case ShaderType::Vertex:
return GL_VERTEX_PROGRAM_NV;
case ShaderType::TesselationControl:
return GL_TESS_CONTROL_PROGRAM_NV;
case ShaderType::TesselationEval:
return GL_TESS_EVALUATION_PROGRAM_NV;
case ShaderType::Geometry:
return GL_GEOMETRY_PROGRAM_NV;
case ShaderType::Fragment:
return GL_FRAGMENT_PROGRAM_NV;
case ShaderType::Compute:
return GL_COMPUTE_PROGRAM_NV;
}
return {};
}
std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
}
std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
entry.graphics_info, entry.compute_info};
auto registry = std::make_shared<Registry>(entry.type, info);
for (const auto& [address, value] : entry.keys) {
const auto [buffer, offset] = address;
registry->InsertKey(buffer, offset, value);
}
for (const auto& [offset, sampler] : entry.bound_samplers) {
registry->InsertBoundSampler(offset, sampler);
}
for (const auto& [key, sampler] : entry.bindless_samplers) {
const auto [buffer, offset] = key;
registry->InsertBindlessSampler(buffer, offset, sampler);
}
return registry;
}
std::unordered_set<GLenum> GetSupportedFormats() {
GLint num_formats;
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
std::vector<GLint> formats(num_formats);
glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
std::unordered_set<GLenum> supported_formats;
for (const GLint format : formats) {
supported_formats.insert(static_cast<GLenum>(format));
}
return supported_formats;
}
} // Anonymous namespace
ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
LOG_INFO(Render_OpenGL, "{}", shader_id);
auto program = std::make_shared<ProgramHandle>();
if (device.UseAssemblyShaders()) {
const std::string arb =
DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
GLuint& arb_prog = program->assembly_program.handle;
// Commented out functions signal OpenGL errors but are compatible with apitrace.
// Use them only to capture and replay on apitrace.
#if 0
glGenProgramsNV(1, &arb_prog);
glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
reinterpret_cast<const GLubyte*>(arb.data()));
#else
glGenProgramsARB(1, &arb_prog);
glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
static_cast<GLsizei>(arb.size()), arb.data());
#endif
const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
if (err && *err) {
LOG_CRITICAL(Render_OpenGL, "{}", err);
LOG_INFO(Render_OpenGL, "\n{}", arb);
}
} else {
const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
OGLShader shader;
shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
program->source_program.Create(true, hint_retrievable, shader.handle);
}
return program;
}
Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
ProgramSharedPtr program_, bool is_built_)
: registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
is_built{is_built_} {
handle = program->assembly_program.handle;
if (handle == 0) {
handle = program->source_program.handle;
}
if (is_built) {
ASSERT(handle != 0);
}
}
Shader::~Shader() = default;
GLuint Shader::GetHandle() const {
DEBUG_ASSERT(registry->IsConsistent());
return handle;
}
bool Shader::IsBuilt() const {
return is_built;
}
void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
program->source_program = std::move(new_program);
handle = program->source_program.handle;
is_built = true;
}
void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
program->assembly_program = std::move(new_program);
handle = program->assembly_program.handle;
is_built = true;
}
std::unique_ptr<Shader> Shader::CreateStageFromMemory(
const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
const auto shader_type = GetShaderType(program_type);
auto& gpu = params.gpu;
gpu.ShaderNotify().MarkSharderBuilding();
auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
// if (!code_b.empty()) {
// ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
// }
auto program =
BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
ShaderDiskCacheEntry entry;
entry.type = shader_type;
entry.code = std::move(code);
entry.code_b = std::move(code_b);
entry.unique_identifier = params.unique_identifier;
entry.bound_buffer = registry->GetBoundBuffer();
entry.graphics_info = registry->GetGraphicsInfo();
entry.keys = registry->GetKeys();
entry.bound_samplers = registry->GetBoundSamplers();
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
gpu.ShaderNotify().MarkShaderComplete();
return std::unique_ptr<Shader>(new Shader(std::move(registry),
MakeEntries(params.device, ir, shader_type),
std::move(program), true));
} else {
// Required for entries
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
auto entries = MakeEntries(params.device, ir, shader_type);
async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
COMPILER_SETTINGS, *registry, cpu_addr);
auto program = std::make_shared<ProgramHandle>();
return std::unique_ptr<Shader>(
new Shader(std::move(registry), std::move(entries), std::move(program), false));
}
}
std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
ProgramCode code) {
auto& gpu = params.gpu;
gpu.ShaderNotify().MarkSharderBuilding();
auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
const u64 uid = params.unique_identifier;
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
ShaderDiskCacheEntry entry;
entry.type = ShaderType::Compute;
entry.code = std::move(code);
entry.unique_identifier = uid;
entry.bound_buffer = registry->GetBoundBuffer();
entry.compute_info = registry->GetComputeInfo();
entry.keys = registry->GetKeys();
entry.bound_samplers = registry->GetBoundSamplers();
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
gpu.ShaderNotify().MarkShaderComplete();
return std::unique_ptr<Shader>(new Shader(std::move(registry),
MakeEntries(params.device, ir, ShaderType::Compute),
std::move(program)));
}
std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
const PrecompiledShader& precompiled_shader) {
return std::unique_ptr<Shader>(new Shader(
precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_)
: ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
disk_cache.BindTitleID(title_id);
const std::optional transferable = disk_cache.LoadTransferable();
if (!transferable) {
return;
}
std::vector<ShaderDiskCachePrecompiled> gl_cache;
if (!device.UseAssemblyShaders()) {
// Only load precompiled cache when we are not using assembly shaders
gl_cache = disk_cache.LoadPrecompiled();
}
const auto supported_formats = GetSupportedFormats();
// Track if precompiled cache was altered during loading to know if we have to
// serialize the virtual precompiled cache file back to the hard drive
bool precompiled_cache_altered = false;
// Inform the frontend about shader build initialization
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
}
std::mutex mutex;
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
std::atomic_bool gl_cache_failed = false;
const auto find_precompiled = [&gl_cache](u64 id) {
return std::find_if(gl_cache.begin(), gl_cache.end(),
[id](const auto& entry) { return entry.unique_identifier == id; });
};
const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
std::size_t end) {
const auto scope = context->Acquire();
for (std::size_t i = begin; i < end; ++i) {
if (stop_loading) {
return;
}
const auto& entry = (*transferable)[i];
const u64 uid = entry.unique_identifier;
const auto it = find_precompiled(uid);
const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
const bool is_compute = entry.type == ShaderType::Compute;
const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
auto registry = MakeRegistry(entry);
const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
ProgramSharedPtr program;
if (precompiled_entry) {
// If the shader is precompiled, attempt to load it with
program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
if (!program) {
gl_cache_failed = true;
}
}
if (!program) {
// Otherwise compile it from GLSL
program = BuildShader(device, entry.type, uid, ir, *registry, true);
}
PrecompiledShader shader;
shader.program = std::move(program);
shader.registry = std::move(registry);
shader.entries = MakeEntries(device, ir, entry.type);
std::scoped_lock lock{mutex};
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
transferable->size());
}
runtime_cache.emplace(entry.unique_identifier, std::move(shader));
}
};
const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
const std::size_t bucket_size{transferable->size() / num_workers};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
// On some platforms the shared context has to be created from the GUI thread
contexts[i] = emu_window.CreateSharedContext();
threads[i] = std::thread(worker, contexts[i].get(), start, end);
}
for (auto& thread : threads) {
thread.join();
}
if (gl_cache_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
precompiled_cache_altered = true;
return;
}
if (stop_loading) {
return;
}
if (device.UseAssemblyShaders()) {
// Don't store precompiled binaries for assembly shaders.
return;
}
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
// before precompiling them
for (std::size_t i = 0; i < transferable->size(); ++i) {
const u64 id = (*transferable)[i].unique_identifier;
const auto it = find_precompiled(id);
if (it == gl_cache.end()) {
const GLuint program = runtime_cache.at(id).program->source_program.handle;
disk_cache.SavePrecompiled(id, program);
precompiled_cache_altered = true;
}
}
if (precompiled_cache_altered) {
disk_cache.SaveVirtualPrecompiledFile();
}
}
ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats) {
if (!supported_formats.contains(precompiled_entry.binary_format)) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
return {};
}
auto program = std::make_shared<ProgramHandle>();
GLuint& handle = program->source_program.handle;
handle = glCreateProgram();
glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
static_cast<GLsizei>(precompiled_entry.binary.size()));
GLint link_status;
glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
if (link_status == GL_FALSE) {
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
return {};
}
return program;
}
Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
VideoCommon::Shader::AsyncShaders& async_shaders) {
if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
if (last_shader->IsBuilt()) {
return last_shader;
}
}
const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
auto completed_work = async_shaders.GetCompletedWork();
for (auto& work : completed_work) {
Shader* shader = TryGet(work.cpu_address);
gpu.ShaderNotify().MarkShaderComplete();
if (shader == nullptr) {
continue;
}
using namespace VideoCommon::Shader;
if (work.backend == AsyncShaders::Backend::OpenGL) {
shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
} else if (work.backend == AsyncShaders::Backend::GLASM) {
shader->AsyncGLASMBuilt(std::move(work.program.glasm));
}
auto& registry = shader->GetRegistry();
ShaderDiskCacheEntry entry;
entry.type = work.shader_type;
entry.code = std::move(work.code);
entry.code_b = std::move(work.code_b);
entry.unique_identifier = work.uid;
entry.bound_buffer = registry.GetBoundBuffer();
entry.graphics_info = registry.GetGraphicsInfo();
entry.keys = registry.GetKeys();
entry.bound_samplers = registry.GetBoundSamplers();
entry.bindless_samplers = registry.GetBindlessSamplers();
disk_cache.SaveEntry(std::move(entry));
}
}
// Look up shader in the cache based on address
const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
const u8* const host_ptr{gpu_memory.GetPointer(address)};
// No shader found - create a new one
ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
ProgramCode code_b;
if (program == Maxwell::ShaderProgram::VertexA) {
const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
}
const std::size_t code_size = code.size() * sizeof(u64);
const u64 unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
*cpu_addr, host_ptr, unique_identifier};
std::unique_ptr<Shader> shader;
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
async_shaders, cpu_addr.value_or(0));
} else {
shader = Shader::CreateFromCache(params, found->second);
}
Shader* const result = shader.get();
if (cpu_addr) {
Register(std::move(shader), *cpu_addr, code_size);
} else {
null_shader = std::move(shader);
}
return last_shaders[static_cast<std::size_t>(program)] = result;
}
Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
return kernel;
}
// No kernel found, create a new one
const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
const std::size_t code_size{code.size() * sizeof(u64)};
const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
*cpu_addr, host_ptr, unique_identifier};
std::unique_ptr<Shader> kernel;
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
kernel = Shader::CreateKernelFromMemory(params, std::move(code));
} else {
kernel = Shader::CreateFromCache(params, found->second);
}
Shader* const result = kernel.get();
if (cpu_addr) {
Register(std::move(kernel), *cpu_addr, code_size);
} else {
null_kernel = std::move(kernel);
}
return result;
}
} // namespace OpenGL

View File

@@ -0,0 +1,161 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <atomic>
#include <bitset>
#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
namespace Tegra {
class MemoryManager;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace VideoCommon::Shader {
class AsyncShaders;
}
namespace OpenGL {
class Device;
class RasterizerOpenGL;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ProgramHandle {
OGLProgram source_program;
OGLAssemblyProgram assembly_program;
};
using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
struct PrecompiledShader {
ProgramSharedPtr program;
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
};
struct ShaderParameters {
Tegra::GPU& gpu;
Tegra::Engines::ConstBufferEngineInterface& engine;
ShaderDiskCacheOpenGL& disk_cache;
const Device& device;
VAddr cpu_addr;
const u8* host_ptr;
u64 unique_identifier;
};
ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
bool hint_retrievable = false);
class Shader final {
public:
~Shader();
/// Gets the GL program handle for the shader
GLuint GetHandle() const;
bool IsBuilt() const;
/// Gets the shader entries for the shader
const ShaderEntries& GetEntries() const {
return entries;
}
const VideoCommon::Shader::Registry& GetRegistry() const {
return *registry;
}
/// Mark a OpenGL shader as built
void AsyncOpenGLBuilt(OGLProgram new_program);
/// Mark a GLASM shader as built
void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
static std::unique_ptr<Shader> CreateStageFromMemory(
const ShaderParameters& params, Maxwell::ShaderProgram program_type,
ProgramCode program_code, ProgramCode program_code_b,
VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
ProgramCode code);
static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
const PrecompiledShader& precompiled_shader);
private:
explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
ProgramSharedPtr program, bool is_built_ = true);
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
ProgramSharedPtr program;
GLuint handle = 0;
bool is_built{};
};
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
public:
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_);
~ShaderCacheOpenGL() override;
/// Loads disk cache for the current game
void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
/// Gets the current specified shader stage program
Shader* GetStageProgram(Maxwell::ShaderProgram program,
VideoCommon::Shader::AsyncShaders& async_shaders);
/// Gets a compute kernel in the passed address
Shader* GetComputeKernel(GPUVAddr code_addr);
private:
ProgramSharedPtr GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats);
Core::Frontend::EmuWindow& emu_window;
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
const Device& device;
ShaderDiskCacheOpenGL disk_cache;
std::unordered_map<u64, PrecompiledShader> runtime_cache;
std::unique_ptr<Shader> null_shader;
std::unique_ptr<Shader> null_kernel;
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
};
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,69 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
class Device;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using SamplerEntry = VideoCommon::Shader::SamplerEntry;
using ImageEntry = VideoCommon::Shader::ImageEntry;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
: ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
u32 GetIndex() const {
return index;
}
private:
u32 index = 0;
};
struct GlobalMemoryEntry {
constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
bool is_written_)
: cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
is_written_} {}
u32 cbuf_index = 0;
u32 cbuf_offset = 0;
bool is_read = false;
bool is_written = false;
};
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<GlobalMemoryEntry> global_memory_entries;
std::vector<SamplerEntry> samplers;
std::vector<ImageEntry> images;
std::size_t shader_length{};
u32 clip_distances{};
bool use_unified_uniforms{};
};
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage);
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
Tegra::Engines::ShaderType stage, std::string_view identifier,
std::string_view suffix = {});
} // namespace OpenGL

View File

@@ -0,0 +1,483 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/common_paths.h"
#include "common/common_types.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/scm_rev.h"
#include "common/zstd_compression.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
namespace OpenGL {
using Tegra::Engines::ShaderType;
using VideoCommon::Shader::BindlessSamplerMap;
using VideoCommon::Shader::BoundSamplerMap;
using VideoCommon::Shader::KeyMap;
namespace {
using VideoCommon::Shader::SeparateSamplerKey;
using ShaderCacheVersionHash = std::array<u8, 64>;
struct ConstBufferKey {
u32 cbuf = 0;
u32 offset = 0;
u32 value = 0;
};
struct BoundSamplerEntry {
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
struct SeparateSamplerEntry {
u32 cbuf1 = 0;
u32 cbuf2 = 0;
u32 offset1 = 0;
u32 offset2 = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
struct BindlessSamplerEntry {
u32 cbuf = 0;
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
constexpr u32 NativeVersion = 21;
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
std::memcpy(hash.data(), Common::g_shader_cache_version, length);
return hash;
}
} // Anonymous namespace
ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
return false;
}
u32 code_size;
u32 code_size_b;
if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) {
return false;
}
code.resize(code_size);
code_b.resize(code_size_b);
if (file.ReadArray(code.data(), code_size) != code_size) {
return false;
}
if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) {
return false;
}
u8 is_texture_handler_size_known;
u32 texture_handler_size_value;
u32 num_keys;
u32 num_bound_samplers;
u32 num_separate_samplers;
u32 num_bindless_samplers;
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
file.ReadArray(&num_separate_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
return false;
}
if (is_texture_handler_size_known) {
texture_handler_size = texture_handler_size_value;
}
std::vector<ConstBufferKey> flat_keys(num_keys);
std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
flat_bound_samplers.size() ||
file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
flat_separate_samplers.size() ||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
flat_bindless_samplers.size()) {
return false;
}
for (const auto& entry : flat_keys) {
keys.insert({{entry.cbuf, entry.offset}, entry.value});
}
for (const auto& entry : flat_bound_samplers) {
bound_samplers.emplace(entry.offset, entry.sampler);
}
for (const auto& entry : flat_separate_samplers) {
SeparateSamplerKey key;
key.buffers = {entry.cbuf1, entry.cbuf2};
key.offsets = {entry.offset1, entry.offset2};
separate_samplers.emplace(key, entry.sampler);
}
for (const auto& entry : flat_bindless_samplers) {
bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
}
return true;
}
bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
if (file.WriteObject(static_cast<u32>(type)) != 1 ||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
return false;
}
if (file.WriteArray(code.data(), code.size()) != code.size()) {
return false;
}
if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) {
return false;
}
if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
return false;
}
std::vector<ConstBufferKey> flat_keys;
flat_keys.reserve(keys.size());
for (const auto& [address, value] : keys) {
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
}
std::vector<BoundSamplerEntry> flat_bound_samplers;
flat_bound_samplers.reserve(bound_samplers.size());
for (const auto& [address, sampler] : bound_samplers) {
flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
}
std::vector<SeparateSamplerEntry> flat_separate_samplers;
flat_separate_samplers.reserve(separate_samplers.size());
for (const auto& [key, sampler] : separate_samplers) {
SeparateSamplerEntry entry;
std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
std::tie(entry.offset1, entry.offset2) = key.offsets;
entry.sampler = sampler;
flat_separate_samplers.push_back(entry);
}
std::vector<BindlessSamplerEntry> flat_bindless_samplers;
flat_bindless_samplers.reserve(bindless_samplers.size());
for (const auto& [address, sampler] : bindless_samplers) {
flat_bindless_samplers.push_back(
BindlessSamplerEntry{address.first, address.second, sampler});
}
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
flat_bound_samplers.size() &&
file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
flat_separate_samplers.size() &&
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
flat_bindless_samplers.size();
}
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
title_id = title_id_;
}
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
const bool has_title_id = title_id != 0;
if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
return std::nullopt;
}
Common::FS::IOFile file(GetTransferablePath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No transferable shader cache found");
is_usable = true;
return std::nullopt;
}
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
return std::nullopt;
}
if (version < NativeVersion) {
LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
file.Close();
InvalidateTransferable();
is_usable = true;
return std::nullopt;
}
if (version > NativeVersion) {
LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
"of the emulator, skipping");
return std::nullopt;
}
// Version is valid, load the shaders
std::vector<ShaderDiskCacheEntry> entries;
while (file.Tell() < file.GetSize()) {
ShaderDiskCacheEntry& entry = entries.emplace_back();
if (!entry.Load(file)) {
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
return std::nullopt;
}
}
is_usable = true;
return {std::move(entries)};
}
std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
if (!is_usable) {
return {};
}
Common::FS::IOFile file(GetPrecompiledPath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
return {};
}
if (const auto result = LoadPrecompiledFile(file)) {
return *result;
}
LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
file.Close();
InvalidatePrecompiled();
return {};
}
std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
Common::FS::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
file.ReadBytes(compressed.data(), compressed.size());
const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
precompiled_cache_virtual_file_offset = 0;
ShaderCacheVersionHash file_hash{};
if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
precompiled_cache_virtual_file_offset = 0;
return std::nullopt;
}
if (GetShaderCacheVersionHash() != file_hash) {
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
precompiled_cache_virtual_file_offset = 0;
return std::nullopt;
}
std::vector<ShaderDiskCachePrecompiled> entries;
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
u32 binary_size;
auto& entry = entries.emplace_back();
if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
!LoadObjectFromPrecompiled(entry.binary_format) ||
!LoadObjectFromPrecompiled(binary_size)) {
return std::nullopt;
}
entry.binary.resize(binary_size);
if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
return std::nullopt;
}
}
return entries;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
if (!Common::FS::Delete(GetTransferablePath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
GetTransferablePath());
}
InvalidatePrecompiled();
}
void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
// Clear virtaul precompiled cache file
precompiled_cache_virtual_file.Resize(0);
if (!Common::FS::Delete(GetPrecompiledPath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
}
}
void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
if (!is_usable) {
return;
}
const u64 id = entry.unique_identifier;
if (stored_transferable.contains(id)) {
// The shader already exists
return;
}
Common::FS::IOFile file = AppendTransferableFile();
if (!file.IsOpen()) {
return;
}
if (!entry.Save(file)) {
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
file.Close();
InvalidateTransferable();
return;
}
stored_transferable.insert(id);
}
void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
if (!is_usable) {
return;
}
// TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
// when writing the dump. This should be done the moment I get access to write to the virtual
// file.
if (precompiled_cache_virtual_file.GetSize() == 0) {
SavePrecompiledHeaderToVirtualPrecompiledCache();
}
GLint binary_length;
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
GLenum binary_format;
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
unique_identifier);
InvalidatePrecompiled();
}
}
Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
if (!EnsureDirectories()) {
return {};
}
const auto transferable_path{GetTransferablePath()};
const bool existed = Common::FS::Exists(transferable_path);
Common::FS::IOFile file(transferable_path, "ab");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
return {};
}
if (!existed || file.GetSize() == 0) {
// If the file didn't exist, write its version
if (file.WriteObject(NativeVersion) != 1) {
LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
transferable_path);
return {};
}
}
return file;
}
void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
const auto hash{GetShaderCacheVersionHash()};
if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
LOG_ERROR(
Render_OpenGL,
"Failed to write precompiled cache version hash to virtual precompiled cache file");
}
}
void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
precompiled_cache_virtual_file_offset = 0;
const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
const std::vector<u8> compressed =
Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
const auto precompiled_path{GetPrecompiledPath()};
Common::FS::IOFile file(precompiled_path, "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
return;
}
if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
precompiled_path);
}
}
bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
const auto CreateDir = [](const std::string& dir) {
if (!Common::FS::CreateDir(dir)) {
LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
return false;
}
return true;
};
return CreateDir(Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)) &&
CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
CreateDir(GetPrecompiledDir());
}
std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
return Common::FS::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
}
std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
return Common::FS::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
}
std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
return GetBaseDir() + DIR_SEP "transferable";
}
std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
return GetBaseDir() + DIR_SEP "precompiled";
}
std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
return Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir) + DIR_SEP "opengl";
}
std::string ShaderDiskCacheOpenGL::GetTitleID() const {
return fmt::format("{:016X}", title_id);
}
} // namespace OpenGL

View File

@@ -0,0 +1,175 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "core/file_sys/vfs_vector.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
namespace Common::FS {
class IOFile;
}
namespace OpenGL {
using ProgramCode = std::vector<u64>;
/// Describes a shader and how it's used by the guest GPU
struct ShaderDiskCacheEntry {
ShaderDiskCacheEntry();
~ShaderDiskCacheEntry();
bool Load(Common::FS::IOFile& file);
bool Save(Common::FS::IOFile& file) const;
bool HasProgramA() const {
return !code.empty() && !code_b.empty();
}
Tegra::Engines::ShaderType type{};
ProgramCode code;
ProgramCode code_b;
u64 unique_identifier = 0;
std::optional<u32> texture_handler_size;
u32 bound_buffer = 0;
VideoCommon::Shader::GraphicsInfo graphics_info;
VideoCommon::Shader::ComputeInfo compute_info;
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
VideoCommon::Shader::SeparateSamplerMap separate_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
};
/// Contains an OpenGL dumped binary program
struct ShaderDiskCachePrecompiled {
u64 unique_identifier = 0;
GLenum binary_format = 0;
std::vector<u8> binary;
};
class ShaderDiskCacheOpenGL {
public:
explicit ShaderDiskCacheOpenGL();
~ShaderDiskCacheOpenGL();
/// Binds a title ID for all future operations.
void BindTitleID(u64 title_id);
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
/// Loads current game's precompiled cache. Invalidates on failure.
std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
/// Removes the transferable (and precompiled) cache file.
void InvalidateTransferable();
/// Removes the precompiled cache file and clears virtual precompiled cache file.
void InvalidatePrecompiled();
/// Saves a raw dump to the transferable file. Checks for collisions.
void SaveEntry(const ShaderDiskCacheEntry& entry);
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SavePrecompiled(u64 unique_identifier, GLuint program);
/// Serializes virtual precompiled shader cache file to real file
void SaveVirtualPrecompiledFile();
private:
/// Loads the transferable cache. Returns empty on failure.
std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
Common::FS::IOFile& file);
/// Opens current game's transferable file and write it's header if it doesn't exist
Common::FS::IOFile AppendTransferableFile() const;
/// Save precompiled header to precompiled_cache_in_memory
void SavePrecompiledHeaderToVirtualPrecompiledCache();
/// Create shader disk cache directories. Returns true on success.
bool EnsureDirectories() const;
/// Gets current game's transferable file path
std::string GetTransferablePath() const;
/// Gets current game's precompiled file path
std::string GetPrecompiledPath() const;
/// Get user's transferable directory path
std::string GetTransferableDir() const;
/// Get user's precompiled directory path
std::string GetPrecompiledDir() const;
/// Get user's shader directory path
std::string GetBaseDir() const;
/// Get current game's title id
std::string GetTitleID() const;
template <typename T>
bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
data, length, precompiled_cache_virtual_file_offset);
precompiled_cache_virtual_file_offset += write_length;
return write_length == sizeof(T) * length;
}
template <typename T>
bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
data, length, precompiled_cache_virtual_file_offset);
precompiled_cache_virtual_file_offset += read_length;
return read_length == sizeof(T) * length;
}
template <typename T>
bool SaveObjectToPrecompiled(const T& object) {
return SaveArrayToPrecompiled(&object, 1);
}
bool SaveObjectToPrecompiled(bool object) {
const auto value = static_cast<u8>(object);
return SaveArrayToPrecompiled(&value, 1);
}
template <typename T>
bool LoadObjectFromPrecompiled(T& object) {
return LoadArrayFromPrecompiled(&object, 1);
}
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
// Stores the current offset of the precompiled cache file for IO purposes
std::size_t precompiled_cache_virtual_file_offset = 0;
// Stored transferable shaders
std::unordered_set<u64> stored_transferable;
/// Title ID to operate on
u64 title_id = 0;
// The cache has been loaded at boot
bool is_usable = false;
};
} // namespace OpenGL

View File

@@ -0,0 +1,149 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL {
namespace {
void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
if (current == old) {
return;
}
if (current == 0) {
if (enabled) {
enabled = false;
glDisable(stage);
}
return;
}
if (!enabled) {
enabled = true;
glEnable(stage);
}
glBindProgramARB(stage, current);
}
} // Anonymous namespace
ProgramManager::ProgramManager(const Device& device)
: use_assembly_programs{device.UseAssemblyShaders()} {
if (use_assembly_programs) {
glEnable(GL_COMPUTE_PROGRAM_NV);
} else {
graphics_pipeline.Create();
glBindProgramPipeline(graphics_pipeline.handle);
}
}
ProgramManager::~ProgramManager() = default;
void ProgramManager::BindCompute(GLuint program) {
if (use_assembly_programs) {
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
} else {
is_graphics_bound = false;
glUseProgram(program);
}
}
void ProgramManager::BindGraphicsPipeline() {
if (!use_assembly_programs) {
UpdateSourcePrograms();
}
}
void ProgramManager::BindHostPipeline(GLuint pipeline) {
if (use_assembly_programs) {
if (geometry_enabled) {
geometry_enabled = false;
old_state.geometry = 0;
glDisable(GL_GEOMETRY_PROGRAM_NV);
}
} else {
if (!is_graphics_bound) {
glUseProgram(0);
}
}
glBindProgramPipeline(pipeline);
}
void ProgramManager::RestoreGuestPipeline() {
if (use_assembly_programs) {
glBindProgramPipeline(0);
} else {
glBindProgramPipeline(graphics_pipeline.handle);
}
}
void ProgramManager::BindHostCompute(GLuint program) {
if (use_assembly_programs) {
glDisable(GL_COMPUTE_PROGRAM_NV);
}
glUseProgram(program);
is_graphics_bound = false;
}
void ProgramManager::RestoreGuestCompute() {
if (use_assembly_programs) {
glEnable(GL_COMPUTE_PROGRAM_NV);
glUseProgram(0);
}
}
void ProgramManager::UseVertexShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
}
current_state.vertex = program;
}
void ProgramManager::UseGeometryShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
}
current_state.geometry = program;
}
void ProgramManager::UseFragmentShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
}
current_state.fragment = program;
}
void ProgramManager::UpdateSourcePrograms() {
if (!is_graphics_bound) {
is_graphics_bound = true;
glUseProgram(0);
}
const GLuint handle = graphics_pipeline.handle;
const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
if (current == old) {
return;
}
glUseProgramStages(handle, stage, current);
};
update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
old_state = current_state;
}
void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
const auto& regs = maxwell.regs;
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
}
} // namespace OpenGL

View File

@@ -0,0 +1,82 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
class Device;
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
/// Not following that rule will cause problems on some AMD drivers.
struct alignas(16) MaxwellUniformData {
void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
GLfloat y_direction;
};
static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
class ProgramManager {
public:
explicit ProgramManager(const Device& device);
~ProgramManager();
/// Binds a compute program
void BindCompute(GLuint program);
/// Updates bound programs.
void BindGraphicsPipeline();
/// Binds an OpenGL pipeline object unsynchronized with the guest state.
void BindHostPipeline(GLuint pipeline);
/// Rewinds BindHostPipeline state changes.
void RestoreGuestPipeline();
/// Binds an OpenGL GLSL program object unsynchronized with the guest state.
void BindHostCompute(GLuint program);
/// Rewinds BindHostCompute state changes.
void RestoreGuestCompute();
void UseVertexShader(GLuint program);
void UseGeometryShader(GLuint program);
void UseFragmentShader(GLuint program);
private:
struct PipelineState {
GLuint vertex = 0;
GLuint geometry = 0;
GLuint fragment = 0;
};
/// Update GLSL programs.
void UpdateSourcePrograms();
OGLPipeline graphics_pipeline;
PipelineState current_state;
PipelineState old_state;
bool use_assembly_programs = false;
bool is_graphics_bound = true;
bool vertex_enabled = false;
bool geometry_enabled = false;
bool fragment_enabled = false;
};
} // namespace OpenGL

View File

@@ -0,0 +1,61 @@
// Copyright 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL::GLShader {
namespace {
std::string_view StageDebugName(GLenum type) {
switch (type) {
case GL_VERTEX_SHADER:
return "vertex";
case GL_GEOMETRY_SHADER:
return "geometry";
case GL_FRAGMENT_SHADER:
return "fragment";
case GL_COMPUTE_SHADER:
return "compute";
}
UNIMPLEMENTED();
return "unknown";
}
} // Anonymous namespace
GLuint LoadShader(std::string_view source, GLenum type) {
const std::string_view debug_type = StageDebugName(type);
const GLuint shader_id = glCreateShader(type);
const GLchar* source_string = source.data();
const GLint source_length = static_cast<GLint>(source.size());
glShaderSource(shader_id, 1, &source_string, &source_length);
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
glCompileShader(shader_id);
GLint result = GL_FALSE;
GLint info_log_length;
glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
if (info_log_length > 1) {
std::string shader_error(info_log_length, ' ');
glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
if (result == GL_TRUE) {
LOG_DEBUG(Render_OpenGL, "{}", shader_error);
} else {
LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
}
}
return shader_id;
}
} // namespace OpenGL::GLShader

View File

@@ -0,0 +1,95 @@
// Copyright 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
namespace OpenGL::GLShader {
/**
* Utility function to log the source code of a list of shaders.
* @param shaders The OpenGL shaders whose source we will print.
*/
template <typename... T>
void LogShaderSource(T... shaders) {
auto shader_list = {shaders...};
for (const auto& shader : shader_list) {
if (shader == 0)
continue;
GLint source_length;
glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length);
std::string source(source_length, ' ');
glGetShaderSource(shader, source_length, nullptr, &source[0]);
LOG_INFO(Render_OpenGL, "Shader source {}", source);
}
}
/**
* Utility function to create and compile an OpenGL GLSL shader
* @param source String of the GLSL shader program
* @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
*/
GLuint LoadShader(std::string_view source, GLenum type);
/**
* Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
* @param separable_program whether to create a separable program
* @param shaders ID of shaders to attach to the program
* @returns Handle of the newly created OpenGL program object
*/
template <typename... T>
GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
// Link the program
LOG_DEBUG(Render_OpenGL, "Linking program...");
GLuint program_id = glCreateProgram();
((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
if (separable_program) {
glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
}
if (hint_retrievable) {
glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
}
glLinkProgram(program_id);
// Check the program
GLint result = GL_FALSE;
GLint info_log_length;
glGetProgramiv(program_id, GL_LINK_STATUS, &result);
glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
if (info_log_length > 1) {
std::string program_error(info_log_length, ' ');
glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
if (result == GL_TRUE) {
LOG_DEBUG(Render_OpenGL, "{}", program_error);
} else {
LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
}
}
if (result == GL_FALSE) {
// There was a problem linking the shader, print the source for debugging purposes.
LogShaderSource(shaders...);
}
ASSERT_MSG(result == GL_TRUE, "Shader not linked");
((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
return program_id;
}
} // namespace OpenGL::GLShader

View File

@@ -0,0 +1,259 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstddef>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace OpenGL {
namespace {
using namespace Dirty;
using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D;
using Regs = Maxwell3D::Regs;
using Tables = Maxwell3D::DirtyState::Tables;
using Table = Maxwell3D::DirtyState::Table;
void SetupDirtyColorMasks(Tables& tables) {
tables[0][OFF(color_mask_common)] = ColorMaskCommon;
for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) {
const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]);
FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt);
}
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
}
void SetupDirtyVertexArrays(Tables& tables) {
static constexpr std::size_t num_array = 3;
static constexpr std::size_t instance_base_offset = 3;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
const std::size_t instance_array_offset = array_offset + instance_base_offset;
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_array_offset] = VertexInstances;
const std::size_t instance_offset = OFF(instanced_arrays) + i;
tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_offset] = VertexInstances;
}
}
void SetupDirtyVertexFormat(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i);
}
FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats);
}
void SetupDirtyViewports(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]);
const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]);
FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i);
FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i);
}
FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports);
tables[0][OFF(viewport_transform_enabled)] = ViewportTransform;
tables[1][OFF(viewport_transform_enabled)] = Viewports;
}
void SetupDirtyScissors(Tables& tables) {
for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]);
FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i);
}
FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
}
void SetupDirtyShaders(Tables& tables) {
FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
Shaders);
}
void SetupDirtyPolygonModes(Tables& tables) {
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
tables[1][OFF(polygon_mode_front)] = PolygonModes;
tables[1][OFF(polygon_mode_back)] = PolygonModes;
tables[0][OFF(fill_rectangle)] = PolygonModes;
}
void SetupDirtyDepthTest(Tables& tables) {
auto& table = tables[0];
table[OFF(depth_test_enable)] = DepthTest;
table[OFF(depth_write_enabled)] = DepthMask;
table[OFF(depth_test_func)] = DepthTest;
}
void SetupDirtyStencilTest(Tables& tables) {
static constexpr std::array offsets = {
OFF(stencil_enable), OFF(stencil_front_func_func), OFF(stencil_front_func_ref),
OFF(stencil_front_func_mask), OFF(stencil_front_op_fail), OFF(stencil_front_op_zfail),
OFF(stencil_front_op_zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable),
OFF(stencil_back_func_func), OFF(stencil_back_func_ref), OFF(stencil_back_func_mask),
OFF(stencil_back_op_fail), OFF(stencil_back_op_zfail), OFF(stencil_back_op_zpass),
OFF(stencil_back_mask)};
for (const auto offset : offsets) {
tables[0][offset] = StencilTest;
}
}
void SetupDirtyAlphaTest(Tables& tables) {
auto& table = tables[0];
table[OFF(alpha_test_ref)] = AlphaTest;
table[OFF(alpha_test_func)] = AlphaTest;
table[OFF(alpha_test_enabled)] = AlphaTest;
}
void SetupDirtyBlend(Tables& tables) {
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor);
tables[0][OFF(independent_blend_enable)] = BlendIndependentEnabled;
for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) {
const std::size_t offset = OFF(independent_blend) + i * NUM(independent_blend[0]);
FillBlock(tables[0], offset, NUM(independent_blend[0]), BlendState0 + i);
tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i);
}
FillBlock(tables[1], OFF(independent_blend), NUM(independent_blend), BlendStates);
FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates);
}
void SetupDirtyPrimitiveRestart(Tables& tables) {
FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart);
}
void SetupDirtyPolygonOffset(Tables& tables) {
auto& table = tables[0];
table[OFF(polygon_offset_fill_enable)] = PolygonOffset;
table[OFF(polygon_offset_line_enable)] = PolygonOffset;
table[OFF(polygon_offset_point_enable)] = PolygonOffset;
table[OFF(polygon_offset_factor)] = PolygonOffset;
table[OFF(polygon_offset_units)] = PolygonOffset;
table[OFF(polygon_offset_clamp)] = PolygonOffset;
}
void SetupDirtyMultisampleControl(Tables& tables) {
FillBlock(tables[0], OFF(multisample_control), NUM(multisample_control), MultisampleControl);
}
void SetupDirtyRasterizeEnable(Tables& tables) {
tables[0][OFF(rasterize_enable)] = RasterizeEnable;
}
void SetupDirtyFramebufferSRGB(Tables& tables) {
tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB;
}
void SetupDirtyLogicOp(Tables& tables) {
FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp);
}
void SetupDirtyFragmentClampColor(Tables& tables) {
tables[0][OFF(frag_color_clamp)] = FragmentClampColor;
}
void SetupDirtyPointSize(Tables& tables) {
tables[0][OFF(vp_point_size)] = PointSize;
tables[0][OFF(point_size)] = PointSize;
tables[0][OFF(point_sprite_enable)] = PointSize;
}
void SetupDirtyLineWidth(Tables& tables) {
tables[0][OFF(line_width_smooth)] = LineWidth;
tables[0][OFF(line_width_aliased)] = LineWidth;
tables[0][OFF(line_smooth_enable)] = LineWidth;
}
void SetupDirtyClipControl(Tables& tables) {
auto& table = tables[0];
table[OFF(screen_y_control)] = ClipControl;
table[OFF(depth_mode)] = ClipControl;
}
void SetupDirtyDepthClampEnabled(Tables& tables) {
tables[0][OFF(view_volume_clip_control)] = DepthClampEnabled;
}
void SetupDirtyMisc(Tables& tables) {
auto& table = tables[0];
table[OFF(clip_distance_enabled)] = ClipDistances;
table[OFF(front_face)] = FrontFace;
table[OFF(cull_test_enabled)] = CullTest;
table[OFF(cull_face)] = CullTest;
}
} // Anonymous namespace
StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
auto& dirty = gpu.Maxwell3D().dirty;
auto& tables = dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyColorMasks(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyVertexArrays(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
SetupDirtyDepthTest(tables);
SetupDirtyStencilTest(tables);
SetupDirtyAlphaTest(tables);
SetupDirtyBlend(tables);
SetupDirtyPrimitiveRestart(tables);
SetupDirtyPolygonOffset(tables);
SetupDirtyMultisampleControl(tables);
SetupDirtyRasterizeEnable(tables);
SetupDirtyFramebufferSRGB(tables);
SetupDirtyLogicOp(tables);
SetupDirtyFragmentClampColor(tables);
SetupDirtyPointSize(tables);
SetupDirtyLineWidth(tables);
SetupDirtyClipControl(tables);
SetupDirtyDepthClampEnabled(tables);
SetupDirtyMisc(tables);
auto& store = dirty.on_write_stores;
store[VertexBuffers] = true;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
store[VertexBuffer0 + i] = true;
}
}
void StateTracker::InvalidateStreamBuffer() {
flags[Dirty::VertexBuffers] = true;
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
flags[index] = true;
}
}
} // namespace OpenGL

View File

@@ -0,0 +1,207 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <limits>
#include <glad/glad.h>
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
namespace Tegra {
class GPU;
}
namespace OpenGL {
namespace Dirty {
enum : u8 {
First = VideoCommon::Dirty::LastCommonEntry,
VertexFormats,
VertexFormat0,
VertexFormat31 = VertexFormat0 + 31,
VertexBuffers,
VertexBuffer0,
VertexBuffer31 = VertexBuffer0 + 31,
VertexInstances,
VertexInstance0,
VertexInstance31 = VertexInstance0 + 31,
ViewportTransform,
Viewports,
Viewport0,
Viewport15 = Viewport0 + 15,
Scissors,
Scissor0,
Scissor15 = Scissor0 + 15,
ColorMaskCommon,
ColorMasks,
ColorMask0,
ColorMask7 = ColorMask0 + 7,
BlendColor,
BlendIndependentEnabled,
BlendStates,
BlendState0,
BlendState7 = BlendState0 + 7,
Shaders,
ClipDistances,
PolygonModes,
PolygonModeFront,
PolygonModeBack,
ColorMask,
FrontFace,
CullTest,
DepthMask,
DepthTest,
StencilTest,
AlphaTest,
PrimitiveRestart,
PolygonOffset,
MultisampleControl,
RasterizeEnable,
FramebufferSRGB,
LogicOp,
FragmentClampColor,
PointSize,
LineWidth,
ClipControl,
DepthClampEnabled,
Last
};
static_assert(Last <= std::numeric_limits<u8>::max());
} // namespace Dirty
class StateTracker {
public:
explicit StateTracker(Tegra::GPU& gpu);
void InvalidateStreamBuffer();
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
return;
}
index_buffer = new_index_buffer;
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
}
void BindFramebuffer(GLuint new_framebuffer) {
if (framebuffer == new_framebuffer) {
return;
}
framebuffer = new_framebuffer;
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
}
void NotifyScreenDrawVertexArray() {
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
flags[OpenGL::Dirty::VertexBuffers] = true;
flags[OpenGL::Dirty::VertexBuffer0] = true;
flags[OpenGL::Dirty::VertexInstances] = true;
flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
flags[OpenGL::Dirty::VertexInstance0 + 1] = true;
}
void NotifyPolygonModes() {
flags[OpenGL::Dirty::PolygonModes] = true;
flags[OpenGL::Dirty::PolygonModeFront] = true;
flags[OpenGL::Dirty::PolygonModeBack] = true;
}
void NotifyViewport0() {
flags[OpenGL::Dirty::Viewports] = true;
flags[OpenGL::Dirty::Viewport0] = true;
}
void NotifyScissor0() {
flags[OpenGL::Dirty::Scissors] = true;
flags[OpenGL::Dirty::Scissor0] = true;
}
void NotifyColorMask(size_t index) {
flags[OpenGL::Dirty::ColorMasks] = true;
flags[OpenGL::Dirty::ColorMask0 + index] = true;
}
void NotifyBlend0() {
flags[OpenGL::Dirty::BlendStates] = true;
flags[OpenGL::Dirty::BlendState0] = true;
}
void NotifyFramebuffer() {
flags[VideoCommon::Dirty::RenderTargets] = true;
}
void NotifyFrontFace() {
flags[OpenGL::Dirty::FrontFace] = true;
}
void NotifyCullTest() {
flags[OpenGL::Dirty::CullTest] = true;
}
void NotifyDepthMask() {
flags[OpenGL::Dirty::DepthMask] = true;
}
void NotifyDepthTest() {
flags[OpenGL::Dirty::DepthTest] = true;
}
void NotifyStencilTest() {
flags[OpenGL::Dirty::StencilTest] = true;
}
void NotifyPolygonOffset() {
flags[OpenGL::Dirty::PolygonOffset] = true;
}
void NotifyRasterizeEnable() {
flags[OpenGL::Dirty::RasterizeEnable] = true;
}
void NotifyFramebufferSRGB() {
flags[OpenGL::Dirty::FramebufferSRGB] = true;
}
void NotifyLogicOp() {
flags[OpenGL::Dirty::LogicOp] = true;
}
void NotifyClipControl() {
flags[OpenGL::Dirty::ClipControl] = true;
}
void NotifyAlphaTest() {
flags[OpenGL::Dirty::AlphaTest] = true;
}
private:
Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
GLuint framebuffer = 0;
GLuint index_buffer = 0;
};
} // namespace OpenGL

View File

@@ -0,0 +1,70 @@
// Copyright 2018 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <tuple>
#include <vector>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
MP_RGB(128, 128, 192));
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
: state_tracker{state_tracker_} {
gl_buffer.Create();
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
mapped_ptr = static_cast<u8*>(
glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
}
}
OGLStreamBuffer::~OGLStreamBuffer() {
glUnmapNamedBuffer(gl_buffer.handle);
gl_buffer.Release();
}
std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
ASSERT(size <= BUFFER_SIZE);
ASSERT(alignment <= BUFFER_SIZE);
mapped_size = size;
if (alignment > 0) {
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
}
if (buffer_pos + size > BUFFER_SIZE) {
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
glInvalidateBufferData(gl_buffer.handle);
state_tracker.InvalidateStreamBuffer();
buffer_pos = 0;
}
return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
}
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
ASSERT(size <= mapped_size);
if (size > 0) {
glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
}
buffer_pos += size;
}
} // namespace OpenGL

View File

@@ -0,0 +1,60 @@
// Copyright 2018 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class Device;
class StateTracker;
class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
~OGLStreamBuffer();
/*
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
* and the optional alignment requirement.
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
* The return values are the pointer to the new chunk, and the offset within the buffer.
* The actual used size must be specified on unmapping the chunk.
*/
std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
void Unmap(GLsizeiptr size);
GLuint Handle() const {
return gl_buffer.handle;
}
u64 Address() const {
return gpu_address;
}
GLsizeiptr Size() const noexcept {
return BUFFER_SIZE;
}
private:
static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
StateTracker& state_tracker;
OGLBuffer gl_buffer;
GLuint64EXT gpu_address = 0;
GLintptr buffer_pos = 0;
GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr;
};
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,250 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <span>
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
class Device;
class ProgramManager;
class StateTracker;
class Framebuffer;
class Image;
class ImageView;
class Sampler;
using VideoCommon::ImageId;
using VideoCommon::ImageViewId;
using VideoCommon::ImageViewType;
using VideoCommon::NUM_RT;
using VideoCommon::Offset2D;
using VideoCommon::RenderTargets;
class ImageBufferMap {
public:
explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
~ImageBufferMap();
GLuint Handle() const noexcept {
return handle;
}
std::span<u8> Span() const noexcept {
return span;
}
private:
std::span<u8> span;
OGLSync* sync;
GLuint handle;
};
struct FormatProperties {
GLenum compatibility_class;
bool compatibility_by_size;
bool is_compressed;
};
class TextureCacheRuntime {
friend Framebuffer;
friend Image;
friend ImageView;
friend Sampler;
public:
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
StateTracker& state_tracker);
~TextureCacheRuntime();
void Finish();
ImageBufferMap MapUploadBuffer(size_t size);
ImageBufferMap MapDownloadBuffer(size_t size);
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
UNIMPLEMENTED();
}
bool CanImageBeCopied(const Image& dst, const Image& src);
void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
const std::array<Offset2D, 2>& dst_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void InsertUploadMemoryBarrier();
FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
private:
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
~StagingBuffers();
ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
size_t RequestBuffer(size_t requested_size);
std::optional<size_t> FindBuffer(size_t requested_size);
std::vector<OGLSync> syncs;
std::vector<OGLBuffer> buffers;
std::vector<u8*> maps;
std::vector<size_t> sizes;
GLenum storage_flags;
GLenum map_flags;
};
const Device& device;
StateTracker& state_tracker;
UtilShaders util_shaders;
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
OGLTexture null_image_rect;
OGLTextureView null_image_view_1d;
OGLTextureView null_image_view_2d;
OGLTextureView null_image_view_2d_array;
OGLTextureView null_image_view_cube;
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
};
class Image : public VideoCommon::ImageBase {
friend ImageView;
public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies);
void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
GLuint Handle() const noexcept {
return texture.handle;
}
private:
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
OGLTexture texture;
OGLTextureView store_view;
OGLBuffer buffer;
GLenum gl_internal_format = GL_NONE;
GLenum gl_store_format = GL_NONE;
GLenum gl_format = GL_NONE;
GLenum gl_type = GL_NONE;
};
class ImageView : public VideoCommon::ImageViewBase {
friend Image;
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
[[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
return views[static_cast<size_t>(query_type)];
}
[[nodiscard]] GLuint DefaultHandle() const noexcept {
return default_handle;
}
[[nodiscard]] GLenum Format() const noexcept {
return internal_format;
}
private:
void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
const VideoCommon::ImageViewInfo& info,
VideoCommon::SubresourceRange view_range);
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
std::vector<OGLTextureView> stored_views;
GLuint default_handle = 0;
GLenum internal_format = GL_NONE;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
class Sampler {
public:
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
GLuint Handle() const noexcept {
return sampler.handle;
}
private:
OGLSampler sampler;
};
class Framebuffer {
public:
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
[[nodiscard]] GLuint Handle() const noexcept {
return framebuffer.handle;
}
[[nodiscard]] GLbitfield BufferBits() const noexcept {
return buffer_bits;
}
private:
OGLFramebuffer framebuffer;
GLbitfield buffer_bits = GL_NONE;
};
struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true;
using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image;
using ImageAlloc = OpenGL::ImageAlloc;
using ImageView = OpenGL::ImageView;
using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
} // namespace OpenGL

View File

@@ -0,0 +1,497 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/maxwell_3d.h"
namespace OpenGL {
using GLvec2 = std::array<GLfloat, 2>;
using GLvec3 = std::array<GLfloat, 3>;
using GLvec4 = std::array<GLfloat, 4>;
using GLuvec2 = std::array<GLuint, 2>;
using GLuvec3 = std::array<GLuint, 3>;
using GLuvec4 = std::array<GLuint, 4>;
namespace MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
case Maxwell::VertexAttribute::Type::UnsignedNorm:
case Maxwell::VertexAttribute::Type::UnsignedScaled:
case Maxwell::VertexAttribute::Type::UnsignedInt:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_UNSIGNED_BYTE;
case Maxwell::VertexAttribute::Size::Size_16:
case Maxwell::VertexAttribute::Size::Size_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return GL_UNSIGNED_SHORT;
case Maxwell::VertexAttribute::Size::Size_32:
case Maxwell::VertexAttribute::Size::Size_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_UNSIGNED_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::SignedNorm:
case Maxwell::VertexAttribute::Type::SignedScaled:
case Maxwell::VertexAttribute::Type::SignedInt:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_BYTE;
case Maxwell::VertexAttribute::Size::Size_16:
case Maxwell::VertexAttribute::Size::Size_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return GL_SHORT;
case Maxwell::VertexAttribute::Size::Size_32:
case Maxwell::VertexAttribute::Size::Size_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_INT_2_10_10_10_REV;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::Float:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_16:
case Maxwell::VertexAttribute::Size::Size_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return GL_HALF_FLOAT;
case Maxwell::VertexAttribute::Size::Size_32:
case Maxwell::VertexAttribute::Size::Size_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_FLOAT;
default:
break;
}
break;
}
UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(),
attrib.SizeString());
return {};
}
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
switch (index_format) {
case Maxwell::IndexFormat::UnsignedByte:
return GL_UNSIGNED_BYTE;
case Maxwell::IndexFormat::UnsignedShort:
return GL_UNSIGNED_SHORT;
case Maxwell::IndexFormat::UnsignedInt:
return GL_UNSIGNED_INT;
}
UNREACHABLE_MSG("Invalid index_format={}", index_format);
return {};
}
inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return GL_POINTS;
case Maxwell::PrimitiveTopology::Lines:
return GL_LINES;
case Maxwell::PrimitiveTopology::LineLoop:
return GL_LINE_LOOP;
case Maxwell::PrimitiveTopology::LineStrip:
return GL_LINE_STRIP;
case Maxwell::PrimitiveTopology::Triangles:
return GL_TRIANGLES;
case Maxwell::PrimitiveTopology::TriangleStrip:
return GL_TRIANGLE_STRIP;
case Maxwell::PrimitiveTopology::TriangleFan:
return GL_TRIANGLE_FAN;
case Maxwell::PrimitiveTopology::Quads:
return GL_QUADS;
case Maxwell::PrimitiveTopology::QuadStrip:
return GL_QUAD_STRIP;
case Maxwell::PrimitiveTopology::Polygon:
return GL_POLYGON;
case Maxwell::PrimitiveTopology::LinesAdjacency:
return GL_LINES_ADJACENCY;
case Maxwell::PrimitiveTopology::LineStripAdjacency:
return GL_LINE_STRIP_ADJACENCY;
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
return GL_TRIANGLES_ADJACENCY;
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return GL_TRIANGLE_STRIP_ADJACENCY;
case Maxwell::PrimitiveTopology::Patches:
return GL_PATCHES;
}
UNREACHABLE_MSG("Invalid topology={}", topology);
return GL_POINTS;
}
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) {
switch (filter_mode) {
case Tegra::Texture::TextureFilter::Nearest:
switch (mipmap_filter_mode) {
case Tegra::Texture::TextureMipmapFilter::None:
return GL_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Nearest:
return GL_NEAREST_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
return GL_NEAREST_MIPMAP_LINEAR;
}
break;
case Tegra::Texture::TextureFilter::Linear:
switch (mipmap_filter_mode) {
case Tegra::Texture::TextureMipmapFilter::None:
return GL_LINEAR;
case Tegra::Texture::TextureMipmapFilter::Nearest:
return GL_LINEAR_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
return GL_LINEAR_MIPMAP_LINEAR;
}
break;
}
UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", filter_mode,
mipmap_filter_mode);
return GL_NEAREST;
}
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
switch (wrap_mode) {
case Tegra::Texture::WrapMode::Wrap:
return GL_REPEAT;
case Tegra::Texture::WrapMode::Mirror:
return GL_MIRRORED_REPEAT;
case Tegra::Texture::WrapMode::ClampToEdge:
return GL_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::Border:
return GL_CLAMP_TO_BORDER;
case Tegra::Texture::WrapMode::Clamp:
return GL_CLAMP;
case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
return GL_MIRROR_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::MirrorOnceBorder:
if (GL_EXT_texture_mirror_clamp) {
return GL_MIRROR_CLAMP_TO_BORDER_EXT;
} else {
return GL_MIRROR_CLAMP_TO_EDGE;
}
case Tegra::Texture::WrapMode::MirrorOnceClampOGL:
if (GL_EXT_texture_mirror_clamp) {
return GL_MIRROR_CLAMP_EXT;
} else {
return GL_MIRROR_CLAMP_TO_EDGE;
}
}
UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", wrap_mode);
return GL_REPEAT;
}
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
switch (func) {
case Tegra::Texture::DepthCompareFunc::Never:
return GL_NEVER;
case Tegra::Texture::DepthCompareFunc::Less:
return GL_LESS;
case Tegra::Texture::DepthCompareFunc::LessEqual:
return GL_LEQUAL;
case Tegra::Texture::DepthCompareFunc::Equal:
return GL_EQUAL;
case Tegra::Texture::DepthCompareFunc::NotEqual:
return GL_NOTEQUAL;
case Tegra::Texture::DepthCompareFunc::Greater:
return GL_GREATER;
case Tegra::Texture::DepthCompareFunc::GreaterEqual:
return GL_GEQUAL;
case Tegra::Texture::DepthCompareFunc::Always:
return GL_ALWAYS;
}
UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", func);
return GL_GREATER;
}
inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
switch (equation) {
case Maxwell::Blend::Equation::Add:
case Maxwell::Blend::Equation::AddGL:
return GL_FUNC_ADD;
case Maxwell::Blend::Equation::Subtract:
case Maxwell::Blend::Equation::SubtractGL:
return GL_FUNC_SUBTRACT;
case Maxwell::Blend::Equation::ReverseSubtract:
case Maxwell::Blend::Equation::ReverseSubtractGL:
return GL_FUNC_REVERSE_SUBTRACT;
case Maxwell::Blend::Equation::Min:
case Maxwell::Blend::Equation::MinGL:
return GL_MIN;
case Maxwell::Blend::Equation::Max:
case Maxwell::Blend::Equation::MaxGL:
return GL_MAX;
}
UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation);
return GL_FUNC_ADD;
}
inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
switch (factor) {
case Maxwell::Blend::Factor::Zero:
case Maxwell::Blend::Factor::ZeroGL:
return GL_ZERO;
case Maxwell::Blend::Factor::One:
case Maxwell::Blend::Factor::OneGL:
return GL_ONE;
case Maxwell::Blend::Factor::SourceColor:
case Maxwell::Blend::Factor::SourceColorGL:
return GL_SRC_COLOR;
case Maxwell::Blend::Factor::OneMinusSourceColor:
case Maxwell::Blend::Factor::OneMinusSourceColorGL:
return GL_ONE_MINUS_SRC_COLOR;
case Maxwell::Blend::Factor::SourceAlpha:
case Maxwell::Blend::Factor::SourceAlphaGL:
return GL_SRC_ALPHA;
case Maxwell::Blend::Factor::OneMinusSourceAlpha:
case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
return GL_ONE_MINUS_SRC_ALPHA;
case Maxwell::Blend::Factor::DestAlpha:
case Maxwell::Blend::Factor::DestAlphaGL:
return GL_DST_ALPHA;
case Maxwell::Blend::Factor::OneMinusDestAlpha:
case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
return GL_ONE_MINUS_DST_ALPHA;
case Maxwell::Blend::Factor::DestColor:
case Maxwell::Blend::Factor::DestColorGL:
return GL_DST_COLOR;
case Maxwell::Blend::Factor::OneMinusDestColor:
case Maxwell::Blend::Factor::OneMinusDestColorGL:
return GL_ONE_MINUS_DST_COLOR;
case Maxwell::Blend::Factor::SourceAlphaSaturate:
case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
return GL_SRC_ALPHA_SATURATE;
case Maxwell::Blend::Factor::Source1Color:
case Maxwell::Blend::Factor::Source1ColorGL:
return GL_SRC1_COLOR;
case Maxwell::Blend::Factor::OneMinusSource1Color:
case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
return GL_ONE_MINUS_SRC1_COLOR;
case Maxwell::Blend::Factor::Source1Alpha:
case Maxwell::Blend::Factor::Source1AlphaGL:
return GL_SRC1_ALPHA;
case Maxwell::Blend::Factor::OneMinusSource1Alpha:
case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
return GL_ONE_MINUS_SRC1_ALPHA;
case Maxwell::Blend::Factor::ConstantColor:
case Maxwell::Blend::Factor::ConstantColorGL:
return GL_CONSTANT_COLOR;
case Maxwell::Blend::Factor::OneMinusConstantColor:
case Maxwell::Blend::Factor::OneMinusConstantColorGL:
return GL_ONE_MINUS_CONSTANT_COLOR;
case Maxwell::Blend::Factor::ConstantAlpha:
case Maxwell::Blend::Factor::ConstantAlphaGL:
return GL_CONSTANT_ALPHA;
case Maxwell::Blend::Factor::OneMinusConstantAlpha:
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return GL_ONE_MINUS_CONSTANT_ALPHA;
}
UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor);
return GL_ZERO;
}
inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
switch (source) {
case Tegra::Texture::SwizzleSource::Zero:
return GL_ZERO;
case Tegra::Texture::SwizzleSource::R:
return GL_RED;
case Tegra::Texture::SwizzleSource::G:
return GL_GREEN;
case Tegra::Texture::SwizzleSource::B:
return GL_BLUE;
case Tegra::Texture::SwizzleSource::A:
return GL_ALPHA;
case Tegra::Texture::SwizzleSource::OneInt:
case Tegra::Texture::SwizzleSource::OneFloat:
return GL_ONE;
}
UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source);
return GL_ZERO;
}
inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
switch (comparison) {
case Maxwell::ComparisonOp::Never:
case Maxwell::ComparisonOp::NeverOld:
return GL_NEVER;
case Maxwell::ComparisonOp::Less:
case Maxwell::ComparisonOp::LessOld:
return GL_LESS;
case Maxwell::ComparisonOp::Equal:
case Maxwell::ComparisonOp::EqualOld:
return GL_EQUAL;
case Maxwell::ComparisonOp::LessEqual:
case Maxwell::ComparisonOp::LessEqualOld:
return GL_LEQUAL;
case Maxwell::ComparisonOp::Greater:
case Maxwell::ComparisonOp::GreaterOld:
return GL_GREATER;
case Maxwell::ComparisonOp::NotEqual:
case Maxwell::ComparisonOp::NotEqualOld:
return GL_NOTEQUAL;
case Maxwell::ComparisonOp::GreaterEqual:
case Maxwell::ComparisonOp::GreaterEqualOld:
return GL_GEQUAL;
case Maxwell::ComparisonOp::Always:
case Maxwell::ComparisonOp::AlwaysOld:
return GL_ALWAYS;
}
UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
return GL_ALWAYS;
}
inline GLenum StencilOp(Maxwell::StencilOp stencil) {
switch (stencil) {
case Maxwell::StencilOp::Keep:
case Maxwell::StencilOp::KeepOGL:
return GL_KEEP;
case Maxwell::StencilOp::Zero:
case Maxwell::StencilOp::ZeroOGL:
return GL_ZERO;
case Maxwell::StencilOp::Replace:
case Maxwell::StencilOp::ReplaceOGL:
return GL_REPLACE;
case Maxwell::StencilOp::Incr:
case Maxwell::StencilOp::IncrOGL:
return GL_INCR;
case Maxwell::StencilOp::Decr:
case Maxwell::StencilOp::DecrOGL:
return GL_DECR;
case Maxwell::StencilOp::Invert:
case Maxwell::StencilOp::InvertOGL:
return GL_INVERT;
case Maxwell::StencilOp::IncrWrap:
case Maxwell::StencilOp::IncrWrapOGL:
return GL_INCR_WRAP;
case Maxwell::StencilOp::DecrWrap:
case Maxwell::StencilOp::DecrWrapOGL:
return GL_DECR_WRAP;
}
UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil);
return GL_KEEP;
}
inline GLenum FrontFace(Maxwell::FrontFace front_face) {
switch (front_face) {
case Maxwell::FrontFace::ClockWise:
return GL_CW;
case Maxwell::FrontFace::CounterClockWise:
return GL_CCW;
}
UNIMPLEMENTED_MSG("Unimplemented front face cull={}", front_face);
return GL_CCW;
}
inline GLenum CullFace(Maxwell::CullFace cull_face) {
switch (cull_face) {
case Maxwell::CullFace::Front:
return GL_FRONT;
case Maxwell::CullFace::Back:
return GL_BACK;
case Maxwell::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK;
}
UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face);
return GL_BACK;
}
inline GLenum LogicOp(Maxwell::LogicOperation operation) {
switch (operation) {
case Maxwell::LogicOperation::Clear:
return GL_CLEAR;
case Maxwell::LogicOperation::And:
return GL_AND;
case Maxwell::LogicOperation::AndReverse:
return GL_AND_REVERSE;
case Maxwell::LogicOperation::Copy:
return GL_COPY;
case Maxwell::LogicOperation::AndInverted:
return GL_AND_INVERTED;
case Maxwell::LogicOperation::NoOp:
return GL_NOOP;
case Maxwell::LogicOperation::Xor:
return GL_XOR;
case Maxwell::LogicOperation::Or:
return GL_OR;
case Maxwell::LogicOperation::Nor:
return GL_NOR;
case Maxwell::LogicOperation::Equiv:
return GL_EQUIV;
case Maxwell::LogicOperation::Invert:
return GL_INVERT;
case Maxwell::LogicOperation::OrReverse:
return GL_OR_REVERSE;
case Maxwell::LogicOperation::CopyInverted:
return GL_COPY_INVERTED;
case Maxwell::LogicOperation::OrInverted:
return GL_OR_INVERTED;
case Maxwell::LogicOperation::Nand:
return GL_NAND;
case Maxwell::LogicOperation::Set:
return GL_SET;
}
UNIMPLEMENTED_MSG("Unimplemented logic operation={}", operation);
return GL_COPY;
}
inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
switch (polygon_mode) {
case Maxwell::PolygonMode::Point:
return GL_POINT;
case Maxwell::PolygonMode::Line:
return GL_LINE;
case Maxwell::PolygonMode::Fill:
return GL_FILL;
}
UNREACHABLE_MSG("Invalid polygon mode={}", polygon_mode);
return GL_FILL;
}
inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
switch (filter) {
case Tegra::Texture::SamplerReduction::WeightedAverage:
return GL_WEIGHTED_AVERAGE_ARB;
case Tegra::Texture::SamplerReduction::Min:
return GL_MIN;
case Tegra::Texture::SamplerReduction::Max:
return GL_MAX;
}
UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
return GL_WEIGHTED_AVERAGE_ARB;
}
inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
// Enumeration order matches register order. We can convert it arithmetically.
return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
}
} // namespace MaxwellToGL
} // namespace OpenGL

View File

@@ -0,0 +1,521 @@
// Copyright 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/telemetry.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "core/perf_stats.h"
#include "core/settings.h"
#include "core/telemetry_session.h"
#include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/textures/decoders.h"
namespace OpenGL {
namespace {
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
struct ScreenRectVertex {
constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v)
: position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {}
std::array<GLfloat, 2> position;
std::array<GLfloat, 2> tex_coord;
};
/**
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
*
* The projection part of the matrix is trivial, hence these operations are represented
* by a 3x2 matrix.
*/
std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) {
std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order
// clang-format off
matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
// Last matrix row is implicitly assumed to be [0, 0, 1].
// clang-format on
return matrix;
}
const char* GetSource(GLenum source) {
switch (source) {
case GL_DEBUG_SOURCE_API:
return "API";
case GL_DEBUG_SOURCE_WINDOW_SYSTEM:
return "WINDOW_SYSTEM";
case GL_DEBUG_SOURCE_SHADER_COMPILER:
return "SHADER_COMPILER";
case GL_DEBUG_SOURCE_THIRD_PARTY:
return "THIRD_PARTY";
case GL_DEBUG_SOURCE_APPLICATION:
return "APPLICATION";
case GL_DEBUG_SOURCE_OTHER:
return "OTHER";
default:
UNREACHABLE();
return "Unknown source";
}
}
const char* GetType(GLenum type) {
switch (type) {
case GL_DEBUG_TYPE_ERROR:
return "ERROR";
case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
return "DEPRECATED_BEHAVIOR";
case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
return "UNDEFINED_BEHAVIOR";
case GL_DEBUG_TYPE_PORTABILITY:
return "PORTABILITY";
case GL_DEBUG_TYPE_PERFORMANCE:
return "PERFORMANCE";
case GL_DEBUG_TYPE_OTHER:
return "OTHER";
case GL_DEBUG_TYPE_MARKER:
return "MARKER";
default:
UNREACHABLE();
return "Unknown type";
}
}
void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
const GLchar* message, const void* user_param) {
const char format[] = "{} {} {}: {}";
const char* const str_source = GetSource(source);
const char* const str_type = GetType(type);
switch (severity) {
case GL_DEBUG_SEVERITY_HIGH:
LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
break;
case GL_DEBUG_SEVERITY_MEDIUM:
LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
break;
case GL_DEBUG_SEVERITY_NOTIFICATION:
case GL_DEBUG_SEVERITY_LOW:
LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
break;
}
}
} // Anonymous namespace
RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
RendererOpenGL::~RendererOpenGL() = default;
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
PrepareRendertarget(framebuffer);
RenderScreenshot();
state_tracker.BindFramebuffer(0);
DrawScreen(emu_window.GetFramebufferLayout());
++m_current_frame;
rasterizer->TickFrame();
context->SwapBuffers();
render_window.OnFrameDisplayed();
}
void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
// If framebuffer is provided, reload it from memory to a texture
if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
screen_info.texture.pixel_format != framebuffer->pixel_format ||
gl_framebuffer_data.empty()) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
}
// Load the framebuffer from memory, draw it to the screen, and swap buffers
LoadFBToScreenInfo(*framebuffer);
}
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
framebuffer_crop_rect = framebuffer.crop_rect;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
return;
}
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, std::span<u8>(host_ptr, size_in_bytes),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1,
block_height_log2, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
// Update existing texture
// TODO: Test what happens on hardware when you change the framebuffer dimensions so that
// they differ from the LCD resolution.
// TODO: Applications could theoretically crash yuzu here by specifying too large
// framebuffer sizes. We should make sure that this cannot happen.
glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
framebuffer.height, screen_info.texture.gl_format,
screen_info.texture.gl_type, gl_framebuffer_data.data());
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture) {
const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
}
void RendererOpenGL::InitOpenGLObjects() {
glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
Settings::values.bg_blue.GetValue(), 0.0f);
// Create shader programs
OGLShader vertex_shader;
vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
OGLShader fragment_shader;
fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
vertex_program.Create(true, false, vertex_shader.handle);
fragment_program.Create(true, false, fragment_shader.handle);
pipeline.Create();
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
// Generate presentation sampler
present_sampler.Create();
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
// Generate VBO handle for drawing
vertex_buffer.Create();
// Attach vertex data to VAO
glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
// Allocate textures for the screen
screen_info.texture.resource.Create(GL_TEXTURE_2D);
const GLuint texture = screen_info.texture.resource.handle;
glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);
screen_info.display_texture = screen_info.texture.resource.handle;
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
// Enable seamless cubemaps when per texture parameters are not available
if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
}
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
&vertex_buffer_address);
}
}
void RendererOpenGL::AddTelemetryFields() {
const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(user_system, "GPU_Vendor", std::string(gpu_vendor));
telemetry_session.AddField(user_system, "GPU_Model", std::string(gpu_model));
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
}
void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
screen_info, program_manager, state_tracker);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer) {
texture.width = framebuffer.width;
texture.height = framebuffer.height;
texture.pixel_format = framebuffer.pixel_format;
const auto pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
GLint internal_format;
switch (framebuffer.pixel_format) {
case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
break;
case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
internal_format = GL_RGB565;
texture.gl_format = GL_RGB;
texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
break;
default:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
// UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
// static_cast<u32>(framebuffer.pixel_format));
}
texture.resource.Release();
texture.resource.Create(GL_TEXTURE_2D);
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
}
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
if (renderer_settings.set_background_color) {
// Update background color before drawing
glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
Settings::values.bg_blue.GetValue(), 0.0f);
}
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
std::data(ortho_matrix));
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
auto right = texcoords.right;
if (framebuffer_transform_flags != Tegra::FramebufferConfig::TransformFlags::Unset) {
if (framebuffer_transform_flags == Tegra::FramebufferConfig::TransformFlags::FlipV) {
// Flip the framebuffer vertically
left = texcoords.right;
right = texcoords.left;
} else {
// Other transformations are unsupported
LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}",
framebuffer_transform_flags);
UNIMPLEMENTED();
}
}
ASSERT_MSG(framebuffer_crop_rect.top == 0, "Unimplemented");
ASSERT_MSG(framebuffer_crop_rect.left == 0, "Unimplemented");
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
f32 scale_u = 1.f, scale_v = 1.f;
if (framebuffer_crop_rect.GetWidth() > 0) {
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
static_cast<f32>(screen_info.texture.width);
}
if (framebuffer_crop_rect.GetHeight() > 0) {
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
static_cast<f32>(screen_info.texture.height);
}
const auto& screen = layout.screen;
const std::array vertices = {
ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, left * scale_v),
ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, left * scale_v),
ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, right * scale_v),
ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, right * scale_v),
};
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
// TODO: Signal state tracker about these changes
state_tracker.NotifyScreenDrawVertexArray();
state_tracker.NotifyPolygonModes();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
state_tracker.NotifyColorMask(0);
state_tracker.NotifyBlend0();
state_tracker.NotifyFramebuffer();
state_tracker.NotifyFrontFace();
state_tracker.NotifyCullTest();
state_tracker.NotifyDepthTest();
state_tracker.NotifyStencilTest();
state_tracker.NotifyPolygonOffset();
state_tracker.NotifyRasterizeEnable();
state_tracker.NotifyFramebufferSRGB();
state_tracker.NotifyLogicOp();
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
program_manager.BindHostPipeline(pipeline.handle);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
glEnable(GL_FRAMEBUFFER_SRGB);
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
}
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_DEPTH_TEST);
glDisable(GL_STENCIL_TEST);
glDisable(GL_POLYGON_OFFSET_FILL);
glDisable(GL_RASTERIZER_DISCARD);
glDisable(GL_ALPHA_TEST);
glDisablei(GL_BLEND, 0);
glDisablei(GL_SCISSOR_TEST, 0);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
glDepthRangeIndexed(0, 0.0, 0.0);
glEnableVertexAttribArray(PositionLocation);
glEnableVertexAttribArray(TexCoordLocation);
glVertexAttribDivisor(PositionLocation, 0);
glVertexAttribDivisor(TexCoordLocation, 0);
glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, position));
glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, tex_coord));
glVertexAttribBinding(PositionLocation, 0);
glVertexAttribBinding(TexCoordLocation, 0);
if (device.HasVertexBufferUnifiedMemory()) {
glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
sizeof(vertices));
} else {
glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
}
glBindTextureUnit(0, screen_info.display_texture);
glBindSampler(0, present_sampler.handle);
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
program_manager.RestoreGuestPipeline();
}
void RendererOpenGL::RenderScreenshot() {
if (!renderer_settings.screenshot_requested) {
return;
}
GLint old_read_fb;
GLint old_draw_fb;
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
// Draw the current frame to the screenshot framebuffer
screenshot_framebuffer.Create();
glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
GLuint renderbuffer;
glGenRenderbuffers(1, &renderbuffer);
glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
glRenderbufferStorage(GL_RENDERBUFFER, screen_info.display_srgb ? GL_SRGB8 : GL_RGB8,
layout.width, layout.height);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
DrawScreen(layout);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
renderer_settings.screenshot_bits);
screenshot_framebuffer.Release();
glDeleteRenderbuffers(1, &renderbuffer);
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
renderer_settings.screenshot_complete_callback();
renderer_settings.screenshot_requested = false;
}
bool RendererOpenGL::Init() {
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
glDebugMessageCallback(DebugHandler, nullptr);
}
AddTelemetryFields();
if (!GLAD_GL_VERSION_4_3) {
return false;
}
InitOpenGLObjects();
CreateRasterizer();
return true;
}
void RendererOpenGL::ShutDown() {}
} // namespace OpenGL

View File

@@ -0,0 +1,129 @@
// Copyright 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
class System;
class TelemetrySession;
} // namespace Core
namespace Core::Frontend {
class EmuWindow;
}
namespace Core::Memory {
class Memory;
}
namespace Layout {
struct FramebufferLayout;
}
namespace Tegra {
class GPU;
}
namespace OpenGL {
/// Structure used for storing information about the textures for the Switch screen
struct TextureInfo {
OGLTexture resource;
GLsizei width;
GLsizei height;
GLenum gl_format;
GLenum gl_type;
Tegra::FramebufferConfig::PixelFormat pixel_format;
};
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
GLuint display_texture{};
bool display_srgb{};
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
};
class RendererOpenGL final : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererOpenGL() override;
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
private:
/// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
void AddTelemetryFields();
void CreateRasterizer();
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
/// Draws the emulated screens to the emulator window.
void DrawScreen(const Layout::FramebufferLayout& layout);
void RenderScreenshot();
/// Loads framebuffer from emulated memory into the active OpenGL texture.
void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
/// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture
/// can be 1x1 but will stretch across whatever it's rendered on.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
Core::TelemetrySession& telemetry_session;
Core::Frontend::EmuWindow& emu_window;
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
const Device device;
StateTracker state_tracker{gpu};
// OpenGL object IDs
OGLSampler present_sampler;
OGLBuffer vertex_buffer;
OGLProgram vertex_program;
OGLProgram fragment_program;
OGLPipeline pipeline;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer
GLuint64EXT vertex_buffer_address = 0;
/// Display information for Switch screen
ScreenInfo screen_info;
/// Global dummy shader pipeline
ProgramManager program_manager;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
/// Used for transforming the framebuffer orientation
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{};
Common::Rectangle<int> framebuffer_crop_rect;
};
} // namespace OpenGL

View File

@@ -0,0 +1,270 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <bit>
#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/decoders.h"
namespace OpenGL {
using namespace HostShaders;
using Tegra::Texture::GOB_SIZE_SHIFT;
using Tegra::Texture::GOB_SIZE_X;
using Tegra::Texture::GOB_SIZE_X_SHIFT;
using Tegra::Texture::GOB_SIZE_Y_SHIFT;
using VideoCommon::Extent3D;
using VideoCommon::ImageCopy;
using VideoCommon::ImageType;
using VideoCommon::SwizzleParameters;
using VideoCore::Surface::BytesPerBlock;
namespace {
OGLProgram MakeProgram(std::string_view source) {
OGLShader shader;
shader.Create(source, GL_COMPUTE_SHADER);
OGLProgram program;
program.Create(true, false, shader.handle);
return program;
}
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
: program_manager{program_manager_},
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
}
UtilShaders::~UtilShaders() = default;
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
static constexpr GLuint LOC_ORIGIN = 0;
static constexpr GLuint LOC_DESTINATION = 1;
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
static constexpr GLuint LOC_LAYER_STRIDE = 3;
static constexpr GLuint LOC_BLOCK_SIZE = 4;
static constexpr GLuint LOC_X_SHIFT = 5;
static constexpr GLuint LOC_BLOCK_HEIGHT = 6;
static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7;
const u32 bytes_per_block = BytesPerBlock(image.info.format);
const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block);
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glUniform3ui(LOC_ORIGIN, 0, 0, 0); // TODO
glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2);
glUniform1ui(LOC_LAYER_STRIDE, image.info.layer_stride);
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D block = swizzle.block;
const Extent3D num_tiles = swizzle.num_tiles;
const size_t offset = swizzle.buffer_offset + buffer_offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level);
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
const u32 block_height_mask = (1U << block.height) - 1;
const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth;
glUniform1ui(LOC_BLOCK_SIZE, block_size);
glUniform1ui(LOC_X_SHIFT, x_shift);
glUniform1ui(LOC_BLOCK_HEIGHT, block.height);
glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, StoreFormat(bytes_per_block));
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
static constexpr GLuint LOC_ORIGIN = 0;
static constexpr GLuint LOC_DESTINATION = 1;
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
static constexpr GLuint SLICE_SIZE_LOC = 3;
static constexpr GLuint LOC_BLOCK_SIZE = 4;
static constexpr GLuint LOC_X_SHIFT = 5;
static constexpr GLuint LOC_BLOCK_HEIGHT = 6;
static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7;
static constexpr GLuint BLOCK_DEPTH_LOC = 8;
static constexpr GLuint BLOCK_DEPTH_MASK_LOC = 9;
const u32 bytes_per_block = BytesPerBlock(image.info.format);
const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glUniform3ui(LOC_ORIGIN, 0, 0, 0); // TODO
glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2);
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D block = swizzle.block;
const Extent3D num_tiles = swizzle.num_tiles;
const size_t offset = swizzle.buffer_offset + buffer_offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level);
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
const u32 slice_size =
Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
const u32 block_height_mask = (1U << block.height) - 1;
const u32 block_depth_mask = (1U << block.depth) - 1;
const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth;
glUniform1ui(SLICE_SIZE_LOC, slice_size);
glUniform1ui(LOC_BLOCK_SIZE, block_size);
glUniform1ui(LOC_X_SHIFT, x_shift);
glUniform1ui(LOC_BLOCK_HEIGHT, block.height);
glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask);
glUniform1ui(BLOCK_DEPTH_LOC, block.depth);
glUniform1ui(BLOCK_DEPTH_MASK_LOC, block_depth_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, StoreFormat(bytes_per_block));
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
static constexpr GLuint LOC_ORIGIN = 0;
static constexpr GLuint LOC_DESTINATION = 1;
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
static constexpr GLuint LOC_PITCH = 3;
const u32 bytes_per_block = BytesPerBlock(image.info.format);
const GLenum format = StoreFormat(bytes_per_block);
const u32 pitch = image.info.pitch;
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0); // TODO
glUniform2i(LOC_DESTINATION, 0, 0); // TODO
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
glUniform1ui(LOC_PITCH, pitch);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
const size_t offset = swizzle.buffer_offset + buffer_offset;
const u32 aligned_width = Common::AlignUp(num_tiles.width, WORKGROUP_SIZE.width);
const u32 aligned_height = Common::AlignUp(num_tiles.height, WORKGROUP_SIZE.height);
const u32 num_dispatches_x = aligned_width / WORKGROUP_SIZE.width;
const u32 num_dispatches_y = aligned_height / WORKGROUP_SIZE.height;
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
image.guest_size_bytes - swizzle.buffer_offset);
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
static constexpr GLuint BINDING_INPUT_IMAGE = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
program_manager.BindHostCompute(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
ASSERT(copy.src_subresource.num_layers == 1);
ASSERT(copy.dst_subresource.base_layer == 0);
ASSERT(copy.dst_subresource.num_layers == 1);
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
program_manager.RestoreGuestCompute();
}
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
return GL_R8UI;
case 2:
return GL_R16UI;
case 4:
return GL_R32UI;
case 8:
return GL_RG32UI;
case 16:
return GL_RGBA32UI;
}
UNREACHABLE();
return GL_R8UI;
}
} // namespace OpenGL

View File

@@ -0,0 +1,51 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <span>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/texture_cache/types.h"
namespace OpenGL {
class Image;
class ImageBufferMap;
class ProgramManager;
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
~UtilShaders();
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void CopyBC4(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
private:
ProgramManager& program_manager;
OGLBuffer swizzle_table_buffer;
OGLProgram block_linear_unswizzle_2d_program;
OGLProgram block_linear_unswizzle_3d_program;
OGLProgram pitch_unswizzle_program;
OGLProgram copy_bc4_program;
};
GLenum StoreFormat(u32 bytes_per_block);
} // namespace OpenGL