yuzu/src/video_core/renderer_opengl/util_shaders.cpp

352 lines
16 KiB
C++
Raw Normal View History

2020-12-28 19:15:37 +04:00
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
2020-12-29 13:54:52 +04:00
#include <span>
#include <string_view>
2020-12-28 19:15:37 +04:00
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
2021-02-14 06:46:35 +04:00
#include "video_core/host_shaders/astc_decoder_comp.h"
2020-12-28 19:15:37 +04:00
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
2021-02-13 23:52:45 +04:00
#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
2020-12-28 19:15:37 +04:00
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
2021-07-10 01:54:15 +04:00
#include "video_core/renderer_opengl/gl_shader_util.h"
2020-12-28 19:15:37 +04:00
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
2020-12-29 13:54:52 +04:00
#include "video_core/texture_cache/accelerated_swizzle.h"
2020-12-28 19:15:37 +04:00
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
2021-02-14 06:46:35 +04:00
#include "video_core/textures/astc.h"
2020-12-28 19:15:37 +04:00
#include "video_core/textures/decoders.h"
namespace OpenGL {
using namespace HostShaders;
2021-02-14 06:46:35 +04:00
using namespace Tegra::Texture::ASTC;
2020-12-28 19:15:37 +04:00
2021-02-14 06:46:35 +04:00
using VideoCommon::Extent2D;
2020-12-28 19:15:37 +04:00
using VideoCommon::Extent3D;
using VideoCommon::ImageCopy;
using VideoCommon::ImageType;
using VideoCommon::SwizzleParameters;
2020-12-29 13:54:52 +04:00
using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
2020-12-28 19:15:37 +04:00
using VideoCore::Surface::BytesPerBlock;
namespace {
OGLProgram MakeProgram(std::string_view source) {
2021-07-15 04:45:51 +04:00
return CreateProgram(source, GL_COMPUTE_SHADER);
2020-12-28 19:15:37 +04:00
}
2021-03-05 12:48:55 +04:00
size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
return static_cast<size_t>(copy.extent.width * copy.extent.height *
copy.src_subresource.num_layers);
2021-02-28 08:08:39 +04:00
}
2020-12-28 19:15:37 +04:00
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
2021-02-14 06:46:35 +04:00
: program_manager{program_manager_}, astc_decoder_program(MakeProgram(ASTC_DECODER_COMP)),
2020-12-28 19:15:37 +04:00
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
2021-02-14 06:46:35 +04:00
copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)),
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
2020-12-28 19:15:37 +04:00
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
2021-02-14 06:46:35 +04:00
astc_buffer.Create();
2020-12-28 19:15:37 +04:00
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
2021-06-19 20:48:40 +04:00
glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES,
0);
2020-12-28 19:15:37 +04:00
}
UtilShaders::~UtilShaders() = default;
2021-02-14 06:46:35 +04:00
void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_ENC_BUFFER = 2;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
const Extent2D tile_size{
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
2021-07-15 04:45:51 +04:00
program_manager.BindComputeProgram(astc_decoder_program.handle);
2021-02-14 06:46:35 +04:00
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
2021-06-19 20:48:40 +04:00
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
2021-02-14 06:46:35 +04:00
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
2021-03-02 08:13:29 +04:00
glUniform2ui(1, tile_size.width, tile_size.height);
2021-03-14 12:08:46 +04:00
// Ensure buffer data is valid before dispatching
glFlush();
2021-02-14 06:46:35 +04:00
for (const SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U);
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.layer_stride);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RGBA8);
// ASTC texture data
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
2021-06-19 20:48:40 +04:00
// Precautionary barrier to ensure the compute shader is done decoding prior to texture access.
// GL_TEXTURE_FETCH_BARRIER_BIT and GL_SHADER_IMAGE_ACCESS_BARRIER_BIT are used in a separate
// glMemoryBarrier call by the texture cache runtime
glMemoryBarrier(GL_UNIFORM_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT |
GL_TEXTURE_UPDATE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT |
GL_SHADER_STORAGE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
2021-02-14 06:46:35 +04:00
program_manager.RestoreGuestCompute();
}
2021-01-18 03:31:15 +04:00
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
2020-12-28 19:15:37 +04:00
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
2021-07-15 04:45:51 +04:00
program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
2021-01-18 03:31:15 +04:00
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
2020-12-28 19:15:37 +04:00
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
2020-12-29 13:54:52 +04:00
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
2020-12-28 19:15:37 +04:00
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
2021-01-18 03:31:15 +04:00
const size_t input_offset = swizzle.buffer_offset + map.offset;
2020-12-28 19:15:37 +04:00
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
2020-12-29 13:54:52 +04:00
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
glUniform3uiv(0, 1, params.origin.data());
glUniform3iv(1, 1, params.destination.data());
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.layer_stride);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
2021-01-17 06:19:34 +04:00
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
2021-02-13 23:52:45 +04:00
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
2020-12-29 13:54:52 +04:00
GL_WRITE_ONLY, store_format);
2020-12-28 19:15:37 +04:00
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
program_manager.RestoreGuestCompute();
}
2021-01-18 03:31:15 +04:00
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
2020-12-28 19:15:37 +04:00
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
2021-01-18 03:31:15 +04:00
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
2021-07-15 04:45:51 +04:00
program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
2020-12-28 19:15:37 +04:00
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
2020-12-29 13:54:52 +04:00
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
2020-12-28 19:15:37 +04:00
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
2021-01-18 03:31:15 +04:00
const size_t input_offset = swizzle.buffer_offset + map.offset;
2020-12-28 19:15:37 +04:00
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
2020-12-29 13:54:52 +04:00
const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
glUniform3uiv(0, 1, params.origin.data());
glUniform3iv(1, 1, params.destination.data());
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.slice_size);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glUniform1ui(8, params.block_depth);
glUniform1ui(9, params.block_depth_mask);
2021-01-17 06:19:34 +04:00
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
2021-02-13 23:52:45 +04:00
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
2020-12-29 13:54:52 +04:00
GL_WRITE_ONLY, store_format);
2020-12-28 19:15:37 +04:00
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
}
program_manager.RestoreGuestCompute();
}
2021-01-18 03:31:15 +04:00
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
2020-12-28 19:15:37 +04:00
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
static constexpr GLuint LOC_ORIGIN = 0;
static constexpr GLuint LOC_DESTINATION = 1;
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
static constexpr GLuint LOC_PITCH = 3;
const u32 bytes_per_block = BytesPerBlock(image.info.format);
const GLenum format = StoreFormat(bytes_per_block);
const u32 pitch = image.info.pitch;
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
2021-07-15 04:45:51 +04:00
program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
2021-01-18 03:31:15 +04:00
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
2020-12-29 13:54:52 +04:00
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
2020-12-28 19:15:37 +04:00
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
glUniform1ui(LOC_PITCH, pitch);
2021-02-13 23:52:45 +04:00
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), 0, GL_FALSE, 0, GL_WRITE_ONLY,
format);
2020-12-28 19:15:37 +04:00
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
2021-01-18 03:31:15 +04:00
const size_t input_offset = swizzle.buffer_offset + map.offset;
2020-12-28 19:15:37 +04:00
2020-12-29 13:54:52 +04:00
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
2020-12-28 19:15:37 +04:00
2021-01-17 06:19:34 +04:00
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
2020-12-28 19:15:37 +04:00
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
}
program_manager.RestoreGuestCompute();
}
void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
static constexpr GLuint BINDING_INPUT_IMAGE = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
2021-07-15 04:45:51 +04:00
program_manager.BindComputeProgram(copy_bc4_program.handle);
2020-12-28 19:15:37 +04:00
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
ASSERT(copy.src_subresource.num_layers == 1);
ASSERT(copy.dst_subresource.base_layer == 0);
ASSERT(copy.dst_subresource.num_layers == 1);
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
2021-02-13 23:52:45 +04:00
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
2021-07-08 00:12:25 +04:00
copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI);
2021-02-13 23:52:45 +04:00
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
2021-07-08 00:12:25 +04:00
copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
2020-12-28 19:15:37 +04:00
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
program_manager.RestoreGuestCompute();
}
2021-02-13 23:52:45 +04:00
void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies) {
static constexpr GLuint BINDING_INPUT_IMAGE = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
2021-02-15 23:21:49 +04:00
static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
2021-02-13 23:52:45 +04:00
const u32 bytes_per_block = BytesPerBlock(dst_image.info.format);
2021-02-28 08:08:39 +04:00
switch (bytes_per_block) {
case 2:
2021-03-05 12:48:55 +04:00
// BGR565 copy
2021-02-15 23:21:49 +04:00
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset);
ASSERT(copy.dst_offset == zero_offset);
bgr_copy_pass.Execute(dst_image, src_image, copy);
}
2021-02-28 08:08:39 +04:00
break;
case 4: {
2021-03-05 12:48:55 +04:00
// BGRA8 copy
2021-07-15 04:45:51 +04:00
program_manager.BindComputeProgram(copy_bgra_program.handle);
2021-02-28 08:08:39 +04:00
constexpr GLenum FORMAT = GL_RGBA8;
2021-02-15 23:21:49 +04:00
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset);
ASSERT(copy.dst_offset == zero_offset);
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
2021-02-28 08:08:39 +04:00
copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT);
2021-02-15 23:21:49 +04:00
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
2021-02-28 08:08:39 +04:00
copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT);
2021-02-15 23:21:49 +04:00
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
2021-02-28 08:08:39 +04:00
program_manager.RestoreGuestCompute();
break;
}
default:
UNREACHABLE();
break;
2021-02-13 23:52:45 +04:00
}
}
2020-12-28 19:15:37 +04:00
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
return GL_R8UI;
case 2:
return GL_R16UI;
case 4:
return GL_R32UI;
case 8:
return GL_RG32UI;
case 16:
return GL_RGBA32UI;
}
UNREACHABLE();
return GL_R8UI;
}
2021-02-15 23:21:49 +04:00
void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image,
const ImageCopy& copy) {
if (CopyBufferCreationNeeded(copy)) {
2021-02-28 08:08:39 +04:00
CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565);
2021-02-15 23:21:49 +04:00
}
// Copy from source to PBO
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
2021-03-05 12:48:55 +04:00
glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle);
2021-02-15 23:21:49 +04:00
glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
2021-03-05 12:48:55 +04:00
static_cast<GLsizei>(bgr16_pbo_size), nullptr);
2021-02-15 23:21:49 +04:00
2021-02-28 08:08:39 +04:00
// Copy from PBO to destination in reverse order
2021-02-15 23:21:49 +04:00
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width);
2021-03-05 12:48:55 +04:00
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle);
2021-02-15 23:21:49 +04:00
glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
2021-03-05 12:48:55 +04:00
copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV,
nullptr);
2021-02-15 23:21:49 +04:00
}
bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) {
2021-03-05 12:48:55 +04:00
return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16);
2021-02-15 23:21:49 +04:00
}
void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) {
bgr16_pbo.Create();
2021-03-05 12:48:55 +04:00
bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16);
glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY);
2021-02-15 23:21:49 +04:00
}
2020-12-28 19:15:37 +04:00
} // namespace OpenGL