early-access version 1259

This commit is contained in:
pineappleEA
2020-12-29 09:54:52 +00:00
parent c7d8d0947d
commit db53458619
12 changed files with 239 additions and 111 deletions

View File

@@ -225,6 +225,8 @@ add_library(video_core STATIC
shader/transform_feedback.h
surface.cpp
surface.h
texture_cache/accelerated_swizzle.cpp
texture_cache/accelerated_swizzle.h
texture_cache/decode_bc4.cpp
texture_cache/decode_bc4.h
texture_cache/descriptor_table.h

View File

@@ -3,10 +3,11 @@
// Refer to the license.txt file included.
#include <bit>
#include <span>
#include <string_view>
#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
@@ -19,6 +20,7 @@
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/decoders.h"
@@ -27,14 +29,12 @@ namespace OpenGL {
using namespace HostShaders;
using Tegra::Texture::GOB_SIZE_SHIFT;
using Tegra::Texture::GOB_SIZE_X;
using Tegra::Texture::GOB_SIZE_X_SHIFT;
using Tegra::Texture::GOB_SIZE_Y_SHIFT;
using VideoCommon::Extent3D;
using VideoCommon::ImageCopy;
using VideoCommon::ImageType;
using VideoCommon::SwizzleParameters;
using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
using VideoCore::Surface::BytesPerBlock;
namespace {
@@ -69,50 +69,32 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
static constexpr GLuint LOC_ORIGIN = 0;
static constexpr GLuint LOC_DESTINATION = 1;
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
static constexpr GLuint LOC_LAYER_STRIDE = 3;
static constexpr GLuint LOC_BLOCK_SIZE = 4;
static constexpr GLuint LOC_X_SHIFT = 5;
static constexpr GLuint LOC_BLOCK_HEIGHT = 6;
static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7;
const u32 bytes_per_block = BytesPerBlock(image.info.format);
const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block);
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glUniform3ui(LOC_ORIGIN, 0, 0, 0); // TODO
glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2);
glUniform1ui(LOC_LAYER_STRIDE, image.info.layer_stride);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D block = swizzle.block;
const Extent3D num_tiles = swizzle.num_tiles;
const size_t offset = swizzle.buffer_offset + buffer_offset;
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level);
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
const u32 block_height_mask = (1U << block.height) - 1;
const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth;
glUniform1ui(LOC_BLOCK_SIZE, block_size);
glUniform1ui(LOC_X_SHIFT, x_shift);
glUniform1ui(LOC_BLOCK_HEIGHT, block.height);
glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
image.guest_size_bytes - swizzle.buffer_offset);
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
glUniform3uiv(0, 1, params.origin.data());
glUniform3iv(1, 1, params.destination.data());
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.layer_stride);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, StoreFormat(bytes_per_block));
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
program_manager.RestoreGuestCompute();
@@ -126,60 +108,35 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
static constexpr GLuint LOC_ORIGIN = 0;
static constexpr GLuint LOC_DESTINATION = 1;
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
static constexpr GLuint SLICE_SIZE_LOC = 3;
static constexpr GLuint LOC_BLOCK_SIZE = 4;
static constexpr GLuint LOC_X_SHIFT = 5;
static constexpr GLuint LOC_BLOCK_HEIGHT = 6;
static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7;
static constexpr GLuint BLOCK_DEPTH_LOC = 8;
static constexpr GLuint BLOCK_DEPTH_MASK_LOC = 9;
const u32 bytes_per_block = BytesPerBlock(image.info.format);
const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glUniform3ui(LOC_ORIGIN, 0, 0, 0); // TODO
glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D block = swizzle.block;
const Extent3D num_tiles = swizzle.num_tiles;
const size_t offset = swizzle.buffer_offset + buffer_offset;
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level);
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
const u32 slice_size =
Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
const u32 block_height_mask = (1U << block.height) - 1;
const u32 block_depth_mask = (1U << block.depth) - 1;
const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth;
glUniform1ui(SLICE_SIZE_LOC, slice_size);
glUniform1ui(LOC_BLOCK_SIZE, block_size);
glUniform1ui(LOC_X_SHIFT, x_shift);
glUniform1ui(LOC_BLOCK_HEIGHT, block.height);
glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask);
glUniform1ui(BLOCK_DEPTH_LOC, block.depth);
glUniform1ui(BLOCK_DEPTH_MASK_LOC, block_depth_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
image.guest_size_bytes - swizzle.buffer_offset);
const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
glUniform3uiv(0, 1, params.origin.data());
glUniform3iv(1, 1, params.destination.data());
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.slice_size);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glUniform1ui(8, params.block_depth);
glUniform1ui(9, params.block_depth_mask);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, StoreFormat(bytes_per_block));
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
}
program_manager.RestoreGuestCompute();
@@ -204,22 +161,20 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0); // TODO
glUniform2i(LOC_DESTINATION, 0, 0); // TODO
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
glUniform1ui(LOC_PITCH, pitch);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
const size_t offset = swizzle.buffer_offset + buffer_offset;
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
const u32 aligned_width = Common::AlignUp(num_tiles.width, WORKGROUP_SIZE.width);
const u32 aligned_height = Common::AlignUp(num_tiles.height, WORKGROUP_SIZE.height);
const u32 num_dispatches_x = aligned_width / WORKGROUP_SIZE.width;
const u32 num_dispatches_y = aligned_height / WORKGROUP_SIZE.height;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
}
program_manager.RestoreGuestCompute();

View File

@@ -0,0 +1,70 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <bit>
#include "common/alignment.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/decoders.h"
namespace VideoCommon::Accelerated {
using Tegra::Texture::GOB_SIZE_SHIFT;
using Tegra::Texture::GOB_SIZE_X;
using Tegra::Texture::GOB_SIZE_X_SHIFT;
using Tegra::Texture::GOB_SIZE_Y_SHIFT;
using VideoCore::Surface::BytesPerBlock;
BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle,
const ImageInfo& info) {
const Extent3D block = swizzle.block;
const Extent3D num_tiles = swizzle.num_tiles;
const u32 bytes_per_block = BytesPerBlock(info.format);
const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
return BlockLinearSwizzle2DParams{
.origin{0, 0, 0},
.destination{0, 0, 0},
.bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
.layer_stride = info.layer_stride,
.block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth),
.x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
.block_height = block.height,
.block_height_mask = (1U << block.height) - 1,
};
}
BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle,
const ImageInfo& info) {
const Extent3D block = swizzle.block;
const Extent3D num_tiles = swizzle.num_tiles;
const u32 bytes_per_block = BytesPerBlock(info.format);
const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
const u32 slice_size =
Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
return BlockLinearSwizzle3DParams{
.origin{0, 0, 0},
.destination{0, 0, 0},
.bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
.slice_size = slice_size,
.block_size = block_size,
.x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
.block_height = block.height,
.block_height_mask = (1U << block.height) - 1,
.block_depth = block.depth,
.block_depth_mask = (1U << block.depth) - 1,
};
}
} // namespace VideoCommon::Accelerated

View File

@@ -0,0 +1,45 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/common_types.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/types.h"
namespace VideoCommon::Accelerated {
struct BlockLinearSwizzle2DParams {
std::array<u32, 3> origin;
std::array<s32, 3> destination;
u32 bytes_per_block_log2;
u32 layer_stride;
u32 block_size;
u32 x_shift;
u32 block_height;
u32 block_height_mask;
};
struct BlockLinearSwizzle3DParams {
std::array<u32, 3> origin;
std::array<s32, 3> destination;
u32 bytes_per_block_log2;
u32 slice_size;
u32 block_size;
u32 x_shift;
u32 block_height;
u32 block_height_mask;
u32 block_depth;
u32 block_depth_mask;
};
[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(
const SwizzleParameters& swizzle, const ImageInfo& info);
[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(
const SwizzleParameters& swizzle, const ImageInfo& info);
} // namespace VideoCommon::Accelerated

View File

@@ -19,7 +19,6 @@
namespace Tegra::Texture {
namespace {
/**
* This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
* Calculates the offset of an (x, y) position within a swizzled texture.
@@ -41,11 +40,15 @@ constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst();
template <bool TO_LINEAR>
void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
static constexpr u32 origin_x = 0; // TODO
static constexpr u32 origin_y = 0; // TODO
static constexpr u32 origin_z = 0; // TODO
// The origin of the transformation can be configured here, leave it as zero as the current API
// doesn't expose it.
static constexpr u32 origin_x = 0;
static constexpr u32 origin_y = 0;
static constexpr u32 origin_z = 0;
const u32 pitch = width * bytes_per_pixel; // TODO
// We can configure here a custom pitch
// As it's not exposed 'width * bpp' will be the expected pitch.
const u32 pitch = width * bytes_per_pixel;
const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel;
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
@@ -86,7 +89,6 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
}
}
}
} // Anonymous namespace
SwizzleTable MakeSwizzleTable() {