From db534586190049b56d9d054c4d4637eb0bbc5b12 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Tue, 29 Dec 2020 09:54:52 +0000 Subject: [PATCH] early-access version 1259 --- README.md | 2 +- src/core/hle/kernel/kernel.cpp | 21 ++- src/core/hle/kernel/kernel.h | 17 +++ src/core/hle/kernel/server_session.cpp | 13 +- src/core/hle/kernel/server_session.h | 2 +- src/core/hle/kernel/service_thread.cpp | 28 ++-- src/core/hle/kernel/service_thread.h | 3 +- src/video_core/CMakeLists.txt | 2 + .../renderer_opengl/util_shaders.cpp | 133 ++++++------------ .../texture_cache/accelerated_swizzle.cpp | 70 +++++++++ .../texture_cache/accelerated_swizzle.h | 45 ++++++ src/video_core/textures/decoders.cpp | 14 +- 12 files changed, 239 insertions(+), 111 deletions(-) create mode 100755 src/video_core/texture_cache/accelerated_swizzle.cpp create mode 100755 src/video_core/texture_cache/accelerated_swizzle.h diff --git a/README.md b/README.md index b96f190b9..cb4930f1a 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1258. +This is the source code for early-access 1259. ## Legal Notice diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 312c64c17..5f917686f 100755 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include "common/assert.h" @@ -35,6 +35,7 @@ #include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" +#include "core/hle/kernel/service_thread.h" #include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" @@ -107,6 +108,9 @@ struct KernelCore::Impl { std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(), std::thread::id{}); std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0); + + // Ensures all service threads gracefully shutdown + service_threads.clear(); } void InitializePhysicalCores() { @@ -345,6 +349,9 @@ struct KernelCore::Impl { std::shared_ptr irs_shared_mem; std::shared_ptr time_shared_mem; + // Threads used for services + std::unordered_set> service_threads; + std::array, Core::Hardware::NUM_CPU_CORES> suspend_threads{}; std::array interrupts{}; std::array, Core::Hardware::NUM_CPU_CORES> schedulers{}; @@ -639,4 +646,16 @@ void KernelCore::ExitSVCProfile() { MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]); } +std::weak_ptr KernelCore::CreateServiceThread(const std::string& name) { + auto service_thread = std::make_shared(*this, 1, name); + impl->service_threads.emplace(service_thread); + return service_thread; +} + +void KernelCore::ReleaseServiceThread(std::weak_ptr service_thread) { + if (auto strong_ptr = service_thread.lock()) { + impl->service_threads.erase(strong_ptr); + } +} + } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 5846c3f39..e3169f5a7 100755 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -42,6 +42,7 @@ class Process; class ResourceLimit; class KScheduler; class SharedMemory; +class ServiceThread; class Synchronization; class Thread; class TimeManager; @@ -227,6 +228,22 @@ public: void ExitSVCProfile(); + /** + * Creates an HLE service thread, which are used to execute service routines asynchronously. + * While these are allocated per ServerSession, these need to be owned and managed outside of + * ServerSession to avoid a circular dependency. + * @param name String name for the ServerSession creating this thread, used for debug purposes. + * @returns The a weak pointer newly created service thread. + */ + std::weak_ptr CreateServiceThread(const std::string& name); + + /** + * Releases a HLE service thread, instructing KernelCore to free it. This should be called when + * the ServerSession associated with the thread is destroyed. + * @param service_thread Service thread to release. + */ + void ReleaseServiceThread(std::weak_ptr service_thread); + private: friend class Object; friend class Process; diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index ed42452ff..b40fe3916 100755 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -25,7 +25,10 @@ namespace Kernel { ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {} -ServerSession::~ServerSession() = default; + +ServerSession::~ServerSession() { + kernel.ReleaseServiceThread(service_thread); +} ResultVal> ServerSession::Create(KernelCore& kernel, std::shared_ptr parent, @@ -34,7 +37,7 @@ ResultVal> ServerSession::Create(KernelCore& kern session->name = std::move(name); session->parent = std::move(parent); - session->service_thread = std::make_unique(kernel, 1); + session->service_thread = kernel.CreateServiceThread(session->name); return MakeResult(std::move(session)); } @@ -139,7 +142,11 @@ ResultCode ServerSession::QueueSyncRequest(std::shared_ptr thread, std::make_shared(kernel, memory, SharedFrom(this), std::move(thread)); context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf); - service_thread->QueueSyncRequest(*this, std::move(context)); + + if (auto strong_ptr = service_thread.lock()) { + strong_ptr->QueueSyncRequest(*this, std::move(context)); + return RESULT_SUCCESS; + } return RESULT_SUCCESS; } diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index 8466b03e6..e8d1d99ea 100755 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h @@ -167,7 +167,7 @@ private: std::string name; /// Thread to dispatch service requests - std::unique_ptr service_thread; + std::weak_ptr service_thread; }; } // namespace Kernel diff --git a/src/core/hle/kernel/service_thread.cpp b/src/core/hle/kernel/service_thread.cpp index 4ceb7e56a..1c134777f 100755 --- a/src/core/hle/kernel/service_thread.cpp +++ b/src/core/hle/kernel/service_thread.cpp @@ -11,6 +11,7 @@ #include "common/assert.h" #include "common/scope_exit.h" +#include "common/thread.h" #include "core/core.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/server_session.h" @@ -22,7 +23,7 @@ namespace Kernel { class ServiceThread::Impl final { public: - explicit Impl(KernelCore& kernel, std::size_t num_threads); + explicit Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name); ~Impl(); void QueueSyncRequest(ServerSession& session, std::shared_ptr&& context); @@ -32,12 +33,16 @@ private: std::queue> requests; std::mutex queue_mutex; std::condition_variable condition; + const std::string service_name; bool stop{}; }; -ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads) { +ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name) + : service_name{name} { for (std::size_t i = 0; i < num_threads; ++i) - threads.emplace_back([&] { + threads.emplace_back([this, &kernel] { + Common::SetCurrentThreadName(std::string{"Hle_" + service_name}.c_str()); + // Wait for first request before trying to acquire a render context { std::unique_lock lock{queue_mutex}; @@ -52,7 +57,7 @@ ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads) { { std::unique_lock lock{queue_mutex}; condition.wait(lock, [this] { return stop || !requests.empty(); }); - if (stop && requests.empty()) { + if (stop || requests.empty()) { return; } task = std::move(requests.front()); @@ -68,9 +73,14 @@ void ServiceThread::Impl::QueueSyncRequest(ServerSession& session, std::shared_ptr&& context) { { std::unique_lock lock{queue_mutex}; - requests.emplace([session{SharedFrom(&session)}, context{std::move(context)}]() { - session->CompleteSyncRequest(*context); - return; + + // ServerSession owns the service thread, so we cannot caption a strong pointer here in the + // event that the ServerSession is terminated. + std::weak_ptr weak_ptr{SharedFrom(&session)}; + requests.emplace([weak_ptr, context{std::move(context)}]() { + if (auto strong_ptr = weak_ptr.lock()) { + strong_ptr->CompleteSyncRequest(*context); + } }); } condition.notify_one(); @@ -87,8 +97,8 @@ ServiceThread::Impl::~Impl() { } } -ServiceThread::ServiceThread(KernelCore& kernel, std::size_t num_threads) - : impl{std::make_unique(kernel, num_threads)} {} +ServiceThread::ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name) + : impl{std::make_unique(kernel, num_threads, name)} {} ServiceThread::~ServiceThread() = default; diff --git a/src/core/hle/kernel/service_thread.h b/src/core/hle/kernel/service_thread.h index 91ad7ae85..025ab8fb5 100755 --- a/src/core/hle/kernel/service_thread.h +++ b/src/core/hle/kernel/service_thread.h @@ -5,6 +5,7 @@ #pragma once #include +#include namespace Kernel { @@ -14,7 +15,7 @@ class ServerSession; class ServiceThread final { public: - explicit ServiceThread(KernelCore& kernel, std::size_t num_threads); + explicit ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name); ~ServiceThread(); void QueueSyncRequest(ServerSession& session, std::shared_ptr&& context); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index dd6e98c7a..b133992e0 100755 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -225,6 +225,8 @@ add_library(video_core STATIC shader/transform_feedback.h surface.cpp surface.h + texture_cache/accelerated_swizzle.cpp + texture_cache/accelerated_swizzle.h texture_cache/decode_bc4.cpp texture_cache/decode_bc4.h texture_cache/descriptor_table.h diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index f53ffec09..0a55d1266 100755 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -3,10 +3,11 @@ // Refer to the license.txt file included. #include +#include +#include #include -#include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" #include "common/div_ceil.h" @@ -19,6 +20,7 @@ #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/surface.h" +#include "video_core/texture_cache/accelerated_swizzle.h" #include "video_core/texture_cache/types.h" #include "video_core/texture_cache/util.h" #include "video_core/textures/decoders.h" @@ -27,14 +29,12 @@ namespace OpenGL { using namespace HostShaders; -using Tegra::Texture::GOB_SIZE_SHIFT; -using Tegra::Texture::GOB_SIZE_X; -using Tegra::Texture::GOB_SIZE_X_SHIFT; -using Tegra::Texture::GOB_SIZE_Y_SHIFT; using VideoCommon::Extent3D; using VideoCommon::ImageCopy; using VideoCommon::ImageType; using VideoCommon::SwizzleParameters; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; using VideoCore::Surface::BytesPerBlock; namespace { @@ -69,50 +69,32 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - static constexpr GLuint LOC_ORIGIN = 0; - static constexpr GLuint LOC_DESTINATION = 1; - static constexpr GLuint LOC_BYTES_PER_BLOCK = 2; - static constexpr GLuint LOC_LAYER_STRIDE = 3; - static constexpr GLuint LOC_BLOCK_SIZE = 4; - static constexpr GLuint LOC_X_SHIFT = 5; - static constexpr GLuint LOC_BLOCK_HEIGHT = 6; - static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7; - - const u32 bytes_per_block = BytesPerBlock(image.info.format); - const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block); program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); - glUniform3ui(LOC_ORIGIN, 0, 0, 0); // TODO - glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO - glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2); - glUniform1ui(LOC_LAYER_STRIDE, image.info.layer_stride); + + const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); for (const SwizzleParameters& swizzle : swizzles) { - const Extent3D block = swizzle.block; const Extent3D num_tiles = swizzle.num_tiles; - const size_t offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); - const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level); - const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; - - const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; - const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); - - const u32 block_height_mask = (1U << block.height) - 1; - const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth; - - glUniform1ui(LOC_BLOCK_SIZE, block_size); - glUniform1ui(LOC_X_SHIFT, x_shift); - glUniform1ui(LOC_BLOCK_HEIGHT, block.height); - glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset, - image.guest_size_bytes - swizzle.buffer_offset); + const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); + glUniform3uiv(0, 1, params.origin.data()); + glUniform3iv(1, 1, params.destination.data()); + glUniform1ui(2, params.bytes_per_block_log2); + glUniform1ui(3, params.layer_stride); + glUniform1ui(4, params.block_size); + glUniform1ui(5, params.x_shift); + glUniform1ui(6, params.block_height); + glUniform1ui(7, params.block_height_mask); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, - GL_WRITE_ONLY, StoreFormat(bytes_per_block)); + GL_WRITE_ONLY, store_format); glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); } program_manager.RestoreGuestCompute(); @@ -126,60 +108,35 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - static constexpr GLuint LOC_ORIGIN = 0; - static constexpr GLuint LOC_DESTINATION = 1; - static constexpr GLuint LOC_BYTES_PER_BLOCK = 2; - static constexpr GLuint SLICE_SIZE_LOC = 3; - static constexpr GLuint LOC_BLOCK_SIZE = 4; - static constexpr GLuint LOC_X_SHIFT = 5; - static constexpr GLuint LOC_BLOCK_HEIGHT = 6; - static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7; - static constexpr GLuint BLOCK_DEPTH_LOC = 8; - static constexpr GLuint BLOCK_DEPTH_MASK_LOC = 9; - - const u32 bytes_per_block = BytesPerBlock(image.info.format); - const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block); - glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); - glUniform3ui(LOC_ORIGIN, 0, 0, 0); // TODO - glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO - glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2); + + const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D block = swizzle.block; const Extent3D num_tiles = swizzle.num_tiles; - const size_t offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth); - const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level); - const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; - - const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; - const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); - const u32 slice_size = - Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size; - - const u32 block_height_mask = (1U << block.height) - 1; - const u32 block_depth_mask = (1U << block.depth) - 1; - const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth; - - glUniform1ui(SLICE_SIZE_LOC, slice_size); - glUniform1ui(LOC_BLOCK_SIZE, block_size); - glUniform1ui(LOC_X_SHIFT, x_shift); - glUniform1ui(LOC_BLOCK_HEIGHT, block.height); - glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask); - glUniform1ui(BLOCK_DEPTH_LOC, block.depth); - glUniform1ui(BLOCK_DEPTH_MASK_LOC, block_depth_mask); - - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset, - image.guest_size_bytes - swizzle.buffer_offset); + const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info); + glUniform3uiv(0, 1, params.origin.data()); + glUniform3iv(1, 1, params.destination.data()); + glUniform1ui(2, params.bytes_per_block_log2); + glUniform1ui(3, params.slice_size); + glUniform1ui(4, params.block_size); + glUniform1ui(5, params.x_shift); + glUniform1ui(6, params.block_height); + glUniform1ui(7, params.block_height_mask); + glUniform1ui(8, params.block_depth); + glUniform1ui(9, params.block_depth_mask); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, - GL_WRITE_ONLY, StoreFormat(bytes_per_block)); - + GL_WRITE_ONLY, store_format); glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); } program_manager.RestoreGuestCompute(); @@ -204,22 +161,20 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu program_manager.BindHostCompute(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); - glUniform2ui(LOC_ORIGIN, 0, 0); // TODO - glUniform2i(LOC_DESTINATION, 0, 0); // TODO + glUniform2ui(LOC_ORIGIN, 0, 0); + glUniform2i(LOC_DESTINATION, 0, 0); glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); glUniform1ui(LOC_PITCH, pitch); glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D num_tiles = swizzle.num_tiles; - const size_t offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; - const u32 aligned_width = Common::AlignUp(num_tiles.width, WORKGROUP_SIZE.width); - const u32 aligned_height = Common::AlignUp(num_tiles.height, WORKGROUP_SIZE.height); - const u32 num_dispatches_x = aligned_width / WORKGROUP_SIZE.width; - const u32 num_dispatches_y = aligned_height / WORKGROUP_SIZE.height; + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset, - image.guest_size_bytes - swizzle.buffer_offset); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); } program_manager.RestoreGuestCompute(); diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp new file mode 100755 index 000000000..a4fc1184b --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.cpp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/alignment.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/accelerated_swizzle.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/decoders.h" + +namespace VideoCommon::Accelerated { + +using Tegra::Texture::GOB_SIZE_SHIFT; +using Tegra::Texture::GOB_SIZE_X; +using Tegra::Texture::GOB_SIZE_X_SHIFT; +using Tegra::Texture::GOB_SIZE_Y_SHIFT; +using VideoCore::Surface::BytesPerBlock; + +BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle, + const ImageInfo& info) { + const Extent3D block = swizzle.block; + const Extent3D num_tiles = swizzle.num_tiles; + const u32 bytes_per_block = BytesPerBlock(info.format); + const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); + const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; + const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); + return BlockLinearSwizzle2DParams{ + .origin{0, 0, 0}, + .destination{0, 0, 0}, + .bytes_per_block_log2 = static_cast(std::countr_zero(bytes_per_block)), + .layer_stride = info.layer_stride, + .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth), + .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, + .block_height = block.height, + .block_height_mask = (1U << block.height) - 1, + }; +} + +BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle, + const ImageInfo& info) { + const Extent3D block = swizzle.block; + const Extent3D num_tiles = swizzle.num_tiles; + const u32 bytes_per_block = BytesPerBlock(info.format); + const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); + const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; + + const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); + const u32 slice_size = + Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size; + return BlockLinearSwizzle3DParams{ + .origin{0, 0, 0}, + .destination{0, 0, 0}, + .bytes_per_block_log2 = static_cast(std::countr_zero(bytes_per_block)), + .slice_size = slice_size, + .block_size = block_size, + .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, + .block_height = block.height, + .block_height_mask = (1U << block.height) - 1, + .block_depth = block.depth, + .block_depth_mask = (1U << block.depth) - 1, + }; +} + +} // namespace VideoCommon::Accelerated \ No newline at end of file diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h new file mode 100755 index 000000000..6ec5c78c4 --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.h @@ -0,0 +1,45 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon::Accelerated { + +struct BlockLinearSwizzle2DParams { + std::array origin; + std::array destination; + u32 bytes_per_block_log2; + u32 layer_stride; + u32 block_size; + u32 x_shift; + u32 block_height; + u32 block_height_mask; +}; + +struct BlockLinearSwizzle3DParams { + std::array origin; + std::array destination; + u32 bytes_per_block_log2; + u32 slice_size; + u32 block_size; + u32 x_shift; + u32 block_height; + u32 block_height_mask; + u32 block_depth; + u32 block_depth_mask; +}; + +[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams( + const SwizzleParameters& swizzle, const ImageInfo& info); + +[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams( + const SwizzleParameters& swizzle, const ImageInfo& info); + +} // namespace VideoCommon::Accelerated diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 51240ae3f..9f5181318 100755 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -19,7 +19,6 @@ namespace Tegra::Texture { namespace { - /** * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. * Calculates the offset of an (x, y) position within a swizzled texture. @@ -41,11 +40,15 @@ constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst(); template void Swizzle(std::span output, std::span input, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { - static constexpr u32 origin_x = 0; // TODO - static constexpr u32 origin_y = 0; // TODO - static constexpr u32 origin_z = 0; // TODO + // The origin of the transformation can be configured here, leave it as zero as the current API + // doesn't expose it. + static constexpr u32 origin_x = 0; + static constexpr u32 origin_y = 0; + static constexpr u32 origin_z = 0; - const u32 pitch = width * bytes_per_pixel; // TODO + // We can configure here a custom pitch + // As it's not exposed 'width * bpp' will be the expected pitch. + const u32 pitch = width * bytes_per_pixel; const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel; const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); @@ -86,7 +89,6 @@ void Swizzle(std::span output, std::span input, u32 bytes_per_pixe } } } - } // Anonymous namespace SwizzleTable MakeSwizzleTable() {