early-access version 1259
This commit is contained in:
		| @@ -1,7 +1,7 @@ | ||||
| yuzu emulator early access | ||||
| ============= | ||||
|  | ||||
| This is the source code for early-access 1258. | ||||
| This is the source code for early-access 1259. | ||||
|  | ||||
| ## Legal Notice | ||||
|  | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
| #include <functional> | ||||
| #include <memory> | ||||
| #include <thread> | ||||
| #include <unordered_map> | ||||
| #include <unordered_set> | ||||
| #include <utility> | ||||
|  | ||||
| #include "common/assert.h" | ||||
| @@ -35,6 +35,7 @@ | ||||
| #include "core/hle/kernel/physical_core.h" | ||||
| #include "core/hle/kernel/process.h" | ||||
| #include "core/hle/kernel/resource_limit.h" | ||||
| #include "core/hle/kernel/service_thread.h" | ||||
| #include "core/hle/kernel/shared_memory.h" | ||||
| #include "core/hle/kernel/synchronization.h" | ||||
| #include "core/hle/kernel/thread.h" | ||||
| @@ -107,6 +108,9 @@ struct KernelCore::Impl { | ||||
|         std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(), | ||||
|                   std::thread::id{}); | ||||
|         std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0); | ||||
|  | ||||
|         // Ensures all service threads gracefully shutdown | ||||
|         service_threads.clear(); | ||||
|     } | ||||
|  | ||||
|     void InitializePhysicalCores() { | ||||
| @@ -345,6 +349,9 @@ struct KernelCore::Impl { | ||||
|     std::shared_ptr<Kernel::SharedMemory> irs_shared_mem; | ||||
|     std::shared_ptr<Kernel::SharedMemory> time_shared_mem; | ||||
|  | ||||
|     // Threads used for services | ||||
|     std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads; | ||||
|  | ||||
|     std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{}; | ||||
|     std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{}; | ||||
|     std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{}; | ||||
| @@ -639,4 +646,16 @@ void KernelCore::ExitSVCProfile() { | ||||
|     MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]); | ||||
| } | ||||
|  | ||||
| std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::string& name) { | ||||
|     auto service_thread = std::make_shared<Kernel::ServiceThread>(*this, 1, name); | ||||
|     impl->service_threads.emplace(service_thread); | ||||
|     return service_thread; | ||||
| } | ||||
|  | ||||
| void KernelCore::ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread) { | ||||
|     if (auto strong_ptr = service_thread.lock()) { | ||||
|         impl->service_threads.erase(strong_ptr); | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // namespace Kernel | ||||
|   | ||||
| @@ -42,6 +42,7 @@ class Process; | ||||
| class ResourceLimit; | ||||
| class KScheduler; | ||||
| class SharedMemory; | ||||
| class ServiceThread; | ||||
| class Synchronization; | ||||
| class Thread; | ||||
| class TimeManager; | ||||
| @@ -227,6 +228,22 @@ public: | ||||
|  | ||||
|     void ExitSVCProfile(); | ||||
|  | ||||
|     /** | ||||
|      * Creates an HLE service thread, which are used to execute service routines asynchronously. | ||||
|      * While these are allocated per ServerSession, these need to be owned and managed outside of | ||||
|      * ServerSession to avoid a circular dependency. | ||||
|      * @param name String name for the ServerSession creating this thread, used for debug purposes. | ||||
|      * @returns The a weak pointer newly created service thread. | ||||
|      */ | ||||
|     std::weak_ptr<Kernel::ServiceThread> CreateServiceThread(const std::string& name); | ||||
|  | ||||
|     /** | ||||
|      * Releases a HLE service thread, instructing KernelCore to free it. This should be called when | ||||
|      * the ServerSession associated with the thread is destroyed. | ||||
|      * @param service_thread Service thread to release. | ||||
|      */ | ||||
|     void ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread); | ||||
|  | ||||
| private: | ||||
|     friend class Object; | ||||
|     friend class Process; | ||||
|   | ||||
| @@ -25,7 +25,10 @@ | ||||
| namespace Kernel { | ||||
|  | ||||
| ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {} | ||||
| ServerSession::~ServerSession() = default; | ||||
|  | ||||
| ServerSession::~ServerSession() { | ||||
|     kernel.ReleaseServiceThread(service_thread); | ||||
| } | ||||
|  | ||||
| ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel, | ||||
|                                                                 std::shared_ptr<Session> parent, | ||||
| @@ -34,7 +37,7 @@ ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kern | ||||
|  | ||||
|     session->name = std::move(name); | ||||
|     session->parent = std::move(parent); | ||||
|     session->service_thread = std::make_unique<ServiceThread>(kernel, 1); | ||||
|     session->service_thread = kernel.CreateServiceThread(session->name); | ||||
|  | ||||
|     return MakeResult(std::move(session)); | ||||
| } | ||||
| @@ -139,7 +142,11 @@ ResultCode ServerSession::QueueSyncRequest(std::shared_ptr<Thread> thread, | ||||
|         std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread)); | ||||
|  | ||||
|     context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf); | ||||
|     service_thread->QueueSyncRequest(*this, std::move(context)); | ||||
|  | ||||
|     if (auto strong_ptr = service_thread.lock()) { | ||||
|         strong_ptr->QueueSyncRequest(*this, std::move(context)); | ||||
|         return RESULT_SUCCESS; | ||||
|     } | ||||
|  | ||||
|     return RESULT_SUCCESS; | ||||
| } | ||||
|   | ||||
| @@ -167,7 +167,7 @@ private: | ||||
|     std::string name; | ||||
|  | ||||
|     /// Thread to dispatch service requests | ||||
|     std::unique_ptr<ServiceThread> service_thread; | ||||
|     std::weak_ptr<ServiceThread> service_thread; | ||||
| }; | ||||
|  | ||||
| } // namespace Kernel | ||||
|   | ||||
| @@ -11,6 +11,7 @@ | ||||
|  | ||||
| #include "common/assert.h" | ||||
| #include "common/scope_exit.h" | ||||
| #include "common/thread.h" | ||||
| #include "core/core.h" | ||||
| #include "core/hle/kernel/kernel.h" | ||||
| #include "core/hle/kernel/server_session.h" | ||||
| @@ -22,7 +23,7 @@ namespace Kernel { | ||||
|  | ||||
| class ServiceThread::Impl final { | ||||
| public: | ||||
|     explicit Impl(KernelCore& kernel, std::size_t num_threads); | ||||
|     explicit Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name); | ||||
|     ~Impl(); | ||||
|  | ||||
|     void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context); | ||||
| @@ -32,12 +33,16 @@ private: | ||||
|     std::queue<std::function<void()>> requests; | ||||
|     std::mutex queue_mutex; | ||||
|     std::condition_variable condition; | ||||
|     const std::string service_name; | ||||
|     bool stop{}; | ||||
| }; | ||||
|  | ||||
| ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads) { | ||||
| ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name) | ||||
|     : service_name{name} { | ||||
|     for (std::size_t i = 0; i < num_threads; ++i) | ||||
|         threads.emplace_back([&] { | ||||
|         threads.emplace_back([this, &kernel] { | ||||
|             Common::SetCurrentThreadName(std::string{"Hle_" + service_name}.c_str()); | ||||
|  | ||||
|             // Wait for first request before trying to acquire a render context | ||||
|             { | ||||
|                 std::unique_lock lock{queue_mutex}; | ||||
| @@ -52,7 +57,7 @@ ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads) { | ||||
|                 { | ||||
|                     std::unique_lock lock{queue_mutex}; | ||||
|                     condition.wait(lock, [this] { return stop || !requests.empty(); }); | ||||
|                     if (stop && requests.empty()) { | ||||
|                     if (stop || requests.empty()) { | ||||
|                         return; | ||||
|                     } | ||||
|                     task = std::move(requests.front()); | ||||
| @@ -68,9 +73,14 @@ void ServiceThread::Impl::QueueSyncRequest(ServerSession& session, | ||||
|                                            std::shared_ptr<HLERequestContext>&& context) { | ||||
|     { | ||||
|         std::unique_lock lock{queue_mutex}; | ||||
|         requests.emplace([session{SharedFrom(&session)}, context{std::move(context)}]() { | ||||
|             session->CompleteSyncRequest(*context); | ||||
|             return; | ||||
|  | ||||
|         // ServerSession owns the service thread, so we cannot caption a strong pointer here in the | ||||
|         // event that the ServerSession is terminated. | ||||
|         std::weak_ptr<ServerSession> weak_ptr{SharedFrom(&session)}; | ||||
|         requests.emplace([weak_ptr, context{std::move(context)}]() { | ||||
|             if (auto strong_ptr = weak_ptr.lock()) { | ||||
|                 strong_ptr->CompleteSyncRequest(*context); | ||||
|             } | ||||
|         }); | ||||
|     } | ||||
|     condition.notify_one(); | ||||
| @@ -87,8 +97,8 @@ ServiceThread::Impl::~Impl() { | ||||
|     } | ||||
| } | ||||
|  | ||||
| ServiceThread::ServiceThread(KernelCore& kernel, std::size_t num_threads) | ||||
|     : impl{std::make_unique<Impl>(kernel, num_threads)} {} | ||||
| ServiceThread::ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name) | ||||
|     : impl{std::make_unique<Impl>(kernel, num_threads, name)} {} | ||||
|  | ||||
| ServiceThread::~ServiceThread() = default; | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,7 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <memory> | ||||
| #include <string> | ||||
|  | ||||
| namespace Kernel { | ||||
|  | ||||
| @@ -14,7 +15,7 @@ class ServerSession; | ||||
|  | ||||
| class ServiceThread final { | ||||
| public: | ||||
|     explicit ServiceThread(KernelCore& kernel, std::size_t num_threads); | ||||
|     explicit ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name); | ||||
|     ~ServiceThread(); | ||||
|  | ||||
|     void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context); | ||||
|   | ||||
| @@ -225,6 +225,8 @@ add_library(video_core STATIC | ||||
|     shader/transform_feedback.h | ||||
|     surface.cpp | ||||
|     surface.h | ||||
|     texture_cache/accelerated_swizzle.cpp | ||||
|     texture_cache/accelerated_swizzle.h | ||||
|     texture_cache/decode_bc4.cpp | ||||
|     texture_cache/decode_bc4.h | ||||
|     texture_cache/descriptor_table.h | ||||
|   | ||||
| @@ -3,10 +3,11 @@ | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <bit> | ||||
| #include <span> | ||||
| #include <string_view> | ||||
|  | ||||
| #include <glad/glad.h> | ||||
|  | ||||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/div_ceil.h" | ||||
| @@ -19,6 +20,7 @@ | ||||
| #include "video_core/renderer_opengl/gl_texture_cache.h" | ||||
| #include "video_core/renderer_opengl/util_shaders.h" | ||||
| #include "video_core/surface.h" | ||||
| #include "video_core/texture_cache/accelerated_swizzle.h" | ||||
| #include "video_core/texture_cache/types.h" | ||||
| #include "video_core/texture_cache/util.h" | ||||
| #include "video_core/textures/decoders.h" | ||||
| @@ -27,14 +29,12 @@ namespace OpenGL { | ||||
|  | ||||
| using namespace HostShaders; | ||||
|  | ||||
| using Tegra::Texture::GOB_SIZE_SHIFT; | ||||
| using Tegra::Texture::GOB_SIZE_X; | ||||
| using Tegra::Texture::GOB_SIZE_X_SHIFT; | ||||
| using Tegra::Texture::GOB_SIZE_Y_SHIFT; | ||||
| using VideoCommon::Extent3D; | ||||
| using VideoCommon::ImageCopy; | ||||
| using VideoCommon::ImageType; | ||||
| using VideoCommon::SwizzleParameters; | ||||
| using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams; | ||||
| using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; | ||||
| using VideoCore::Surface::BytesPerBlock; | ||||
|  | ||||
| namespace { | ||||
| @@ -69,50 +69,32 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s | ||||
|     static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | ||||
|     static constexpr GLuint BINDING_INPUT_BUFFER = 1; | ||||
|     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||||
|     static constexpr GLuint LOC_ORIGIN = 0; | ||||
|     static constexpr GLuint LOC_DESTINATION = 1; | ||||
|     static constexpr GLuint LOC_BYTES_PER_BLOCK = 2; | ||||
|     static constexpr GLuint LOC_LAYER_STRIDE = 3; | ||||
|     static constexpr GLuint LOC_BLOCK_SIZE = 4; | ||||
|     static constexpr GLuint LOC_X_SHIFT = 5; | ||||
|     static constexpr GLuint LOC_BLOCK_HEIGHT = 6; | ||||
|     static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7; | ||||
|  | ||||
|     const u32 bytes_per_block = BytesPerBlock(image.info.format); | ||||
|     const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block); | ||||
|  | ||||
|     program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); | ||||
|     glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||||
|     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||||
|     glUniform3ui(LOC_ORIGIN, 0, 0, 0);     // TODO | ||||
|     glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO | ||||
|     glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2); | ||||
|     glUniform1ui(LOC_LAYER_STRIDE, image.info.layer_stride); | ||||
|  | ||||
|     const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | ||||
|     for (const SwizzleParameters& swizzle : swizzles) { | ||||
|         const Extent3D block = swizzle.block; | ||||
|         const Extent3D num_tiles = swizzle.num_tiles; | ||||
|         const size_t offset = swizzle.buffer_offset + buffer_offset; | ||||
|         const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||||
|  | ||||
|         const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||||
|         const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||||
|  | ||||
|         const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level); | ||||
|         const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | ||||
|  | ||||
|         const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; | ||||
|         const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); | ||||
|  | ||||
|         const u32 block_height_mask = (1U << block.height) - 1; | ||||
|         const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth; | ||||
|  | ||||
|         glUniform1ui(LOC_BLOCK_SIZE, block_size); | ||||
|         glUniform1ui(LOC_X_SHIFT, x_shift); | ||||
|         glUniform1ui(LOC_BLOCK_HEIGHT, block.height); | ||||
|         glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask); | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset, | ||||
|                           image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | ||||
|         glUniform3uiv(0, 1, params.origin.data()); | ||||
|         glUniform3iv(1, 1, params.destination.data()); | ||||
|         glUniform1ui(2, params.bytes_per_block_log2); | ||||
|         glUniform1ui(3, params.layer_stride); | ||||
|         glUniform1ui(4, params.block_size); | ||||
|         glUniform1ui(5, params.x_shift); | ||||
|         glUniform1ui(6, params.block_height); | ||||
|         glUniform1ui(7, params.block_height_mask); | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||||
|                           input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | ||||
|                            GL_WRITE_ONLY, StoreFormat(bytes_per_block)); | ||||
|                            GL_WRITE_ONLY, store_format); | ||||
|         glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); | ||||
|     } | ||||
|     program_manager.RestoreGuestCompute(); | ||||
| @@ -126,60 +108,35 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s | ||||
|     static constexpr GLuint BINDING_INPUT_BUFFER = 1; | ||||
|     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||||
|  | ||||
|     static constexpr GLuint LOC_ORIGIN = 0; | ||||
|     static constexpr GLuint LOC_DESTINATION = 1; | ||||
|     static constexpr GLuint LOC_BYTES_PER_BLOCK = 2; | ||||
|     static constexpr GLuint SLICE_SIZE_LOC = 3; | ||||
|     static constexpr GLuint LOC_BLOCK_SIZE = 4; | ||||
|     static constexpr GLuint LOC_X_SHIFT = 5; | ||||
|     static constexpr GLuint LOC_BLOCK_HEIGHT = 6; | ||||
|     static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7; | ||||
|     static constexpr GLuint BLOCK_DEPTH_LOC = 8; | ||||
|     static constexpr GLuint BLOCK_DEPTH_MASK_LOC = 9; | ||||
|  | ||||
|     const u32 bytes_per_block = BytesPerBlock(image.info.format); | ||||
|     const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block); | ||||
|  | ||||
|     glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||||
|     program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); | ||||
|     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||||
|     glUniform3ui(LOC_ORIGIN, 0, 0, 0);     // TODO | ||||
|     glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO | ||||
|     glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2); | ||||
|  | ||||
|     const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | ||||
|     for (const SwizzleParameters& swizzle : swizzles) { | ||||
|         const Extent3D block = swizzle.block; | ||||
|         const Extent3D num_tiles = swizzle.num_tiles; | ||||
|         const size_t offset = swizzle.buffer_offset + buffer_offset; | ||||
|         const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||||
|  | ||||
|         const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||||
|         const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||||
|         const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth); | ||||
|  | ||||
|         const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level); | ||||
|         const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | ||||
|  | ||||
|         const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; | ||||
|         const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); | ||||
|         const u32 slice_size = | ||||
|             Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size; | ||||
|  | ||||
|         const u32 block_height_mask = (1U << block.height) - 1; | ||||
|         const u32 block_depth_mask = (1U << block.depth) - 1; | ||||
|         const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth; | ||||
|  | ||||
|         glUniform1ui(SLICE_SIZE_LOC, slice_size); | ||||
|         glUniform1ui(LOC_BLOCK_SIZE, block_size); | ||||
|         glUniform1ui(LOC_X_SHIFT, x_shift); | ||||
|         glUniform1ui(LOC_BLOCK_HEIGHT, block.height); | ||||
|         glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask); | ||||
|         glUniform1ui(BLOCK_DEPTH_LOC, block.depth); | ||||
|         glUniform1ui(BLOCK_DEPTH_MASK_LOC, block_depth_mask); | ||||
|  | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset, | ||||
|                           image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info); | ||||
|         glUniform3uiv(0, 1, params.origin.data()); | ||||
|         glUniform3iv(1, 1, params.destination.data()); | ||||
|         glUniform1ui(2, params.bytes_per_block_log2); | ||||
|         glUniform1ui(3, params.slice_size); | ||||
|         glUniform1ui(4, params.block_size); | ||||
|         glUniform1ui(5, params.x_shift); | ||||
|         glUniform1ui(6, params.block_height); | ||||
|         glUniform1ui(7, params.block_height_mask); | ||||
|         glUniform1ui(8, params.block_depth); | ||||
|         glUniform1ui(9, params.block_depth_mask); | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||||
|                           input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | ||||
|                            GL_WRITE_ONLY, StoreFormat(bytes_per_block)); | ||||
|  | ||||
|                            GL_WRITE_ONLY, store_format); | ||||
|         glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); | ||||
|     } | ||||
|     program_manager.RestoreGuestCompute(); | ||||
| @@ -204,22 +161,20 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu | ||||
|  | ||||
|     program_manager.BindHostCompute(pitch_unswizzle_program.handle); | ||||
|     glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||||
|     glUniform2ui(LOC_ORIGIN, 0, 0);     // TODO | ||||
|     glUniform2i(LOC_DESTINATION, 0, 0); // TODO | ||||
|     glUniform2ui(LOC_ORIGIN, 0, 0); | ||||
|     glUniform2i(LOC_DESTINATION, 0, 0); | ||||
|     glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); | ||||
|     glUniform1ui(LOC_PITCH, pitch); | ||||
|     glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); | ||||
|     for (const SwizzleParameters& swizzle : swizzles) { | ||||
|         const Extent3D num_tiles = swizzle.num_tiles; | ||||
|         const size_t offset = swizzle.buffer_offset + buffer_offset; | ||||
|         const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||||
|  | ||||
|         const u32 aligned_width = Common::AlignUp(num_tiles.width, WORKGROUP_SIZE.width); | ||||
|         const u32 aligned_height = Common::AlignUp(num_tiles.height, WORKGROUP_SIZE.height); | ||||
|         const u32 num_dispatches_x = aligned_width / WORKGROUP_SIZE.width; | ||||
|         const u32 num_dispatches_y = aligned_height / WORKGROUP_SIZE.height; | ||||
|         const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||||
|         const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||||
|  | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset, | ||||
|                           image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||||
|                           input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); | ||||
|     } | ||||
|     program_manager.RestoreGuestCompute(); | ||||
|   | ||||
							
								
								
									
										70
									
								
								src/video_core/texture_cache/accelerated_swizzle.cpp
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										70
									
								
								src/video_core/texture_cache/accelerated_swizzle.cpp
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,70 @@ | ||||
| // Copyright 2020 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <array> | ||||
| #include <bit> | ||||
|  | ||||
| #include "common/alignment.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/div_ceil.h" | ||||
| #include "video_core/surface.h" | ||||
| #include "video_core/texture_cache/accelerated_swizzle.h" | ||||
| #include "video_core/texture_cache/util.h" | ||||
| #include "video_core/textures/decoders.h" | ||||
|  | ||||
| namespace VideoCommon::Accelerated { | ||||
|  | ||||
| using Tegra::Texture::GOB_SIZE_SHIFT; | ||||
| using Tegra::Texture::GOB_SIZE_X; | ||||
| using Tegra::Texture::GOB_SIZE_X_SHIFT; | ||||
| using Tegra::Texture::GOB_SIZE_Y_SHIFT; | ||||
| using VideoCore::Surface::BytesPerBlock; | ||||
|  | ||||
| BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle, | ||||
|                                                           const ImageInfo& info) { | ||||
|     const Extent3D block = swizzle.block; | ||||
|     const Extent3D num_tiles = swizzle.num_tiles; | ||||
|     const u32 bytes_per_block = BytesPerBlock(info.format); | ||||
|     const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); | ||||
|     const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | ||||
|     const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); | ||||
|     return BlockLinearSwizzle2DParams{ | ||||
|         .origin{0, 0, 0}, | ||||
|         .destination{0, 0, 0}, | ||||
|         .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), | ||||
|         .layer_stride = info.layer_stride, | ||||
|         .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth), | ||||
|         .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, | ||||
|         .block_height = block.height, | ||||
|         .block_height_mask = (1U << block.height) - 1, | ||||
|     }; | ||||
| } | ||||
|  | ||||
| BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle, | ||||
|                                                           const ImageInfo& info) { | ||||
|     const Extent3D block = swizzle.block; | ||||
|     const Extent3D num_tiles = swizzle.num_tiles; | ||||
|     const u32 bytes_per_block = BytesPerBlock(info.format); | ||||
|     const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); | ||||
|     const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | ||||
|  | ||||
|     const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; | ||||
|     const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); | ||||
|     const u32 slice_size = | ||||
|         Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size; | ||||
|     return BlockLinearSwizzle3DParams{ | ||||
|         .origin{0, 0, 0}, | ||||
|         .destination{0, 0, 0}, | ||||
|         .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), | ||||
|         .slice_size = slice_size, | ||||
|         .block_size = block_size, | ||||
|         .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, | ||||
|         .block_height = block.height, | ||||
|         .block_height_mask = (1U << block.height) - 1, | ||||
|         .block_depth = block.depth, | ||||
|         .block_depth_mask = (1U << block.depth) - 1, | ||||
|     }; | ||||
| } | ||||
|  | ||||
| } // namespace VideoCommon::Accelerated | ||||
							
								
								
									
										45
									
								
								src/video_core/texture_cache/accelerated_swizzle.h
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										45
									
								
								src/video_core/texture_cache/accelerated_swizzle.h
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,45 @@ | ||||
| // Copyright 2020 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <array> | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/texture_cache/image_info.h" | ||||
| #include "video_core/texture_cache/types.h" | ||||
|  | ||||
| namespace VideoCommon::Accelerated { | ||||
|  | ||||
| struct BlockLinearSwizzle2DParams { | ||||
|     std::array<u32, 3> origin; | ||||
|     std::array<s32, 3> destination; | ||||
|     u32 bytes_per_block_log2; | ||||
|     u32 layer_stride; | ||||
|     u32 block_size; | ||||
|     u32 x_shift; | ||||
|     u32 block_height; | ||||
|     u32 block_height_mask; | ||||
| }; | ||||
|  | ||||
| struct BlockLinearSwizzle3DParams { | ||||
|     std::array<u32, 3> origin; | ||||
|     std::array<s32, 3> destination; | ||||
|     u32 bytes_per_block_log2; | ||||
|     u32 slice_size; | ||||
|     u32 block_size; | ||||
|     u32 x_shift; | ||||
|     u32 block_height; | ||||
|     u32 block_height_mask; | ||||
|     u32 block_depth; | ||||
|     u32 block_depth_mask; | ||||
| }; | ||||
|  | ||||
| [[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams( | ||||
|     const SwizzleParameters& swizzle, const ImageInfo& info); | ||||
|  | ||||
| [[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams( | ||||
|     const SwizzleParameters& swizzle, const ImageInfo& info); | ||||
|  | ||||
| } // namespace VideoCommon::Accelerated | ||||
| @@ -19,7 +19,6 @@ | ||||
| namespace Tegra::Texture { | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| /** | ||||
|  * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. | ||||
|  * Calculates the offset of an (x, y) position within a swizzled texture. | ||||
| @@ -41,11 +40,15 @@ constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst(); | ||||
| template <bool TO_LINEAR> | ||||
| void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, | ||||
|              u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { | ||||
|     static constexpr u32 origin_x = 0; // TODO | ||||
|     static constexpr u32 origin_y = 0; // TODO | ||||
|     static constexpr u32 origin_z = 0; // TODO | ||||
|     // The origin of the transformation can be configured here, leave it as zero as the current API | ||||
|     // doesn't expose it. | ||||
|     static constexpr u32 origin_x = 0; | ||||
|     static constexpr u32 origin_y = 0; | ||||
|     static constexpr u32 origin_z = 0; | ||||
|  | ||||
|     const u32 pitch = width * bytes_per_pixel; // TODO | ||||
|     // We can configure here a custom pitch | ||||
|     // As it's not exposed 'width * bpp' will be the expected pitch. | ||||
|     const u32 pitch = width * bytes_per_pixel; | ||||
|     const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel; | ||||
|  | ||||
|     const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); | ||||
| @@ -86,7 +89,6 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // Anonymous namespace | ||||
|  | ||||
| SwizzleTable MakeSwizzleTable() { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user