early-access version 4155

2024-02-21 02:52:05 +01:00
parent 64bd48fad5
commit b86c4f1e66
24 changed files with 160 additions and 298 deletions
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@@ -45,12 +45,6 @@ struct ChannelState {
    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);

    s32 bind_id = -1;
-    /// Scheduling info
-    u32 syncpoint_id = 0xFFFF;
-    u32 priority = 0;
-    u32 timeslice = 0;
-    u32 timeout = 0;
-
    /// 3D engine
    std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
    /// 2D engine
--- a/src/video_core/control/scheduler.cpp
+++ b/src/video_core/control/scheduler.cpp
@@ -1,245 +1,32 @@
 // SPDX-FileCopyrightText: 2021 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later

-#include <atomic>
-#include <deque>
-#include <map>
 #include <memory>
-#include <mutex>
-#include <unordered_map>
-#include <utility>

 #include "common/assert.h"
-#include "common/fiber.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/control/scheduler.h"
-#include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"

 namespace Tegra::Control {
-
-struct GPFifoContext {
-    bool is_active;
-    bool is_running;
-    std::shared_ptr<Common::Fiber> context;
-    std::deque<CommandList> pending_work;
-    std::mutex guard;
-    s32 bind_id;
-    std::shared_ptr<ChannelState> info;
-    size_t yield_count;
-    size_t scheduled_count;
-};
-
-struct Scheduler::SchedulerImpl {
-    // Fifos
-    std::map<u32, std::list<size_t>, std::greater<u32>> schedule_priority_queue;
-    std::unordered_map<s32, size_t> channel_gpfifo_ids;
-    std::deque<GPFifoContext> gpfifos;
-    std::deque<size_t> free_fifos;
-
-    // Scheduling
-    std::mutex scheduling_guard;
-    std::shared_ptr<Common::Fiber> master_control;
-    bool must_reschedule{};
-    GPFifoContext* current_fifo{};
-};
-
-Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {
-    impl = std::make_unique<SchedulerImpl>();
-}
+Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}

 Scheduler::~Scheduler() = default;

-void Scheduler::Init() {
-    impl->master_control = Common::Fiber::ThreadToFiber();
-}
-
-void Scheduler::Resume() {
-    while (UpdateHighestPriorityChannel()) {
-        impl->current_fifo->scheduled_count++;
-        Common::Fiber::YieldTo(impl->master_control, *impl->current_fifo->context);
-    }
-}
-
-bool Scheduler::UpdateHighestPriorityChannel() {
-    std::scoped_lock lk(impl->scheduling_guard);
-
-    // Clear needs to schedule state.
-    impl->must_reschedule = false;
-
-    // By default, we don't have a channel to schedule.
-    impl->current_fifo = nullptr;
-
-    // Check each level to see if we can schedule.
-    for (auto& level : impl->schedule_priority_queue) {
-        if (ScheduleLevel(level.second)) {
-            return true;
-        }
-    }
-
-    // Nothing to schedule.
-    return false;
-}
-
-bool Scheduler::ScheduleLevel(std::list<size_t>& queue) {
-    bool found_anything = false;
-    size_t min_schedule_count = std::numeric_limits<size_t>::max();
-    for (auto id : queue) {
-        auto& fifo = impl->gpfifos[id];
-        std::scoped_lock lk(fifo.guard);
-
-        // With no pending work and nothing running, this channel can't be scheduled.
-        if (fifo.pending_work.empty() && !fifo.is_running) {
-            continue;
-        }
-        // Prioritize channels at current priority which have been run the least.
-        if (fifo.scheduled_count > min_schedule_count) {
-            continue;
-        }
-
-        // Try not to select the same channel we just yielded from.
-        if (fifo.scheduled_count < fifo.yield_count) {
-            fifo.scheduled_count++;
-            continue;
-        }
-
-        // Update best selection.
-        min_schedule_count = fifo.scheduled_count;
-        impl->current_fifo = &fifo;
-        found_anything = true;
-    }
-    return found_anything;
-}
-
-void Scheduler::ChangePriority(s32 channel_id, u32 new_priority) {
-    std::scoped_lock lk(impl->scheduling_guard);
-    // Ensure we are tracking this channel.
-    auto fifo_it = impl->channel_gpfifo_ids.find(channel_id);
-    if (fifo_it == impl->channel_gpfifo_ids.end()) {
-        return;
-    }
-
-    // Get the fifo and update its priority.
-    const size_t fifo_id = fifo_it->second;
-    auto& fifo = impl->gpfifos[fifo_id];
-    const auto old_priority = std::exchange(fifo.info->priority, new_priority);
-
-    // Create the new level if needed.
-    impl->schedule_priority_queue.try_emplace(new_priority);
-
-    // Remove the old level and add to the new level.
-    impl->schedule_priority_queue[new_priority].push_back(fifo_id);
-    impl->schedule_priority_queue[old_priority].remove_if(
-        [fifo_id](size_t id) { return id == fifo_id; });
-}
-
-void Scheduler::Yield() {
-    ASSERT(impl->current_fifo != nullptr);
-
-    // Set yield count higher
-    impl->current_fifo->yield_count = impl->current_fifo->scheduled_count + 1;
-    Common::Fiber::YieldTo(impl->current_fifo->context, *impl->master_control);
-    gpu.BindChannel(impl->current_fifo->bind_id);
-}
-
-void Scheduler::CheckStatus() {
-    {
-        std::unique_lock lk(impl->scheduling_guard);
-        // If no reschedule is needed, don't transfer control
-        if (!impl->must_reschedule) {
-            return;
-        }
-    }
-    // Transfer control to the scheduler
-    Common::Fiber::YieldTo(impl->current_fifo->context, *impl->master_control);
-    gpu.BindChannel(impl->current_fifo->bind_id);
-}
-
 void Scheduler::Push(s32 channel, CommandList&& entries) {
-    std::scoped_lock lk(impl->scheduling_guard);
-    // Get and ensure we have this channel.
-    auto it = impl->channel_gpfifo_ids.find(channel);
-    ASSERT(it != impl->channel_gpfifo_ids.end());
-    auto gpfifo_id = it->second;
-    auto& fifo = impl->gpfifos[gpfifo_id];
-    // Add the new new work to the channel.
-    {
-        std::scoped_lock lk2(fifo.guard);
-        fifo.pending_work.emplace_back(std::move(entries));
-    }
-
-    // If the current running FIFO is null or the one being pushed to then
-    // just return
-    if (impl->current_fifo == nullptr || impl->current_fifo == &fifo) {
-        return;
-    }
-
-    // If the current fifo has higher or equal priority to the current fifo then return
-    if (impl->current_fifo->info->priority >= fifo.info->priority) {
-        return;
-    }
-    // Mark scheduler update as required.
-    impl->must_reschedule = true;
-}
-
-void Scheduler::ChannelLoop(size_t gpfifo_id, s32 channel_id) {
-    auto& fifo = impl->gpfifos[gpfifo_id];
-    auto* channel_state = fifo.info.get();
-    const auto SendToPuller = [&] {
-        std::scoped_lock lk(fifo.guard);
-        if (fifo.pending_work.empty()) {
-            // Stop if no work available.
-            fifo.is_running = false;
-            return false;
-        }
-        // Otherwise, send work to puller and mark as running.
-        CommandList&& entries = std::move(fifo.pending_work.front());
-        channel_state->dma_pusher->Push(std::move(entries));
-        fifo.pending_work.pop_front();
-        fifo.is_running = true;
-        // Succeed.
-        return true;
-    };
-    // Inform the GPU about the current channel.
-    gpu.BindChannel(channel_id);
-    while (true) {
-        while (SendToPuller()) {
-            // Execute.
-            channel_state->dma_pusher->DispatchCalls();
-            // Reschedule.
-            CheckStatus();
-        }
-        // Return to host execution when all work is completed.
-        Common::Fiber::YieldTo(fifo.context, *impl->master_control);
-        // Inform the GPU about the current channel.
-        gpu.BindChannel(channel_id);
-    }
+    std::unique_lock lk(scheduling_guard);
+    auto it = channels.find(channel);
+    ASSERT(it != channels.end());
+    auto channel_state = it->second;
+    gpu.BindChannel(channel_state->bind_id);
+    channel_state->dma_pusher->Push(std::move(entries));
+    channel_state->dma_pusher->DispatchCalls();
 }

 void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
    s32 channel = new_channel->bind_id;
-    std::unique_lock lk(impl->scheduling_guard);
-
-    size_t new_fifo_id;
-    if (!impl->free_fifos.empty()) {
-        new_fifo_id = impl->free_fifos.front();
-        impl->free_fifos.pop_front();
-    } else {
-        new_fifo_id = impl->gpfifos.size();
-        impl->gpfifos.emplace_back();
-    }
-    auto& new_fifo = impl->gpfifos[new_fifo_id];
-    impl->channel_gpfifo_ids[channel] = new_fifo_id;
-    new_fifo.is_active = true;
-    new_fifo.bind_id = channel;
-    new_fifo.pending_work.clear();
-    new_fifo.info = new_channel;
-    new_fifo.scheduled_count = 0;
-    new_fifo.yield_count = 0;
-    new_fifo.is_running = false;
-    impl->schedule_priority_queue.try_emplace(new_channel->priority);
-    impl->schedule_priority_queue[new_channel->priority].push_back(new_fifo_id);
-    std::function<void()> callback = std::bind(&Scheduler::ChannelLoop, this, new_fifo_id, channel);
-    new_fifo.context = std::make_shared<Common::Fiber>(std::move(callback));
+    std::unique_lock lk(scheduling_guard);
+    channels.emplace(channel, new_channel);
 }

 } // namespace Tegra::Control
--- a/src/video_core/control/scheduler.h
+++ b/src/video_core/control/scheduler.h
@@ -3,11 +3,10 @@

 #pragma once

-#include <list>
 #include <memory>
+#include <mutex>
+#include <unordered_map>

-#include "common/common_types.h"
-#include "video_core/control/channel_state.h"
 #include "video_core/dma_pusher.h"

 namespace Tegra {
@@ -23,27 +22,13 @@ public:
    explicit Scheduler(GPU& gpu_);
    ~Scheduler();

-    void Init();
-
-    void Resume();
-
-    void Yield();
-
    void Push(s32 channel, CommandList&& entries);

    void DeclareChannel(std::shared_ptr<ChannelState> new_channel);

-    void ChangePriority(s32 channel_id, u32 new_priority);
-
 private:
-    void ChannelLoop(size_t gpfifo_id, s32 channel_id);
-    bool ScheduleLevel(std::list<size_t>& queue);
-    void CheckStatus();
-    bool UpdateHighestPriorityChannel();
-
-    struct SchedulerImpl;
-    std::unique_ptr<SchedulerImpl> impl;
-
+    std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
+    std::mutex scheduling_guard;
    GPU& gpu;
 };

--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -6,7 +6,6 @@
 #include "common/settings.h"
 #include "core/core.h"
 #include "video_core/control/channel_state.h"
-#include "video_core/control/scheduler.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_compute.h"
@@ -15,8 +14,6 @@
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/engines/puller.h"
 #include "video_core/gpu.h"
-#include "video_core/host1x/host1x.h"
-#include "video_core/host1x/syncpoint_manager.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"

@@ -63,14 +60,11 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
 }

 void Puller::ProcessFenceActionMethod() {
-    auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager();
    switch (regs.fence_action.op) {
    case Puller::FenceOperation::Acquire:
-        while (regs.fence_value >
-               syncpoint_manager.GetGuestSyncpointValue(regs.fence_action.syncpoint_id)) {
-            rasterizer->ReleaseFences();
-            gpu.Scheduler().Yield();
-        }
+        // UNIMPLEMENTED_MSG("Channel Scheduling pending.");
+        // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+        rasterizer->ReleaseFences();
        break;
    case Puller::FenceOperation::Increment:
        rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -387,14 +387,6 @@ std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
    return impl->AllocateChannel();
 }

-Tegra::Control::Scheduler& GPU::Scheduler() {
-    return *impl->scheduler;
-}
-
-const Tegra::Control::Scheduler& GPU::Scheduler() const {
-    return *impl->scheduler;
-}
-
 void GPU::InitChannel(Control::ChannelState& to_init) {
    impl->InitChannel(to_init);
 }
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -124,8 +124,7 @@ class KeplerCompute;

 namespace Control {
 struct ChannelState;
-class Scheduler;
-} // namespace Control
+}

 namespace Host1x {
 class Host1x;
@@ -205,12 +204,6 @@ public:
    /// Returns a const reference to the shader notifier.
    [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const;

-    /// Returns GPU Channel Scheduler.
-    [[nodiscard]] Tegra::Control::Scheduler& Scheduler();
-
-    /// Returns GPU Channel Scheduler.
-    [[nodiscard]] const Tegra::Control::Scheduler& Scheduler() const;
-
    [[nodiscard]] u64 GetTicks() const;

    [[nodiscard]] bool IsAsync() const;
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -36,15 +36,13 @@ static void RunThread(std::stop_token stop_token, Core::System& system,

    CommandDataContainer next;

-    scheduler.Init();
-
    while (!stop_token.stop_requested()) {
        state.queue.PopWait(next, stop_token);
        if (stop_token.stop_requested()) {
            break;
        }
-        if (std::holds_alternative<SubmitListCommand>(next.data)) {
-            scheduler.Resume();
+        if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
+            scheduler.Push(submit_list->channel, std::move(submit_list->entries));
        } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
            system.GPU().TickWork();
        } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
@@ -71,16 +69,14 @@ ThreadManager::~ThreadManager() = default;

 void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
                                Core::Frontend::GraphicsContext& context,
-                                Tegra::Control::Scheduler& scheduler_) {
+                                Tegra::Control::Scheduler& scheduler) {
    rasterizer = renderer.ReadRasterizer();
-    scheduler = &scheduler_;
    thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
-                          std::ref(scheduler_), std::ref(state));
+                          std::ref(scheduler), std::ref(state));
 }

 void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
-    scheduler->Push(channel, std::move(entries));
-    PushCommand(SubmitListCommand());
+    PushCommand(SubmitListCommand(channel, std::move(entries)));
 }

 void ThreadManager::FlushRegion(DAddr addr, u64 size) {
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -36,7 +36,13 @@ class RendererBase;
 namespace VideoCommon::GPUThread {

 /// Command to signal to the GPU thread that a command list is ready for processing
-struct SubmitListCommand final {};
+struct SubmitListCommand final {
+    explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
+        : channel{channel_}, entries{std::move(entries_)} {}
+
+    s32 channel;
+    Tegra::CommandList entries;
+};

 /// Command to signal to the GPU thread to flush a region
 struct FlushRegionCommand final {
@@ -118,7 +124,6 @@ public:
 private:
    /// Pushes a command to be executed by the GPU thread
    u64 PushCommand(CommandData&& command_data, bool block = false);
-    Tegra::Control::Scheduler* scheduler;

    Core::System& system;
    const bool is_async;
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -352,6 +352,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
        .support_native_ndc = device.IsExtDepthClipControlSupported(),
        .support_scaled_attributes = !device.MustEmulateScaledFormats(),
        .support_multi_viewport = device.SupportsMultiViewport(),
+        .support_geometry_streams = device.AreTransformFeedbackGeometryStreamsSupported(),

        .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),

--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -489,6 +489,11 @@ public:
        return extensions.transform_feedback;
    }

+    /// Returns true if the device supports VK_EXT_transform_feedback properly.
+    bool AreTransformFeedbackGeometryStreamsSupported() const {
+        return features.transform_feedback.geometryStreams;
+    }
+
    /// Returns true if the device supports VK_EXT_custom_border_color.
    bool IsExtCustomBorderColorSupported() const {
        return extensions.custom_border_color;