early-access version 4118

2024-02-06 18:55:25 +01:00
parent 9e61037e01
commit e0f3149c76
20 changed files with 411 additions and 713 deletions
--- a/src/video_core/control/scheduler.cpp
+++ b/src/video_core/control/scheduler.cpp
@@ -13,102 +13,20 @@ Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}

 Scheduler::~Scheduler() = default;

-void Scheduler::Init() {
-    master_control = Common::Fiber::ThreadToFiber();
-}
-
-void Scheduler::Resume() {
-    bool nothing_pending;
-    do {
-        nothing_pending = true;
-        current_fifo = nullptr;
-        {
-            std::unique_lock lk(scheduling_guard);
-            size_t num_iters = gpfifos.size();
-            for (size_t i = 0; i < num_iters; i++) {
-                size_t current_id = (current_fifo_rotation_id + i) % gpfifos.size();
-                auto& fifo = gpfifos[current_id];
-                if (!fifo.is_active) {
-                    continue;
-                }
-                std::scoped_lock lk2(fifo.guard);
-                if (!fifo.pending_work.empty() || fifo.working.load(std::memory_order_acquire)) {
-                    current_fifo = &fifo;
-                    current_fifo_rotation_id = current_id;
-                    nothing_pending = false;
-                    break;
-                }
-            }
-        }
-        if (current_fifo) {
-            Common::Fiber::YieldTo(master_control, *current_fifo->context);
-            current_fifo = nullptr;
-        }
-    } while (!nothing_pending);
-}
-
-void Scheduler::Yield() {
-    ASSERT(current_fifo != nullptr);
-    Common::Fiber::YieldTo(current_fifo->context, *master_control);
-    gpu.BindChannel(current_fifo->bind_id);
-}
-
 void Scheduler::Push(s32 channel, CommandList&& entries) {
    std::unique_lock lk(scheduling_guard);
-    auto it = channel_gpfifo_ids.find(channel);
-    ASSERT(it != channel_gpfifo_ids.end());
-    auto gpfifo_id = it->second;
-    auto& fifo = gpfifos[gpfifo_id];
-    {
-        std::scoped_lock lk2(fifo.guard);
-        fifo.pending_work.emplace_back(std::move(entries));
-    }
-}
-
-void Scheduler::ChannelLoop(size_t gpfifo_id, s32 channel_id) {
-    gpu.BindChannel(channel_id);
-    auto& fifo = gpfifos[gpfifo_id];
-    while (true) {
-        auto* channel_state = channels[channel_id].get();
-        fifo.guard.lock();
-        while (!fifo.pending_work.empty()) {
-            {
-
-                fifo.working.store(true, std::memory_order_release);
-                CommandList&& entries = std::move(fifo.pending_work.front());
-                channel_state->dma_pusher->Push(std::move(entries));
-                fifo.pending_work.pop_front();
-            }
-            fifo.guard.unlock();
-            channel_state->dma_pusher->DispatchCalls();
-            fifo.guard.lock();
-        }
-        fifo.working.store(false, std::memory_order_relaxed);
-        fifo.guard.unlock();
-        Common::Fiber::YieldTo(fifo.context, *master_control);
-        gpu.BindChannel(channel_id);
-    }
+    auto it = channels.find(channel);
+    ASSERT(it != channels.end());
+    auto channel_state = it->second;
+    gpu.BindChannel(channel_state->bind_id);
+    channel_state->dma_pusher->Push(std::move(entries));
+    channel_state->dma_pusher->DispatchCalls();
 }

 void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
    s32 channel = new_channel->bind_id;
    std::unique_lock lk(scheduling_guard);
    channels.emplace(channel, new_channel);
-    size_t new_fifo_id;
-    if (!free_fifos.empty()) {
-        new_fifo_id = free_fifos.front();
-        free_fifos.pop_front();
-    } else {
-        new_fifo_id = gpfifos.size();
-        gpfifos.emplace_back();
-    }
-    auto& new_fifo = gpfifos[new_fifo_id];
-    channel_gpfifo_ids[channel] = new_fifo_id;
-    new_fifo.is_active = true;
-    new_fifo.bind_id = channel;
-    new_fifo.pending_work.clear();
-    std::function<void()> callback = std::bind(&Scheduler::ChannelLoop, this, new_fifo_id, channel);
-    new_fifo.context = std::make_shared<Common::Fiber>(std::move(callback));
 }

 } // namespace Tegra::Control
--- a/src/video_core/control/scheduler.h
+++ b/src/video_core/control/scheduler.h
@@ -3,13 +3,10 @@

 #pragma once

-#include <atomic>
-#include <deque>
 #include <memory>
 #include <mutex>
 #include <unordered_map>

-#include "common/fiber.h"
 #include "video_core/dma_pusher.h"

 namespace Tegra {
@@ -25,36 +22,14 @@ public:
    explicit Scheduler(GPU& gpu_);
    ~Scheduler();

-    void Init();
-
-    void Resume();
-
-    void Yield();
-
    void Push(s32 channel, CommandList&& entries);

    void DeclareChannel(std::shared_ptr<ChannelState> new_channel);

 private:
-    void ChannelLoop(size_t gpfifo_id, s32 channel_id);
-
    std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
-    std::unordered_map<s32, size_t> channel_gpfifo_ids;
    std::mutex scheduling_guard;
-    std::shared_ptr<Common::Fiber> master_control;
-    struct GPFifoContext {
-        bool is_active;
-        std::shared_ptr<Common::Fiber> context;
-        std::deque<CommandList> pending_work;
-        std::atomic<bool> working{};
-        std::mutex guard;
-        s32 bind_id;
-    };
-    std::deque<GPFifoContext> gpfifos;
-    std::deque<size_t> free_fifos;
    GPU& gpu;
-    size_t current_fifo_rotation_id{};
-    GPFifoContext* current_fifo{};
 };

 } // namespace Control
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -6,7 +6,6 @@
 #include "common/settings.h"
 #include "core/core.h"
 #include "video_core/control/channel_state.h"
-#include "video_core/control/scheduler.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_compute.h"
@@ -15,8 +14,6 @@
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/engines/puller.h"
 #include "video_core/gpu.h"
-#include "video_core/host1x/host1x.h"
-#include "video_core/host1x/syncpoint_manager.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"

@@ -63,14 +60,11 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
 }

 void Puller::ProcessFenceActionMethod() {
-    auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager();
    switch (regs.fence_action.op) {
    case Puller::FenceOperation::Acquire:
-        while (regs.fence_value >
-               syncpoint_manager.GetGuestSyncpointValue(regs.fence_action.syncpoint_id)) {
-            rasterizer->ReleaseFences();
-            gpu.Scheduler().Yield();
-        }
+        // UNIMPLEMENTED_MSG("Channel Scheduling pending.");
+        // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+        rasterizer->ReleaseFences();
        break;
    case Puller::FenceOperation::Increment:
        rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -387,14 +387,6 @@ std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
    return impl->AllocateChannel();
 }

-Tegra::Control::Scheduler& GPU::Scheduler() {
-    return *impl->scheduler;
-}
-
-const Tegra::Control::Scheduler& GPU::Scheduler() const {
-    return *impl->scheduler;
-}
-
 void GPU::InitChannel(Control::ChannelState& to_init) {
    impl->InitChannel(to_init);
 }
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -124,8 +124,7 @@ class KeplerCompute;

 namespace Control {
 struct ChannelState;
-class Scheduler;
-} // namespace Control
+}

 namespace Host1x {
 class Host1x;
@@ -205,12 +204,6 @@ public:
    /// Returns a const reference to the shader notifier.
    [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const;

-    /// Returns GPU Channel Scheduler.
-    [[nodiscard]] Tegra::Control::Scheduler& Scheduler();
-
-    /// Returns GPU Channel Scheduler.
-    [[nodiscard]] const Tegra::Control::Scheduler& Scheduler() const;
-
    [[nodiscard]] u64 GetTicks() const;

    [[nodiscard]] bool IsAsync() const;
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -34,15 +34,13 @@ static void RunThread(std::stop_token stop_token, Core::System& system,

    CommandDataContainer next;

-    scheduler.Init();
-
    while (!stop_token.stop_requested()) {
        state.queue.PopWait(next, stop_token);
        if (stop_token.stop_requested()) {
            break;
        }
-        if (std::holds_alternative<SubmitListCommand>(next.data)) {
-            scheduler.Resume();
+        if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
+            scheduler.Push(submit_list->channel, std::move(submit_list->entries));
        } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
            system.GPU().TickWork();
        } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
@@ -69,16 +67,14 @@ ThreadManager::~ThreadManager() = default;

 void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
                                Core::Frontend::GraphicsContext& context,
-                                Tegra::Control::Scheduler& scheduler_) {
+                                Tegra::Control::Scheduler& scheduler) {
    rasterizer = renderer.ReadRasterizer();
-    scheduler = &scheduler_;
    thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
-                          std::ref(scheduler_), std::ref(state));
+                          std::ref(scheduler), std::ref(state));
 }

 void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
-    scheduler->Push(channel, std::move(entries));
-    PushCommand(SubmitListCommand());
+    PushCommand(SubmitListCommand(channel, std::move(entries)));
 }

 void ThreadManager::FlushRegion(DAddr addr, u64 size) {
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -36,7 +36,13 @@ class RendererBase;
 namespace VideoCommon::GPUThread {

 /// Command to signal to the GPU thread that a command list is ready for processing
-struct SubmitListCommand final {};
+struct SubmitListCommand final {
+    explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
+        : channel{channel_}, entries{std::move(entries_)} {}
+
+    s32 channel;
+    Tegra::CommandList entries;
+};

 /// Command to signal to the GPU thread to flush a region
 struct FlushRegionCommand final {
@@ -118,7 +124,6 @@ public:
 private:
    /// Pushes a command to be executed by the GPU thread
    u64 PushCommand(CommandData&& command_data, bool block = false);
-    Tegra::Control::Scheduler* scheduler;

    Core::System& system;
    const bool is_async;
--- a/src/video_core/host1x/host1x.cpp
+++ b/src/video_core/host1x/host1x.cpp
@@ -10,7 +10,7 @@ namespace Tegra::Host1x {

 Host1x::Host1x(Core::System& system_)
    : system{system_}, syncpoint_manager{},
-      memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 0, 12},
+      memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
      allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}

 Host1x::~Host1x() = default;
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -22,12 +22,11 @@ using Tegra::Memory::GuestMemoryFlags;
 std::atomic<size_t> MemoryManager::unique_identifier_generator{};

 MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
-                             u64 address_space_bits_, GPUVAddr split_address_, u64 big_page_bits_,
-                             u64 page_bits_)
+                             u64 address_space_bits_, u64 big_page_bits_, u64 page_bits_)
    : system{system_}, memory{memory_}, address_space_bits{address_space_bits_},
-      split_address{split_address_}, page_bits{page_bits_}, big_page_bits{big_page_bits_},
-      entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
-                                           page_bits != big_page_bits ? page_bits : 0},
+      page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{},
+      page_table{address_space_bits, address_space_bits + page_bits - 38,
+                 page_bits != big_page_bits ? page_bits : 0},
      kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
                                      1, std::memory_order_acq_rel)},
      accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
@@ -49,10 +48,10 @@ MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager&
    entries.resize(page_table_size / 32, 0);
 }

-MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_,
-                             GPUVAddr split_address_, u64 big_page_bits_, u64 page_bits_)
-    : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, split_address_,
-                    big_page_bits_, page_bits_) {}
+MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
+                             u64 page_bits_)
+    : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, big_page_bits_,
+                    page_bits_) {}

 MemoryManager::~MemoryManager() = default;

--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -36,11 +36,10 @@ namespace Tegra {
 class MemoryManager final {
 public:
    explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40,
-                           GPUVAddr split_address = 1ULL << 34, u64 big_page_bits_ = 16,
-                           u64 page_bits_ = 12);
-    explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
-                           u64 address_space_bits_ = 40, GPUVAddr split_address = 1ULL << 34,
                           u64 big_page_bits_ = 16, u64 page_bits_ = 12);
+    explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
+                           u64 address_space_bits_ = 40, u64 big_page_bits_ = 16,
+                           u64 page_bits_ = 12);
    ~MemoryManager();

    static constexpr bool HAS_FLUSH_INVALIDATION = true;
@@ -195,7 +194,6 @@ private:
    MaxwellDeviceMemoryManager& memory;

    const u64 address_space_bits;
-    GPUVAddr split_address;
    const u64 page_bits;
    u64 address_space_size;
    u64 page_size;