early-access version 4118
This commit is contained in:
@@ -13,102 +13,20 @@ Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
|
||||
|
||||
Scheduler::~Scheduler() = default;
|
||||
|
||||
void Scheduler::Init() {
|
||||
master_control = Common::Fiber::ThreadToFiber();
|
||||
}
|
||||
|
||||
void Scheduler::Resume() {
|
||||
bool nothing_pending;
|
||||
do {
|
||||
nothing_pending = true;
|
||||
current_fifo = nullptr;
|
||||
{
|
||||
std::unique_lock lk(scheduling_guard);
|
||||
size_t num_iters = gpfifos.size();
|
||||
for (size_t i = 0; i < num_iters; i++) {
|
||||
size_t current_id = (current_fifo_rotation_id + i) % gpfifos.size();
|
||||
auto& fifo = gpfifos[current_id];
|
||||
if (!fifo.is_active) {
|
||||
continue;
|
||||
}
|
||||
std::scoped_lock lk2(fifo.guard);
|
||||
if (!fifo.pending_work.empty() || fifo.working.load(std::memory_order_acquire)) {
|
||||
current_fifo = &fifo;
|
||||
current_fifo_rotation_id = current_id;
|
||||
nothing_pending = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (current_fifo) {
|
||||
Common::Fiber::YieldTo(master_control, *current_fifo->context);
|
||||
current_fifo = nullptr;
|
||||
}
|
||||
} while (!nothing_pending);
|
||||
}
|
||||
|
||||
void Scheduler::Yield() {
|
||||
ASSERT(current_fifo != nullptr);
|
||||
Common::Fiber::YieldTo(current_fifo->context, *master_control);
|
||||
gpu.BindChannel(current_fifo->bind_id);
|
||||
}
|
||||
|
||||
void Scheduler::Push(s32 channel, CommandList&& entries) {
|
||||
std::unique_lock lk(scheduling_guard);
|
||||
auto it = channel_gpfifo_ids.find(channel);
|
||||
ASSERT(it != channel_gpfifo_ids.end());
|
||||
auto gpfifo_id = it->second;
|
||||
auto& fifo = gpfifos[gpfifo_id];
|
||||
{
|
||||
std::scoped_lock lk2(fifo.guard);
|
||||
fifo.pending_work.emplace_back(std::move(entries));
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::ChannelLoop(size_t gpfifo_id, s32 channel_id) {
|
||||
gpu.BindChannel(channel_id);
|
||||
auto& fifo = gpfifos[gpfifo_id];
|
||||
while (true) {
|
||||
auto* channel_state = channels[channel_id].get();
|
||||
fifo.guard.lock();
|
||||
while (!fifo.pending_work.empty()) {
|
||||
{
|
||||
|
||||
fifo.working.store(true, std::memory_order_release);
|
||||
CommandList&& entries = std::move(fifo.pending_work.front());
|
||||
channel_state->dma_pusher->Push(std::move(entries));
|
||||
fifo.pending_work.pop_front();
|
||||
}
|
||||
fifo.guard.unlock();
|
||||
channel_state->dma_pusher->DispatchCalls();
|
||||
fifo.guard.lock();
|
||||
}
|
||||
fifo.working.store(false, std::memory_order_relaxed);
|
||||
fifo.guard.unlock();
|
||||
Common::Fiber::YieldTo(fifo.context, *master_control);
|
||||
gpu.BindChannel(channel_id);
|
||||
}
|
||||
auto it = channels.find(channel);
|
||||
ASSERT(it != channels.end());
|
||||
auto channel_state = it->second;
|
||||
gpu.BindChannel(channel_state->bind_id);
|
||||
channel_state->dma_pusher->Push(std::move(entries));
|
||||
channel_state->dma_pusher->DispatchCalls();
|
||||
}
|
||||
|
||||
void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
|
||||
s32 channel = new_channel->bind_id;
|
||||
std::unique_lock lk(scheduling_guard);
|
||||
channels.emplace(channel, new_channel);
|
||||
size_t new_fifo_id;
|
||||
if (!free_fifos.empty()) {
|
||||
new_fifo_id = free_fifos.front();
|
||||
free_fifos.pop_front();
|
||||
} else {
|
||||
new_fifo_id = gpfifos.size();
|
||||
gpfifos.emplace_back();
|
||||
}
|
||||
auto& new_fifo = gpfifos[new_fifo_id];
|
||||
channel_gpfifo_ids[channel] = new_fifo_id;
|
||||
new_fifo.is_active = true;
|
||||
new_fifo.bind_id = channel;
|
||||
new_fifo.pending_work.clear();
|
||||
std::function<void()> callback = std::bind(&Scheduler::ChannelLoop, this, new_fifo_id, channel);
|
||||
new_fifo.context = std::make_shared<Common::Fiber>(std::move(callback));
|
||||
}
|
||||
|
||||
} // namespace Tegra::Control
|
||||
|
||||
@@ -3,13 +3,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/fiber.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
|
||||
namespace Tegra {
|
||||
@@ -25,36 +22,14 @@ public:
|
||||
explicit Scheduler(GPU& gpu_);
|
||||
~Scheduler();
|
||||
|
||||
void Init();
|
||||
|
||||
void Resume();
|
||||
|
||||
void Yield();
|
||||
|
||||
void Push(s32 channel, CommandList&& entries);
|
||||
|
||||
void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
|
||||
|
||||
private:
|
||||
void ChannelLoop(size_t gpfifo_id, s32 channel_id);
|
||||
|
||||
std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
|
||||
std::unordered_map<s32, size_t> channel_gpfifo_ids;
|
||||
std::mutex scheduling_guard;
|
||||
std::shared_ptr<Common::Fiber> master_control;
|
||||
struct GPFifoContext {
|
||||
bool is_active;
|
||||
std::shared_ptr<Common::Fiber> context;
|
||||
std::deque<CommandList> pending_work;
|
||||
std::atomic<bool> working{};
|
||||
std::mutex guard;
|
||||
s32 bind_id;
|
||||
};
|
||||
std::deque<GPFifoContext> gpfifos;
|
||||
std::deque<size_t> free_fifos;
|
||||
GPU& gpu;
|
||||
size_t current_fifo_rotation_id{};
|
||||
GPFifoContext* current_fifo{};
|
||||
};
|
||||
|
||||
} // namespace Control
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/control/scheduler.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
@@ -15,8 +14,6 @@
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/engines/puller.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/host1x/syncpoint_manager.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
@@ -63,14 +60,11 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
|
||||
}
|
||||
|
||||
void Puller::ProcessFenceActionMethod() {
|
||||
auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager();
|
||||
switch (regs.fence_action.op) {
|
||||
case Puller::FenceOperation::Acquire:
|
||||
while (regs.fence_value >
|
||||
syncpoint_manager.GetGuestSyncpointValue(regs.fence_action.syncpoint_id)) {
|
||||
rasterizer->ReleaseFences();
|
||||
gpu.Scheduler().Yield();
|
||||
}
|
||||
// UNIMPLEMENTED_MSG("Channel Scheduling pending.");
|
||||
// WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
|
||||
rasterizer->ReleaseFences();
|
||||
break;
|
||||
case Puller::FenceOperation::Increment:
|
||||
rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
|
||||
|
||||
@@ -387,14 +387,6 @@ std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
|
||||
return impl->AllocateChannel();
|
||||
}
|
||||
|
||||
Tegra::Control::Scheduler& GPU::Scheduler() {
|
||||
return *impl->scheduler;
|
||||
}
|
||||
|
||||
const Tegra::Control::Scheduler& GPU::Scheduler() const {
|
||||
return *impl->scheduler;
|
||||
}
|
||||
|
||||
void GPU::InitChannel(Control::ChannelState& to_init) {
|
||||
impl->InitChannel(to_init);
|
||||
}
|
||||
|
||||
@@ -124,8 +124,7 @@ class KeplerCompute;
|
||||
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
class Scheduler;
|
||||
} // namespace Control
|
||||
}
|
||||
|
||||
namespace Host1x {
|
||||
class Host1x;
|
||||
@@ -205,12 +204,6 @@ public:
|
||||
/// Returns a const reference to the shader notifier.
|
||||
[[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const;
|
||||
|
||||
/// Returns GPU Channel Scheduler.
|
||||
[[nodiscard]] Tegra::Control::Scheduler& Scheduler();
|
||||
|
||||
/// Returns GPU Channel Scheduler.
|
||||
[[nodiscard]] const Tegra::Control::Scheduler& Scheduler() const;
|
||||
|
||||
[[nodiscard]] u64 GetTicks() const;
|
||||
|
||||
[[nodiscard]] bool IsAsync() const;
|
||||
|
||||
@@ -34,15 +34,13 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
|
||||
|
||||
CommandDataContainer next;
|
||||
|
||||
scheduler.Init();
|
||||
|
||||
while (!stop_token.stop_requested()) {
|
||||
state.queue.PopWait(next, stop_token);
|
||||
if (stop_token.stop_requested()) {
|
||||
break;
|
||||
}
|
||||
if (std::holds_alternative<SubmitListCommand>(next.data)) {
|
||||
scheduler.Resume();
|
||||
if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
|
||||
scheduler.Push(submit_list->channel, std::move(submit_list->entries));
|
||||
} else if (std::holds_alternative<GPUTickCommand>(next.data)) {
|
||||
system.GPU().TickWork();
|
||||
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||
@@ -69,16 +67,14 @@ ThreadManager::~ThreadManager() = default;
|
||||
|
||||
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
|
||||
Core::Frontend::GraphicsContext& context,
|
||||
Tegra::Control::Scheduler& scheduler_) {
|
||||
Tegra::Control::Scheduler& scheduler) {
|
||||
rasterizer = renderer.ReadRasterizer();
|
||||
scheduler = &scheduler_;
|
||||
thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
|
||||
std::ref(scheduler_), std::ref(state));
|
||||
std::ref(scheduler), std::ref(state));
|
||||
}
|
||||
|
||||
void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
|
||||
scheduler->Push(channel, std::move(entries));
|
||||
PushCommand(SubmitListCommand());
|
||||
PushCommand(SubmitListCommand(channel, std::move(entries)));
|
||||
}
|
||||
|
||||
void ThreadManager::FlushRegion(DAddr addr, u64 size) {
|
||||
|
||||
@@ -36,7 +36,13 @@ class RendererBase;
|
||||
namespace VideoCommon::GPUThread {
|
||||
|
||||
/// Command to signal to the GPU thread that a command list is ready for processing
|
||||
struct SubmitListCommand final {};
|
||||
struct SubmitListCommand final {
|
||||
explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
|
||||
: channel{channel_}, entries{std::move(entries_)} {}
|
||||
|
||||
s32 channel;
|
||||
Tegra::CommandList entries;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to flush a region
|
||||
struct FlushRegionCommand final {
|
||||
@@ -118,7 +124,6 @@ public:
|
||||
private:
|
||||
/// Pushes a command to be executed by the GPU thread
|
||||
u64 PushCommand(CommandData&& command_data, bool block = false);
|
||||
Tegra::Control::Scheduler* scheduler;
|
||||
|
||||
Core::System& system;
|
||||
const bool is_async;
|
||||
|
||||
@@ -10,7 +10,7 @@ namespace Tegra::Host1x {
|
||||
|
||||
Host1x::Host1x(Core::System& system_)
|
||||
: system{system_}, syncpoint_manager{},
|
||||
memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 0, 12},
|
||||
memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
|
||||
allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}
|
||||
|
||||
Host1x::~Host1x() = default;
|
||||
|
||||
@@ -22,12 +22,11 @@ using Tegra::Memory::GuestMemoryFlags;
|
||||
std::atomic<size_t> MemoryManager::unique_identifier_generator{};
|
||||
|
||||
MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
|
||||
u64 address_space_bits_, GPUVAddr split_address_, u64 big_page_bits_,
|
||||
u64 page_bits_)
|
||||
u64 address_space_bits_, u64 big_page_bits_, u64 page_bits_)
|
||||
: system{system_}, memory{memory_}, address_space_bits{address_space_bits_},
|
||||
split_address{split_address_}, page_bits{page_bits_}, big_page_bits{big_page_bits_},
|
||||
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
|
||||
page_bits != big_page_bits ? page_bits : 0},
|
||||
page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{},
|
||||
page_table{address_space_bits, address_space_bits + page_bits - 38,
|
||||
page_bits != big_page_bits ? page_bits : 0},
|
||||
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
|
||||
1, std::memory_order_acq_rel)},
|
||||
accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
|
||||
@@ -49,10 +48,10 @@ MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager&
|
||||
entries.resize(page_table_size / 32, 0);
|
||||
}
|
||||
|
||||
MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_,
|
||||
GPUVAddr split_address_, u64 big_page_bits_, u64 page_bits_)
|
||||
: MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, split_address_,
|
||||
big_page_bits_, page_bits_) {}
|
||||
MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
|
||||
u64 page_bits_)
|
||||
: MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, big_page_bits_,
|
||||
page_bits_) {}
|
||||
|
||||
MemoryManager::~MemoryManager() = default;
|
||||
|
||||
|
||||
@@ -36,11 +36,10 @@ namespace Tegra {
|
||||
class MemoryManager final {
|
||||
public:
|
||||
explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40,
|
||||
GPUVAddr split_address = 1ULL << 34, u64 big_page_bits_ = 16,
|
||||
u64 page_bits_ = 12);
|
||||
explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
|
||||
u64 address_space_bits_ = 40, GPUVAddr split_address = 1ULL << 34,
|
||||
u64 big_page_bits_ = 16, u64 page_bits_ = 12);
|
||||
explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
|
||||
u64 address_space_bits_ = 40, u64 big_page_bits_ = 16,
|
||||
u64 page_bits_ = 12);
|
||||
~MemoryManager();
|
||||
|
||||
static constexpr bool HAS_FLUSH_INVALIDATION = true;
|
||||
@@ -195,7 +194,6 @@ private:
|
||||
MaxwellDeviceMemoryManager& memory;
|
||||
|
||||
const u64 address_space_bits;
|
||||
GPUVAddr split_address;
|
||||
const u64 page_bits;
|
||||
u64 address_space_size;
|
||||
u64 page_size;
|
||||
|
||||
Reference in New Issue
Block a user