early-access version 4104

This commit is contained in:
pineappleEA
2024-01-31 19:54:14 +01:00
parent 6a0b9484c1
commit 7e95a06b53
43 changed files with 2918 additions and 1033 deletions

View File

@@ -5,6 +5,7 @@
#include <array>
#include <atomic>
#include <bit>
#include <deque>
#include <memory>
#include <mutex>
@@ -42,6 +43,8 @@ public:
DeviceMemoryManager(const DeviceMemory& device_memory);
~DeviceMemoryManager();
static constexpr bool HAS_FLUSH_INVALIDATION = true;
void BindInterface(DeviceInterface* device_inter);
DAddr Allocate(size_t size);
@@ -181,24 +184,28 @@ private:
}
Common::VirtualBuffer<VAddr> cpu_backing_address;
static constexpr size_t subentries = 8 / sizeof(u8);
using CounterType = u8;
using CounterAtomicType = std::atomic_uint8_t;
static constexpr size_t subentries = 8 / sizeof(CounterType);
static constexpr size_t subentries_mask = subentries - 1;
static constexpr size_t subentries_shift =
std::countr_zero(sizeof(u64)) - std::countr_zero(sizeof(CounterType));
class CounterEntry final {
public:
CounterEntry() = default;
std::atomic_uint8_t& Count(std::size_t page) {
CounterAtomicType& Count(std::size_t page) {
return values[page & subentries_mask];
}
const std::atomic_uint8_t& Count(std::size_t page) const {
const CounterAtomicType& Count(std::size_t page) const {
return values[page & subentries_mask];
}
private:
std::array<std::atomic_uint8_t, subentries> values{};
std::array<CounterAtomicType, subentries> values{};
};
static_assert(sizeof(CounterEntry) == subentries * sizeof(u8),
static_assert(sizeof(CounterEntry) == subentries * sizeof(CounterType),
"CounterEntry should be 8 bytes!");
static constexpr size_t num_counter_entries =

View File

@@ -213,8 +213,8 @@ void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) {
}
template <typename Traits>
void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size,
Asid asid, bool track) {
void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, Asid asid,
bool track) {
Core::Memory::Memory* process_memory = registered_processes[asid.id];
size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
@@ -519,22 +519,36 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
size_t page = addr >> Memory::YUZU_PAGEBITS;
auto [asid, base_vaddress] = ExtractCPUBacking(page);
size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS;
auto* memory_device_inter = registered_processes[asid.id];
const auto release_pending = [&] {
if (uncache_bytes > 0) {
MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
uncache_bytes, false);
uncache_bytes = 0;
}
if (cache_bytes > 0) {
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS,
cache_bytes, true);
cache_bytes = 0;
}
};
for (; page != page_end; ++page) {
std::atomic_uint8_t& count = cached_pages->at(page >> 3).Count(page);
CounterAtomicType& count = cached_pages->at(page >> subentries_shift).Count(page);
auto [asid_2, vpage] = ExtractCPUBacking(page);
vpage >>= Memory::YUZU_PAGEBITS;
if (delta > 0) {
ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u8>::max(),
"Count may overflow!");
} else if (delta < 0) {
ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
} else {
ASSERT_MSG(false, "Delta must be non-zero!");
if (vpage == 0) [[unlikely]] {
release_pending();
continue;
}
if (asid.id != asid_2.id) [[unlikely]] {
release_pending();
memory_device_inter = registered_processes[asid_2.id];
}
// Adds or subtracts 1, as count is a unsigned 8-bit value
count.fetch_add(static_cast<u8>(delta), std::memory_order_release);
count.fetch_add(static_cast<CounterType>(delta), std::memory_order_release);
// Assume delta is either -1 or 1
if (count.load(std::memory_order::relaxed) == 0) {
@@ -553,20 +567,12 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
}
cache_bytes += Memory::YUZU_PAGESIZE;
} else if (cache_bytes > 0) {
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
true);
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS,
cache_bytes, true);
cache_bytes = 0;
}
vpage++;
}
if (uncache_bytes > 0) {
MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes,
false);
}
if (cache_bytes > 0) {
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
true);
}
release_pending();
}
} // namespace Core

View File

@@ -44,15 +44,32 @@ public:
GuestMemory() = delete;
explicit GuestMemory(M& memory, u64 addr, std::size_t size,
Common::ScratchBuffer<T>* backup = nullptr)
: m_memory{memory}, m_addr{addr}, m_size{size} {
: m_memory{&memory}, m_addr{addr}, m_size{size} {
static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
if constexpr (FLAGS & GuestMemoryFlags::Read) {
if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
if (!this->TrySetSpan()) {
if (backup) {
backup->resize_destructive(this->size());
m_data_span = *backup;
m_span_valid = true;
m_is_data_copy = true;
} else {
m_data_copy.resize(this->size());
m_data_span = std::span(m_data_copy);
m_span_valid = true;
m_is_data_copy = true;
}
}
} else if constexpr (FLAGS & GuestMemoryFlags::Read) {
Read(addr, size, backup);
}
}
~GuestMemory() = default;
GuestMemory(GuestMemory&& rhs) = default;
GuestMemory& operator=(GuestMemory&& rhs) = default;
T* data() noexcept {
return m_data_span.data();
}
@@ -109,8 +126,8 @@ public:
}
if (this->TrySetSpan()) {
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
m_memory.FlushRegion(m_addr, this->size_bytes());
if constexpr (FLAGS & GuestMemoryFlags::Safe && M::HAS_FLUSH_INVALIDATION) {
m_memory->FlushRegion(m_addr, this->size_bytes());
}
} else {
if (backup) {
@@ -123,9 +140,9 @@ public:
m_is_data_copy = true;
m_span_valid = true;
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
m_memory.ReadBlock(m_addr, this->data(), this->size_bytes());
m_memory->ReadBlock(m_addr, this->data(), this->size_bytes());
} else {
m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
m_memory->ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
}
}
return m_data_span;
@@ -133,18 +150,19 @@ public:
void Write(std::span<T> write_data) noexcept {
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
m_memory->WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes());
m_memory->WriteBlock(m_addr, write_data.data(), this->size_bytes());
} else {
m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
m_memory->WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
}
}
bool TrySetSpan() noexcept {
if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) {
if (u8* ptr = m_memory->GetSpan(m_addr, this->size_bytes()); ptr) {
m_data_span = {reinterpret_cast<T*>(ptr), this->size()};
m_span_valid = true;
m_is_data_copy = false;
return true;
}
return false;
@@ -159,7 +177,7 @@ protected:
return m_addr_changed;
}
M& m_memory;
M* m_memory;
u64 m_addr{};
size_t m_size{};
std::span<T> m_data_span{};
@@ -175,17 +193,7 @@ public:
GuestMemoryScoped() = delete;
explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size,
Common::ScratchBuffer<T>* backup = nullptr)
: GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {
if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
if (!this->TrySetSpan()) {
if (backup) {
this->m_data_span = *backup;
this->m_span_valid = true;
this->m_is_data_copy = true;
}
}
}
}
: GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {}
~GuestMemoryScoped() {
if constexpr (FLAGS & GuestMemoryFlags::Write) {
@@ -196,15 +204,17 @@ public:
if (this->AddressChanged() || this->IsDataCopy()) {
ASSERT(this->m_span_valid);
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes());
this->m_memory->WriteBlockCached(this->m_addr, this->data(),
this->size_bytes());
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes());
this->m_memory->WriteBlock(this->m_addr, this->data(), this->size_bytes());
} else {
this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes());
this->m_memory->WriteBlockUnsafe(this->m_addr, this->data(),
this->size_bytes());
}
} else if constexpr ((FLAGS & GuestMemoryFlags::Safe) ||
(FLAGS & GuestMemoryFlags::Cached)) {
this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes());
this->m_memory->InvalidateRegion(this->m_addr, this->size_bytes());
}
}
}

View File

@@ -202,7 +202,7 @@ void AOC_U::ListAddOnContent(HLERequestContext& ctx) {
LOG_DEBUG(Service_AOC, "called with offset={}, count={}, process_id={}", offset, count,
process_id);
const auto current = system.GetApplicationProcessProgramID();
const auto current = FileSys::GetBaseTitleID(system.GetApplicationProcessProgramID());
std::vector<u32> out;
const auto& disabled = Settings::values.disabled_addons[current];

View File

@@ -67,10 +67,7 @@ public:
const SyncpointManager& GetSyncpointManager() const;
struct Host1xDeviceFileData {
std::unordered_map<DeviceFD, u32> fd_to_id{};
std::deque<u32> syncpts_accumulated{};
u32 nvdec_next_id{};
u32 vic_next_id{};
};
Host1xDeviceFileData& Host1xDeviceFile();

View File

@@ -8,6 +8,7 @@
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/devices/ioctl_serialization.h"
#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
#include "video_core/host1x/host1x.h"
#include "video_core/renderer_base.h"
namespace Service::Nvidia::Devices {
@@ -21,13 +22,8 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
switch (command.group) {
case 0x0:
switch (command.cmd) {
case 0x1: {
auto& host1x_file = core.Host1xDeviceFile();
if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++;
}
case 0x1:
return WrapFixedVariable(this, &nvhost_nvdec::Submit, input, output, fd);
}
case 0x2:
return WrapFixed(this, &nvhost_nvdec::GetSyncpoint, input, output);
case 0x3:
@@ -72,15 +68,12 @@ void nvhost_nvdec::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
LOG_INFO(Service_NVDRV, "NVDEC video stream started");
system.SetNVDECActive(true);
sessions[fd] = session_id;
host1x.StartDevice(fd, Tegra::Host1x::ChannelType::NvDec, channel_syncpoint);
}
void nvhost_nvdec::OnClose(DeviceFD fd) {
LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
auto& host1x_file = core.Host1xDeviceFile();
const auto iter = host1x_file.fd_to_id.find(fd);
if (iter != host1x_file.fd_to_id.end()) {
system.GPU().ClearCdmaInstance(iter->second);
}
host1x.StopDevice(fd, Tegra::Host1x::ChannelType::NvDec);
system.SetNVDECActive(false);
auto it = sessions.find(fd);
if (it != sessions.end()) {

View File

@@ -55,8 +55,9 @@ std::size_t WriteVectors(std::span<u8> dst, const std::vector<T>& src, std::size
nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_,
NvCore::ChannelType channel_type_)
: nvdevice{system_}, core{core_}, syncpoint_manager{core.GetSyncpointManager()},
nvmap{core.GetNvMapFile()}, channel_type{channel_type_} {
: nvdevice{system_}, host1x{system_.Host1x()}, core{core_},
syncpoint_manager{core.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
channel_type{channel_type_} {
auto& syncpts_accumulated = core.Host1xDeviceFile().syncpts_accumulated;
if (syncpts_accumulated.empty()) {
channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false);
@@ -95,24 +96,24 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
offset += SliceVectors(data, syncpt_increments, params.syncpoint_count, offset);
offset += SliceVectors(data, fence_thresholds, params.fence_count, offset);
auto& gpu = system.GPU();
auto* session = core.GetSession(sessions[fd]);
if (gpu.UseNvdec()) {
for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
const SyncptIncr& syncpt_incr = syncpt_increments[i];
fence_thresholds[i] =
syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments);
}
for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
const SyncptIncr& syncpt_incr = syncpt_increments[i];
fence_thresholds[i] =
syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments);
}
for (const auto& cmd_buffer : command_buffers) {
const auto object = nvmap.GetHandle(cmd_buffer.memory_id);
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
Core::Memory::CpuGuestMemory<Tegra::ChCommandHeader,
Core::Memory::GuestMemoryFlags::SafeRead>
cmdlist(session->process->GetMemory(), object->address + cmd_buffer.offset,
cmd_buffer.word_count);
host1x.PushEntries(fd, std::move(cmdlist));
}
// Some games expect command_buffers to be written back
offset = 0;
offset += WriteVectors(data, command_buffers, offset);

View File

@@ -119,6 +119,7 @@ protected:
Kernel::KEvent* QueryEvent(u32 event_id) override;
Tegra::Host1x::Host1x& host1x;
u32 channel_syncpoint;
s32_le nvmap_fd{};
u32_le submit_timeout{};

View File

@@ -7,6 +7,7 @@
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/devices/ioctl_serialization.h"
#include "core/hle/service/nvdrv/devices/nvhost_vic.h"
#include "video_core/host1x/host1x.h"
#include "video_core/renderer_base.h"
namespace Service::Nvidia::Devices {
@@ -21,13 +22,8 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
switch (command.group) {
case 0x0:
switch (command.cmd) {
case 0x1: {
auto& host1x_file = core.Host1xDeviceFile();
if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++;
}
case 0x1:
return WrapFixedVariable(this, &nvhost_vic::Submit, input, output, fd);
}
case 0x2:
return WrapFixed(this, &nvhost_vic::GetSyncpoint, input, output);
case 0x3:
@@ -70,14 +66,11 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
void nvhost_vic::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
sessions[fd] = session_id;
host1x.StartDevice(fd, Tegra::Host1x::ChannelType::VIC, channel_syncpoint);
}
void nvhost_vic::OnClose(DeviceFD fd) {
auto& host1x_file = core.Host1xDeviceFile();
const auto iter = host1x_file.fd_to_id.find(fd);
if (iter != host1x_file.fd_to_id.end()) {
system.GPU().ClearCdmaInstance(iter->second);
}
host1x.StopDevice(fd, Tegra::Host1x::ChannelType::VIC);
sessions.erase(fd);
}

View File

@@ -1091,20 +1091,6 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
[&] { rasterizer = true; });
if (rasterizer) {
impl->InvalidateGPUMemory(ptr, size);
const auto type = impl->current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Type();
if (type == Common::PageType::RasterizerCachedMemory) {
// Check if device mapped. If not, this bugged and we can unmark.
DAddr addr{};
Common::ScratchBuffer<u32> buffer;
impl->gpu_device_memory->ApplyOpOnPointer(ptr, buffer,
[&](DAddr address) { addr = address; });
if (addr == 0) {
LOG_ERROR(HW_Memory, "Fixing unmapped cached region {:#x}", GetInteger(vaddr));
impl->RasterizerMarkRegionCached(GetInteger(vaddr), size, false);
}
}
}
#ifdef __linux__

View File

@@ -64,6 +64,8 @@ public:
Memory(Memory&&) = default;
Memory& operator=(Memory&&) = delete;
static constexpr bool HAS_FLUSH_INVALIDATION = false;
/**
* Resets the state of the Memory system.
*/