From 7bd2421ff926733c4633d0e581e0561f0290dbc8 Mon Sep 17 00:00:00 2001
From: pineappleEA <pineaea@gmail.com>
Date: Wed, 20 Dec 2023 02:37:09 +0100
Subject: [PATCH] early-access version 4028

---
 README.md                                     |    2 +-
 src/core/arm/arm_interface.cpp                |    2 +-
 src/core/arm/arm_interface.h                  |    2 +-
 src/core/arm/debug.cpp                        |   14 +-
 src/core/arm/debug.h                          |    6 +-
 src/core/arm/dynarmic/arm_dynarmic_32.cpp     |    6 +-
 src/core/arm/dynarmic/arm_dynarmic_32.h       |    2 +-
 src/core/arm/dynarmic/arm_dynarmic_64.cpp     |    6 +-
 src/core/arm/dynarmic/arm_dynarmic_64.h       |    2 +-
 src/core/core.cpp                             |   73 +-
 src/core/core.h                               |   13 -
 src/core/file_sys/program_metadata.cpp        |    6 +-
 src/core/file_sys/program_metadata.h          |   13 +-
 src/core/hle/kernel/k_address_arbiter.cpp     |   19 +-
 src/core/hle/kernel/k_client_port.cpp         |    5 +-
 src/core/hle/kernel/k_condition_variable.cpp  |    8 +-
 src/core/hle/kernel/k_handle_table.h          |    8 +-
 src/core/hle/kernel/k_process.cpp             |   50 +-
 src/core/hle/kernel/k_process.h               |   17 +-
 src/core/hle/kernel/k_server_session.cpp      | 1619 +++++++++++++----
 src/core/hle/kernel/k_server_session.h        |   15 +-
 src/core/hle/kernel/k_session.cpp             |    3 +-
 src/core/hle/kernel/k_thread.cpp              |    3 +-
 src/core/hle/kernel/k_thread.h                |    6 +-
 src/core/hle/kernel/kernel.cpp                |   34 -
 src/core/hle/kernel/kernel.h                  |    7 -
 src/core/hle/kernel/message_buffer.h          |   20 +-
 src/core/hle/kernel/svc/svc_info.cpp          |    1 -
 src/core/hle/kernel/svc/svc_ipc.cpp           |    6 +-
 src/core/hle/kernel/svc_results.h             |    2 +
 src/core/hle/service/fatal/fatal.cpp          |    2 +-
 src/core/hle/service/ipc_helpers.h            |    4 +-
 src/core/hle/service/server_manager.cpp       |   21 +-
 src/core/hle/service/set/set_sys.cpp          |   12 +-
 src/core/hle/service/set/set_sys.h            |    1 +
 src/core/hle/service/sm/sm.cpp                |   36 +-
 src/core/hle/service/sm/sm.h                  |    8 +-
 src/core/hle/service/sm/sm_controller.cpp     |    7 +-
 .../loader/deconstructed_rom_directory.cpp    |    7 +-
 src/core/memory.cpp                           |   28 +-
 src/video_core/engines/maxwell_3d.cpp         |   24 +-
 src/video_core/query_cache.h                  |   39 +-
 .../renderer_opengl/gl_buffer_cache.cpp       |   45 +-
 .../renderer_opengl/gl_buffer_cache.h         |    1 +
 src/video_core/renderer_opengl/gl_device.cpp  |    1 +
 src/video_core/renderer_opengl/gl_device.h    |    5 +
 .../renderer_opengl/gl_query_cache.cpp        |   23 +-
 .../renderer_opengl/gl_rasterizer.cpp         |   57 +-
 .../renderer_opengl/gl_rasterizer.h           |    3 +
 .../renderer_opengl/renderer_opengl.cpp       |   16 +-
 .../renderer_vulkan/vk_rasterizer.cpp         |    4 +
 51 files changed, 1669 insertions(+), 645 deletions(-)

diff --git a/README.md b/README.md
index 01eab00f3..592664766 100755
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 yuzu emulator early access
 =============
 
-This is the source code for early-access 4027.
+This is the source code for early-access 4028.
 
 ## Legal Notice
 
diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp
index 509cb964a..177a79fd7 100755
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -9,7 +9,7 @@
 
 namespace Core {
 
-void ArmInterface::LogBacktrace(const Kernel::KProcess* process) const {
+void ArmInterface::LogBacktrace(Kernel::KProcess* process) const {
     Kernel::Svc::ThreadContext ctx;
     this->GetContext(ctx);
 
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 885078ce1..02bad948b 100755
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -95,7 +95,7 @@ public:
     virtual void SignalInterrupt(Kernel::KThread* thread) = 0;
 
     // Stack trace generation.
-    void LogBacktrace(const Kernel::KProcess* process) const;
+    void LogBacktrace(Kernel::KProcess* process) const;
 
     // Debug functionality.
     virtual const Kernel::DebugWatchpoint* HaltedWatchpoint() const = 0;
diff --git a/src/core/arm/debug.cpp b/src/core/arm/debug.cpp
index af1c34bc3..854509463 100755
--- a/src/core/arm/debug.cpp
+++ b/src/core/arm/debug.cpp
@@ -79,7 +79,7 @@ constexpr std::array<u64, 2> SegmentBases{
     0x7100000000ULL,
 };
 
-void SymbolicateBacktrace(const Kernel::KProcess* process, std::vector<BacktraceEntry>& out) {
+void SymbolicateBacktrace(Kernel::KProcess* process, std::vector<BacktraceEntry>& out) {
     auto modules = FindModules(process);
 
     const bool is_64 = process->Is64Bit();
@@ -118,7 +118,7 @@ void SymbolicateBacktrace(const Kernel::KProcess* process, std::vector<Backtrace
     }
 }
 
-std::vector<BacktraceEntry> GetAArch64Backtrace(const Kernel::KProcess* process,
+std::vector<BacktraceEntry> GetAArch64Backtrace(Kernel::KProcess* process,
                                                 const Kernel::Svc::ThreadContext& ctx) {
     std::vector<BacktraceEntry> out;
     auto& memory = process->GetMemory();
@@ -144,7 +144,7 @@ std::vector<BacktraceEntry> GetAArch64Backtrace(const Kernel::KProcess* process,
     return out;
 }
 
-std::vector<BacktraceEntry> GetAArch32Backtrace(const Kernel::KProcess* process,
+std::vector<BacktraceEntry> GetAArch32Backtrace(Kernel::KProcess* process,
                                                 const Kernel::Svc::ThreadContext& ctx) {
     std::vector<BacktraceEntry> out;
     auto& memory = process->GetMemory();
@@ -173,7 +173,7 @@ std::vector<BacktraceEntry> GetAArch32Backtrace(const Kernel::KProcess* process,
 } // namespace
 
 std::optional<std::string> GetThreadName(const Kernel::KThread* thread) {
-    const auto* process = thread->GetOwnerProcess();
+    auto* process = thread->GetOwnerProcess();
     if (process->Is64Bit()) {
         return GetNameFromThreadType64(process->GetMemory(), *thread);
     } else {
@@ -248,7 +248,7 @@ Kernel::KProcessAddress GetModuleEnd(const Kernel::KProcess* process,
     return cur_addr - 1;
 }
 
-Loader::AppLoader::Modules FindModules(const Kernel::KProcess* process) {
+Loader::AppLoader::Modules FindModules(Kernel::KProcess* process) {
     Loader::AppLoader::Modules modules;
 
     auto& page_table = process->GetPageTable();
@@ -312,7 +312,7 @@ Loader::AppLoader::Modules FindModules(const Kernel::KProcess* process) {
     return modules;
 }
 
-Kernel::KProcessAddress FindMainModuleEntrypoint(const Kernel::KProcess* process) {
+Kernel::KProcessAddress FindMainModuleEntrypoint(Kernel::KProcess* process) {
     // Do we have any loaded executable sections?
     auto modules = FindModules(process);
 
@@ -337,7 +337,7 @@ void InvalidateInstructionCacheRange(const Kernel::KProcess* process, u64 addres
     }
 }
 
-std::vector<BacktraceEntry> GetBacktraceFromContext(const Kernel::KProcess* process,
+std::vector<BacktraceEntry> GetBacktraceFromContext(Kernel::KProcess* process,
                                                     const Kernel::Svc::ThreadContext& ctx) {
     if (process->Is64Bit()) {
         return GetAArch64Backtrace(process, ctx);
diff --git a/src/core/arm/debug.h b/src/core/arm/debug.h
index c542633db..3cd671365 100755
--- a/src/core/arm/debug.h
+++ b/src/core/arm/debug.h
@@ -14,9 +14,9 @@ std::optional<std::string> GetThreadName(const Kernel::KThread* thread);
 std::string_view GetThreadWaitReason(const Kernel::KThread* thread);
 std::string GetThreadState(const Kernel::KThread* thread);
 
-Loader::AppLoader::Modules FindModules(const Kernel::KProcess* process);
+Loader::AppLoader::Modules FindModules(Kernel::KProcess* process);
 Kernel::KProcessAddress GetModuleEnd(const Kernel::KProcess* process, Kernel::KProcessAddress base);
-Kernel::KProcessAddress FindMainModuleEntrypoint(const Kernel::KProcess* process);
+Kernel::KProcessAddress FindMainModuleEntrypoint(Kernel::KProcess* process);
 
 void InvalidateInstructionCacheRange(const Kernel::KProcess* process, u64 address, u64 size);
 
@@ -28,7 +28,7 @@ struct BacktraceEntry {
     std::string name;
 };
 
-std::vector<BacktraceEntry> GetBacktraceFromContext(const Kernel::KProcess* process,
+std::vector<BacktraceEntry> GetBacktraceFromContext(Kernel::KProcess* process,
                                                     const Kernel::Svc::ThreadContext& ctx);
 std::vector<BacktraceEntry> GetBacktrace(const Kernel::KThread* thread);
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 9031b3bdb..079ec51fe 100755
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -15,7 +15,7 @@ using namespace Common::Literals;
 
 class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
 public:
-    explicit DynarmicCallbacks32(ArmDynarmic32& parent, const Kernel::KProcess* process)
+    explicit DynarmicCallbacks32(ArmDynarmic32& parent, Kernel::KProcess* process)
         : m_parent{parent}, m_memory(process->GetMemory()),
           m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()},
           m_check_memory_access{m_debugger_enabled ||
@@ -169,7 +169,7 @@ public:
 
     ArmDynarmic32& m_parent;
     Core::Memory::Memory& m_memory;
-    const Kernel::KProcess* m_process{};
+    Kernel::KProcess* m_process{};
     const bool m_debugger_enabled{};
     const bool m_check_memory_access{};
     static constexpr u64 MinimumRunCycles = 10000U;
@@ -370,7 +370,7 @@ void ArmDynarmic32::RewindBreakpointInstruction() {
     this->SetContext(m_breakpoint_context);
 }
 
-ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process,
                              DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index)
     : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor},
       m_cb(std::make_unique<DynarmicCallbacks32>(*this, process)),
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 0eb55a7b6..44464dcbb 100755
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -20,7 +20,7 @@ class System;
 
 class ArmDynarmic32 final : public ArmInterface {
 public:
-    ArmDynarmic32(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+    ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process,
                   DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ArmDynarmic32() override;
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 9792cc6ac..ac1db0195 100755
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -15,7 +15,7 @@ using namespace Common::Literals;
 
 class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
 public:
-    explicit DynarmicCallbacks64(ArmDynarmic64& parent, const Kernel::KProcess* process)
+    explicit DynarmicCallbacks64(ArmDynarmic64& parent, Kernel::KProcess* process)
         : m_parent{parent}, m_memory(process->GetMemory()),
           m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()},
           m_check_memory_access{m_debugger_enabled ||
@@ -216,7 +216,7 @@ public:
     Core::Memory::Memory& m_memory;
     u64 m_tpidrro_el0{};
     u64 m_tpidr_el0{};
-    const Kernel::KProcess* m_process{};
+    Kernel::KProcess* m_process{};
     const bool m_debugger_enabled{};
     const bool m_check_memory_access{};
     static constexpr u64 MinimumRunCycles = 10000U;
@@ -399,7 +399,7 @@ void ArmDynarmic64::RewindBreakpointInstruction() {
     this->SetContext(m_breakpoint_context);
 }
 
-ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process,
                              DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index)
     : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor},
       m_cb(std::make_unique<DynarmicCallbacks64>(*this, process)), m_core_index{core_index} {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index da1b6c2ca..2c9878734 100755
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -25,7 +25,7 @@ class System;
 
 class ArmDynarmic64 final : public ArmInterface {
 public:
-    ArmDynarmic64(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+    ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process,
                   DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ArmDynarmic64() override;
 
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 296c6e671..42c3eb0c8 100755
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -28,7 +28,6 @@
 #include "core/file_sys/savedata_factory.h"
 #include "core/file_sys/vfs_concat.h"
 #include "core/file_sys/vfs_real.h"
-#include "core/gpu_dirty_memory_manager.h"
 #include "core/hid/hid_core.h"
 #include "core/hle/kernel/k_memory_manager.h"
 #include "core/hle/kernel/k_process.h"
@@ -130,11 +129,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
 
 struct System::Impl {
     explicit Impl(System& system)
-        : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{},
-          cpu_manager{system}, reporter{system}, applet_manager{system}, profile_manager{},
-          time_manager{system}, gpu_dirty_memory_write_manager{} {
-        memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
-    }
+        : kernel{system}, fs_controller{system}, hid_core{}, room_network{}, cpu_manager{system},
+          reporter{system}, applet_manager{system}, profile_manager{}, time_manager{system} {}
 
     void Initialize(System& system) {
         device_memory = std::make_unique<Core::DeviceMemory>();
@@ -241,17 +237,17 @@ struct System::Impl {
         debugger = std::make_unique<Debugger>(system, port);
     }
 
-    SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) {
+    void InitializeKernel(System& system) {
         LOG_DEBUG(Core, "initialized OK");
 
         // Setting changes may require a full system reinitialization (e.g., disabling multicore).
         ReinitializeIfNecessary(system);
 
-        memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
-
         kernel.Initialize();
         cpu_manager.Initialize();
+    }
 
+    SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) {
         /// Reset all glue registrations
         arp_manager.ResetAll();
 
@@ -300,17 +296,9 @@ struct System::Impl {
             return SystemResultStatus::ErrorGetLoader;
         }
 
-        SystemResultStatus init_result{SetupForApplicationProcess(system, emu_window)};
-        if (init_result != SystemResultStatus::Success) {
-            LOG_CRITICAL(Core, "Failed to initialize system (Error {})!",
-                         static_cast<int>(init_result));
-            ShutdownMainProcess();
-            return init_result;
-        }
+        InitializeKernel(system);
 
-        telemetry_session->AddInitialInfo(*app_loader, fs_controller, *content_provider);
-
-        // Create the process.
+        // Create the application process.
         auto main_process = Kernel::KProcess::Create(system.Kernel());
         Kernel::KProcess::Register(system.Kernel(), main_process);
         kernel.AppendNewProcess(main_process);
@@ -323,7 +311,18 @@ struct System::Impl {
             return static_cast<SystemResultStatus>(
                 static_cast<u32>(SystemResultStatus::ErrorLoader) + static_cast<u32>(load_result));
         }
+
+        // Set up the rest of the system.
+        SystemResultStatus init_result{SetupForApplicationProcess(system, emu_window)};
+        if (init_result != SystemResultStatus::Success) {
+            LOG_CRITICAL(Core, "Failed to initialize system (Error {})!",
+                         static_cast<int>(init_result));
+            ShutdownMainProcess();
+            return init_result;
+        }
+
         AddGlueRegistrationForProcess(*app_loader, *main_process);
+        telemetry_session->AddInitialInfo(*app_loader, fs_controller, *content_provider);
 
         // Initialize cheat engine
         if (cheat_engine) {
@@ -426,7 +425,6 @@ struct System::Impl {
         cpu_manager.Shutdown();
         debugger.reset();
         kernel.Shutdown();
-        memory.Reset();
         Network::RestartSocketOperations();
 
         if (auto room_member = room_network.GetRoomMember().lock()) {
@@ -507,7 +505,6 @@ struct System::Impl {
     std::unique_ptr<Tegra::Host1x::Host1x> host1x_core;
     std::unique_ptr<Core::DeviceMemory> device_memory;
     std::unique_ptr<AudioCore::AudioCore> audio_core;
-    Core::Memory::Memory memory;
     Core::HID::HIDCore hid_core;
     Network::RoomNetwork room_network;
 
@@ -567,9 +564,6 @@ struct System::Impl {
     std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
     std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{};
 
-    std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES>
-        gpu_dirty_memory_write_manager{};
-
     std::deque<std::vector<u8>> user_channel;
 };
 
@@ -652,29 +646,12 @@ void System::PrepareReschedule(const u32 core_index) {
     impl->kernel.PrepareReschedule(core_index);
 }
 
-Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() {
-    const std::size_t core = impl->kernel.GetCurrentHostThreadID();
-    return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
-                                                    ? core
-                                                    : Core::Hardware::NUM_CPU_CORES - 1];
-}
-
-/// Provides a constant reference to the current gou dirty memory manager.
-const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const {
-    const std::size_t core = impl->kernel.GetCurrentHostThreadID();
-    return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
-                                                    ? core
-                                                    : Core::Hardware::NUM_CPU_CORES - 1];
-}
-
 size_t System::GetCurrentHostThreadID() const {
     return impl->kernel.GetCurrentHostThreadID();
 }
 
 void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
-    for (auto& manager : impl->gpu_dirty_memory_write_manager) {
-        manager.Gather(callback);
-    }
+    return this->ApplicationProcess()->GatherGPUDirtyMemory(callback);
 }
 
 PerfStatsResults System::GetAndResetPerfStats() {
@@ -723,20 +700,12 @@ const Kernel::KProcess* System::ApplicationProcess() const {
     return impl->kernel.ApplicationProcess();
 }
 
-ExclusiveMonitor& System::Monitor() {
-    return impl->kernel.GetExclusiveMonitor();
-}
-
-const ExclusiveMonitor& System::Monitor() const {
-    return impl->kernel.GetExclusiveMonitor();
-}
-
 Memory::Memory& System::ApplicationMemory() {
-    return impl->memory;
+    return impl->kernel.ApplicationProcess()->GetMemory();
 }
 
 const Core::Memory::Memory& System::ApplicationMemory() const {
-    return impl->memory;
+    return impl->kernel.ApplicationProcess()->GetMemory();
 }
 
 Tegra::GPU& System::GPU() {
diff --git a/src/core/core.h b/src/core/core.h
index 5fba2d739..9b5fa1c61 100755
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -116,7 +116,6 @@ class CpuManager;
 class Debugger;
 class DeviceMemory;
 class ExclusiveMonitor;
-class GPUDirtyMemoryManager;
 class PerfStats;
 class Reporter;
 class SpeedLimiter;
@@ -225,12 +224,6 @@ public:
     /// Prepare the core emulation for a reschedule
     void PrepareReschedule(u32 core_index);
 
-    /// Provides a reference to the gou dirty memory manager.
-    [[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager();
-
-    /// Provides a constant reference to the current gou dirty memory manager.
-    [[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const;
-
     void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
 
     [[nodiscard]] size_t GetCurrentHostThreadID() const;
@@ -250,12 +243,6 @@ public:
     /// Gets a const reference to the underlying CPU manager
     [[nodiscard]] const CpuManager& GetCpuManager() const;
 
-    /// Gets a reference to the exclusive monitor
-    [[nodiscard]] ExclusiveMonitor& Monitor();
-
-    /// Gets a constant reference to the exclusive monitor
-    [[nodiscard]] const ExclusiveMonitor& Monitor() const;
-
     /// Gets a mutable reference to the system memory instance.
     [[nodiscard]] Core::Memory::Memory& ApplicationMemory();
 
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index 3ec6e360a..382fb418f 100755
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -166,6 +166,10 @@ u32 ProgramMetadata::GetSystemResourceSize() const {
     return npdm_header.system_resource_size;
 }
 
+PoolPartition ProgramMetadata::GetPoolPartition() const {
+    return acid_header.pool_partition;
+}
+
 const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const {
     return aci_kernel_capabilities;
 }
@@ -201,7 +205,7 @@ void ProgramMetadata::Print() const {
     // Begin ACID printing (potential perms, signed)
     LOG_DEBUG(Service_FS, "Magic:                  {:.4}", acid_header.magic.data());
     LOG_DEBUG(Service_FS, "Flags:                  0x{:02X}", acid_header.flags);
-    LOG_DEBUG(Service_FS, " > Is Retail:           {}", acid_header.is_retail ? "YES" : "NO");
+    LOG_DEBUG(Service_FS, " > Is Retail:           {}", acid_header.production_flag ? "YES" : "NO");
     LOG_DEBUG(Service_FS, "Title ID Min:           0x{:016X}", acid_header.title_id_min);
     LOG_DEBUG(Service_FS, "Title ID Max:           0x{:016X}", acid_header.title_id_max);
     LOG_DEBUG(Service_FS, "Filesystem Access:      0x{:016X}\n", acid_file_access.permissions);
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index b3cd379c2..70d87ff99 100755
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -34,6 +34,13 @@ enum class ProgramFilePermission : u64 {
     Everything = 1ULL << 63,
 };
 
+enum class PoolPartition : u32 {
+    Application = 0,
+    Applet = 1,
+    System = 2,
+    SystemNonSecure = 3,
+};
+
 /**
  * Helper which implements an interface to parse Program Description Metadata (NPDM)
  * Data can either be loaded from a file path or with data and an offset into it.
@@ -72,6 +79,7 @@ public:
     u64 GetTitleID() const;
     u64 GetFilesystemPermissions() const;
     u32 GetSystemResourceSize() const;
+    PoolPartition GetPoolPartition() const;
     const KernelCapabilityDescriptors& GetKernelCapabilities() const;
     const std::array<u8, 0x10>& GetName() const {
         return npdm_header.application_name;
@@ -116,8 +124,9 @@ private:
         union {
             u32 flags;
 
-            BitField<0, 1, u32> is_retail;
-            BitField<1, 31, u32> flags_unk;
+            BitField<0, 1, u32> production_flag;
+            BitField<1, 1, u32> unqualified_approval;
+            BitField<2, 4, PoolPartition> pool_partition;
         };
         u64_le title_id_min;
         u64_le title_id_max;
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
index 18dd2cb45..4b6ee9260 100755
--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -4,6 +4,7 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
 #include "core/hle/kernel/k_address_arbiter.h"
+#include "core/hle/kernel/k_process.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
 #include "core/hle/kernel/k_thread.h"
@@ -26,9 +27,9 @@ bool ReadFromUser(KernelCore& kernel, s32* out, KProcessAddress address) {
     return true;
 }
 
-bool DecrementIfLessThan(Core::System& system, s32* out, KProcessAddress address, s32 value) {
-    auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+bool DecrementIfLessThan(KernelCore& kernel, s32* out, KProcessAddress address, s32 value) {
+    auto& monitor = GetCurrentProcess(kernel).GetExclusiveMonitor();
+    const auto current_core = kernel.CurrentPhysicalCoreIndex();
 
     // NOTE: If scheduler lock is not held here, interrupt disable is required.
     // KScopedInterruptDisable di;
@@ -66,10 +67,10 @@ bool DecrementIfLessThan(Core::System& system, s32* out, KProcessAddress address
     return true;
 }
 
-bool UpdateIfEqual(Core::System& system, s32* out, KProcessAddress address, s32 value,
+bool UpdateIfEqual(KernelCore& kernel, s32* out, KProcessAddress address, s32 value,
                    s32 new_value) {
-    auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+    auto& monitor = GetCurrentProcess(kernel).GetExclusiveMonitor();
+    const auto current_core = kernel.CurrentPhysicalCoreIndex();
 
     // NOTE: If scheduler lock is not held here, interrupt disable is required.
     // KScopedInterruptDisable di;
@@ -159,7 +160,7 @@ Result KAddressArbiter::SignalAndIncrementIfEqual(uint64_t addr, s32 value, s32
 
         // Check the userspace value.
         s32 user_value{};
-        R_UNLESS(UpdateIfEqual(m_system, std::addressof(user_value), addr, value, value + 1),
+        R_UNLESS(UpdateIfEqual(m_kernel, std::addressof(user_value), addr, value, value + 1),
                  ResultInvalidCurrentMemory);
         R_UNLESS(user_value == value, ResultInvalidState);
 
@@ -219,7 +220,7 @@ Result KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(uint64_t addr, s32
         s32 user_value{};
         bool succeeded{};
         if (value != new_value) {
-            succeeded = UpdateIfEqual(m_system, std::addressof(user_value), addr, value, new_value);
+            succeeded = UpdateIfEqual(m_kernel, std::addressof(user_value), addr, value, new_value);
         } else {
             succeeded = ReadFromUser(m_kernel, std::addressof(user_value), addr);
         }
@@ -262,7 +263,7 @@ Result KAddressArbiter::WaitIfLessThan(uint64_t addr, s32 value, bool decrement,
         s32 user_value{};
         bool succeeded{};
         if (decrement) {
-            succeeded = DecrementIfLessThan(m_system, std::addressof(user_value), addr, value);
+            succeeded = DecrementIfLessThan(m_kernel, std::addressof(user_value), addr, value);
         } else {
             succeeded = ReadFromUser(m_kernel, std::addressof(user_value), addr);
         }
diff --git a/src/core/hle/kernel/k_client_port.cpp b/src/core/hle/kernel/k_client_port.cpp
index 3d3e2e403..c059fc9d1 100755
--- a/src/core/hle/kernel/k_client_port.cpp
+++ b/src/core/hle/kernel/k_client_port.cpp
@@ -58,9 +58,8 @@ Result KClientPort::CreateSession(KClientSession** out) {
     KSession* session{};
 
     // Reserve a new session from the resource limit.
-    //! FIXME: we are reserving this from the wrong resource limit!
-    KScopedResourceReservation session_reservation(
-        m_kernel.ApplicationProcess()->GetResourceLimit(), LimitableResource::SessionCountMax);
+    KScopedResourceReservation session_reservation(GetCurrentProcessPointer(m_kernel),
+                                                   LimitableResource::SessionCountMax);
     R_UNLESS(session_reservation.Succeeded(), ResultLimitReached);
 
     // Allocate a session normally.
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
index 6784c9bfd..97dcf805d 100755
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -28,10 +28,10 @@ bool WriteToUser(KernelCore& kernel, KProcessAddress address, const u32* p) {
     return true;
 }
 
-bool UpdateLockAtomic(Core::System& system, u32* out, KProcessAddress address, u32 if_zero,
+bool UpdateLockAtomic(KernelCore& kernel, u32* out, KProcessAddress address, u32 if_zero,
                       u32 new_orr_mask) {
-    auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+    auto& monitor = GetCurrentProcess(kernel).GetExclusiveMonitor();
+    const auto current_core = kernel.CurrentPhysicalCoreIndex();
 
     u32 expected{};
 
@@ -208,7 +208,7 @@ void KConditionVariable::SignalImpl(KThread* thread) {
         // TODO(bunnei): We should call CanAccessAtomic(..) here.
         can_access = true;
         if (can_access) [[likely]] {
-            UpdateLockAtomic(m_system, std::addressof(prev_tag), address, own_tag,
+            UpdateLockAtomic(m_kernel, std::addressof(prev_tag), address, own_tag,
                              Svc::HandleWaitMask);
         }
     }
diff --git a/src/core/hle/kernel/k_handle_table.h b/src/core/hle/kernel/k_handle_table.h
index cfa02259b..fa8aaea71 100755
--- a/src/core/hle/kernel/k_handle_table.h
+++ b/src/core/hle/kernel/k_handle_table.h
@@ -30,7 +30,7 @@ public:
 public:
     explicit KHandleTable(KernelCore& kernel) : m_kernel(kernel) {}
 
-    Result Initialize(s32 size) {
+    Result Initialize(KProcess* owner, s32 size) {
         // Check that the table size is valid.
         R_UNLESS(size <= static_cast<s32>(MaxTableSize), ResultOutOfMemory);
 
@@ -44,6 +44,7 @@ public:
         m_next_linear_id = MinLinearId;
         m_count = 0;
         m_free_head_index = -1;
+        m_owner = owner;
 
         // Free all entries.
         for (s32 i = 0; i < static_cast<s32>(m_table_size); ++i) {
@@ -90,8 +91,8 @@ public:
         // Handle pseudo-handles.
         if constexpr (std::derived_from<KProcess, T>) {
             if (handle == Svc::PseudoHandle::CurrentProcess) {
-                //! FIXME: this is the wrong process!
-                auto* const cur_process = m_kernel.ApplicationProcess();
+                // TODO: this should be the current process
+                auto* const cur_process = m_owner;
                 ASSERT(cur_process != nullptr);
                 return cur_process;
             }
@@ -301,6 +302,7 @@ private:
 
 private:
     KernelCore& m_kernel;
+    KProcess* m_owner{};
     std::array<EntryInfo, MaxTableSize> m_entry_infos{};
     std::array<KAutoObject*, MaxTableSize> m_objects{};
     mutable KSpinLock m_lock;
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index f5fdb505a..7cb170a2f 100755
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -306,12 +306,16 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa
             False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge);
         R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool,
                                       params.code_address, params.code_num_pages * PageSize,
-                                      m_system_resource, res_limit, this->GetMemory(), 0));
+                                      m_system_resource, res_limit, m_memory, 0));
     }
     ON_RESULT_FAILURE_2 {
         m_page_table.Finalize();
     };
 
+    // Ensure our memory is initialized.
+    m_memory.SetCurrentPageTable(*this);
+    m_memory.SetGPUDirtyManagers(m_dirty_memory_managers);
+
     // Ensure we can insert the code region.
     R_UNLESS(m_page_table.CanContain(params.code_address, params.code_num_pages * PageSize,
                                      KMemoryState::Code),
@@ -399,12 +403,16 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params,
             False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge);
         R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool,
                                       params.code_address, code_size, m_system_resource, res_limit,
-                                      this->GetMemory(), aslr_space_start));
+                                      m_memory, aslr_space_start));
     }
     ON_RESULT_FAILURE_2 {
         m_page_table.Finalize();
     };
 
+    // Ensure our memory is initialized.
+    m_memory.SetCurrentPageTable(*this);
+    m_memory.SetGPUDirtyManagers(m_dirty_memory_managers);
+
     // Ensure we can insert the code region.
     R_UNLESS(m_page_table.CanContain(params.code_address, code_size, KMemoryState::Code),
              ResultInvalidMemoryRegion);
@@ -1094,8 +1102,7 @@ void KProcess::UnpinThread(KThread* thread) {
 
 Result KProcess::GetThreadList(s32* out_num_threads, KProcessAddress out_thread_ids,
                                s32 max_out_count) {
-    // TODO: use current memory reference
-    auto& memory = m_kernel.System().ApplicationMemory();
+    auto& memory = this->GetMemory();
 
     // Lock the list.
     KScopedLightLock lk(m_list_lock);
@@ -1128,14 +1135,15 @@ void KProcess::Switch(KProcess* cur_process, KProcess* next_process) {}
 KProcess::KProcess(KernelCore& kernel)
     : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel},
       m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()},
-      m_handle_table{kernel} {}
+      m_handle_table{kernel}, m_dirty_memory_managers{},
+      m_exclusive_monitor{}, m_memory{kernel.System()} {}
 KProcess::~KProcess() = default;
 
 Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size,
                                   KProcessAddress aslr_space_start, bool is_hbl) {
     // Create a resource limit for the process.
-    const auto physical_memory_size =
-        m_kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::Application);
+    const auto pool = static_cast<KMemoryManager::Pool>(metadata.GetPoolPartition());
+    const auto physical_memory_size = m_kernel.MemoryManager().GetSize(pool);
     auto* res_limit =
         Kernel::CreateResourceLimitForProcess(m_kernel.System(), physical_memory_size);
 
@@ -1146,8 +1154,10 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
     Svc::CreateProcessFlag flag{};
     u64 code_address{};
 
-    // We are an application.
-    flag |= Svc::CreateProcessFlag::IsApplication;
+    // Determine if we are an application.
+    if (pool == KMemoryManager::Pool::Application) {
+        flag |= Svc::CreateProcessFlag::IsApplication;
+    }
 
     // If we are 64-bit, create as such.
     if (metadata.Is64BitProgram()) {
@@ -1196,8 +1206,8 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
     std::memcpy(params.name.data(), name.data(), sizeof(params.name));
 
     // Initialize for application process.
-    R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit,
-                           KMemoryManager::Pool::Application, aslr_space_start));
+    R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit, pool,
+                           aslr_space_start));
 
     // Assign remaining properties.
     m_is_hbl = is_hbl;
@@ -1223,7 +1233,7 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
     ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite);
 
 #ifdef HAS_NCE
-    if (Settings::IsNceEnabled()) {
+    if (this->IsApplication() && Settings::IsNceEnabled()) {
         auto& buffer = m_kernel.System().DeviceMemory().buffer;
         const auto& code = code_set.CodeSegment();
         const auto& patch = code_set.PatchSegment();
@@ -1235,10 +1245,11 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
 }
 
 void KProcess::InitializeInterfaces() {
-    this->GetMemory().SetCurrentPageTable(*this);
+    m_exclusive_monitor =
+        Core::MakeExclusiveMonitor(this->GetMemory(), Core::Hardware::NUM_CPU_CORES);
 
 #ifdef HAS_NCE
-    if (this->Is64Bit() && Settings::IsNceEnabled()) {
+    if (this->IsApplication() && Settings::IsNceEnabled()) {
         for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
             m_arm_interfaces[i] = std::make_unique<Core::ArmNce>(m_kernel.System(), true, i);
         }
@@ -1248,13 +1259,13 @@ void KProcess::InitializeInterfaces() {
         for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
             m_arm_interfaces[i] = std::make_unique<Core::ArmDynarmic64>(
                 m_kernel.System(), m_kernel.IsMulticore(), this,
-                static_cast<Core::DynarmicExclusiveMonitor&>(m_kernel.GetExclusiveMonitor()), i);
+                static_cast<Core::DynarmicExclusiveMonitor&>(*m_exclusive_monitor), i);
         }
     } else {
         for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
             m_arm_interfaces[i] = std::make_unique<Core::ArmDynarmic32>(
                 m_kernel.System(), m_kernel.IsMulticore(), this,
-                static_cast<Core::DynarmicExclusiveMonitor&>(m_kernel.GetExclusiveMonitor()), i);
+                static_cast<Core::DynarmicExclusiveMonitor&>(*m_exclusive_monitor), i);
         }
     }
 }
@@ -1305,9 +1316,10 @@ bool KProcess::RemoveWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointT
     return true;
 }
 
-Core::Memory::Memory& KProcess::GetMemory() const {
-    // TODO: per-process memory
-    return m_kernel.System().ApplicationMemory();
+void KProcess::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
+    for (auto& manager : m_dirty_memory_managers) {
+        manager.Gather(callback);
+    }
 }
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index a231a3b6a..c953011d4 100755
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -7,6 +7,7 @@
 
 #include "core/arm/arm_interface.h"
 #include "core/file_sys/program_metadata.h"
+#include "core/gpu_dirty_memory_manager.h"
 #include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/k_address_arbiter.h"
 #include "core/hle/kernel/k_capabilities.h"
@@ -17,6 +18,7 @@
 #include "core/hle/kernel/k_system_resource.h"
 #include "core/hle/kernel/k_thread.h"
 #include "core/hle/kernel/k_thread_local_page.h"
+#include "core/memory.h"
 
 namespace Kernel {
 
@@ -126,6 +128,9 @@ private:
 #ifdef HAS_NCE
     std::unordered_map<u64, u64> m_post_handlers{};
 #endif
+    std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> m_dirty_memory_managers;
+    std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor;
+    Core::Memory::Memory m_memory;
 
 private:
     Result StartTermination();
@@ -502,7 +507,15 @@ public:
 
     void InitializeInterfaces();
 
-    Core::Memory::Memory& GetMemory() const;
+    Core::Memory::Memory& GetMemory() {
+        return m_memory;
+    }
+
+    void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
+
+    Core::ExclusiveMonitor& GetExclusiveMonitor() const {
+        return *m_exclusive_monitor;
+    }
 
 public:
     // Overridden parent functions.
@@ -539,7 +552,7 @@ private:
 
     Result InitializeHandleTable(s32 size) {
         // Try to initialize the handle table.
-        R_TRY(m_handle_table.Initialize(size));
+        R_TRY(m_handle_table.Initialize(this, size));
 
         // We succeeded, so note that we did.
         m_is_handle_table_initialized = true;
diff --git a/src/core/hle/kernel/k_server_session.cpp b/src/core/hle/kernel/k_server_session.cpp
index c02beeac9..f6ab2b025 100755
--- a/src/core/hle/kernel/k_server_session.cpp
+++ b/src/core/hle/kernel/k_server_session.cpp
@@ -8,6 +8,7 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "common/scope_exit.h"
+#include "common/scratch_buffer.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/k_client_port.h"
@@ -29,12 +30,138 @@ namespace Kernel {
 
 namespace {
 
+constexpr inline size_t PointerTransferBufferAlignment = 0x10;
+constexpr inline size_t ReceiveListDataSize =
+    MessageBuffer::MessageHeader::ReceiveListCountType_CountMax *
+    MessageBuffer::ReceiveListEntry::GetDataSize() / sizeof(u32);
+
+using ThreadQueueImplForKServerSessionRequest = KThreadQueue;
+
+class ReceiveList {
+public:
+    static constexpr int GetEntryCount(const MessageBuffer::MessageHeader& header) {
+        const auto count = header.GetReceiveListCount();
+        switch (count) {
+        case MessageBuffer::MessageHeader::ReceiveListCountType_None:
+            return 0;
+        case MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer:
+            return 0;
+        case MessageBuffer::MessageHeader::ReceiveListCountType_ToSingleBuffer:
+            return 1;
+        default:
+            return count - MessageBuffer::MessageHeader::ReceiveListCountType_CountOffset;
+        }
+    }
+
+    explicit ReceiveList(const u32* dst_msg, uint64_t dst_address,
+                         KProcessPageTable& dst_page_table,
+                         const MessageBuffer::MessageHeader& dst_header,
+                         const MessageBuffer::SpecialHeader& dst_special_header, size_t msg_size,
+                         size_t out_offset, s32 dst_recv_list_idx, bool is_tls) {
+        m_recv_list_count = dst_header.GetReceiveListCount();
+        m_msg_buffer_end = dst_address + sizeof(u32) * out_offset;
+        m_msg_buffer_space_end = dst_address + msg_size;
+
+        // NOTE: Nintendo calculates the receive list index here using the special header.
+        // We pre-calculate it in the caller, and pass it as a parameter.
+        (void)dst_special_header;
+
+        const u32* recv_list = dst_msg + dst_recv_list_idx;
+        const auto entry_count = GetEntryCount(dst_header);
+
+        if (is_tls) {
+            // Messages from TLS to TLS are contained within one page.
+            std::memcpy(m_data.data(), recv_list,
+                        entry_count * MessageBuffer::ReceiveListEntry::GetDataSize());
+        } else {
+            // If any buffer is not from TLS, perform a normal read instead.
+            uint64_t cur_addr = dst_address + dst_recv_list_idx * sizeof(u32);
+            dst_page_table.GetMemory().ReadBlock(
+                cur_addr, m_data.data(),
+                entry_count * MessageBuffer::ReceiveListEntry::GetDataSize());
+        }
+    }
+
+    bool IsIndex() const {
+        return m_recv_list_count >
+               static_cast<s32>(MessageBuffer::MessageHeader::ReceiveListCountType_CountOffset);
+    }
+
+    bool IsToMessageBuffer() const {
+        return m_recv_list_count ==
+               MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer;
+    }
+
+    void GetBuffer(uint64_t& out, size_t size, int& key) const {
+        switch (m_recv_list_count) {
+        case MessageBuffer::MessageHeader::ReceiveListCountType_None: {
+            out = 0;
+            break;
+        }
+        case MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer: {
+            const uint64_t buf =
+                Common::AlignUp(m_msg_buffer_end + key, PointerTransferBufferAlignment);
+
+            if ((buf < buf + size) && (buf + size <= m_msg_buffer_space_end)) {
+                out = buf;
+                key = static_cast<int>(buf + size - m_msg_buffer_end);
+            } else {
+                out = 0;
+            }
+            break;
+        }
+        case MessageBuffer::MessageHeader::ReceiveListCountType_ToSingleBuffer: {
+            const MessageBuffer::ReceiveListEntry entry(m_data[0], m_data[1]);
+            const uint64_t buf =
+                Common::AlignUp(entry.GetAddress() + key, PointerTransferBufferAlignment);
+
+            const uint64_t entry_addr = entry.GetAddress();
+            const size_t entry_size = entry.GetSize();
+
+            if ((buf < buf + size) && (entry_addr < entry_addr + entry_size) &&
+                (buf + size <= entry_addr + entry_size)) {
+                out = buf;
+                key = static_cast<int>(buf + size - entry_addr);
+            } else {
+                out = 0;
+            }
+            break;
+        }
+        default: {
+            if (key < m_recv_list_count -
+                          static_cast<s32>(
+                              MessageBuffer::MessageHeader::ReceiveListCountType_CountOffset)) {
+                const MessageBuffer::ReceiveListEntry entry(m_data[2 * key + 0],
+                                                            m_data[2 * key + 1]);
+
+                const uintptr_t entry_addr = entry.GetAddress();
+                const size_t entry_size = entry.GetSize();
+
+                if ((entry_addr < entry_addr + entry_size) && (entry_size >= size)) {
+                    out = entry_addr;
+                }
+            } else {
+                out = 0;
+            }
+            break;
+        }
+        }
+    }
+
+private:
+    std::array<u32, ReceiveListDataSize> m_data;
+    s32 m_recv_list_count;
+    uint64_t m_msg_buffer_end;
+    uint64_t m_msg_buffer_space_end;
+};
+
 template <bool MoveHandleAllowed>
-Result ProcessMessageSpecialData(KProcess& dst_process, KProcess& src_process, KThread& src_thread,
-                                 MessageBuffer& dst_msg, const MessageBuffer& src_msg,
-                                 MessageBuffer::SpecialHeader& src_special_header) {
+Result ProcessMessageSpecialData(s32& offset, KProcess& dst_process, KProcess& src_process,
+                                 KThread& src_thread, const MessageBuffer& dst_msg,
+                                 const MessageBuffer& src_msg,
+                                 const MessageBuffer::SpecialHeader& src_special_header) {
     // Copy the special header to the destination.
-    s32 offset = dst_msg.Set(src_special_header);
+    offset = dst_msg.Set(src_special_header);
 
     // Copy the process ID.
     if (src_special_header.GetHasProcessId()) {
@@ -110,6 +237,102 @@ Result ProcessMessageSpecialData(KProcess& dst_process, KProcess& src_process, K
     R_RETURN(result);
 }
 
+Result ProcessReceiveMessagePointerDescriptors(int& offset, int& pointer_key,
+                                               KProcessPageTable& dst_page_table,
+                                               KProcessPageTable& src_page_table,
+                                               const MessageBuffer& dst_msg,
+                                               const MessageBuffer& src_msg,
+                                               const ReceiveList& dst_recv_list, bool dst_user) {
+    // Get the offset at the start of processing.
+    const int cur_offset = offset;
+
+    // Get the pointer desc.
+    MessageBuffer::PointerDescriptor src_desc(src_msg, cur_offset);
+    offset += static_cast<int>(MessageBuffer::PointerDescriptor::GetDataSize() / sizeof(u32));
+
+    // Extract address/size.
+    const uint64_t src_pointer = src_desc.GetAddress();
+    const size_t recv_size = src_desc.GetSize();
+    uint64_t recv_pointer = 0;
+
+    // Process the buffer, if it has a size.
+    if (recv_size > 0) {
+        // If using indexing, set index.
+        if (dst_recv_list.IsIndex()) {
+            pointer_key = src_desc.GetIndex();
+        }
+
+        // Get the buffer.
+        dst_recv_list.GetBuffer(recv_pointer, recv_size, pointer_key);
+        R_UNLESS(recv_pointer != 0, ResultOutOfResource);
+
+        // Perform the pointer data copy.
+        if (dst_user) {
+            R_TRY(src_page_table.CopyMemoryFromHeapToHeapWithoutCheckDestination(
+                dst_page_table, recv_pointer, recv_size, KMemoryState::FlagReferenceCounted,
+                KMemoryState::FlagReferenceCounted,
+                KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite,
+                KMemoryAttribute::Uncached | KMemoryAttribute::Locked, KMemoryAttribute::Locked,
+                src_pointer, KMemoryState::FlagLinearMapped, KMemoryState::FlagLinearMapped,
+                KMemoryPermission::UserRead, KMemoryAttribute::Uncached, KMemoryAttribute::None));
+        } else {
+            R_TRY(src_page_table.CopyMemoryFromLinearToUser(
+                recv_pointer, recv_size, src_pointer, KMemoryState::FlagLinearMapped,
+                KMemoryState::FlagLinearMapped, KMemoryPermission::UserRead,
+                KMemoryAttribute::Uncached, KMemoryAttribute::None));
+        }
+    }
+
+    // Set the output descriptor.
+    dst_msg.Set(cur_offset, MessageBuffer::PointerDescriptor(reinterpret_cast<void*>(recv_pointer),
+                                                             recv_size, src_desc.GetIndex()));
+
+    R_SUCCEED();
+}
+
+constexpr Result GetMapAliasMemoryState(KMemoryState& out,
+                                        MessageBuffer::MapAliasDescriptor::Attribute attr) {
+    switch (attr) {
+    case MessageBuffer::MapAliasDescriptor::Attribute::Ipc:
+        out = KMemoryState::Ipc;
+        break;
+    case MessageBuffer::MapAliasDescriptor::Attribute::NonSecureIpc:
+        out = KMemoryState::NonSecureIpc;
+        break;
+    case MessageBuffer::MapAliasDescriptor::Attribute::NonDeviceIpc:
+        out = KMemoryState::NonDeviceIpc;
+        break;
+    default:
+        R_THROW(ResultInvalidCombination);
+    }
+
+    R_SUCCEED();
+}
+
+constexpr Result GetMapAliasTestStateAndAttributeMask(KMemoryState& out_state,
+                                                      KMemoryAttribute& out_attr_mask,
+                                                      KMemoryState state) {
+    switch (state) {
+    case KMemoryState::Ipc:
+        out_state = KMemoryState::FlagCanUseIpc;
+        out_attr_mask =
+            KMemoryAttribute::Uncached | KMemoryAttribute::DeviceShared | KMemoryAttribute::Locked;
+        break;
+    case KMemoryState::NonSecureIpc:
+        out_state = KMemoryState::FlagCanUseNonSecureIpc;
+        out_attr_mask = KMemoryAttribute::Uncached | KMemoryAttribute::Locked;
+        break;
+    case KMemoryState::NonDeviceIpc:
+        out_state = KMemoryState::FlagCanUseNonDeviceIpc;
+        out_attr_mask = KMemoryAttribute::Uncached | KMemoryAttribute::Locked;
+        break;
+    default:
+        R_THROW(ResultInvalidCombination);
+    }
+
+    R_SUCCEED();
+}
+
 void CleanupSpecialData(KProcess& dst_process, u32* dst_msg_ptr, size_t dst_buffer_size) {
     // Parse the message.
     const MessageBuffer dst_msg(dst_msg_ptr, dst_buffer_size);
@@ -144,9 +367,714 @@ void CleanupSpecialData(KProcess& dst_process, u32* dst_msg_ptr, size_t dst_buff
     }
 }
 
-} // namespace
+Result CleanupServerHandles(KernelCore& kernel, uint64_t message, size_t buffer_size,
+                            KPhysicalAddress message_paddr) {
+    // Server is assumed to be current thread.
+    KThread& thread = GetCurrentThread(kernel);
 
-using ThreadQueueImplForKServerSessionRequest = KThreadQueue;
+    // Get the linear message pointer.
+    u32* msg_ptr;
+    if (message) {
+        msg_ptr = kernel.System().DeviceMemory().GetPointer<u32>(message_paddr);
+    } else {
+        msg_ptr = GetCurrentMemory(kernel).GetPointer<u32>(thread.GetTlsAddress());
+        buffer_size = MessageBufferSize;
+        message = GetInteger(thread.GetTlsAddress());
+    }
+
+    // Parse the message.
+    const MessageBuffer msg(msg_ptr, buffer_size);
+    const MessageBuffer::MessageHeader header(msg);
+    const MessageBuffer::SpecialHeader special_header(msg, header);
+
+    // Check that the size is big enough.
+    R_UNLESS(MessageBuffer::GetMessageBufferSize(header, special_header) <= buffer_size,
+             ResultInvalidCombination);
+
+    // If there's a special header, there may be move handles we need to close.
+    if (header.GetHasSpecialHeader()) {
+        // Determine the offset to the start of handles.
+        auto offset = msg.GetSpecialDataIndex(header, special_header);
+        if (special_header.GetHasProcessId()) {
+            offset += static_cast<int>(sizeof(u64) / sizeof(u32));
+        }
+        if (auto copy_count = special_header.GetCopyHandleCount(); copy_count > 0) {
+            offset += static_cast<int>((sizeof(Svc::Handle) * copy_count) / sizeof(u32));
+        }
+
+        // Get the handle table.
+        auto& handle_table = thread.GetOwnerProcess()->GetHandleTable();
+
+        // Close the handles.
+        for (auto i = 0; i < special_header.GetMoveHandleCount(); ++i) {
+            handle_table.Remove(msg.GetHandle(offset));
+            offset += static_cast<int>(sizeof(Svc::Handle) / sizeof(u32));
+        }
+    }
+
+    R_SUCCEED();
+}
+
+Result CleanupServerMap(KSessionRequest* request, KProcess* server_process) {
+    // If there's no server process, there's nothing to clean up.
+    R_SUCCEED_IF(server_process == nullptr);
+
+    // Get the page table.
+    auto& server_page_table = server_process->GetPageTable();
+
+    // Cleanup Send mappings.
+    for (size_t i = 0; i < request->GetSendCount(); ++i) {
+        R_TRY(server_page_table.CleanupForIpcServer(request->GetSendServerAddress(i),
+                                                    request->GetSendSize(i),
+                                                    request->GetSendMemoryState(i)));
+    }
+
+    // Cleanup Receive mappings.
+    for (size_t i = 0; i < request->GetReceiveCount(); ++i) {
+        R_TRY(server_page_table.CleanupForIpcServer(request->GetReceiveServerAddress(i),
+                                                    request->GetReceiveSize(i),
+                                                    request->GetReceiveMemoryState(i)));
+    }
+
+    // Cleanup Exchange mappings.
+    for (size_t i = 0; i < request->GetExchangeCount(); ++i) {
+        R_TRY(server_page_table.CleanupForIpcServer(request->GetExchangeServerAddress(i),
+                                                    request->GetExchangeSize(i),
+                                                    request->GetExchangeMemoryState(i)));
+    }
+
+    R_SUCCEED();
+}
+
+Result CleanupClientMap(KSessionRequest* request, KProcessPageTable* client_page_table) {
+    // If there's no client page table, there's nothing to clean up.
+    R_SUCCEED_IF(client_page_table == nullptr);
+
+    // Cleanup Send mappings.
+    for (size_t i = 0; i < request->GetSendCount(); ++i) {
+        R_TRY(client_page_table->CleanupForIpcClient(request->GetSendClientAddress(i),
+                                                     request->GetSendSize(i),
+                                                     request->GetSendMemoryState(i)));
+    }
+
+    // Cleanup Receive mappings.
+    for (size_t i = 0; i < request->GetReceiveCount(); ++i) {
+        R_TRY(client_page_table->CleanupForIpcClient(request->GetReceiveClientAddress(i),
+                                                     request->GetReceiveSize(i),
+                                                     request->GetReceiveMemoryState(i)));
+    }
+
+    // Cleanup Exchange mappings.
+    for (size_t i = 0; i < request->GetExchangeCount(); ++i) {
+        R_TRY(client_page_table->CleanupForIpcClient(request->GetExchangeClientAddress(i),
+                                                     request->GetExchangeSize(i),
+                                                     request->GetExchangeMemoryState(i)));
+    }
+
+    R_SUCCEED();
+}
+
+Result CleanupMap(KSessionRequest* request, KProcess* server_process,
+                  KProcessPageTable* client_page_table) {
+    // Cleanup the server map.
+    R_TRY(CleanupServerMap(request, server_process));
+
+    // Cleanup the client map.
+    R_TRY(CleanupClientMap(request, client_page_table));
+
+    R_SUCCEED();
+}
+
+Result ProcessReceiveMessageMapAliasDescriptors(int& offset, KProcessPageTable& dst_page_table,
+                                                KProcessPageTable& src_page_table,
+                                                const MessageBuffer& dst_msg,
+                                                const MessageBuffer& src_msg,
+                                                KSessionRequest* request, KMemoryPermission perm,
+                                                bool send) {
+    // Get the offset at the start of processing.
+    const int cur_offset = offset;
+
+    // Get the map alias descriptor.
+    MessageBuffer::MapAliasDescriptor src_desc(src_msg, cur_offset);
+    offset += static_cast<int>(MessageBuffer::MapAliasDescriptor::GetDataSize() / sizeof(u32));
+
+    // Extract address/size.
+    const KProcessAddress src_address = src_desc.GetAddress();
+    const size_t size = src_desc.GetSize();
+    KProcessAddress dst_address = 0;
+
+    // Determine the result memory state.
+    KMemoryState dst_state;
+    R_TRY(GetMapAliasMemoryState(dst_state, src_desc.GetAttribute()));
+
+    // Process the buffer, if it has a size.
+    if (size > 0) {
+        // Set up the source pages for ipc.
+        R_TRY(dst_page_table.SetupForIpc(std::addressof(dst_address), size, src_address,
+                                         src_page_table, perm, dst_state, send));
+
+        // Ensure that we clean up on failure.
+        ON_RESULT_FAILURE {
+            dst_page_table.CleanupForIpcServer(dst_address, size, dst_state);
+            src_page_table.CleanupForIpcClient(src_address, size, dst_state);
+        };
+
+        // Push the appropriate mapping.
+        if (perm == KMemoryPermission::UserRead) {
+            R_TRY(request->PushSend(src_address, dst_address, size, dst_state));
+        } else if (send) {
+            R_TRY(request->PushExchange(src_address, dst_address, size, dst_state));
+        } else {
+            R_TRY(request->PushReceive(src_address, dst_address, size, dst_state));
+        }
+    }
+
+    // Set the output descriptor.
+    dst_msg.Set(cur_offset,
+                MessageBuffer::MapAliasDescriptor(reinterpret_cast<void*>(GetInteger(dst_address)),
+                                                  size, src_desc.GetAttribute()));
+
+    R_SUCCEED();
+}
+
+Result ReceiveMessage(KernelCore& kernel, bool& recv_list_broken, uint64_t dst_message_buffer,
+                      size_t dst_buffer_size, KPhysicalAddress dst_message_paddr,
+                      KThread& src_thread, uint64_t src_message_buffer, size_t src_buffer_size,
+                      KServerSession* session, KSessionRequest* request) {
+    // Prepare variables for receive.
+    KThread& dst_thread = GetCurrentThread(kernel);
+    KProcess& dst_process = *(dst_thread.GetOwnerProcess());
+    KProcess& src_process = *(src_thread.GetOwnerProcess());
+    auto& dst_page_table = dst_process.GetPageTable();
+    auto& src_page_table = src_process.GetPageTable();
+
+    // NOTE: Session is used only for debugging, and so may go unused.
+    (void)session;
+
+    // The receive list is initially not broken.
+    recv_list_broken = false;
+
+    // Set the server process for the request.
+    request->SetServerProcess(std::addressof(dst_process));
+
+    // Determine the message buffers.
+    u32 *dst_msg_ptr, *src_msg_ptr;
+    bool dst_user, src_user;
+
+    if (dst_message_buffer) {
+        dst_msg_ptr = kernel.System().DeviceMemory().GetPointer<u32>(dst_message_paddr);
+        dst_user = true;
+    } else {
+        dst_msg_ptr = dst_page_table.GetMemory().GetPointer<u32>(dst_thread.GetTlsAddress());
+        dst_buffer_size = MessageBufferSize;
+        dst_message_buffer = GetInteger(dst_thread.GetTlsAddress());
+        dst_user = false;
+    }
+
+    if (src_message_buffer) {
+        // NOTE: Nintendo does not check the result of this GetPhysicalAddress call.
+        src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_message_buffer);
+        src_user = true;
+    } else {
+        src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_thread.GetTlsAddress());
+        src_buffer_size = MessageBufferSize;
+        src_message_buffer = GetInteger(src_thread.GetTlsAddress());
+        src_user = false;
+    }
+
+    // Parse the headers.
+    const MessageBuffer dst_msg(dst_msg_ptr, dst_buffer_size);
+    const MessageBuffer src_msg(src_msg_ptr, src_buffer_size);
+    const MessageBuffer::MessageHeader dst_header(dst_msg);
+    const MessageBuffer::MessageHeader src_header(src_msg);
+    const MessageBuffer::SpecialHeader dst_special_header(dst_msg, dst_header);
+    const MessageBuffer::SpecialHeader src_special_header(src_msg, src_header);
+
+    // Get the end of the source message.
+    const size_t src_end_offset =
+        MessageBuffer::GetRawDataIndex(src_header, src_special_header) + src_header.GetRawCount();
+
+    // Ensure that the headers fit.
+    R_UNLESS(MessageBuffer::GetMessageBufferSize(dst_header, dst_special_header) <= dst_buffer_size,
+             ResultInvalidCombination);
+    R_UNLESS(MessageBuffer::GetMessageBufferSize(src_header, src_special_header) <= src_buffer_size,
+             ResultInvalidCombination);
+
+    // Ensure the receive list offset is after the end of raw data.
+    if (dst_header.GetReceiveListOffset()) {
+        R_UNLESS(dst_header.GetReceiveListOffset() >=
+                     MessageBuffer::GetRawDataIndex(dst_header, dst_special_header) +
+                         dst_header.GetRawCount(),
+                 ResultInvalidCombination);
+    }
+
+    // Ensure that the destination buffer is big enough to receive the source.
+    R_UNLESS(dst_buffer_size >= src_end_offset * sizeof(u32), ResultMessageTooLarge);
+
+    // Get the receive list.
+    const s32 dst_recv_list_idx =
+        MessageBuffer::GetReceiveListIndex(dst_header, dst_special_header);
+    ReceiveList dst_recv_list(dst_msg_ptr, dst_message_buffer, dst_page_table, dst_header,
+                              dst_special_header, dst_buffer_size, src_end_offset,
+                              dst_recv_list_idx, !dst_user);
+
+    // Ensure that the source special header isn't invalid.
+    const bool src_has_special_header = src_header.GetHasSpecialHeader();
+    if (src_has_special_header) {
+        // Sending move handles from client -> server is not allowed.
+        R_UNLESS(src_special_header.GetMoveHandleCount() == 0, ResultInvalidCombination);
+    }
+
+    // Prepare for further processing.
+    int pointer_key = 0;
+    int offset = dst_msg.Set(src_header);
+
+    // Set up a guard to make sure that we end up in a clean state on error.
+    ON_RESULT_FAILURE {
+        // Cleanup mappings.
+        CleanupMap(request, std::addressof(dst_process), std::addressof(src_page_table));
+
+        // Cleanup special data.
+        if (src_header.GetHasSpecialHeader()) {
+            CleanupSpecialData(dst_process, dst_msg_ptr, dst_buffer_size);
+        }
+
+        // Cleanup the header if the receive list isn't broken.
+        if (!recv_list_broken) {
+            dst_msg.Set(dst_header);
+            if (dst_header.GetHasSpecialHeader()) {
+                dst_msg.Set(dst_special_header);
+            }
+        }
+    };
+
+    // Process any special data.
+    if (src_header.GetHasSpecialHeader()) {
+        // After we process, make sure we track whether the receive list is broken.
+        SCOPE_EXIT({
+            if (offset > dst_recv_list_idx) {
+                recv_list_broken = true;
+            }
+        });
+
+        // Process special data.
+        R_TRY(ProcessMessageSpecialData<false>(offset, dst_process, src_process, src_thread,
+                                               dst_msg, src_msg, src_special_header));
+    }
+
+    // Process any pointer buffers.
+    for (auto i = 0; i < src_header.GetPointerCount(); ++i) {
+        // After we process, make sure we track whether the receive list is broken.
+        SCOPE_EXIT({
+            if (offset > dst_recv_list_idx) {
+                recv_list_broken = true;
+            }
+        });
+
+        R_TRY(ProcessReceiveMessagePointerDescriptors(
+            offset, pointer_key, dst_page_table, src_page_table, dst_msg, src_msg, dst_recv_list,
+            dst_user && dst_header.GetReceiveListCount() ==
+                            MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer));
+    }
+
+    // Process any map alias buffers.
+    for (auto i = 0; i < src_header.GetMapAliasCount(); ++i) {
+        // After we process, make sure we track whether the receive list is broken.
+        SCOPE_EXIT({
+            if (offset > dst_recv_list_idx) {
+                recv_list_broken = true;
+            }
+        });
+
+        // We process in order send, recv, exch. Buffers after send (recv/exch) are ReadWrite.
+        const KMemoryPermission perm = (i >= src_header.GetSendCount())
+                                           ? KMemoryPermission::UserReadWrite
+                                           : KMemoryPermission::UserRead;
+
+        // Buffer is send if it is send or exch.
+        const bool send = (i < src_header.GetSendCount()) ||
+                          (i >= src_header.GetSendCount() + src_header.GetReceiveCount());
+
+        R_TRY(ProcessReceiveMessageMapAliasDescriptors(offset, dst_page_table, src_page_table,
+                                                       dst_msg, src_msg, request, perm, send));
+    }
+
+    // Process any raw data.
+    if (const auto raw_count = src_header.GetRawCount(); raw_count != 0) {
+        // After we process, make sure we track whether the receive list is broken.
+        SCOPE_EXIT({
+            if (offset + raw_count > dst_recv_list_idx) {
+                recv_list_broken = true;
+            }
+        });
+
+        // Get the offset and size.
+        const size_t offset_words = offset * sizeof(u32);
+        const size_t raw_size = raw_count * sizeof(u32);
+
+        if (!dst_user && !src_user) {
+            // Fast case is TLS -> TLS, do raw memcpy if we can.
+            std::memcpy(dst_msg_ptr + offset, src_msg_ptr + offset, raw_size);
+        } else if (dst_user) {
+            // Determine how much fast size we can copy.
+            const size_t max_fast_size = std::min<size_t>(offset_words + raw_size, PageSize);
+            const size_t fast_size = max_fast_size - offset_words;
+
+            // Determine source state; if user buffer, we require heap, and otherwise only linear
+            // mapped (to enable tls use).
+            const auto src_state =
+                src_user ? KMemoryState::FlagReferenceCounted : KMemoryState::FlagLinearMapped;
+
+            // Determine the source permission. User buffer should be unmapped + read, TLS should be
+            // user readable.
+            const KMemoryPermission src_perm = static_cast<KMemoryPermission>(
+                src_user ? KMemoryPermission::NotMapped | KMemoryPermission::KernelRead
+                         : KMemoryPermission::UserRead);
+
+            // Perform the fast part of the copy.
+            R_TRY(src_page_table.CopyMemoryFromLinearToKernel(
+                dst_msg_ptr + offset, fast_size, src_message_buffer + offset_words, src_state,
+                src_state, src_perm, KMemoryAttribute::Uncached, KMemoryAttribute::None));
+
+            // If the fast part of the copy didn't get everything, perform the slow part of the
+            // copy.
+            if (fast_size < raw_size) {
+                R_TRY(src_page_table.CopyMemoryFromHeapToHeap(
+                    dst_page_table, dst_message_buffer + max_fast_size, raw_size - fast_size,
+                    KMemoryState::FlagReferenceCounted, KMemoryState::FlagReferenceCounted,
+                    KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite,
+                    KMemoryAttribute::Uncached | KMemoryAttribute::Locked, KMemoryAttribute::Locked,
+                    src_message_buffer + max_fast_size, src_state, src_state, src_perm,
+                    KMemoryAttribute::Uncached, KMemoryAttribute::None));
+            }
+        } else /* if (src_user) */ {
+            // The source is a user buffer, so it should be unmapped + readable.
+            constexpr KMemoryPermission SourcePermission = static_cast<KMemoryPermission>(
+                KMemoryPermission::NotMapped | KMemoryPermission::KernelRead);
+
+            // Copy the memory.
+            R_TRY(src_page_table.CopyMemoryFromLinearToUser(
+                dst_message_buffer + offset_words, raw_size, src_message_buffer + offset_words,
+                KMemoryState::FlagReferenceCounted, KMemoryState::FlagReferenceCounted,
+                SourcePermission, KMemoryAttribute::Uncached, KMemoryAttribute::None));
+        }
+    }
+
+    // We succeeded!
+    R_SUCCEED();
+}
+
+Result ProcessSendMessageReceiveMapping(KProcessPageTable& src_page_table,
+                                        KProcessPageTable& dst_page_table,
+                                        KProcessAddress client_address,
+                                        KProcessAddress server_address, size_t size,
+                                        KMemoryState src_state) {
+    // If the size is zero, there's nothing to process.
+    R_SUCCEED_IF(size == 0);
+
+    // Get the memory state and attribute mask to test.
+    KMemoryState test_state;
+    KMemoryAttribute test_attr_mask;
+    R_TRY(GetMapAliasTestStateAndAttributeMask(test_state, test_attr_mask, src_state));
+
+    // Determine buffer extents.
+    KProcessAddress aligned_dst_start = Common::AlignDown(GetInteger(client_address), PageSize);
+    KProcessAddress aligned_dst_end = Common::AlignUp(GetInteger(client_address) + size, PageSize);
+    KProcessAddress mapping_dst_start = Common::AlignUp(GetInteger(client_address), PageSize);
+    KProcessAddress mapping_dst_end =
+        Common::AlignDown(GetInteger(client_address) + size, PageSize);
+
+    KProcessAddress mapping_src_end =
+        Common::AlignDown(GetInteger(server_address) + size, PageSize);
+
+    // If the start of the buffer is unaligned, handle that.
+    if (aligned_dst_start != mapping_dst_start) {
+        ASSERT(client_address < mapping_dst_start);
+        const size_t copy_size = std::min<size_t>(size, mapping_dst_start - client_address);
+        R_TRY(dst_page_table.CopyMemoryFromUserToLinear(
+            client_address, copy_size, test_state, test_state, KMemoryPermission::UserReadWrite,
+            test_attr_mask, KMemoryAttribute::None, server_address));
+    }
+
+    // If the end of the buffer is unaligned, handle that.
+    if (mapping_dst_end < aligned_dst_end &&
+        (aligned_dst_start == mapping_dst_start || aligned_dst_start < mapping_dst_end)) {
+        const size_t copy_size = client_address + size - mapping_dst_end;
+        R_TRY(dst_page_table.CopyMemoryFromUserToLinear(
+            mapping_dst_end, copy_size, test_state, test_state, KMemoryPermission::UserReadWrite,
+            test_attr_mask, KMemoryAttribute::None, mapping_src_end));
+    }
+
+    R_SUCCEED();
+}
+
+Result ProcessSendMessagePointerDescriptors(int& offset, int& pointer_key,
+                                            KProcessPageTable& src_page_table,
+                                            KProcessPageTable& dst_page_table,
+                                            const MessageBuffer& dst_msg,
+                                            const MessageBuffer& src_msg,
+                                            const ReceiveList& dst_recv_list, bool dst_user) {
+    // Get the offset at the start of processing.
+    const int cur_offset = offset;
+
+    // Get the pointer desc.
+    MessageBuffer::PointerDescriptor src_desc(src_msg, cur_offset);
+    offset += static_cast<int>(MessageBuffer::PointerDescriptor::GetDataSize() / sizeof(u32));
+
+    // Extract address/size.
+    const uint64_t src_pointer = src_desc.GetAddress();
+    const size_t recv_size = src_desc.GetSize();
+    uint64_t recv_pointer = 0;
+
+    // Process the buffer, if it has a size.
+    if (recv_size > 0) {
+        // If using indexing, set index.
+        if (dst_recv_list.IsIndex()) {
+            pointer_key = src_desc.GetIndex();
+        }
+
+        // Get the buffer.
+        dst_recv_list.GetBuffer(recv_pointer, recv_size, pointer_key);
+        R_UNLESS(recv_pointer != 0, ResultOutOfResource);
+
+        // Perform the pointer data copy.
+        const bool dst_heap = dst_user && dst_recv_list.IsToMessageBuffer();
+        const auto dst_state =
+            dst_heap ? KMemoryState::FlagReferenceCounted : KMemoryState::FlagLinearMapped;
+        const KMemoryPermission dst_perm =
+            dst_heap ? KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite
+                     : KMemoryPermission::UserReadWrite;
+        R_TRY(dst_page_table.CopyMemoryFromUserToLinear(
+            recv_pointer, recv_size, dst_state, dst_state, dst_perm, KMemoryAttribute::Uncached,
+            KMemoryAttribute::None, src_pointer));
+    }
+
+    // Set the output descriptor.
+    dst_msg.Set(cur_offset, MessageBuffer::PointerDescriptor(reinterpret_cast<void*>(recv_pointer),
+                                                             recv_size, src_desc.GetIndex()));
+
+    R_SUCCEED();
+}
+
+Result SendMessage(KernelCore& kernel, uint64_t src_message_buffer, size_t src_buffer_size,
+                   KPhysicalAddress src_message_paddr, KThread& dst_thread,
+                   uint64_t dst_message_buffer, size_t dst_buffer_size, KServerSession* session,
+                   KSessionRequest* request) {
+    // Prepare variables for send.
+    KThread& src_thread = GetCurrentThread(kernel);
+    KProcess& dst_process = *(dst_thread.GetOwnerProcess());
+    KProcess& src_process = *(src_thread.GetOwnerProcess());
+    auto& dst_page_table = dst_process.GetPageTable();
+    auto& src_page_table = src_process.GetPageTable();
+
+    // NOTE: Session is used only for debugging, and so may go unused.
+    (void)session;
+
+    // Determine the message buffers.
+    u32 *dst_msg_ptr, *src_msg_ptr;
+    bool dst_user, src_user;
+
+    if (dst_message_buffer) {
+        // NOTE: Nintendo does not check the result of this GetPhysicalAddress call.
+        dst_msg_ptr = dst_page_table.GetMemory().GetPointer<u32>(dst_message_buffer);
+        dst_user = true;
+    } else {
+        dst_msg_ptr = dst_page_table.GetMemory().GetPointer<u32>(dst_thread.GetTlsAddress());
+        dst_buffer_size = MessageBufferSize;
+        dst_message_buffer = GetInteger(dst_thread.GetTlsAddress());
+        dst_user = false;
+    }
+
+    if (src_message_buffer) {
+        src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_message_buffer);
+        src_user = true;
+    } else {
+        src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_thread.GetTlsAddress());
+        src_buffer_size = MessageBufferSize;
+        src_message_buffer = GetInteger(src_thread.GetTlsAddress());
+        src_user = false;
+    }
+
+    // Parse the headers.
+    const MessageBuffer dst_msg(dst_msg_ptr, dst_buffer_size);
+    const MessageBuffer src_msg(src_msg_ptr, src_buffer_size);
+    const MessageBuffer::MessageHeader dst_header(dst_msg);
+    const MessageBuffer::MessageHeader src_header(src_msg);
+    const MessageBuffer::SpecialHeader dst_special_header(dst_msg, dst_header);
+    const MessageBuffer::SpecialHeader src_special_header(src_msg, src_header);
+
+    // Get the end of the source message.
+    const size_t src_end_offset =
+        MessageBuffer::GetRawDataIndex(src_header, src_special_header) + src_header.GetRawCount();
+
+    // Declare variables for processing.
+    int offset = 0;
+    int pointer_key = 0;
+    bool processed_special_data = false;
+
+    // Send the message.
+    {
+        // Make sure that we end up in a clean state on error.
+        ON_RESULT_FAILURE {
+            // Cleanup special data.
+            if (processed_special_data) {
+                if (src_header.GetHasSpecialHeader()) {
+                    CleanupSpecialData(dst_process, dst_msg_ptr, dst_buffer_size);
+                }
+            } else {
+                CleanupServerHandles(kernel, src_user ? src_message_buffer : 0, src_buffer_size,
+                                     src_message_paddr);
+            }
+
+            // Cleanup mappings.
+            CleanupMap(request, std::addressof(src_process), std::addressof(dst_page_table));
+        };
+
+        // Ensure that the headers fit.
+        R_UNLESS(MessageBuffer::GetMessageBufferSize(src_header, src_special_header) <=
+                     src_buffer_size,
+                 ResultInvalidCombination);
+        R_UNLESS(MessageBuffer::GetMessageBufferSize(dst_header, dst_special_header) <=
+                     dst_buffer_size,
+                 ResultInvalidCombination);
+
+        // Ensure the receive list offset is after the end of raw data.
+        if (dst_header.GetReceiveListOffset()) {
+            R_UNLESS(dst_header.GetReceiveListOffset() >=
+                         MessageBuffer::GetRawDataIndex(dst_header, dst_special_header) +
+                             dst_header.GetRawCount(),
+                     ResultInvalidCombination);
+        }
+
+        // Ensure that the destination buffer is big enough to receive the source.
+        R_UNLESS(dst_buffer_size >= src_end_offset * sizeof(u32), ResultMessageTooLarge);
+
+        // Replies must have no buffers.
+        R_UNLESS(src_header.GetSendCount() == 0, ResultInvalidCombination);
+        R_UNLESS(src_header.GetReceiveCount() == 0, ResultInvalidCombination);
+        R_UNLESS(src_header.GetExchangeCount() == 0, ResultInvalidCombination);
+
+        // Get the receive list.
+        const s32 dst_recv_list_idx =
+            MessageBuffer::GetReceiveListIndex(dst_header, dst_special_header);
+        ReceiveList dst_recv_list(dst_msg_ptr, dst_message_buffer, dst_page_table, dst_header,
+                                  dst_special_header, dst_buffer_size, src_end_offset,
+                                  dst_recv_list_idx, !dst_user);
+
+        // Handle any receive buffers.
+        for (size_t i = 0; i < request->GetReceiveCount(); ++i) {
+            R_TRY(ProcessSendMessageReceiveMapping(
+                src_page_table, dst_page_table, request->GetReceiveClientAddress(i),
+                request->GetReceiveServerAddress(i), request->GetReceiveSize(i),
+                request->GetReceiveMemoryState(i)));
+        }
+
+        // Handle any exchange buffers.
+        for (size_t i = 0; i < request->GetExchangeCount(); ++i) {
+            R_TRY(ProcessSendMessageReceiveMapping(
+                src_page_table, dst_page_table, request->GetExchangeClientAddress(i),
+                request->GetExchangeServerAddress(i), request->GetExchangeSize(i),
+                request->GetExchangeMemoryState(i)));
+        }
+
+        // Set the header.
+        offset = dst_msg.Set(src_header);
+
+        // Process any special data.
+        ASSERT(GetCurrentThreadPointer(kernel) == std::addressof(src_thread));
+        processed_special_data = true;
+        if (src_header.GetHasSpecialHeader()) {
+            R_TRY(ProcessMessageSpecialData<true>(offset, dst_process, src_process, src_thread,
+                                                  dst_msg, src_msg, src_special_header));
+        }
+
+        // Process any pointer buffers.
+        for (auto i = 0; i < src_header.GetPointerCount(); ++i) {
+            R_TRY(ProcessSendMessagePointerDescriptors(
+                offset, pointer_key, src_page_table, dst_page_table, dst_msg, src_msg,
+                dst_recv_list,
+                dst_user &&
+                    dst_header.GetReceiveListCount() ==
+                        MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer));
+        }
+
+        // Clear any map alias buffers.
+        for (auto i = 0; i < src_header.GetMapAliasCount(); ++i) {
+            offset = dst_msg.Set(offset, MessageBuffer::MapAliasDescriptor());
+        }
+
+        // Process any raw data.
+        if (const auto raw_count = src_header.GetRawCount(); raw_count != 0) {
+            // Get the offset and size.
+            const size_t offset_words = offset * sizeof(u32);
+            const size_t raw_size = raw_count * sizeof(u32);
+
+            if (!dst_user && !src_user) {
+                // Fast case is TLS -> TLS, do raw memcpy if we can.
+                std::memcpy(dst_msg_ptr + offset, src_msg_ptr + offset, raw_size);
+            } else if (src_user) {
+                // Determine how much fast size we can copy.
+                const size_t max_fast_size = std::min<size_t>(offset_words + raw_size, PageSize);
+                const size_t fast_size = max_fast_size - offset_words;
+
+                // Determine dst state; if user buffer, we require heap, and otherwise only linear
+                // mapped (to enable tls use).
+                const auto dst_state =
+                    dst_user ? KMemoryState::FlagReferenceCounted : KMemoryState::FlagLinearMapped;
+
+                // Determine the dst permission. User buffer should be unmapped + read, TLS should
+                // be user readable.
+                const KMemoryPermission dst_perm =
+                    dst_user ? KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite
+                             : KMemoryPermission::UserReadWrite;
+
+                // Perform the fast part of the copy.
+                R_TRY(dst_page_table.CopyMemoryFromKernelToLinear(
+                    dst_message_buffer + offset_words, fast_size, dst_state, dst_state, dst_perm,
+                    KMemoryAttribute::Uncached, KMemoryAttribute::None, src_msg_ptr + offset));
+
+                // If the fast part of the copy didn't get everything, perform the slow part of the
+                // copy.
+                if (fast_size < raw_size) {
+                    R_TRY(dst_page_table.CopyMemoryFromHeapToHeap(
+                        dst_page_table, dst_message_buffer + max_fast_size, raw_size - fast_size,
+                        dst_state, dst_state, dst_perm, KMemoryAttribute::Uncached,
+                        KMemoryAttribute::None, src_message_buffer + max_fast_size,
+                        KMemoryState::FlagReferenceCounted, KMemoryState::FlagReferenceCounted,
+                        KMemoryPermission::NotMapped | KMemoryPermission::KernelRead,
+                        KMemoryAttribute::Uncached | KMemoryAttribute::Locked,
+                        KMemoryAttribute::Locked));
+                }
+            } else /* if (dst_user) */ {
+                // The destination is a user buffer, so it should be unmapped + readable.
+                constexpr KMemoryPermission DestinationPermission =
+                    KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite;
+
+                // Copy the memory.
+                R_TRY(dst_page_table.CopyMemoryFromUserToLinear(
+                    dst_message_buffer + offset_words, raw_size, KMemoryState::FlagReferenceCounted,
+                    KMemoryState::FlagReferenceCounted, DestinationPermission,
+                    KMemoryAttribute::Uncached, KMemoryAttribute::None,
+                    src_message_buffer + offset_words));
+            }
+        }
+    }
+
+    // Perform (and validate) any remaining cleanup.
+    R_RETURN(CleanupMap(request, std::addressof(src_process), std::addressof(dst_page_table)));
+}
+
+void ReplyAsyncError(KProcess* to_process, uint64_t to_msg_buf, size_t to_msg_buf_size,
+                     Result result) {
+    // Convert the address to a linear pointer.
+    u32* to_msg = to_process->GetMemory().GetPointer<u32>(to_msg_buf);
+
+    // Set the error.
+    MessageBuffer msg(to_msg, to_msg_buf_size);
+    msg.SetAsyncResult(result);
+}
+
+} // namespace
 
 KServerSession::KServerSession(KernelCore& kernel)
     : KSynchronizationObject{kernel}, m_lock{m_kernel} {}
@@ -161,6 +1089,354 @@ void KServerSession::Destroy() {
     m_parent->Close();
 }
 
+Result KServerSession::ReceiveRequest(uintptr_t server_message, uintptr_t server_buffer_size,
+                                      KPhysicalAddress server_message_paddr,
+                                      std::shared_ptr<Service::HLERequestContext>* out_context,
+                                      std::weak_ptr<Service::SessionRequestManager> manager) {
+    // Lock the session.
+    KScopedLightLock lk{m_lock};
+
+    // Get the request and client thread.
+    KSessionRequest* request;
+    KThread* client_thread;
+
+    {
+        KScopedSchedulerLock sl{m_kernel};
+
+        // Ensure that we can service the request.
+        R_UNLESS(!m_parent->IsClientClosed(), ResultSessionClosed);
+
+        // Ensure we aren't already servicing a request.
+        R_UNLESS(m_current_request == nullptr, ResultNotFound);
+
+        // Ensure we have a request to service.
+        R_UNLESS(!m_request_list.empty(), ResultNotFound);
+
+        // Pop the first request from the list.
+        request = std::addressof(m_request_list.front());
+        m_request_list.pop_front();
+
+        // Get the thread for the request.
+        client_thread = request->GetThread();
+        R_UNLESS(client_thread != nullptr, ResultSessionClosed);
+
+        // Open the client thread.
+        client_thread->Open();
+    }
+
+    SCOPE_EXIT({ client_thread->Close(); });
+
+    // Set the request as our current.
+    m_current_request = request;
+
+    // Get the client address.
+    uint64_t client_message = request->GetAddress();
+    size_t client_buffer_size = request->GetSize();
+    bool recv_list_broken = false;
+
+    // Receive the message.
+    Result result = ResultSuccess;
+
+    if (out_context != nullptr) {
+        // HLE request.
+        if (!client_message) {
+            client_message = GetInteger(client_thread->GetTlsAddress());
+        }
+        Core::Memory::Memory& memory{client_thread->GetOwnerProcess()->GetMemory()};
+        u32* cmd_buf{reinterpret_cast<u32*>(memory.GetPointer(client_message))};
+        *out_context =
+            std::make_shared<Service::HLERequestContext>(m_kernel, memory, this, client_thread);
+        (*out_context)->SetSessionRequestManager(manager);
+        (*out_context)
+            ->PopulateFromIncomingCommandBuffer(*client_thread->GetOwnerProcess(), cmd_buf);
+        // We succeeded.
+        R_SUCCEED();
+    } else {
+        result = ReceiveMessage(m_kernel, recv_list_broken, server_message, server_buffer_size,
+                                server_message_paddr, *client_thread, client_message,
+                                client_buffer_size, this, request);
+    }
+
+    // Handle cleanup on receive failure.
+    if (R_FAILED(result)) {
+        // Cache the result to return it to the client.
+        const Result result_for_client = result;
+
+        // Clear the current request.
+        {
+            KScopedSchedulerLock sl(m_kernel);
+            ASSERT(m_current_request == request);
+            m_current_request = nullptr;
+            if (!m_request_list.empty()) {
+                this->NotifyAvailable();
+            }
+        }
+
+        // Reply to the client.
+        {
+            // After we reply, close our reference to the request.
+            SCOPE_EXIT({ request->Close(); });
+
+            // Get the event to check whether the request is async.
+            if (KEvent* event = request->GetEvent(); event != nullptr) {
+                // The client sent an async request.
+                KProcess* client = client_thread->GetOwnerProcess();
+                auto& client_pt = client->GetPageTable();
+
+                // Send the async result.
+                if (R_FAILED(result_for_client)) {
+                    ReplyAsyncError(client, client_message, client_buffer_size, result_for_client);
+                }
+
+                // Unlock the client buffer.
+                // NOTE: Nintendo does not check the result of this.
+                client_pt.UnlockForIpcUserBuffer(client_message, client_buffer_size);
+
+                // Signal the event.
+                event->Signal();
+            } else {
+                // End the client thread's wait.
+                KScopedSchedulerLock sl(m_kernel);
+
+                if (!client_thread->IsTerminationRequested()) {
+                    client_thread->EndWait(result_for_client);
+                }
+            }
+        }
+
+        // Set the server result.
+        if (recv_list_broken) {
+            result = ResultReceiveListBroken;
+        } else {
+            result = ResultNotFound;
+        }
+    }
+
+    R_RETURN(result);
+}
+
+Result KServerSession::SendReply(uintptr_t server_message, uintptr_t server_buffer_size,
+                                 KPhysicalAddress server_message_paddr, bool is_hle) {
+    // Lock the session.
+    KScopedLightLock lk{m_lock};
+
+    // Get the request.
+    KSessionRequest* request;
+    {
+        KScopedSchedulerLock sl{m_kernel};
+
+        // Get the current request.
+        request = m_current_request;
+        R_UNLESS(request != nullptr, ResultInvalidState);
+
+        // Clear the current request, since we're processing it.
+        m_current_request = nullptr;
+        if (!m_request_list.empty()) {
+            this->NotifyAvailable();
+        }
+    }
+
+    // Close reference to the request once we're done processing it.
+    SCOPE_EXIT({ request->Close(); });
+
+    // Extract relevant information from the request.
+    const uint64_t client_message = request->GetAddress();
+    const size_t client_buffer_size = request->GetSize();
+    KThread* client_thread = request->GetThread();
+    KEvent* event = request->GetEvent();
+
+    // Check whether we're closed.
+    const bool closed = (client_thread == nullptr || m_parent->IsClientClosed());
+
+    Result result = ResultSuccess;
+    if (!closed) {
+        // If we're not closed, send the reply.
+        if (is_hle) {
+            // HLE servers write directly to a pointer to the thread command buffer. Therefore
+            // the reply has already been written in this case.
+        } else {
+            result = SendMessage(m_kernel, server_message, server_buffer_size, server_message_paddr,
+                                 *client_thread, client_message, client_buffer_size, this, request);
+        }
+    } else if (!is_hle) {
+        // Otherwise, we'll need to do some cleanup.
+        KProcess* server_process = request->GetServerProcess();
+        KProcess* client_process =
+            (client_thread != nullptr) ? client_thread->GetOwnerProcess() : nullptr;
+        KProcessPageTable* client_page_table =
+            (client_process != nullptr) ? std::addressof(client_process->GetPageTable()) : nullptr;
+
+        // Cleanup server handles.
+        result = CleanupServerHandles(m_kernel, server_message, server_buffer_size,
+                                      server_message_paddr);
+
+        // Cleanup mappings.
+        Result cleanup_map_result = CleanupMap(request, server_process, client_page_table);
+
+        // If we successfully cleaned up handles, use the map cleanup result as our result.
+        if (R_SUCCEEDED(result)) {
+            result = cleanup_map_result;
+        }
+    }
+
+    // Select a result for the client.
+    Result client_result = result;
+    if (closed && R_SUCCEEDED(result)) {
+        result = ResultSessionClosed;
+        client_result = ResultSessionClosed;
+    } else {
+        result = ResultSuccess;
+    }
+
+    // If there's a client thread, update it.
+    if (client_thread != nullptr) {
+        if (event != nullptr) {
+            // Get the client process/page table.
+            KProcess* client_process = client_thread->GetOwnerProcess();
+            KProcessPageTable* client_page_table = std::addressof(client_process->GetPageTable());
+
+            // If we need to, reply with an async error.
+            if (R_FAILED(client_result)) {
+                ReplyAsyncError(client_process, client_message, client_buffer_size, client_result);
+            }
+
+            // Unlock the client buffer.
+            // NOTE: Nintendo does not check the result of this.
+            client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size);
+
+            // Signal the event.
+            event->Signal();
+        } else {
+            // End the client thread's wait.
+            KScopedSchedulerLock sl{m_kernel};
+
+            if (!client_thread->IsTerminationRequested()) {
+                client_thread->EndWait(client_result);
+            }
+        }
+    }
+
+    R_RETURN(result);
+}
+
+Result KServerSession::OnRequest(KSessionRequest* request) {
+    // Create the wait queue.
+    ThreadQueueImplForKServerSessionRequest wait_queue{m_kernel};
+
+    {
+        // Lock the scheduler.
+        KScopedSchedulerLock sl{m_kernel};
+
+        // Ensure that we can handle new requests.
+        R_UNLESS(!m_parent->IsServerClosed(), ResultSessionClosed);
+
+        // Check that we're not terminating.
+        R_UNLESS(!GetCurrentThread(m_kernel).IsTerminationRequested(), ResultTerminationRequested);
+
+        // Get whether we're empty.
+        const bool was_empty = m_request_list.empty();
+
+        // Add the request to the list.
+        request->Open();
+        m_request_list.push_back(*request);
+
+        // If we were empty, signal.
+        if (was_empty) {
+            this->NotifyAvailable();
+        }
+
+        // If we have a request event, this is asynchronous, and we don't need to wait.
+        R_SUCCEED_IF(request->GetEvent() != nullptr);
+
+        // This is a synchronous request, so we should wait for our request to complete.
+        GetCurrentThread(m_kernel).SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::IPC);
+        GetCurrentThread(m_kernel).BeginWait(std::addressof(wait_queue));
+    }
+
+    return GetCurrentThread(m_kernel).GetWaitResult();
+}
+
+bool KServerSession::IsSignaled() const {
+    ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel));
+
+    // If the client is closed, we're always signaled.
+    if (m_parent->IsClientClosed()) {
+        return true;
+    }
+
+    // Otherwise, we're signaled if we have a request and aren't handling one.
+    return !m_request_list.empty() && m_current_request == nullptr;
+}
+
+void KServerSession::CleanupRequests() {
+    KScopedLightLock lk(m_lock);
+
+    // Clean up any pending requests.
+    while (true) {
+        // Get the next request.
+        KSessionRequest* request = nullptr;
+        {
+            KScopedSchedulerLock sl{m_kernel};
+
+            if (m_current_request) {
+                // Choose the current request if we have one.
+                request = m_current_request;
+                m_current_request = nullptr;
+            } else if (!m_request_list.empty()) {
+                // Pop the request from the front of the list.
+                request = std::addressof(m_request_list.front());
+                m_request_list.pop_front();
+            }
+        }
+
+        // If there's no request, we're done.
+        if (request == nullptr) {
+            break;
+        }
+
+        // Close a reference to the request once it's cleaned up.
+        SCOPE_EXIT({ request->Close(); });
+
+        // Extract relevant information from the request.
+        const uint64_t client_message = request->GetAddress();
+        const size_t client_buffer_size = request->GetSize();
+        KThread* client_thread = request->GetThread();
+        KEvent* event = request->GetEvent();
+
+        KProcess* server_process = request->GetServerProcess();
+        KProcess* client_process =
+            (client_thread != nullptr) ? client_thread->GetOwnerProcess() : nullptr;
+        KProcessPageTable* client_page_table =
+            (client_process != nullptr) ? std::addressof(client_process->GetPageTable()) : nullptr;
+
+        // Cleanup the mappings.
+        Result result = CleanupMap(request, server_process, client_page_table);
+
+        // If there's a client thread, update it.
+        if (client_thread != nullptr) {
+            if (event != nullptr) {
+                // We need to reply async.
+                ReplyAsyncError(client_process, client_message, client_buffer_size,
+                                (R_SUCCEEDED(result) ? ResultSessionClosed : result));
+
+                // Unlock the client buffer.
+                // NOTE: Nintendo does not check the result of this.
+                client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size);
+
+                // Signal the event.
+                event->Signal();
+            } else {
+                // End the client thread's wait.
+                KScopedSchedulerLock sl{m_kernel};
+
+                if (!client_thread->IsTerminationRequested()) {
+                    client_thread->EndWait(ResultSessionClosed);
+                }
+            }
+        }
+    }
+}
+
 void KServerSession::OnClientClosed() {
     KScopedLightLock lk{m_lock};
 
@@ -233,17 +1509,17 @@ void KServerSession::OnClientClosed() {
             ASSERT(request->GetReceiveCount() == 0);
             ASSERT(request->GetExchangeCount() == 0);
 
-            // // Get the process and page table.
-            // KProcess *client_process = thread->GetOwnerProcess();
-            // auto& client_pt = client_process->GetPageTable();
+            // Get the process and page table.
+            KProcess* client_process = thread->GetOwnerProcess();
+            auto& client_pt = client_process->GetPageTable();
 
-            // // Reply to the request.
-            // ReplyAsyncError(client_process, request->GetAddress(), request->GetSize(),
-            //                 ResultSessionClosed);
+            // Reply to the request.
+            ReplyAsyncError(client_process, request->GetAddress(), request->GetSize(),
+                            ResultSessionClosed);
 
-            // // Unlock the buffer.
-            // // NOTE: Nintendo does not check the result of this.
-            // client_pt.UnlockForIpcUserBuffer(request->GetAddress(), request->GetSize());
+            // Unlock the buffer.
+            // NOTE: Nintendo does not check the result of this.
+            client_pt.UnlockForIpcUserBuffer(request->GetAddress(), request->GetSize());
 
             // Signal the event.
             event->Signal();
@@ -254,317 +1530,4 @@ void KServerSession::OnClientClosed() {
     this->NotifyAvailable(ResultSessionClosed);
 }
 
-bool KServerSession::IsSignaled() const {
-    ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel));
-
-    // If the client is closed, we're always signaled.
-    if (m_parent->IsClientClosed()) {
-        return true;
-    }
-
-    // Otherwise, we're signaled if we have a request and aren't handling one.
-    return !m_request_list.empty() && m_current_request == nullptr;
-}
-
-Result KServerSession::OnRequest(KSessionRequest* request) {
-    // Create the wait queue.
-    ThreadQueueImplForKServerSessionRequest wait_queue{m_kernel};
-
-    {
-        // Lock the scheduler.
-        KScopedSchedulerLock sl{m_kernel};
-
-        // Ensure that we can handle new requests.
-        R_UNLESS(!m_parent->IsServerClosed(), ResultSessionClosed);
-
-        // Check that we're not terminating.
-        R_UNLESS(!GetCurrentThread(m_kernel).IsTerminationRequested(), ResultTerminationRequested);
-
-        // Get whether we're empty.
-        const bool was_empty = m_request_list.empty();
-
-        // Add the request to the list.
-        request->Open();
-        m_request_list.push_back(*request);
-
-        // If we were empty, signal.
-        if (was_empty) {
-            this->NotifyAvailable();
-        }
-
-        // If we have a request event, this is asynchronous, and we don't need to wait.
-        R_SUCCEED_IF(request->GetEvent() != nullptr);
-
-        // This is a synchronous request, so we should wait for our request to complete.
-        GetCurrentThread(m_kernel).SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::IPC);
-        GetCurrentThread(m_kernel).BeginWait(std::addressof(wait_queue));
-    }
-
-    return GetCurrentThread(m_kernel).GetWaitResult();
-}
-
-Result KServerSession::SendReply(bool is_hle) {
-    // Lock the session.
-    KScopedLightLock lk{m_lock};
-
-    // Get the request.
-    KSessionRequest* request;
-    {
-        KScopedSchedulerLock sl{m_kernel};
-
-        // Get the current request.
-        request = m_current_request;
-        R_UNLESS(request != nullptr, ResultInvalidState);
-
-        // Clear the current request, since we're processing it.
-        m_current_request = nullptr;
-        if (!m_request_list.empty()) {
-            this->NotifyAvailable();
-        }
-    }
-
-    // Close reference to the request once we're done processing it.
-    SCOPE_EXIT({ request->Close(); });
-
-    // Extract relevant information from the request.
-    const uintptr_t client_message = request->GetAddress();
-    const size_t client_buffer_size = request->GetSize();
-    KThread* client_thread = request->GetThread();
-    KEvent* event = request->GetEvent();
-
-    // Check whether we're closed.
-    const bool closed = (client_thread == nullptr || m_parent->IsClientClosed());
-
-    Result result = ResultSuccess;
-    if (!closed) {
-        // If we're not closed, send the reply.
-        if (is_hle) {
-            // HLE servers write directly to a pointer to the thread command buffer. Therefore
-            // the reply has already been written in this case.
-        } else {
-            Core::Memory::Memory& memory{client_thread->GetOwnerProcess()->GetMemory()};
-            KThread* server_thread = GetCurrentThreadPointer(m_kernel);
-            KProcess& src_process = *client_thread->GetOwnerProcess();
-            KProcess& dst_process = *server_thread->GetOwnerProcess();
-            UNIMPLEMENTED_IF(server_thread->GetOwnerProcess() != client_thread->GetOwnerProcess());
-
-            auto* src_msg_buffer = memory.GetPointer<u32>(server_thread->GetTlsAddress());
-            auto* dst_msg_buffer = memory.GetPointer<u32>(client_message);
-            std::memcpy(dst_msg_buffer, src_msg_buffer, client_buffer_size);
-
-            // Translate special header ad-hoc.
-            MessageBuffer src_msg(src_msg_buffer, client_buffer_size);
-            MessageBuffer::MessageHeader src_header(src_msg);
-            MessageBuffer::SpecialHeader src_special_header(src_msg, src_header);
-            if (src_header.GetHasSpecialHeader()) {
-                MessageBuffer dst_msg(dst_msg_buffer, client_buffer_size);
-                result = ProcessMessageSpecialData<true>(dst_process, src_process, *server_thread,
-                                                         dst_msg, src_msg, src_special_header);
-                if (R_FAILED(result)) {
-                    CleanupSpecialData(dst_process, dst_msg_buffer, client_buffer_size);
-                }
-            }
-        }
-    } else {
-        result = ResultSessionClosed;
-    }
-
-    // Select a result for the client.
-    Result client_result = result;
-    if (closed && R_SUCCEEDED(result)) {
-        result = ResultSessionClosed;
-        client_result = ResultSessionClosed;
-    } else {
-        result = ResultSuccess;
-    }
-
-    // If there's a client thread, update it.
-    if (client_thread != nullptr) {
-        if (event != nullptr) {
-            // // Get the client process/page table.
-            // KProcess *client_process             = client_thread->GetOwnerProcess();
-            // KProcessPageTable *client_page_table = std::addressof(client_process->PageTable());
-
-            // // If we need to, reply with an async error.
-            // if (R_FAILED(client_result)) {
-            //     ReplyAsyncError(client_process, client_message, client_buffer_size,
-            //     client_result);
-            // }
-
-            // // Unlock the client buffer.
-            // // NOTE: Nintendo does not check the result of this.
-            // client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size);
-
-            // Signal the event.
-            event->Signal();
-        } else {
-            // End the client thread's wait.
-            KScopedSchedulerLock sl{m_kernel};
-
-            if (!client_thread->IsTerminationRequested()) {
-                client_thread->EndWait(client_result);
-            }
-        }
-    }
-
-    R_RETURN(result);
-}
-
-Result KServerSession::ReceiveRequest(std::shared_ptr<Service::HLERequestContext>* out_context,
-                                      std::weak_ptr<Service::SessionRequestManager> manager) {
-    // Lock the session.
-    KScopedLightLock lk{m_lock};
-
-    // Get the request and client thread.
-    KSessionRequest* request;
-    KThread* client_thread;
-
-    {
-        KScopedSchedulerLock sl{m_kernel};
-
-        // Ensure that we can service the request.
-        R_UNLESS(!m_parent->IsClientClosed(), ResultSessionClosed);
-
-        // Ensure we aren't already servicing a request.
-        R_UNLESS(m_current_request == nullptr, ResultNotFound);
-
-        // Ensure we have a request to service.
-        R_UNLESS(!m_request_list.empty(), ResultNotFound);
-
-        // Pop the first request from the list.
-        request = std::addressof(m_request_list.front());
-        m_request_list.pop_front();
-
-        // Get the thread for the request.
-        client_thread = request->GetThread();
-        R_UNLESS(client_thread != nullptr, ResultSessionClosed);
-
-        // Open the client thread.
-        client_thread->Open();
-    }
-
-    SCOPE_EXIT({ client_thread->Close(); });
-
-    // Set the request as our current.
-    m_current_request = request;
-
-    // Get the client address.
-    uintptr_t client_message = request->GetAddress();
-    size_t client_buffer_size = request->GetSize();
-    // bool recv_list_broken = false;
-
-    if (!client_message) {
-        client_message = GetInteger(client_thread->GetTlsAddress());
-        client_buffer_size = MessageBufferSize;
-    }
-
-    // Receive the message.
-    Core::Memory::Memory& memory{client_thread->GetOwnerProcess()->GetMemory()};
-    if (out_context != nullptr) {
-        // HLE request.
-        u32* cmd_buf{reinterpret_cast<u32*>(memory.GetPointer(client_message))};
-        *out_context =
-            std::make_shared<Service::HLERequestContext>(m_kernel, memory, this, client_thread);
-        (*out_context)->SetSessionRequestManager(manager);
-        (*out_context)
-            ->PopulateFromIncomingCommandBuffer(*client_thread->GetOwnerProcess(), cmd_buf);
-    } else {
-        KThread* server_thread = GetCurrentThreadPointer(m_kernel);
-        KProcess& src_process = *client_thread->GetOwnerProcess();
-        KProcess& dst_process = *server_thread->GetOwnerProcess();
-        UNIMPLEMENTED_IF(client_thread->GetOwnerProcess() != server_thread->GetOwnerProcess());
-
-        auto* src_msg_buffer = memory.GetPointer<u32>(client_message);
-        auto* dst_msg_buffer = memory.GetPointer<u32>(server_thread->GetTlsAddress());
-        std::memcpy(dst_msg_buffer, src_msg_buffer, client_buffer_size);
-
-        // Translate special header ad-hoc.
-        // TODO: fix this mess
-        MessageBuffer src_msg(src_msg_buffer, client_buffer_size);
-        MessageBuffer::MessageHeader src_header(src_msg);
-        MessageBuffer::SpecialHeader src_special_header(src_msg, src_header);
-        if (src_header.GetHasSpecialHeader()) {
-            MessageBuffer dst_msg(dst_msg_buffer, client_buffer_size);
-            Result res = ProcessMessageSpecialData<false>(dst_process, src_process, *client_thread,
-                                                          dst_msg, src_msg, src_special_header);
-            if (R_FAILED(res)) {
-                CleanupSpecialData(dst_process, dst_msg_buffer, client_buffer_size);
-            }
-        }
-    }
-
-    // We succeeded.
-    R_SUCCEED();
-}
-
-void KServerSession::CleanupRequests() {
-    KScopedLightLock lk(m_lock);
-
-    // Clean up any pending requests.
-    while (true) {
-        // Get the next request.
-        KSessionRequest* request = nullptr;
-        {
-            KScopedSchedulerLock sl{m_kernel};
-
-            if (m_current_request) {
-                // Choose the current request if we have one.
-                request = m_current_request;
-                m_current_request = nullptr;
-            } else if (!m_request_list.empty()) {
-                // Pop the request from the front of the list.
-                request = std::addressof(m_request_list.front());
-                m_request_list.pop_front();
-            }
-        }
-
-        // If there's no request, we're done.
-        if (request == nullptr) {
-            break;
-        }
-
-        // Close a reference to the request once it's cleaned up.
-        SCOPE_EXIT({ request->Close(); });
-
-        // Extract relevant information from the request.
-        // const uintptr_t client_message  = request->GetAddress();
-        // const size_t client_buffer_size = request->GetSize();
-        KThread* client_thread = request->GetThread();
-        KEvent* event = request->GetEvent();
-
-        // KProcess *server_process             = request->GetServerProcess();
-        // KProcess *client_process             = (client_thread != nullptr) ?
-        //                                         client_thread->GetOwnerProcess() : nullptr;
-        // KProcessPageTable *client_page_table = (client_process != nullptr) ?
-        //                                         std::addressof(client_process->GetPageTable())
-        //                                         : nullptr;
-
-        // Cleanup the mappings.
-        // Result result = CleanupMap(request, server_process, client_page_table);
-
-        // If there's a client thread, update it.
-        if (client_thread != nullptr) {
-            if (event != nullptr) {
-                // // We need to reply async.
-                // ReplyAsyncError(client_process, client_message, client_buffer_size,
-                //                 (R_SUCCEEDED(result) ? ResultSessionClosed : result));
-
-                // // Unlock the client buffer.
-                // NOTE: Nintendo does not check the result of this.
-                // client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size);
-
-                // Signal the event.
-                event->Signal();
-            } else {
-                // End the client thread's wait.
-                KScopedSchedulerLock sl{m_kernel};
-
-                if (!client_thread->IsTerminationRequested()) {
-                    client_thread->EndWait(ResultSessionClosed);
-                }
-            }
-        }
-    }
-}
-
 } // namespace Kernel
diff --git a/src/core/hle/kernel/k_server_session.h b/src/core/hle/kernel/k_server_session.h
index 6f9ce4261..09165b266 100755
--- a/src/core/hle/kernel/k_server_session.h
+++ b/src/core/hle/kernel/k_server_session.h
@@ -49,14 +49,21 @@ public:
     bool IsSignaled() const override;
     void OnClientClosed();
 
-    /// TODO: flesh these out to match the real kernel
     Result OnRequest(KSessionRequest* request);
-    Result SendReply(bool is_hle = false);
-    Result ReceiveRequest(std::shared_ptr<Service::HLERequestContext>* out_context = nullptr,
+    Result SendReply(uintptr_t server_message, uintptr_t server_buffer_size,
+                     KPhysicalAddress server_message_paddr, bool is_hle = false);
+    Result ReceiveRequest(uintptr_t server_message, uintptr_t server_buffer_size,
+                          KPhysicalAddress server_message_paddr,
+                          std::shared_ptr<Service::HLERequestContext>* out_context = nullptr,
                           std::weak_ptr<Service::SessionRequestManager> manager = {});
 
     Result SendReplyHLE() {
-        return SendReply(true);
+        R_RETURN(this->SendReply(0, 0, 0, true));
+    }
+
+    Result ReceiveRequestHLE(std::shared_ptr<Service::HLERequestContext>* out_context,
+                             std::weak_ptr<Service::SessionRequestManager> manager) {
+        R_RETURN(this->ReceiveRequest(0, 0, 0, out_context, manager));
     }
 
 private:
diff --git a/src/core/hle/kernel/k_session.cpp b/src/core/hle/kernel/k_session.cpp
index 7f9b86809..117840915 100755
--- a/src/core/hle/kernel/k_session.cpp
+++ b/src/core/hle/kernel/k_session.cpp
@@ -33,8 +33,7 @@ void KSession::Initialize(KClientPort* client_port, uintptr_t name) {
     m_name = name;
 
     // Set our owner process.
-    //! FIXME: this is the wrong process!
-    m_process = m_kernel.ApplicationProcess();
+    m_process = GetCurrentProcessPointer(m_kernel);
     m_process->Open();
 
     // Set our port.
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index 82ff480bc..1eaebb362 100755
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -1422,8 +1422,7 @@ s32 GetCurrentCoreId(KernelCore& kernel) {
 }
 
 Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel) {
-    // TODO: per-process memory
-    return kernel.System().ApplicationMemory();
+    return GetCurrentProcess(kernel).GetMemory();
 }
 
 KScopedDisableDispatch::~KScopedDisableDispatch() {
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index ea263d53f..c2a9e9831 100755
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -314,11 +314,7 @@ public:
         m_current_core_id = core;
     }
 
-    KProcess* GetOwnerProcess() {
-        return m_parent;
-    }
-
-    const KProcess* GetOwnerProcess() const {
+    KProcess* GetOwnerProcess() const {
         return m_parent;
     }
 
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 8a6c29f09..fd905276d 100755
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -68,8 +68,6 @@ struct KernelCore::Impl {
 
         global_object_list_container = std::make_unique<KAutoObjectWithListContainer>(kernel);
         global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
-        global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel);
-        global_handle_table->Initialize(KHandleTable::MaxTableSize);
 
         is_phantom_mode_for_singlecore = false;
 
@@ -121,13 +119,8 @@ struct KernelCore::Impl {
         next_user_process_id = KProcess::ProcessIdMin;
         next_thread_id = 1;
 
-        global_handle_table->Finalize();
-        global_handle_table.reset();
-
         preemption_event = nullptr;
 
-        exclusive_monitor.reset();
-
         // Cleanup persistent kernel objects
         auto CleanupObject = [](KAutoObject* obj) {
             if (obj) {
@@ -191,8 +184,6 @@ struct KernelCore::Impl {
     }
 
     void InitializePhysicalCores() {
-        exclusive_monitor =
-            Core::MakeExclusiveMonitor(system.ApplicationMemory(), Core::Hardware::NUM_CPU_CORES);
         for (u32 i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
             const s32 core{static_cast<s32>(i)};
 
@@ -791,10 +782,6 @@ struct KernelCore::Impl {
 
     std::shared_ptr<Core::Timing::EventType> preemption_event;
 
-    // This is the kernel's handle table or supervisor handle table which
-    // stores all the objects in place.
-    std::unique_ptr<KHandleTable> global_handle_table;
-
     std::unique_ptr<KAutoObjectWithListContainer> global_object_list_container;
 
     std::unique_ptr<KObjectNameGlobalData> object_name_global_data;
@@ -805,7 +792,6 @@ struct KernelCore::Impl {
     std::mutex server_lock;
     std::vector<std::unique_ptr<Service::ServerManager>> server_managers;
 
-    std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
     std::array<std::unique_ptr<Kernel::PhysicalCore>, Core::Hardware::NUM_CPU_CORES> cores;
 
     // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
@@ -882,10 +868,6 @@ KResourceLimit* KernelCore::GetSystemResourceLimit() {
     return impl->system_resource_limit;
 }
 
-KScopedAutoObject<KThread> KernelCore::RetrieveThreadFromGlobalHandleTable(Handle handle) const {
-    return impl->global_handle_table->GetObject<KThread>(handle);
-}
-
 void KernelCore::AppendNewProcess(KProcess* process) {
     impl->process_list.push_back(process);
 }
@@ -959,14 +941,6 @@ Kernel::KHardwareTimer& KernelCore::HardwareTimer() {
     return *impl->hardware_timer;
 }
 
-Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() {
-    return *impl->exclusive_monitor;
-}
-
-const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
-    return *impl->exclusive_monitor;
-}
-
 KAutoObjectWithListContainer& KernelCore::ObjectListContainer() {
     return *impl->global_object_list_container;
 }
@@ -1030,14 +1004,6 @@ u64 KernelCore::CreateNewUserProcessID() {
     return impl->next_user_process_id++;
 }
 
-KHandleTable& KernelCore::GlobalHandleTable() {
-    return *impl->global_handle_table;
-}
-
-const KHandleTable& KernelCore::GlobalHandleTable() const {
-    return *impl->global_handle_table;
-}
-
 void KernelCore::RegisterCoreThread(std::size_t core_id) {
     impl->RegisterCoreThread(core_id);
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 3de4942a2..775783b95 100755
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -116,9 +116,6 @@ public:
     /// Retrieves a shared pointer to the system resource limit instance.
     KResourceLimit* GetSystemResourceLimit();
 
-    /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table.
-    KScopedAutoObject<KThread> RetrieveThreadFromGlobalHandleTable(Handle handle) const;
-
     /// Adds the given shared pointer to an internal list of active processes.
     void AppendNewProcess(KProcess* process);
 
@@ -170,10 +167,6 @@ public:
     /// Stops execution of 'id' core, in order to reschedule a new thread.
     void PrepareReschedule(std::size_t id);
 
-    Core::ExclusiveMonitor& GetExclusiveMonitor();
-
-    const Core::ExclusiveMonitor& GetExclusiveMonitor() const;
-
     KAutoObjectWithListContainer& ObjectListContainer();
 
     const KAutoObjectWithListContainer& ObjectListContainer() const;
diff --git a/src/core/hle/kernel/message_buffer.h b/src/core/hle/kernel/message_buffer.h
index 75b275310..d528a9bb3 100755
--- a/src/core/hle/kernel/message_buffer.h
+++ b/src/core/hle/kernel/message_buffer.h
@@ -18,13 +18,13 @@ public:
         static constexpr inline u64 NullTag = 0;
 
     public:
-        enum class ReceiveListCountType : u32 {
-            None = 0,
-            ToMessageBuffer = 1,
-            ToSingleBuffer = 2,
+        enum ReceiveListCountType : u32 {
+            ReceiveListCountType_None = 0,
+            ReceiveListCountType_ToMessageBuffer = 1,
+            ReceiveListCountType_ToSingleBuffer = 2,
 
-            CountOffset = 2,
-            CountMax = 13,
+            ReceiveListCountType_CountOffset = 2,
+            ReceiveListCountType_CountMax = 13,
         };
 
     private:
@@ -591,16 +591,16 @@ public:
         // Add the size of the receive list.
         const auto count = hdr.GetReceiveListCount();
         switch (count) {
-        case MessageHeader::ReceiveListCountType::None:
+        case MessageHeader::ReceiveListCountType_None:
             break;
-        case MessageHeader::ReceiveListCountType::ToMessageBuffer:
+        case MessageHeader::ReceiveListCountType_ToMessageBuffer:
             break;
-        case MessageHeader::ReceiveListCountType::ToSingleBuffer:
+        case MessageHeader::ReceiveListCountType_ToSingleBuffer:
             msg_size += ReceiveListEntry::GetDataSize();
             break;
         default:
             msg_size += (static_cast<s32>(count) -
-                         static_cast<s32>(MessageHeader::ReceiveListCountType::CountOffset)) *
+                         static_cast<s32>(MessageHeader::ReceiveListCountType_CountOffset)) *
                         ReceiveListEntry::GetDataSize();
             break;
         }
diff --git a/src/core/hle/kernel/svc/svc_info.cpp b/src/core/hle/kernel/svc/svc_info.cpp
index ada998772..231e4d0e1 100755
--- a/src/core/hle/kernel/svc/svc_info.cpp
+++ b/src/core/hle/kernel/svc/svc_info.cpp
@@ -118,7 +118,6 @@ Result GetInfo(Core::System& system, u64* result, InfoType info_id_type, Handle
             R_SUCCEED();
 
         case InfoType::IsApplication:
-            LOG_WARNING(Kernel_SVC, "(STUBBED) Assuming process is application");
             *result = process->IsApplication();
             R_SUCCEED();
 
diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp
index 47a3e7bb0..85cc4f561 100755
--- a/src/core/hle/kernel/svc/svc_ipc.cpp
+++ b/src/core/hle/kernel/svc/svc_ipc.cpp
@@ -48,8 +48,7 @@ Result ReplyAndReceiveImpl(KernelCore& kernel, int32_t* out_index, uintptr_t mes
         };
 
         // Send the reply.
-        R_TRY(session->SendReply());
-        // R_TRY(session->SendReply(message, buffer_size, message_paddr));
+        R_TRY(session->SendReply(message, buffer_size, message_paddr));
     }
 
     // Receive a message.
@@ -85,8 +84,7 @@ Result ReplyAndReceiveImpl(KernelCore& kernel, int32_t* out_index, uintptr_t mes
             if (R_SUCCEEDED(result)) {
                 KServerSession* session = objs[index]->DynamicCast<KServerSession*>();
                 if (session != nullptr) {
-                    // result = session->ReceiveRequest(message, buffer_size, message_paddr);
-                    result = session->ReceiveRequest();
+                    result = session->ReceiveRequest(message, buffer_size, message_paddr);
                     if (ResultNotFound == result) {
                         continue;
                     }
diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h
index 264ac68fd..37d4f11c2 100755
--- a/src/core/hle/kernel/svc_results.h
+++ b/src/core/hle/kernel/svc_results.h
@@ -38,7 +38,9 @@ constexpr Result ResultInvalidState{ErrorModule::Kernel, 125};
 constexpr Result ResultReservedUsed{ErrorModule::Kernel, 126};
 constexpr Result ResultPortClosed{ErrorModule::Kernel, 131};
 constexpr Result ResultLimitReached{ErrorModule::Kernel, 132};
+constexpr Result ResultReceiveListBroken{ErrorModule::Kernel, 258};
 constexpr Result ResultOutOfAddressSpace{ErrorModule::Kernel, 259};
+constexpr Result ResultMessageTooLarge{ErrorModule::Kernel, 260};
 constexpr Result ResultInvalidId{ErrorModule::Kernel, 519};
 
 } // namespace Kernel
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp
index 49fdbbf47..f545456ca 100755
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -89,7 +89,7 @@ static void GenerateErrorReport(Core::System& system, Result error_code, const F
         crash_report += fmt::format("    ESR:                         {:016x}\n", info.esr);
         crash_report += fmt::format("    FAR:                         {:016x}\n", info.far);
         crash_report += "\nBacktrace:\n";
-        for (size_t i = 0; i < info.backtrace_size; i++) {
+        for (u32 i = 0; i < std::min<u32>(info.backtrace_size, 32); i++) {
             crash_report +=
                 fmt::format("    Backtrace[{:02d}]:               {:016x}\n", i, info.backtrace[i]);
         }
diff --git a/src/core/hle/service/ipc_helpers.h b/src/core/hle/service/ipc_helpers.h
index 0e222362e..4b02872fb 100755
--- a/src/core/hle/service/ipc_helpers.h
+++ b/src/core/hle/service/ipc_helpers.h
@@ -151,8 +151,8 @@ public:
         if (manager->IsDomain()) {
             context->AddDomainObject(std::move(iface));
         } else {
-            kernel.ApplicationProcess()->GetResourceLimit()->Reserve(
-                Kernel::LimitableResource::SessionCountMax, 1);
+            ASSERT(Kernel::GetCurrentProcess(kernel).GetResourceLimit()->Reserve(
+                Kernel::LimitableResource::SessionCountMax, 1));
 
             auto* session = Kernel::KSession::Create(kernel);
             session->Initialize(nullptr, 0);
diff --git a/src/core/hle/service/server_manager.cpp b/src/core/hle/service/server_manager.cpp
index 6808247a9..15edb23e0 100755
--- a/src/core/hle/service/server_manager.cpp
+++ b/src/core/hle/service/server_manager.cpp
@@ -47,7 +47,7 @@ ServerManager::~ServerManager() {
     m_stopped.Wait();
     m_threads.clear();
 
-    // Clean up ports.
+    // Clean up server ports.
     for (const auto& [port, handler] : m_ports) {
         port->Close();
     }
@@ -97,22 +97,15 @@ Result ServerManager::RegisterNamedService(const std::string& service_name,
                                            u32 max_sessions) {
     ASSERT(m_sessions.size() + m_ports.size() < MaximumWaitObjects);
 
-    // Add the new server to sm:.
-    ASSERT(R_SUCCEEDED(
-        m_system.ServiceManager().RegisterService(service_name, max_sessions, handler_factory)));
-
-    // Get the registered port.
-    Kernel::KPort* port{};
-    ASSERT(
-        R_SUCCEEDED(m_system.ServiceManager().GetServicePort(std::addressof(port), service_name)));
-
-    // Open a new reference to the server port.
-    port->GetServerPort().Open();
+    // Add the new server to sm: and get the moved server port.
+    Kernel::KServerPort* server_port{};
+    R_ASSERT(m_system.ServiceManager().RegisterService(std::addressof(server_port), service_name,
+                                                       max_sessions, handler_factory));
 
     // Begin tracking the server port.
     {
         std::scoped_lock ll{m_list_mutex};
-        m_ports.emplace(std::addressof(port->GetServerPort()), std::move(handler_factory));
+        m_ports.emplace(server_port, std::move(handler_factory));
     }
 
     // Signal the wakeup event.
@@ -372,7 +365,7 @@ Result ServerManager::OnSessionEvent(Kernel::KServerSession* session,
 
     // Try to receive a message.
     std::shared_ptr<HLERequestContext> context;
-    rc = session->ReceiveRequest(&context, manager);
+    rc = session->ReceiveRequestHLE(&context, manager);
 
     // If the session has been closed, we're done.
     if (rc == Kernel::ResultSessionClosed) {
diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp
index 4d24bedf5..d0c52b67a 100755
--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -507,6 +507,14 @@ void SET_SYS::SetTvSettings(HLERequestContext& ctx) {
     rb.Push(ResultSuccess);
 }
 
+void SET_SYS::GetDebugModeFlag(HLERequestContext& ctx) {
+    LOG_DEBUG(Service_SET, "called");
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(ResultSuccess);
+    rb.Push<u32>(0);
+}
+
 void SET_SYS::GetQuestFlag(HLERequestContext& ctx) {
     LOG_WARNING(Service_SET, "(STUBBED) called");
 
@@ -926,7 +934,7 @@ SET_SYS::SET_SYS(Core::System& system_) : ServiceFramework{system_, "set:sys"},
         {59, &SET_SYS::SetNetworkSystemClockContext, "SetNetworkSystemClockContext"},
         {60, &SET_SYS::IsUserSystemClockAutomaticCorrectionEnabled, "IsUserSystemClockAutomaticCorrectionEnabled"},
         {61, &SET_SYS::SetUserSystemClockAutomaticCorrectionEnabled, "SetUserSystemClockAutomaticCorrectionEnabled"},
-        {62, nullptr, "GetDebugModeFlag"},
+        {62, &SET_SYS::GetDebugModeFlag, "GetDebugModeFlag"},
         {63, &SET_SYS::GetPrimaryAlbumStorage, "GetPrimaryAlbumStorage"},
         {64, nullptr, "SetPrimaryAlbumStorage"},
         {65, nullptr, "GetUsb30EnableFlag"},
@@ -1143,6 +1151,8 @@ void SET_SYS::StoreSettings() {
 }
 
 void SET_SYS::StoreSettingsThreadFunc(std::stop_token stop_token) {
+    Common::SetCurrentThreadName("SettingsStore");
+
     while (Common::StoppableTimedWait(stop_token, std::chrono::minutes(1))) {
         std::scoped_lock l{m_save_needed_mutex};
         if (!std::exchange(m_save_needed, false)) {
diff --git a/src/core/hle/service/set/set_sys.h b/src/core/hle/service/set/set_sys.h
index c6ead2521..12f0637e2 100755
--- a/src/core/hle/service/set/set_sys.h
+++ b/src/core/hle/service/set/set_sys.h
@@ -98,6 +98,7 @@ private:
     void GetSettingsItemValue(HLERequestContext& ctx);
     void GetTvSettings(HLERequestContext& ctx);
     void SetTvSettings(HLERequestContext& ctx);
+    void GetDebugModeFlag(HLERequestContext& ctx);
     void GetQuestFlag(HLERequestContext& ctx);
     void GetDeviceTimeZoneLocationName(HLERequestContext& ctx);
     void SetDeviceTimeZoneLocationName(HLERequestContext& ctx);
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 6f39dba78..eb35b368d 100755
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -29,8 +29,7 @@ ServiceManager::ServiceManager(Kernel::KernelCore& kernel_) : kernel{kernel_} {
 
 ServiceManager::~ServiceManager() {
     for (auto& [name, port] : service_ports) {
-        port->GetClientPort().Close();
-        port->GetServerPort().Close();
+        port->Close();
     }
 
     if (deferral_event) {
@@ -50,8 +49,8 @@ static Result ValidateServiceName(const std::string& name) {
     return ResultSuccess;
 }
 
-Result ServiceManager::RegisterService(std::string name, u32 max_sessions,
-                                       SessionRequestHandlerFactory handler) {
+Result ServiceManager::RegisterService(Kernel::KServerPort** out_server_port, std::string name,
+                                       u32 max_sessions, SessionRequestHandlerFactory handler) {
     R_TRY(ValidateServiceName(name));
 
     std::scoped_lock lk{lock};
@@ -66,13 +65,17 @@ Result ServiceManager::RegisterService(std::string name, u32 max_sessions,
     // Register the port.
     Kernel::KPort::Register(kernel, port);
 
-    service_ports.emplace(name, port);
+    service_ports.emplace(name, std::addressof(port->GetClientPort()));
     registered_services.emplace(name, handler);
     if (deferral_event) {
         deferral_event->Signal();
     }
 
-    return ResultSuccess;
+    // Set our output.
+    *out_server_port = std::addressof(port->GetServerPort());
+
+    // We succeeded.
+    R_SUCCEED();
 }
 
 Result ServiceManager::UnregisterService(const std::string& name) {
@@ -91,7 +94,8 @@ Result ServiceManager::UnregisterService(const std::string& name) {
     return ResultSuccess;
 }
 
-Result ServiceManager::GetServicePort(Kernel::KPort** out_port, const std::string& name) {
+Result ServiceManager::GetServicePort(Kernel::KClientPort** out_client_port,
+                                      const std::string& name) {
     R_TRY(ValidateServiceName(name));
 
     std::scoped_lock lk{lock};
@@ -101,7 +105,7 @@ Result ServiceManager::GetServicePort(Kernel::KPort** out_port, const std::strin
         return Service::SM::ResultNotRegistered;
     }
 
-    *out_port = it->second;
+    *out_client_port = it->second;
     return ResultSuccess;
 }
 
@@ -172,8 +176,8 @@ Result SM::GetServiceImpl(Kernel::KClientSession** out_client_session, HLEReques
     std::string name(PopServiceName(rp));
 
     // Find the named port.
-    Kernel::KPort* port{};
-    auto port_result = service_manager.GetServicePort(&port, name);
+    Kernel::KClientPort* client_port{};
+    auto port_result = service_manager.GetServicePort(&client_port, name);
     if (port_result == Service::SM::ResultInvalidServiceName) {
         LOG_ERROR(Service_SM, "Invalid service name '{}'", name);
         return Service::SM::ResultInvalidServiceName;
@@ -187,7 +191,7 @@ Result SM::GetServiceImpl(Kernel::KClientSession** out_client_session, HLEReques
 
     // Create a new session.
     Kernel::KClientSession* session{};
-    if (const auto result = port->GetClientPort().CreateSession(&session); result.IsError()) {
+    if (const auto result = client_port->CreateSession(&session); result.IsError()) {
         LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, result.raw);
         return result;
     }
@@ -221,7 +225,9 @@ void SM::RegisterServiceImpl(HLERequestContext& ctx, std::string name, u32 max_s
     LOG_DEBUG(Service_SM, "called with name={}, max_session_count={}, is_light={}", name,
               max_session_count, is_light);
 
-    if (const auto result = service_manager.RegisterService(name, max_session_count, nullptr);
+    Kernel::KServerPort* server_port{};
+    if (const auto result = service_manager.RegisterService(std::addressof(server_port), name,
+                                                            max_session_count, nullptr);
         result.IsError()) {
         LOG_ERROR(Service_SM, "failed to register service with error_code={:08X}", result.raw);
         IPC::ResponseBuilder rb{ctx, 2};
@@ -229,13 +235,9 @@ void SM::RegisterServiceImpl(HLERequestContext& ctx, std::string name, u32 max_s
         return;
     }
 
-    auto* port = Kernel::KPort::Create(kernel);
-    port->Initialize(ServerSessionCountMax, is_light, 0);
-    SCOPE_EXIT({ port->GetClientPort().Close(); });
-
     IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
     rb.Push(ResultSuccess);
-    rb.PushMoveObjects(port->GetServerPort());
+    rb.PushMoveObjects(server_port);
 }
 
 void SM::UnregisterService(HLERequestContext& ctx) {
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index e74fe3c94..d17813405 100755
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -56,10 +56,10 @@ public:
     explicit ServiceManager(Kernel::KernelCore& kernel_);
     ~ServiceManager();
 
-    Result RegisterService(std::string name, u32 max_sessions,
-                           SessionRequestHandlerFactory handler_factory);
+    Result RegisterService(Kernel::KServerPort** out_server_port, std::string name,
+                           u32 max_sessions, SessionRequestHandlerFactory handler_factory);
     Result UnregisterService(const std::string& name);
-    Result GetServicePort(Kernel::KPort** out_port, const std::string& name);
+    Result GetServicePort(Kernel::KClientPort** out_client_port, const std::string& name);
 
     template <Common::DerivedFrom<SessionRequestHandler> T>
     std::shared_ptr<T> GetService(const std::string& service_name) const {
@@ -84,7 +84,7 @@ private:
     /// Map of registered services, retrieved using GetServicePort.
     std::mutex lock;
     std::unordered_map<std::string, SessionRequestHandlerFactory> registered_services;
-    std::unordered_map<std::string, Kernel::KPort*> service_ports;
+    std::unordered_map<std::string, Kernel::KClientPort*> service_ports;
 
     /// Kernel context
     Kernel::KernelCore& kernel;
diff --git a/src/core/hle/service/sm/sm_controller.cpp b/src/core/hle/service/sm/sm_controller.cpp
index 1e833c3da..9f506014b 100755
--- a/src/core/hle/service/sm/sm_controller.cpp
+++ b/src/core/hle/service/sm/sm_controller.cpp
@@ -28,7 +28,6 @@ void Controller::ConvertCurrentObjectToDomain(HLERequestContext& ctx) {
 void Controller::CloneCurrentObject(HLERequestContext& ctx) {
     LOG_DEBUG(Service, "called");
 
-    auto& process = *ctx.GetThread().GetOwnerProcess();
     auto session_manager = ctx.GetManager();
 
     // FIXME: this is duplicated from the SVC, it should just call it instead
@@ -36,11 +35,11 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) {
 
     // Reserve a new session from the process resource limit.
     Kernel::KScopedResourceReservation session_reservation(
-        &process, Kernel::LimitableResource::SessionCountMax);
+        Kernel::GetCurrentProcessPointer(kernel), Kernel::LimitableResource::SessionCountMax);
     ASSERT(session_reservation.Succeeded());
 
     // Create the session.
-    Kernel::KSession* session = Kernel::KSession::Create(system.Kernel());
+    Kernel::KSession* session = Kernel::KSession::Create(kernel);
     ASSERT(session != nullptr);
 
     // Initialize the session.
@@ -50,7 +49,7 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) {
     session_reservation.Commit();
 
     // Register the session.
-    Kernel::KSession::Register(system.Kernel(), session);
+    Kernel::KSession::Register(kernel, session);
 
     // Register with server manager.
     session_manager->GetServerManager().RegisterSession(&session->GetServerSession(),
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index d1746692a..6e51372d3 100755
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -129,9 +129,10 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
     }
     metadata.Print();
 
-    // Enable NCE only for programs with 39-bit address space.
+    // Enable NCE only for applications with 39-bit address space.
     const bool is_39bit =
         metadata.GetAddressSpaceType() == FileSys::ProgramAddressSpaceType::Is39Bit;
+    const bool is_application = metadata.GetPoolPartition() == FileSys::PoolPartition::Application;
     Settings::SetNceEnabled(is_39bit);
 
     const std::array static_modules = {"rtld",    "main",    "subsdk0", "subsdk1", "subsdk2",
@@ -147,7 +148,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
 
     const auto GetPatcher = [&](size_t i) -> Core::NCE::Patcher* {
 #ifdef HAS_NCE
-        if (Settings::IsNceEnabled()) {
+        if (is_application && Settings::IsNceEnabled()) {
             return &module_patchers[i];
         }
 #endif
@@ -175,7 +176,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
 
     // Enable direct memory mapping in case of NCE.
     const u64 fastmem_base = [&]() -> size_t {
-        if (Settings::IsNceEnabled()) {
+        if (is_application && Settings::IsNceEnabled()) {
             auto& buffer = system.DeviceMemory().buffer;
             buffer.EnableDirectMappedAddress();
             return reinterpret_cast<u64>(buffer.VirtualBasePointer());
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index f41d5266c..8545517b3 100755
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -45,7 +45,13 @@ struct Memory::Impl {
 
     void SetCurrentPageTable(Kernel::KProcess& process) {
         current_page_table = &process.GetPageTable().GetImpl();
-        current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
+
+        if (std::addressof(process) == system.ApplicationProcess() &&
+            Settings::IsFastmemEnabled()) {
+            current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
+        } else {
+            current_page_table->fastmem_arena = nullptr;
+        }
     }
 
     void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
@@ -57,7 +63,7 @@ struct Memory::Impl {
         MapPages(page_table, base / YUZU_PAGESIZE, size / YUZU_PAGESIZE, target,
                  Common::PageType::Memory);
 
-        if (Settings::IsFastmemEnabled()) {
+        if (current_page_table->fastmem_arena) {
             system.DeviceMemory().buffer.Map(GetInteger(base),
                                              GetInteger(target) - DramMemoryMap::Base, size, perms);
         }
@@ -69,7 +75,7 @@ struct Memory::Impl {
         MapPages(page_table, base / YUZU_PAGESIZE, size / YUZU_PAGESIZE, 0,
                  Common::PageType::Unmapped);
 
-        if (Settings::IsFastmemEnabled()) {
+        if (current_page_table->fastmem_arena) {
             system.DeviceMemory().buffer.Unmap(GetInteger(base), size);
         }
     }
@@ -79,7 +85,7 @@ struct Memory::Impl {
         ASSERT_MSG((size & YUZU_PAGEMASK) == 0, "non-page aligned size: {:016X}", size);
         ASSERT_MSG((vaddr & YUZU_PAGEMASK) == 0, "non-page aligned base: {:016X}", vaddr);
 
-        if (!Settings::IsFastmemEnabled()) {
+        if (!current_page_table->fastmem_arena) {
             return;
         }
 
@@ -88,11 +94,6 @@ struct Memory::Impl {
         const bool is_x =
             True(perms & Common::MemoryPermission::Execute) && Settings::IsNceEnabled();
 
-        if (!current_page_table) {
-            system.DeviceMemory().buffer.Protect(vaddr, size, is_r, is_w, is_x);
-            return;
-        }
-
         u64 protect_bytes{};
         u64 protect_begin{};
         for (u64 addr = vaddr; addr < vaddr + size; addr += YUZU_PAGESIZE) {
@@ -239,7 +240,7 @@ struct Memory::Impl {
 
     bool WalkBlock(const Common::ProcessAddress addr, const std::size_t size, auto on_unmapped,
                    auto on_memory, auto on_rasterizer, auto increment) {
-        const auto& page_table = system.ApplicationProcess()->GetPageTable().GetImpl();
+        const auto& page_table = *current_page_table;
         std::size_t remaining_size = size;
         std::size_t page_index = addr >> YUZU_PAGEBITS;
         std::size_t page_offset = addr & YUZU_PAGEMASK;
@@ -484,7 +485,7 @@ struct Memory::Impl {
             return;
         }
 
-        if (Settings::IsFastmemEnabled()) {
+        if (current_page_table->fastmem_arena) {
             system.DeviceMemory().buffer.Protect(vaddr, size, !debug, !debug);
         }
 
@@ -541,7 +542,7 @@ struct Memory::Impl {
             return;
         }
 
-        if (Settings::IsFastmemEnabled()) {
+        if (current_page_table->fastmem_arena) {
             const bool is_read_enable =
                 !Settings::values.use_reactive_flushing.GetValue() || !cached;
             system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
@@ -886,8 +887,7 @@ void Memory::ProtectRegion(Common::PageTable& page_table, Common::ProcessAddress
 }
 
 bool Memory::IsValidVirtualAddress(const Common::ProcessAddress vaddr) const {
-    const Kernel::KProcess& process = *system.ApplicationProcess();
-    const auto& page_table = process.GetPageTable().GetImpl();
+    const auto& page_table = *impl->current_page_table;
     const size_t page = vaddr >> YUZU_PAGEBITS;
     if (page >= page_table.pointers.size()) {
         return false;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 80d043ffe..67a28aeba 100755
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -586,14 +586,22 @@ void Maxwell3D::ProcessQueryCondition() {
 }
 
 void Maxwell3D::ProcessCounterReset() {
-    switch (regs.clear_report_value) {
-    case Regs::ClearReport::ZPassPixelCount:
-        rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
-        break;
-    default:
-        LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value);
-        break;
-    }
+    const auto query_type = [clear_report = regs.clear_report_value]() {
+        switch (clear_report) {
+        case Tegra::Engines::Maxwell3D::Regs::ClearReport::ZPassPixelCount:
+            return VideoCommon::QueryType::ZPassPixelCount64;
+        case Tegra::Engines::Maxwell3D::Regs::ClearReport::StreamingPrimitivesSucceeded:
+            return VideoCommon::QueryType::StreamingPrimitivesSucceeded;
+        case Tegra::Engines::Maxwell3D::Regs::ClearReport::PrimitivesGenerated:
+            return VideoCommon::QueryType::PrimitivesGenerated;
+        case Tegra::Engines::Maxwell3D::Regs::ClearReport::VtgPrimitivesOut:
+            return VideoCommon::QueryType::VtgPrimitivesOut;
+        default:
+            LOG_DEBUG(HW_GPU, "Unimplemented counter reset={}", clear_report);
+            return VideoCommon::QueryType::Payload;
+        }
+    }();
+    rasterizer->ResetCounter(query_type);
 }
 
 void Maxwell3D::ProcessSyncPoint() {
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 450a75296..1c1239b41 100755
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -28,8 +28,11 @@
 namespace VideoCore {
 enum class QueryType {
     SamplesPassed,
+    PrimitivesGenerated,
+    TfbPrimitivesWritten,
+    Count,
 };
-constexpr std::size_t NumQueryTypes = 1;
+constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count);
 } // namespace VideoCore
 
 namespace VideoCommon {
@@ -44,15 +47,6 @@ public:
     explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_)
         : cache{cache_}, type{type_} {}
 
-    /// Updates the state of the stream, enabling or disabling as needed.
-    void Update(bool enabled) {
-        if (enabled) {
-            Enable();
-        } else {
-            Disable();
-        }
-    }
-
     /// Resets the stream to zero. It doesn't disable the query after resetting.
     void Reset() {
         if (current) {
@@ -80,7 +74,6 @@ public:
         return current != nullptr;
     }
 
-private:
     /// Enables the stream.
     void Enable() {
         if (current) {
@@ -97,6 +90,7 @@ private:
         last = std::exchange(current, nullptr);
     }
 
+private:
     QueryCache& cache;
     const VideoCore::QueryType type;
 
@@ -112,8 +106,14 @@ public:
         : rasterizer{rasterizer_},
           // Use reinterpret_cast instead of static_cast as workaround for
           // UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
-          cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast<QueryCache&>(*this),
-                                                          VideoCore::QueryType::SamplesPassed}}} {
+          cpu_memory{cpu_memory_}, streams{{
+                                       {CounterStream{reinterpret_cast<QueryCache&>(*this),
+                                                      VideoCore::QueryType::SamplesPassed}},
+                                       {CounterStream{reinterpret_cast<QueryCache&>(*this),
+                                                      VideoCore::QueryType::PrimitivesGenerated}},
+                                       {CounterStream{reinterpret_cast<QueryCache&>(*this),
+                                                      VideoCore::QueryType::TfbPrimitivesWritten}},
+                                   }} {
         (void)slot_async_jobs.insert(); // Null value
     }
 
@@ -157,12 +157,11 @@ public:
         AsyncFlushQuery(query, timestamp, lock);
     }
 
-    /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
-    void UpdateCounters() {
+    /// Enables all available GPU counters
+    void EnableCounters() {
         std::unique_lock lock{mutex};
-        if (maxwell3d) {
-            const auto& regs = maxwell3d->regs;
-            Stream(VideoCore::QueryType::SamplesPassed).Update(regs.zpass_pixel_count_enable);
+        for (auto& stream : streams) {
+            stream.Enable();
         }
     }
 
@@ -176,7 +175,7 @@ public:
     void DisableStreams() {
         std::unique_lock lock{mutex};
         for (auto& stream : streams) {
-            stream.Update(false);
+            stream.Disable();
         }
     }
 
@@ -353,7 +352,7 @@ private:
 
     std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
     std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
-};
+}; // namespace VideoCommon
 
 template <class QueryCache, class HostCounter>
 class HostCounterBase {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index f4bd099f5..ba28087de 100755
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -58,6 +58,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast
         glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
     }
     glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
+    if (runtime.has_unified_vertex_buffers) {
+        glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
+    }
 }
 
 void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
@@ -109,6 +112,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
     : device{device_}, staging_buffer_pool{staging_buffer_pool_},
       has_fast_buffer_sub_data{device.HasFastBufferSubData()},
       use_assembly_shaders{device.UseAssemblyShaders()},
+      has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
       stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
     GLint gl_max_attributes;
     glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
@@ -210,8 +214,14 @@ void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t siz
 }
 
 void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
-    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
-    index_buffer_offset = offset;
+    if (has_unified_vertex_buffers) {
+        buffer.MakeResident(GL_READ_ONLY);
+        glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
+                               static_cast<GLsizeiptr>(Common::AlignUp(size, 4)));
+    } else {
+        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
+        index_buffer_offset = offset;
+    }
 }
 
 void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
@@ -219,8 +229,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset,
     if (index >= max_attributes) {
         return;
     }
-    glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
-                       static_cast<GLsizei>(stride));
+    if (has_unified_vertex_buffers) {
+        buffer.MakeResident(GL_READ_ONLY);
+        glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
+        glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
+                               buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
+    } else {
+        glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
+                           static_cast<GLsizei>(stride));
+    }
 }
 
 void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) {
@@ -233,9 +250,23 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
                            [](u64 stride) { return static_cast<GLsizei>(stride); });
     const u32 count =
         std::min(static_cast<u32>(bindings.buffers.size()), max_attributes - bindings.min_index);
-    glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(),
-                        reinterpret_cast<const GLintptr*>(bindings.offsets.data()),
-                        buffer_strides.data());
+    if (has_unified_vertex_buffers) {
+        for (u32 index = 0; index < count; ++index) {
+            Buffer& buffer = *bindings.buffers[index];
+            buffer.MakeResident(GL_READ_ONLY);
+            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, bindings.min_index + index,
+                                   buffer.HostGpuAddr() + bindings.offsets[index],
+                                   static_cast<GLsizeiptr>(bindings.sizes[index]));
+        }
+        static constexpr std::array<size_t, 32> ZEROS{};
+        glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count),
+                            reinterpret_cast<const GLuint*>(ZEROS.data()),
+                            reinterpret_cast<const GLintptr*>(ZEROS.data()), buffer_strides.data());
+    } else {
+        glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(),
+                            reinterpret_cast<const GLintptr*>(bindings.offsets.data()),
+                            buffer_strides.data());
+    }
 }
 
 void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 2ee9c4ea2..affcaff04 100755
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -209,6 +209,7 @@ private:
 
     bool has_fast_buffer_sub_data = false;
     bool use_assembly_shaders = false;
+    bool has_unified_vertex_buffers = false;
 
     bool use_storage_buffers = false;
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 8c78a50c9..3d13c695f 100755
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -200,6 +200,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
     has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
     has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
     has_derivative_control = GLAD_GL_ARB_derivative_control;
+    has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
     has_debugging_tool_attached = IsDebugToolAttached(extensions);
     has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
     has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 652f02ac8..1c0a63446 100755
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -72,6 +72,10 @@ public:
         return has_texture_shadow_lod;
     }
 
+    bool HasVertexBufferUnifiedMemory() const {
+        return has_vertex_buffer_unified_memory;
+    }
+
     bool HasASTC() const {
         return has_astc;
     }
@@ -211,6 +215,7 @@ private:
     bool has_vertex_viewport_layer{};
     bool has_image_load_formatted{};
     bool has_texture_shadow_lod{};
+    bool has_vertex_buffer_unified_memory{};
     bool has_astc{};
     bool has_variable_aoffi{};
     bool has_component_indexing_bug{};
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 251bcb140..bfd57c31a 100755
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -18,16 +18,27 @@ namespace OpenGL {
 
 namespace {
 
-constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
-
 constexpr GLenum GetTarget(VideoCore::QueryType type) {
-    return QueryTargets[static_cast<std::size_t>(type)];
+    switch (type) {
+    case VideoCore::QueryType::SamplesPassed:
+        return GL_SAMPLES_PASSED;
+    case VideoCore::QueryType::PrimitivesGenerated:
+        return GL_PRIMITIVES_GENERATED;
+    case VideoCore::QueryType::TfbPrimitivesWritten:
+        return GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN;
+    default:
+        break;
+    }
+    UNIMPLEMENTED_MSG("Query type {}", type);
+    return 0;
 }
 
 } // Anonymous namespace
 
 QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
-    : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
+    : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {
+    EnableCounters();
+}
 
 QueryCache::~QueryCache() = default;
 
@@ -103,13 +114,13 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) {
     auto& stream = cache->Stream(type);
     const bool slice_counter = WaitPending() && stream.IsEnabled();
     if (slice_counter) {
-        stream.Update(false);
+        stream.Disable();
     }
 
     auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
 
     if (slice_counter) {
-        stream.Update(true);
+        stream.Enable();
     }
 
     return result;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index d541d7236..c03f5b230 100755
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -51,6 +51,22 @@ constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
 void oglEnable(GLenum cap, bool state) {
     (state ? glEnable : glDisable)(cap);
 }
+
+std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryType type) {
+    switch (type) {
+    case VideoCommon::QueryType::PrimitivesGenerated:
+    case VideoCommon::QueryType::VtgPrimitivesOut:
+        return VideoCore::QueryType::PrimitivesGenerated;
+    case VideoCommon::QueryType::ZPassPixelCount64:
+        return VideoCore::QueryType::SamplesPassed;
+    case VideoCommon::QueryType::StreamingPrimitivesSucceeded:
+        // case VideoCommon::QueryType::StreamingByteCount:
+        // TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride
+        return VideoCore::QueryType::TfbPrimitivesWritten;
+    default:
+        return std::nullopt;
+    }
+}
 } // Anonymous namespace
 
 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -162,14 +178,18 @@ void RasterizerOpenGL::Clear(u32 layer_count) {
         SyncFramebufferSRGB();
     }
     if (regs.clear_surface.Z) {
-        ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
+        if (regs.zeta_enable != 0) {
+            LOG_DEBUG(Render_OpenGL, "Tried to clear Z but buffer is not enabled!");
+        }
         use_depth = true;
 
         state_tracker.NotifyDepthMask();
         glDepthMask(GL_TRUE);
     }
     if (regs.clear_surface.S) {
-        ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!");
+        if (regs.zeta_enable) {
+            LOG_DEBUG(Render_OpenGL, "Tried to clear stencil but buffer is not enabled!");
+        }
         use_stencil = true;
     }
 
@@ -212,7 +232,6 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
 
     SCOPE_EXIT({ gpu.TickWork(); });
     gpu_memory->FlushCaching();
-    query_cache.UpdateCounters();
 
     GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
     if (!pipeline) {
@@ -330,7 +349,6 @@ void RasterizerOpenGL::DrawTexture() {
     MICROPROFILE_SCOPE(OpenGL_Drawing);
 
     SCOPE_EXIT({ gpu.TickWork(); });
-    query_cache.UpdateCounters();
 
     texture_cache.SynchronizeGraphicsDescriptors();
     texture_cache.UpdateRenderTargets(false);
@@ -397,21 +415,28 @@ void RasterizerOpenGL::DispatchCompute() {
 }
 
 void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
-    if (type == VideoCommon::QueryType::ZPassPixelCount64) {
-        query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed);
+    const auto query_cache_type = MaxwellToVideoCoreQuery(type);
+    if (!query_cache_type.has_value()) {
+        UNIMPLEMENTED_MSG("Reset query type: {}", type);
+        return;
     }
+    query_cache.ResetCounter(*query_cache_type);
 }
 
 void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
                              VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
-    if (type == VideoCommon::QueryType::ZPassPixelCount64) {
-        if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
-            query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()});
-        } else {
-            query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt);
-        }
-        return;
+    const auto query_cache_type = MaxwellToVideoCoreQuery(type);
+    if (!query_cache_type.has_value()) {
+        return QueryFallback(gpu_addr, type, flags, payload, subreport);
     }
+    const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout);
+    const auto timestamp = has_timeout ? std::optional<u64>{gpu.GetTicks()} : std::nullopt;
+    query_cache.Query(gpu_addr, *query_cache_type, timestamp);
+}
+
+void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
+                                     VideoCommon::QueryPropertiesFlags flags, u32 payload,
+                                     u32 subreport) {
     if (type != VideoCommon::QueryType::Payload) {
         payload = 1u;
     }
@@ -1294,15 +1319,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum
     program->ConfigureTransformFeedback();
 
     UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
-                     regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) ||
-                     regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry));
-    UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
+                     regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
 
     // We may have to call BeginTransformFeedbackNV here since they seem to call different
     // implementations on Nvidia's driver (the pointer is different) but we are using
     // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
     // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
-    glBeginTransformFeedback(GL_POINTS);
+    glBeginTransformFeedback(primitive_mode);
 }
 
 void RasterizerOpenGL::EndTransformFeedback() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d7f1481d2..15d371215 100755
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -225,6 +225,9 @@ private:
     /// End a transform feedback
     void EndTransformFeedback();
 
+    void QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
+                       VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport);
+
     Tegra::GPU& gpu;
 
     const Device& device;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 692f14661..12873423c 100755
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -168,6 +168,14 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
     if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
         glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
     }
+    // Enable unified vertex attributes and query vertex buffer address when the driver supports it
+    if (device.HasVertexBufferUnifiedMemory()) {
+        glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
+        glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
+        glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
+        glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
+                                         &vertex_buffer_address);
+    }
 }
 
 RendererOpenGL::~RendererOpenGL() = default;
@@ -667,7 +675,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
                          offsetof(ScreenRectVertex, tex_coord));
     glVertexAttribBinding(PositionLocation, 0);
     glVertexAttribBinding(TexCoordLocation, 0);
-    glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
+    if (device.HasVertexBufferUnifiedMemory()) {
+        glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
+        glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
+                               sizeof(vertices));
+    } else {
+        glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
+    }
 
     if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) {
         glBindSampler(0, present_sampler.handle);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index ace107139..e1e26bac5 100755
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -485,6 +485,10 @@ void RasterizerVulkan::DispatchCompute() {
 }
 
 void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) {
+    if (type != VideoCommon::QueryType::ZPassPixelCount64) {
+        LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type);
+        return;
+    }
     query_cache.CounterReset(type);
 }