early-access version 4000
This commit is contained in:
		| @@ -1,7 +1,7 @@ | ||||
| yuzu emulator early access | ||||
| ============= | ||||
|  | ||||
| This is the source code for early-access 3999. | ||||
| This is the source code for early-access 4000. | ||||
|  | ||||
| ## Legal Notice | ||||
|  | ||||
|   | ||||
							
								
								
									
										2
									
								
								externals/CMakeLists.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								externals/CMakeLists.txt
									
									
									
									
										vendored
									
									
								
							| @@ -193,7 +193,7 @@ if (ANDROID) | ||||
|    endif() | ||||
| endif() | ||||
|  | ||||
| if (CMAKE_SYSTEM_NAME STREQUAL "Linux") | ||||
| if (UNIX AND NOT APPLE) | ||||
|     add_subdirectory(gamemode) | ||||
| endif() | ||||
|  | ||||
|   | ||||
| @@ -182,7 +182,7 @@ if(ANDROID) | ||||
|     ) | ||||
| endif() | ||||
|  | ||||
| if (CMAKE_SYSTEM_NAME STREQUAL "Linux") | ||||
| if (UNIX AND NOT APPLE) | ||||
|   target_sources(common PRIVATE | ||||
|     linux/gamemode.cpp | ||||
|     linux/gamemode.h | ||||
|   | ||||
| @@ -935,8 +935,8 @@ if (HAS_NCE) | ||||
|         arm/nce/arm_nce.h | ||||
|         arm/nce/arm_nce.s | ||||
|         arm/nce/guest_context.h | ||||
|         arm/nce/patch.cpp | ||||
|         arm/nce/patch.h | ||||
|         arm/nce/patcher.cpp | ||||
|         arm/nce/patcher.h | ||||
|         arm/nce/instructions.h | ||||
|     ) | ||||
|     target_link_libraries(core PRIVATE merry::oaknut) | ||||
|   | ||||
| @@ -6,7 +6,7 @@ | ||||
|  | ||||
| #include "common/signal_chain.h" | ||||
| #include "core/arm/nce/arm_nce.h" | ||||
| #include "core/arm/nce/patch.h" | ||||
| #include "core/arm/nce/patcher.h" | ||||
| #include "core/core.h" | ||||
| #include "core/memory.h" | ||||
|  | ||||
|   | ||||
							
								
								
									
										474
									
								
								src/core/arm/nce/patcher.cpp
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										474
									
								
								src/core/arm/nce/patcher.cpp
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,474 @@ | ||||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later | ||||
|  | ||||
| #include "common/arm64/native_clock.h" | ||||
| #include "common/bit_cast.h" | ||||
| #include "common/literals.h" | ||||
| #include "core/arm/nce/arm_nce.h" | ||||
| #include "core/arm/nce/guest_context.h" | ||||
| #include "core/arm/nce/instructions.h" | ||||
| #include "core/arm/nce/patcher.h" | ||||
| #include "core/core.h" | ||||
| #include "core/core_timing.h" | ||||
| #include "core/hle/kernel/svc.h" | ||||
|  | ||||
| namespace Core::NCE { | ||||
|  | ||||
| using namespace Common::Literals; | ||||
| using namespace oaknut::util; | ||||
|  | ||||
| using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; | ||||
|  | ||||
| constexpr size_t MaxRelativeBranch = 128_MiB; | ||||
| constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32); | ||||
|  | ||||
| Patcher::Patcher() : c(m_patch_instructions) {} | ||||
|  | ||||
| Patcher::~Patcher() = default; | ||||
|  | ||||
| void Patcher::PatchText(const Kernel::PhysicalMemory& program_image, | ||||
|                         const Kernel::CodeSet::Segment& code) { | ||||
|  | ||||
|     // Write save context helper function. | ||||
|     c.l(m_save_context); | ||||
|     WriteSaveContext(); | ||||
|  | ||||
|     // Write load context helper function. | ||||
|     c.l(m_load_context); | ||||
|     WriteLoadContext(); | ||||
|  | ||||
|     // Retrieve text segment data. | ||||
|     const auto text = std::span{program_image}.subspan(code.offset, code.size); | ||||
|     const auto text_words = | ||||
|         std::span<const u32>{reinterpret_cast<const u32*>(text.data()), text.size() / sizeof(u32)}; | ||||
|  | ||||
|     // Loop through instructions, patching as needed. | ||||
|     for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) { | ||||
|         const u32 inst = text_words[i]; | ||||
|  | ||||
|         const auto AddRelocations = [&] { | ||||
|             const uintptr_t this_offset = i * sizeof(u32); | ||||
|             const uintptr_t next_offset = this_offset + sizeof(u32); | ||||
|  | ||||
|             // Relocate from here to patch. | ||||
|             this->BranchToPatch(this_offset); | ||||
|  | ||||
|             // Relocate from patch to next instruction. | ||||
|             return next_offset; | ||||
|         }; | ||||
|  | ||||
|         // SVC | ||||
|         if (auto svc = SVC{inst}; svc.Verify()) { | ||||
|             WriteSvcTrampoline(AddRelocations(), svc.GetValue()); | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         // MRS Xn, TPIDR_EL0 | ||||
|         // MRS Xn, TPIDRRO_EL0 | ||||
|         if (auto mrs = MRS{inst}; | ||||
|             mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) { | ||||
|             const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0 | ||||
|                                                                   : oaknut::SystemReg::TPIDR_EL0; | ||||
|             const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())}; | ||||
|             WriteMrsHandler(AddRelocations(), dest_reg, src_reg); | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         // MRS Xn, CNTPCT_EL0 | ||||
|         if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) { | ||||
|             WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())}); | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         // MRS Xn, CNTFRQ_EL0 | ||||
|         if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) { | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|  | ||||
|         // MSR TPIDR_EL0, Xn | ||||
|         if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) { | ||||
|             WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())}); | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { | ||||
|             m_exclusives.push_back(i); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Determine patching mode for the final relocation step | ||||
|     const size_t image_size = program_image.size(); | ||||
|     this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData; | ||||
| } | ||||
|  | ||||
| void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, | ||||
|                               const Kernel::CodeSet::Segment& code, | ||||
|                               Kernel::PhysicalMemory& program_image, | ||||
|                               EntryTrampolines* out_trampolines) { | ||||
|     const size_t patch_size = GetSectionSize(); | ||||
|     const size_t image_size = program_image.size(); | ||||
|  | ||||
|     // Retrieve text segment data. | ||||
|     const auto text = std::span{program_image}.subspan(code.offset, code.size); | ||||
|     const auto text_words = | ||||
|         std::span<u32>{reinterpret_cast<u32*>(text.data()), text.size() / sizeof(u32)}; | ||||
|  | ||||
|     const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) { | ||||
|         oaknut::CodeGenerator rc{target}; | ||||
|         if (mode == PatchMode::PreText) { | ||||
|             rc.B(rel.patch_offset - patch_size - rel.module_offset); | ||||
|         } else { | ||||
|             rc.B(image_size - rel.module_offset + rel.patch_offset); | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) { | ||||
|         oaknut::CodeGenerator rc{target}; | ||||
|         if (mode == PatchMode::PreText) { | ||||
|             rc.B(patch_size - rel.patch_offset + rel.module_offset); | ||||
|         } else { | ||||
|             rc.B(rel.module_offset - image_size - rel.patch_offset); | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     const auto RebasePatch = [&](ptrdiff_t patch_offset) { | ||||
|         if (mode == PatchMode::PreText) { | ||||
|             return GetInteger(load_base) + patch_offset; | ||||
|         } else { | ||||
|             return GetInteger(load_base) + image_size + patch_offset; | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     const auto RebasePc = [&](uintptr_t module_offset) { | ||||
|         if (mode == PatchMode::PreText) { | ||||
|             return GetInteger(load_base) + patch_size + module_offset; | ||||
|         } else { | ||||
|             return GetInteger(load_base) + module_offset; | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     // We are now ready to relocate! | ||||
|     for (const Relocation& rel : m_branch_to_patch_relocations) { | ||||
|         ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel); | ||||
|     } | ||||
|     for (const Relocation& rel : m_branch_to_module_relocations) { | ||||
|         ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32), | ||||
|                                       rel); | ||||
|     } | ||||
|  | ||||
|     // Rewrite PC constants and record post trampolines | ||||
|     for (const Relocation& rel : m_write_module_pc_relocations) { | ||||
|         oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)}; | ||||
|         rc.dx(RebasePc(rel.module_offset)); | ||||
|     } | ||||
|     for (const Trampoline& rel : m_trampolines) { | ||||
|         out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)}); | ||||
|     } | ||||
|  | ||||
|     // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not. | ||||
|     // Convert to ordered to preserve this assumption. | ||||
|     for (const ModuleTextAddress i : m_exclusives) { | ||||
|         auto exclusive = Exclusive{text_words[i]}; | ||||
|         text_words[i] = exclusive.AsOrdered(); | ||||
|     } | ||||
|  | ||||
|     // Copy to program image | ||||
|     if (this->mode == PatchMode::PreText) { | ||||
|         std::memcpy(program_image.data(), m_patch_instructions.data(), | ||||
|                     m_patch_instructions.size() * sizeof(u32)); | ||||
|     } else { | ||||
|         program_image.resize(image_size + patch_size); | ||||
|         std::memcpy(program_image.data() + image_size, m_patch_instructions.data(), | ||||
|                     m_patch_instructions.size() * sizeof(u32)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| size_t Patcher::GetSectionSize() const noexcept { | ||||
|     return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE); | ||||
| } | ||||
|  | ||||
| void Patcher::WriteLoadContext() { | ||||
|     // This function was called, which modifies X30, so use that as a scratch register. | ||||
|     // SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes | ||||
|     // of stack. | ||||
|     c.STR(X30, SP, 8); | ||||
|     c.MRS(X30, oaknut::SystemReg::TPIDR_EL0); | ||||
|     c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); | ||||
|  | ||||
|     // Load system registers. | ||||
|     c.LDR(W0, X30, offsetof(GuestContext, fpsr)); | ||||
|     c.MSR(oaknut::SystemReg::FPSR, X0); | ||||
|     c.LDR(W0, X30, offsetof(GuestContext, fpcr)); | ||||
|     c.MSR(oaknut::SystemReg::FPCR, X0); | ||||
|     c.LDR(W0, X30, offsetof(GuestContext, nzcv)); | ||||
|     c.MSR(oaknut::SystemReg::NZCV, X0); | ||||
|  | ||||
|     // Load all vector registers. | ||||
|     static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); | ||||
|     for (int i = 0; i <= 30; i += 2) { | ||||
|         c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); | ||||
|     } | ||||
|  | ||||
|     // Load all general-purpose registers except X30. | ||||
|     for (int i = 0; i <= 28; i += 2) { | ||||
|         c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); | ||||
|     } | ||||
|  | ||||
|     // Reload our return X30 from the stack and return. | ||||
|     // The patch code will reload the guest X30 for us. | ||||
|     c.LDR(X30, SP, 8); | ||||
|     c.RET(); | ||||
| } | ||||
|  | ||||
| void Patcher::WriteSaveContext() { | ||||
|     // This function was called, which modifies X30, so use that as a scratch register. | ||||
|     // SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of | ||||
|     // stack. | ||||
|     c.STR(X30, SP, 8); | ||||
|     c.MRS(X30, oaknut::SystemReg::TPIDR_EL0); | ||||
|     c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); | ||||
|  | ||||
|     // Store all general-purpose registers except X30. | ||||
|     for (int i = 0; i <= 28; i += 2) { | ||||
|         c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); | ||||
|     } | ||||
|  | ||||
|     // Store all vector registers. | ||||
|     static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); | ||||
|     for (int i = 0; i <= 30; i += 2) { | ||||
|         c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); | ||||
|     } | ||||
|  | ||||
|     // Store guest system registers, X30 and SP, using X0 as a scratch register. | ||||
|     c.STR(X0, SP, PRE_INDEXED, -16); | ||||
|     c.LDR(X0, SP, 16); | ||||
|     c.STR(X0, X30, 8 * 30); | ||||
|     c.ADD(X0, SP, 32); | ||||
|     c.STR(X0, X30, offsetof(GuestContext, sp)); | ||||
|     c.MRS(X0, oaknut::SystemReg::FPSR); | ||||
|     c.STR(W0, X30, offsetof(GuestContext, fpsr)); | ||||
|     c.MRS(X0, oaknut::SystemReg::FPCR); | ||||
|     c.STR(W0, X30, offsetof(GuestContext, fpcr)); | ||||
|     c.MRS(X0, oaknut::SystemReg::NZCV); | ||||
|     c.STR(W0, X30, offsetof(GuestContext, nzcv)); | ||||
|     c.LDR(X0, SP, POST_INDEXED, 16); | ||||
|  | ||||
|     // Reload our return X30 from the stack, and return. | ||||
|     c.LDR(X30, SP, 8); | ||||
|     c.RET(); | ||||
| } | ||||
|  | ||||
| void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { | ||||
|     // We are about to start saving state, so we need to lock the context. | ||||
|     this->LockContext(); | ||||
|  | ||||
|     // Store guest X30 to the stack. Then, save the context and restore the stack. | ||||
|     // This will save all registers except PC, but we know PC at patch time. | ||||
|     c.STR(X30, SP, PRE_INDEXED, -16); | ||||
|     c.BL(m_save_context); | ||||
|     c.LDR(X30, SP, POST_INDEXED, 16); | ||||
|  | ||||
|     // Now that we've saved all registers, we can use any registers as scratch. | ||||
|     // Store PC + 4 to arm interface, since we know the instruction offset from the entry point. | ||||
|     oaknut::Label pc_after_svc; | ||||
|     c.MRS(X1, oaknut::SystemReg::TPIDR_EL0); | ||||
|     c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); | ||||
|     c.LDR(X2, pc_after_svc); | ||||
|     c.STR(X2, X1, offsetof(GuestContext, pc)); | ||||
|  | ||||
|     // Store SVC number to execute when we return | ||||
|     c.MOV(X2, svc_id); | ||||
|     c.STR(W2, X1, offsetof(GuestContext, svc_swi)); | ||||
|  | ||||
|     // We are calling a SVC. Clear esr_el1 and return it. | ||||
|     static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>); | ||||
|     oaknut::Label retry; | ||||
|     c.ADD(X2, X1, offsetof(GuestContext, esr_el1)); | ||||
|     c.l(retry); | ||||
|     c.LDAXR(X0, X2); | ||||
|     c.STLXR(W3, XZR, X2); | ||||
|     c.CBNZ(W3, retry); | ||||
|  | ||||
|     // Add "calling SVC" flag. Since this is X0, this is now our return value. | ||||
|     c.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall)); | ||||
|  | ||||
|     // Offset the GuestContext pointer to the HostContext member. | ||||
|     // STP has limited range of [-512, 504] which we can't reach otherwise | ||||
|     // NB: Due to this all offsets below are from the start of HostContext. | ||||
|     c.ADD(X1, X1, offsetof(GuestContext, host_ctx)); | ||||
|  | ||||
|     // Reload host TPIDR_EL0 and SP. | ||||
|     static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0)); | ||||
|     c.LDP(X2, X3, X1, offsetof(HostContext, host_sp)); | ||||
|     c.MOV(SP, X2); | ||||
|     c.MSR(oaknut::SystemReg::TPIDR_EL0, X3); | ||||
|  | ||||
|     // Load callee-saved host registers and return to host. | ||||
|     static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs); | ||||
|     static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs); | ||||
|     c.LDP(X19, X20, X1, HOST_REGS_OFF); | ||||
|     c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64)); | ||||
|     c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64)); | ||||
|     c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64)); | ||||
|     c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64)); | ||||
|     c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64)); | ||||
|     c.LDP(Q8, Q9, X1, HOST_VREGS_OFF); | ||||
|     c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128)); | ||||
|     c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128)); | ||||
|     c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128)); | ||||
|     c.RET(); | ||||
|  | ||||
|     // Write the post-SVC trampoline address, which will jump back to the guest after restoring its | ||||
|     // state. | ||||
|     m_trampolines.push_back({c.offset(), module_dest}); | ||||
|  | ||||
|     // Host called this location. Save the return address so we can | ||||
|     // unwind the stack properly when jumping back. | ||||
|     c.MRS(X2, oaknut::SystemReg::TPIDR_EL0); | ||||
|     c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context)); | ||||
|     c.ADD(X0, X2, offsetof(GuestContext, host_ctx)); | ||||
|     c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64)); | ||||
|  | ||||
|     // Reload all guest registers except X30 and PC. | ||||
|     // The function also expects 16 bytes of stack already allocated. | ||||
|     c.STR(X30, SP, PRE_INDEXED, -16); | ||||
|     c.BL(m_load_context); | ||||
|     c.LDR(X30, SP, POST_INDEXED, 16); | ||||
|  | ||||
|     // Use X1 as a scratch register to restore X30. | ||||
|     c.STR(X1, SP, PRE_INDEXED, -16); | ||||
|     c.MRS(X1, oaknut::SystemReg::TPIDR_EL0); | ||||
|     c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); | ||||
|     c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30); | ||||
|     c.LDR(X1, SP, POST_INDEXED, 16); | ||||
|  | ||||
|     // Unlock the context. | ||||
|     this->UnlockContext(); | ||||
|  | ||||
|     // Jump back to the instruction after the emulated SVC. | ||||
|     this->BranchToModule(module_dest); | ||||
|  | ||||
|     // Store PC after call. | ||||
|     c.l(pc_after_svc); | ||||
|     this->WriteModulePc(module_dest); | ||||
| } | ||||
|  | ||||
| void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, | ||||
|                               oaknut::SystemReg src_reg) { | ||||
|     // Retrieve emulated TLS register from GuestContext. | ||||
|     c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0); | ||||
|     if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) { | ||||
|         c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0)); | ||||
|     } else { | ||||
|         c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0)); | ||||
|     } | ||||
|  | ||||
|     // Jump back to the instruction after the emulated MRS. | ||||
|     this->BranchToModule(module_dest); | ||||
| } | ||||
|  | ||||
| void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { | ||||
|     const auto scratch_reg = src_reg.index() == 0 ? X1 : X0; | ||||
|     c.STR(scratch_reg, SP, PRE_INDEXED, -16); | ||||
|  | ||||
|     // Save guest value to NativeExecutionParameters::tpidr_el0. | ||||
|     c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0); | ||||
|     c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0)); | ||||
|  | ||||
|     // Restore scratch register. | ||||
|     c.LDR(scratch_reg, SP, POST_INDEXED, 16); | ||||
|  | ||||
|     // Jump back to the instruction after the emulated MSR. | ||||
|     this->BranchToModule(module_dest); | ||||
| } | ||||
|  | ||||
| void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { | ||||
|     static Common::Arm64::NativeClock clock{}; | ||||
|     const auto factor = clock.GetGuestCNTFRQFactor(); | ||||
|     const auto raw_factor = Common::BitCast<std::array<u64, 2>>(factor); | ||||
|  | ||||
|     const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1; | ||||
|     oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0; | ||||
|     oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1; | ||||
|  | ||||
|     oaknut::Label factorlo; | ||||
|     oaknut::Label factorhi; | ||||
|  | ||||
|     // Save scratches. | ||||
|     c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16); | ||||
|  | ||||
|     // Load counter value. | ||||
|     c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0); | ||||
|  | ||||
|     // Load scaling factor. | ||||
|     c.LDR(scratch0, factorlo); | ||||
|     c.LDR(scratch1, factorhi); | ||||
|  | ||||
|     // Multiply low bits and get result. | ||||
|     c.UMULH(scratch0, dest_reg, scratch0); | ||||
|  | ||||
|     // Multiply high bits and add low bit result. | ||||
|     c.MADD(dest_reg, dest_reg, scratch1, scratch0); | ||||
|  | ||||
|     // Reload scratches. | ||||
|     c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16); | ||||
|  | ||||
|     // Jump back to the instruction after the emulated MRS. | ||||
|     this->BranchToModule(module_dest); | ||||
|  | ||||
|     // Scaling factor constant values. | ||||
|     c.l(factorlo); | ||||
|     c.dx(raw_factor[0]); | ||||
|     c.l(factorhi); | ||||
|     c.dx(raw_factor[1]); | ||||
| } | ||||
|  | ||||
| void Patcher::LockContext() { | ||||
|     oaknut::Label retry; | ||||
|  | ||||
|     // Save scratches. | ||||
|     c.STP(X0, X1, SP, PRE_INDEXED, -16); | ||||
|  | ||||
|     // Reload lock pointer. | ||||
|     c.l(retry); | ||||
|     c.CLREX(); | ||||
|     c.MRS(X0, oaknut::SystemReg::TPIDR_EL0); | ||||
|     c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); | ||||
|  | ||||
|     static_assert(SpinLockLocked == 0); | ||||
|  | ||||
|     // Load-linked with acquire ordering. | ||||
|     c.LDAXR(W1, X0); | ||||
|  | ||||
|     // If the value was SpinLockLocked, clear monitor and retry. | ||||
|     c.CBZ(W1, retry); | ||||
|  | ||||
|     // Store-conditional SpinLockLocked with relaxed ordering. | ||||
|     c.STXR(W1, WZR, X0); | ||||
|  | ||||
|     // If we failed to store, retry. | ||||
|     c.CBNZ(W1, retry); | ||||
|  | ||||
|     // We succeeded! Reload scratches. | ||||
|     c.LDP(X0, X1, SP, POST_INDEXED, 16); | ||||
| } | ||||
|  | ||||
| void Patcher::UnlockContext() { | ||||
|     // Save scratches. | ||||
|     c.STP(X0, X1, SP, PRE_INDEXED, -16); | ||||
|  | ||||
|     // Load lock pointer. | ||||
|     c.MRS(X0, oaknut::SystemReg::TPIDR_EL0); | ||||
|     c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); | ||||
|  | ||||
|     // Load SpinLockUnlocked. | ||||
|     c.MOV(W1, SpinLockUnlocked); | ||||
|  | ||||
|     // Store value with release ordering. | ||||
|     c.STLR(W1, X0); | ||||
|  | ||||
|     // Load scratches. | ||||
|     c.LDP(X0, X1, SP, POST_INDEXED, 16); | ||||
| } | ||||
|  | ||||
| } // namespace Core::NCE | ||||
							
								
								
									
										98
									
								
								src/core/arm/nce/patcher.h
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										98
									
								
								src/core/arm/nce/patcher.h
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <span> | ||||
| #include <unordered_map> | ||||
| #include <vector> | ||||
| #include <oaknut/code_block.hpp> | ||||
| #include <oaknut/oaknut.hpp> | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "core/hle/kernel/code_set.h" | ||||
| #include "core/hle/kernel/k_typed_address.h" | ||||
| #include "core/hle/kernel/physical_memory.h" | ||||
|  | ||||
| namespace Core::NCE { | ||||
|  | ||||
| enum class PatchMode : u32 { | ||||
|     None, | ||||
|     PreText,  ///< Patch section is inserted before .text | ||||
|     PostData, ///< Patch section is inserted after .data | ||||
| }; | ||||
|  | ||||
| using ModuleTextAddress = u64; | ||||
| using PatchTextAddress = u64; | ||||
| using EntryTrampolines = std::unordered_map<ModuleTextAddress, PatchTextAddress>; | ||||
|  | ||||
| class Patcher { | ||||
| public: | ||||
|     explicit Patcher(); | ||||
|     ~Patcher(); | ||||
|  | ||||
|     void PatchText(const Kernel::PhysicalMemory& program_image, | ||||
|                    const Kernel::CodeSet::Segment& code); | ||||
|     void RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code, | ||||
|                          Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines); | ||||
|     size_t GetSectionSize() const noexcept; | ||||
|  | ||||
|     [[nodiscard]] PatchMode GetPatchMode() const noexcept { | ||||
|         return mode; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     using ModuleDestLabel = uintptr_t; | ||||
|  | ||||
|     struct Trampoline { | ||||
|         ptrdiff_t patch_offset; | ||||
|         uintptr_t module_offset; | ||||
|     }; | ||||
|  | ||||
|     void WriteLoadContext(); | ||||
|     void WriteSaveContext(); | ||||
|     void LockContext(); | ||||
|     void UnlockContext(); | ||||
|     void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id); | ||||
|     void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, | ||||
|                          oaknut::SystemReg src_reg); | ||||
|     void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg); | ||||
|     void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg); | ||||
|  | ||||
| private: | ||||
|     void BranchToPatch(uintptr_t module_dest) { | ||||
|         m_branch_to_patch_relocations.push_back({c.offset(), module_dest}); | ||||
|     } | ||||
|  | ||||
|     void BranchToModule(uintptr_t module_dest) { | ||||
|         m_branch_to_module_relocations.push_back({c.offset(), module_dest}); | ||||
|         c.dw(0); | ||||
|     } | ||||
|  | ||||
|     void WriteModulePc(uintptr_t module_dest) { | ||||
|         m_write_module_pc_relocations.push_back({c.offset(), module_dest}); | ||||
|         c.dx(0); | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     // List of patch instructions we have generated. | ||||
|     std::vector<u32> m_patch_instructions{}; | ||||
|  | ||||
|     // Relocation type for relative branch from module to patch. | ||||
|     struct Relocation { | ||||
|         ptrdiff_t patch_offset;  ///< Offset in bytes from the start of the patch section. | ||||
|         uintptr_t module_offset; ///< Offset in bytes from the start of the text section. | ||||
|     }; | ||||
|  | ||||
|     oaknut::VectorCodeGenerator c; | ||||
|     std::vector<Trampoline> m_trampolines; | ||||
|     std::vector<Relocation> m_branch_to_patch_relocations{}; | ||||
|     std::vector<Relocation> m_branch_to_module_relocations{}; | ||||
|     std::vector<Relocation> m_write_module_pc_relocations{}; | ||||
|     std::vector<ModuleTextAddress> m_exclusives{}; | ||||
|     oaknut::Label m_save_context{}; | ||||
|     oaknut::Label m_load_context{}; | ||||
|     PatchMode mode{PatchMode::None}; | ||||
| }; | ||||
|  | ||||
| } // namespace Core::NCE | ||||
| @@ -16,7 +16,7 @@ | ||||
| #include "core/loader/nso.h" | ||||
|  | ||||
| #ifdef HAS_NCE | ||||
| #include "core/arm/nce/patch.h" | ||||
| #include "core/arm/nce/patcher.h" | ||||
| #endif | ||||
|  | ||||
| namespace Loader { | ||||
|   | ||||
| @@ -23,7 +23,7 @@ | ||||
| #include "core/memory.h" | ||||
|  | ||||
| #ifdef HAS_NCE | ||||
| #include "core/arm/nce/patch.h" | ||||
| #include "core/arm/nce/patcher.h" | ||||
| #endif | ||||
|  | ||||
| namespace Loader { | ||||
|   | ||||
| @@ -21,7 +21,7 @@ | ||||
| #include "core/memory.h" | ||||
|  | ||||
| #ifdef HAS_NCE | ||||
| #include "core/arm/nce/patch.h" | ||||
| #include "core/arm/nce/patcher.h" | ||||
| #endif | ||||
|  | ||||
| namespace Loader { | ||||
|   | ||||
| @@ -146,8 +146,12 @@ StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) { | ||||
|     return staging_buffer_pool.RequestUploadBuffer(size); | ||||
| } | ||||
|  | ||||
| StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) { | ||||
|     return staging_buffer_pool.RequestDownloadBuffer(size); | ||||
| StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { | ||||
|     return staging_buffer_pool.RequestDownloadBuffer(size, deferred); | ||||
| } | ||||
|  | ||||
| void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) { | ||||
|     staging_buffer_pool.FreeDeferredStagingBuffer(buffer); | ||||
| } | ||||
|  | ||||
| u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { | ||||
|   | ||||
| @@ -66,7 +66,9 @@ public: | ||||
|  | ||||
|     [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size); | ||||
|  | ||||
|     [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); | ||||
|     [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false); | ||||
|  | ||||
|     void FreeDeferredStagingBuffer(StagingBufferMap& buffer); | ||||
|  | ||||
|     bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) { | ||||
|         return false; | ||||
| @@ -246,7 +248,7 @@ struct BufferCacheParams { | ||||
|     static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | ||||
|     static constexpr bool USE_MEMORY_MAPS = true; | ||||
|     static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; | ||||
|     static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; | ||||
|     static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; | ||||
|  | ||||
|     // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads | ||||
|     static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; | ||||
|   | ||||
| @@ -28,63 +28,69 @@ StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) | ||||
|  | ||||
| StagingBuffers::~StagingBuffers() = default; | ||||
|  | ||||
| StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) { | ||||
| StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence, | ||||
|                                             bool deferred) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_BufferRequest); | ||||
|  | ||||
|     const size_t index = RequestBuffer(requested_size); | ||||
|     OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; | ||||
|     sync_indices[index] = insert_fence ? ++current_sync_index : 0; | ||||
|     OGLSync* const sync = insert_fence ? &allocs[index].sync : nullptr; | ||||
|     allocs[index].sync_index = insert_fence ? ++current_sync_index : 0; | ||||
|     allocs[index].deferred = deferred; | ||||
|     return StagingBufferMap{ | ||||
|         .mapped_span = std::span(maps[index], requested_size), | ||||
|         .mapped_span = std::span(allocs[index].map, requested_size), | ||||
|         .sync = sync, | ||||
|         .buffer = buffers[index].handle, | ||||
|         .buffer = allocs[index].buffer.handle, | ||||
|         .index = index, | ||||
|     }; | ||||
| } | ||||
|  | ||||
| void StagingBuffers::FreeDeferredStagingBuffer(size_t index) { | ||||
|     ASSERT(allocs[index].deferred); | ||||
|     allocs[index].deferred = false; | ||||
| } | ||||
|  | ||||
| size_t StagingBuffers::RequestBuffer(size_t requested_size) { | ||||
|     if (const std::optional<size_t> index = FindBuffer(requested_size); index) { | ||||
|         return *index; | ||||
|     } | ||||
|  | ||||
|     OGLBuffer& buffer = buffers.emplace_back(); | ||||
|     buffer.Create(); | ||||
|     StagingBufferAlloc alloc; | ||||
|     alloc.buffer.Create(); | ||||
|     const auto next_pow2_size = Common::NextPow2(requested_size); | ||||
|     glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr, | ||||
|     glNamedBufferStorage(alloc.buffer.handle, next_pow2_size, nullptr, | ||||
|                          storage_flags | GL_MAP_PERSISTENT_BIT); | ||||
|     maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size, | ||||
|                                                           map_flags | GL_MAP_PERSISTENT_BIT))); | ||||
|     syncs.emplace_back(); | ||||
|     sync_indices.emplace_back(); | ||||
|     sizes.push_back(next_pow2_size); | ||||
|  | ||||
|     ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && | ||||
|            maps.size() == sizes.size()); | ||||
|  | ||||
|     return buffers.size() - 1; | ||||
|     alloc.map = static_cast<u8*>(glMapNamedBufferRange(alloc.buffer.handle, 0, next_pow2_size, | ||||
|                                                        map_flags | GL_MAP_PERSISTENT_BIT)); | ||||
|     alloc.size = next_pow2_size; | ||||
|     allocs.emplace_back(std::move(alloc)); | ||||
|     return allocs.size() - 1; | ||||
| } | ||||
|  | ||||
| std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) { | ||||
|     size_t known_unsignaled_index = current_sync_index + 1; | ||||
|     size_t smallest_buffer = std::numeric_limits<size_t>::max(); | ||||
|     std::optional<size_t> found; | ||||
|     const size_t num_buffers = sizes.size(); | ||||
|     const size_t num_buffers = allocs.size(); | ||||
|     for (size_t index = 0; index < num_buffers; ++index) { | ||||
|         const size_t buffer_size = sizes[index]; | ||||
|         StagingBufferAlloc& alloc = allocs[index]; | ||||
|         const size_t buffer_size = alloc.size; | ||||
|         if (buffer_size < requested_size || buffer_size >= smallest_buffer) { | ||||
|             continue; | ||||
|         } | ||||
|         if (syncs[index].handle != 0) { | ||||
|             if (sync_indices[index] >= known_unsignaled_index) { | ||||
|         if (alloc.deferred) { | ||||
|             continue; | ||||
|         } | ||||
|         if (alloc.sync.handle != 0) { | ||||
|             if (alloc.sync_index >= known_unsignaled_index) { | ||||
|                 // This fence is later than a fence that is known to not be signaled | ||||
|                 continue; | ||||
|             } | ||||
|             if (!syncs[index].IsSignaled()) { | ||||
|             if (!alloc.sync.IsSignaled()) { | ||||
|                 // Since this fence hasn't been signaled, it's safe to assume all later | ||||
|                 // fences haven't been signaled either | ||||
|                 known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]); | ||||
|                 known_unsignaled_index = std::min(known_unsignaled_index, alloc.sync_index); | ||||
|                 continue; | ||||
|             } | ||||
|             syncs[index].Release(); | ||||
|             alloc.sync.Release(); | ||||
|         } | ||||
|         smallest_buffer = buffer_size; | ||||
|         found = index; | ||||
| @@ -143,8 +149,12 @@ StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) { | ||||
|     return upload_buffers.RequestMap(size, true); | ||||
| } | ||||
|  | ||||
| StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) { | ||||
|     return download_buffers.RequestMap(size, false); | ||||
| StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size, bool deferred) { | ||||
|     return download_buffers.RequestMap(size, false, deferred); | ||||
| } | ||||
|  | ||||
| void StagingBufferPool::FreeDeferredStagingBuffer(StagingBufferMap& buffer) { | ||||
|     download_buffers.FreeDeferredStagingBuffer(buffer.index); | ||||
| } | ||||
|  | ||||
| } // namespace OpenGL | ||||
|   | ||||
| @@ -26,23 +26,30 @@ struct StagingBufferMap { | ||||
|     size_t offset = 0; | ||||
|     OGLSync* sync; | ||||
|     GLuint buffer; | ||||
|     size_t index; | ||||
| }; | ||||
|  | ||||
| struct StagingBuffers { | ||||
|     explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); | ||||
|     ~StagingBuffers(); | ||||
|  | ||||
|     StagingBufferMap RequestMap(size_t requested_size, bool insert_fence); | ||||
|     StagingBufferMap RequestMap(size_t requested_size, bool insert_fence, bool deferred = false); | ||||
|  | ||||
|     void FreeDeferredStagingBuffer(size_t index); | ||||
|  | ||||
|     size_t RequestBuffer(size_t requested_size); | ||||
|  | ||||
|     std::optional<size_t> FindBuffer(size_t requested_size); | ||||
|  | ||||
|     std::vector<OGLSync> syncs; | ||||
|     std::vector<OGLBuffer> buffers; | ||||
|     std::vector<u8*> maps; | ||||
|     std::vector<size_t> sizes; | ||||
|     std::vector<size_t> sync_indices; | ||||
|     struct StagingBufferAlloc { | ||||
|         OGLSync sync; | ||||
|         OGLBuffer buffer; | ||||
|         u8* map; | ||||
|         size_t size; | ||||
|         size_t sync_index; | ||||
|         bool deferred; | ||||
|     }; | ||||
|     std::vector<StagingBufferAlloc> allocs; | ||||
|     GLenum storage_flags; | ||||
|     GLenum map_flags; | ||||
|     size_t current_sync_index = 0; | ||||
| @@ -85,7 +92,8 @@ public: | ||||
|     ~StagingBufferPool() = default; | ||||
|  | ||||
|     StagingBufferMap RequestUploadBuffer(size_t size); | ||||
|     StagingBufferMap RequestDownloadBuffer(size_t size); | ||||
|     StagingBufferMap RequestDownloadBuffer(size_t size, bool deferred = false); | ||||
|     void FreeDeferredStagingBuffer(StagingBufferMap& buffer); | ||||
|  | ||||
| private: | ||||
|     StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; | ||||
|   | ||||
| @@ -557,8 +557,12 @@ StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { | ||||
|     return staging_buffer_pool.RequestUploadBuffer(size); | ||||
| } | ||||
|  | ||||
| StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { | ||||
|     return staging_buffer_pool.RequestDownloadBuffer(size); | ||||
| StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { | ||||
|     return staging_buffer_pool.RequestDownloadBuffer(size, deferred); | ||||
| } | ||||
|  | ||||
| void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) { | ||||
|     staging_buffer_pool.FreeDeferredStagingBuffer(buffer); | ||||
| } | ||||
|  | ||||
| u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { | ||||
|   | ||||
| @@ -74,7 +74,9 @@ public: | ||||
|  | ||||
|     StagingBufferMap UploadStagingBuffer(size_t size); | ||||
|  | ||||
|     StagingBufferMap DownloadStagingBuffer(size_t size); | ||||
|     StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false); | ||||
|  | ||||
|     void FreeDeferredStagingBuffer(StagingBufferMap& buffer); | ||||
|  | ||||
|     u64 GetDeviceLocalMemory() const { | ||||
|         return device_access_memory; | ||||
| @@ -359,7 +361,7 @@ struct TextureCacheParams { | ||||
|     static constexpr bool FRAMEBUFFER_BLITS = true; | ||||
|     static constexpr bool HAS_EMULATED_COPIES = true; | ||||
|     static constexpr bool HAS_DEVICE_MEMORY_INFO = true; | ||||
|     static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; | ||||
|     static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; | ||||
|  | ||||
|     using Runtime = OpenGL::TextureCacheRuntime; | ||||
|     using Image = OpenGL::Image; | ||||
| @@ -367,7 +369,7 @@ struct TextureCacheParams { | ||||
|     using ImageView = OpenGL::ImageView; | ||||
|     using Sampler = OpenGL::Sampler; | ||||
|     using Framebuffer = OpenGL::Framebuffer; | ||||
|     using AsyncBuffer = u32; | ||||
|     using AsyncBuffer = OpenGL::StagingBufferMap; | ||||
|     using BufferType = GLuint; | ||||
| }; | ||||
|  | ||||
|   | ||||
| @@ -892,10 +892,6 @@ void RasterizerVulkan::UpdateDynamicStates() { | ||||
|         UpdateFrontFace(regs); | ||||
|         UpdateStencilOp(regs); | ||||
|  | ||||
|         if (device.IsExtVertexInputDynamicStateSupported()) { | ||||
|             UpdateVertexInput(regs); | ||||
|         } | ||||
|  | ||||
|         if (state_tracker.TouchStateEnable()) { | ||||
|             UpdateDepthBoundsTestEnable(regs); | ||||
|             UpdateDepthTestEnable(regs); | ||||
| @@ -918,6 +914,9 @@ void RasterizerVulkan::UpdateDynamicStates() { | ||||
|             UpdateBlending(regs); | ||||
|         } | ||||
|     } | ||||
|     if (device.IsExtVertexInputDynamicStateSupported()) { | ||||
|         UpdateVertexInput(regs); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void RasterizerVulkan::HandleTransformFeedback() { | ||||
|   | ||||
| @@ -519,10 +519,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||
|         LOG_WARNING(Render_Vulkan, "ARM drivers have broken VK_EXT_extended_dynamic_state"); | ||||
|         RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state, | ||||
|                                VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); | ||||
|  | ||||
|         LOG_WARNING(Render_Vulkan, "ARM drivers have broken VK_EXT_extended_dynamic_state2"); | ||||
|         RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2, | ||||
|                                VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); | ||||
|     } | ||||
|  | ||||
|     if (is_nvidia) { | ||||
| @@ -611,17 +607,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||
|         } | ||||
|     } | ||||
|     if (extensions.vertex_input_dynamic_state && is_qualcomm) { | ||||
|         const u32 version = (properties.properties.driverVersion << 3) >> 3; | ||||
|         if (version >= VK_MAKE_API_VERSION(0, 0, 676, 0) && | ||||
|             version < VK_MAKE_API_VERSION(0, 0, 680, 0)) { | ||||
|             // Qualcomm Adreno 7xx drivers do not properly support vertex_input_dynamic_state. | ||||
|             LOG_WARNING( | ||||
|                 Render_Vulkan, | ||||
|                 "Qualcomm Adreno 7xx drivers have broken VK_EXT_vertex_input_dynamic_state"); | ||||
|             RemoveExtensionFeature(extensions.vertex_input_dynamic_state, | ||||
|                                    features.vertex_input_dynamic_state, | ||||
|                                    VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); | ||||
|         } | ||||
|         // Qualcomm drivers do not properly support vertex_input_dynamic_state. | ||||
|         LOG_WARNING(Render_Vulkan, | ||||
|                     "Qualcomm drivers have broken VK_EXT_vertex_input_dynamic_state"); | ||||
|         RemoveExtensionFeature(extensions.vertex_input_dynamic_state, | ||||
|                                features.vertex_input_dynamic_state, | ||||
|                                VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); | ||||
|     } | ||||
|  | ||||
|     sets_per_pool = 64; | ||||
| @@ -704,6 +695,22 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||
|             std::min(properties.properties.limits.maxVertexInputBindings, 16U); | ||||
|     } | ||||
|  | ||||
|     if (!extensions.extended_dynamic_state && extensions.extended_dynamic_state2) { | ||||
|         LOG_INFO(Render_Vulkan, | ||||
|                  "Removing extendedDynamicState2 due to missing extendedDynamicState"); | ||||
|         RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2, | ||||
|                                VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); | ||||
|     } | ||||
|  | ||||
|     if (!extensions.extended_dynamic_state2 && extensions.extended_dynamic_state3) { | ||||
|         LOG_INFO(Render_Vulkan, | ||||
|                  "Removing extendedDynamicState3 due to missing extendedDynamicState2"); | ||||
|         RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3, | ||||
|                                VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); | ||||
|         dynamic_state3_blending = false; | ||||
|         dynamic_state3_enables = false; | ||||
|     } | ||||
|  | ||||
|     logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions), | ||||
|                                  first_next, dld); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user