early-access version 2447
This commit is contained in:
		| @@ -1,7 +1,7 @@ | ||||
| yuzu emulator early access | ||||
| ============= | ||||
|  | ||||
| This is the source code for early-access 2445. | ||||
| This is the source code for early-access 2447. | ||||
|  | ||||
| ## Legal Notice | ||||
|  | ||||
|   | ||||
| @@ -132,6 +132,8 @@ public: | ||||
|  | ||||
|     void DownloadMemory(VAddr cpu_addr, u64 size); | ||||
|  | ||||
|     bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer); | ||||
|  | ||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | ||||
|  | ||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index); | ||||
| @@ -834,6 +836,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | ||||
|         return; | ||||
|     } | ||||
|     MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||||
|     const bool is_accuracy_normal = | ||||
|         Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | ||||
|  | ||||
|     boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; | ||||
|     u64 total_size_bytes = 0; | ||||
| @@ -845,6 +849,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | ||||
|             ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | ||||
|                 buffer.ForEachDownloadRangeAndClear( | ||||
|                     cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||||
|                         if (is_accuracy_normal) { | ||||
|                             return; | ||||
|                         } | ||||
|                         const VAddr buffer_addr = buffer.CpuAddr(); | ||||
|                         const auto add_download = [&](VAddr start, VAddr end) { | ||||
|                             const u64 new_offset = start - buffer_addr; | ||||
| @@ -1443,10 +1450,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s | ||||
|     const IntervalType base_interval{cpu_addr, cpu_addr + size}; | ||||
|     common_ranges.add(base_interval); | ||||
|  | ||||
|     const bool is_accuracy_high = | ||||
|         Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; | ||||
|     const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | ||||
|     if (!is_async && !is_accuracy_high) { | ||||
|     if (!is_async) { | ||||
|         return; | ||||
|     } | ||||
|     uncommitted_ranges.add(base_interval); | ||||
| @@ -1669,6 +1674,41 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | ||||
|     runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | ||||
| } | ||||
|  | ||||
| template <class P> | ||||
| bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | ||||
|                                   std::span<u8> inlined_buffer) { | ||||
|     const bool is_dirty = IsRegionRegistered(dest_address, copy_size); | ||||
|     if (!is_dirty) { | ||||
|         return false; | ||||
|     } | ||||
|     if (!IsRegionGpuModified(dest_address, copy_size)) { | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     const IntervalType subtract_interval{dest_address, dest_address + copy_size}; | ||||
|     ClearDownload(subtract_interval); | ||||
|  | ||||
|     BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size)); | ||||
|     auto& buffer = slot_buffers[buffer_id]; | ||||
|     SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); | ||||
|  | ||||
|     if constexpr (USE_MEMORY_MAPS) { | ||||
|         std::array copies{BufferCopy{ | ||||
|             .src_offset = 0, | ||||
|             .dst_offset = buffer.Offset(dest_address), | ||||
|             .size = copy_size, | ||||
|         }}; | ||||
|         auto upload_staging = runtime.UploadStagingBuffer(copy_size); | ||||
|         u8* const src_pointer = upload_staging.mapped_span.data(); | ||||
|         std::memcpy(src_pointer, inlined_buffer.data(), copy_size); | ||||
|         runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | ||||
|     } else { | ||||
|         buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer); | ||||
|     } | ||||
|  | ||||
|     return true; | ||||
| } | ||||
|  | ||||
| template <class P> | ||||
| void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { | ||||
|     DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
| #include "common/assert.h" | ||||
| #include "video_core/engines/engine_upload.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/textures/decoders.h" | ||||
|  | ||||
| namespace Tegra::Engines::Upload { | ||||
| @@ -16,6 +17,10 @@ State::State(MemoryManager& memory_manager_, Registers& regs_) | ||||
|  | ||||
| State::~State() = default; | ||||
|  | ||||
| void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||||
|     rasterizer = rasterizer_; | ||||
| } | ||||
|  | ||||
| void State::ProcessExec(const bool is_linear_) { | ||||
|     write_offset = 0; | ||||
|     copy_size = regs.line_length_in * regs.line_count; | ||||
| @@ -32,8 +37,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) { | ||||
|     } | ||||
|     const GPUVAddr address{regs.dest.Address()}; | ||||
|     if (is_linear) { | ||||
|         memory_manager.FlushRegion(address, copy_size); | ||||
|         memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); | ||||
|         rasterizer->AccelerateInline2Memory(address, copy_size, inner_buffer); | ||||
|     } else { | ||||
|         UNIMPLEMENTED_IF(regs.dest.z != 0); | ||||
|         UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||||
|   | ||||
| @@ -12,6 +12,10 @@ namespace Tegra { | ||||
| class MemoryManager; | ||||
| } | ||||
|  | ||||
| namespace VideoCore { | ||||
| class RasterizerInterface; | ||||
| } | ||||
|  | ||||
| namespace Tegra::Engines::Upload { | ||||
|  | ||||
| struct Registers { | ||||
| @@ -60,6 +64,9 @@ public: | ||||
|     void ProcessExec(bool is_linear_); | ||||
|     void ProcessData(u32 data, bool is_last_call); | ||||
|  | ||||
|     /// Binds a rasterizer to this engine. | ||||
|     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||||
|  | ||||
| private: | ||||
|     u32 write_offset = 0; | ||||
|     u32 copy_size = 0; | ||||
| @@ -68,6 +75,7 @@ private: | ||||
|     bool is_linear = false; | ||||
|     Registers& regs; | ||||
|     MemoryManager& memory_manager; | ||||
|     VideoCore::RasterizerInterface* rasterizer = nullptr; | ||||
| }; | ||||
|  | ||||
| } // namespace Tegra::Engines::Upload | ||||
|   | ||||
| @@ -22,6 +22,7 @@ KeplerCompute::~KeplerCompute() = default; | ||||
|  | ||||
| void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||||
|     rasterizer = rasterizer_; | ||||
|     upload_state.BindRasterizer(rasterizer); | ||||
| } | ||||
|  | ||||
| void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||
|   | ||||
| @@ -19,6 +19,10 @@ KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager) | ||||
|  | ||||
| KeplerMemory::~KeplerMemory() = default; | ||||
|  | ||||
| void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||||
|     upload_state.BindRasterizer(rasterizer_); | ||||
| } | ||||
|  | ||||
| void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||
|     ASSERT_MSG(method < Regs::NUM_REGS, | ||||
|                "Invalid KeplerMemory register, increase the size of the Regs structure"); | ||||
|   | ||||
| @@ -22,6 +22,10 @@ namespace Tegra { | ||||
| class MemoryManager; | ||||
| } | ||||
|  | ||||
| namespace VideoCore { | ||||
| class RasterizerInterface; | ||||
| } | ||||
|  | ||||
| namespace Tegra::Engines { | ||||
|  | ||||
| /** | ||||
| @@ -38,6 +42,9 @@ public: | ||||
|     explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager); | ||||
|     ~KeplerMemory() override; | ||||
|  | ||||
|     /// Binds a rasterizer to this engine. | ||||
|     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||||
|  | ||||
|     /// Write the value to the register identified by method. | ||||
|     void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; | ||||
|  | ||||
|   | ||||
| @@ -31,6 +31,7 @@ Maxwell3D::~Maxwell3D() = default; | ||||
|  | ||||
| void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||||
|     rasterizer = rasterizer_; | ||||
|     upload_state.BindRasterizer(rasterizer_); | ||||
| } | ||||
|  | ||||
| void Maxwell3D::InitializeRegisterDefaults() { | ||||
|   | ||||
| @@ -1557,7 +1557,8 @@ private: | ||||
|  | ||||
|     static constexpr u32 null_cb_data = 0xFFFFFFFF; | ||||
|     struct CBDataState { | ||||
|         std::array<std::array<u32, 0x4000>, 16> buffer; | ||||
|         static constexpr size_t inline_size = 0x8000; | ||||
|         std::array<std::array<u32, inline_size>, 16> buffer; | ||||
|         u32 current{null_cb_data}; | ||||
|         u32 id{null_cb_data}; | ||||
|         u32 start_pos{}; | ||||
|   | ||||
| @@ -59,6 +59,7 @@ struct GPU::Impl { | ||||
|         maxwell_3d->BindRasterizer(rasterizer); | ||||
|         fermi_2d->BindRasterizer(rasterizer); | ||||
|         kepler_compute->BindRasterizer(rasterizer); | ||||
|         kepler_memory->BindRasterizer(rasterizer); | ||||
|         maxwell_dma->BindRasterizer(rasterizer); | ||||
|     } | ||||
|  | ||||
| @@ -502,8 +503,15 @@ struct GPU::Impl { | ||||
|         case BufferMethods::SemaphoreAddressHigh: | ||||
|         case BufferMethods::SemaphoreAddressLow: | ||||
|         case BufferMethods::SemaphoreSequence: | ||||
|         case BufferMethods::UnkCacheFlush: | ||||
|         case BufferMethods::WrcacheFlush: | ||||
|             break; | ||||
|         case BufferMethods::UnkCacheFlush: { | ||||
|             rasterizer->SyncGuestHost(); | ||||
|             break; | ||||
|         } | ||||
|         case BufferMethods::WrcacheFlush: { | ||||
|             rasterizer->SignalReference(); | ||||
|             break; | ||||
|         } | ||||
|         case BufferMethods::FenceValue: | ||||
|             break; | ||||
|         case BufferMethods::RefCnt: | ||||
|   | ||||
| @@ -123,6 +123,9 @@ public: | ||||
|  | ||||
|     [[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0; | ||||
|  | ||||
|     virtual void AccelerateInline2Memory(GPUVAddr address, size_t copy_size, | ||||
|                                          std::span<u8> memory) = 0; | ||||
|  | ||||
|     /// Attempt to use a faster method to display the framebuffer to screen | ||||
|     [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||
|                                                  VAddr framebuffer_addr, u32 pixel_stride) { | ||||
|   | ||||
| @@ -484,6 +484,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() | ||||
|     return accelerate_dma; | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::AccelerateInline2Memory(GPUVAddr address, size_t copy_size, | ||||
|                                                std::span<u8> memory) { | ||||
|     auto cpu_addr = gpu_memory.GpuToCpuAddress(address); | ||||
|     if (!cpu_addr) [[unlikely]] { | ||||
|         gpu_memory.WriteBlock(address, memory.data(), copy_size); | ||||
|         return; | ||||
|     } | ||||
|     gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size); | ||||
|     { | ||||
|         std::unique_lock<std::mutex> lock{buffer_cache.mutex}; | ||||
|         if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { | ||||
|             buffer_cache.WriteMemory(*cpu_addr, copy_size); | ||||
|         } | ||||
|     } | ||||
|     { | ||||
|         std::scoped_lock lock_texture{texture_cache.mutex}; | ||||
|         texture_cache.WriteMemory(*cpu_addr, copy_size); | ||||
|     } | ||||
|     shader_cache.InvalidateRegion(*cpu_addr, copy_size); | ||||
|     query_cache.InvalidateRegion(*cpu_addr, copy_size); | ||||
| } | ||||
|  | ||||
| bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||
|                                          VAddr framebuffer_addr, u32 pixel_stride) { | ||||
|     if (framebuffer_addr == 0) { | ||||
|   | ||||
| @@ -106,6 +106,7 @@ public: | ||||
|                                const Tegra::Engines::Fermi2D::Surface& dst, | ||||
|                                const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||||
|     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||||
|     void AccelerateInline2Memory(GPUVAddr address, size_t copy_size, std::span<u8> memory) override; | ||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||||
|                            u32 pixel_stride) override; | ||||
|     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||
|   | ||||
| @@ -548,6 +548,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() | ||||
|     return accelerate_dma; | ||||
| } | ||||
|  | ||||
| void RasterizerVulkan::AccelerateInline2Memory(GPUVAddr address, size_t copy_size, | ||||
|                                                std::span<u8> memory) { | ||||
|     auto cpu_addr = gpu_memory.GpuToCpuAddress(address); | ||||
|     if (!cpu_addr) [[unlikely]] { | ||||
|         gpu_memory.WriteBlock(address, memory.data(), copy_size); | ||||
|         return; | ||||
|     } | ||||
|     gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size); | ||||
|     { | ||||
|         std::unique_lock<std::mutex> lock{buffer_cache.mutex}; | ||||
|         if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { | ||||
|             buffer_cache.WriteMemory(*cpu_addr, copy_size); | ||||
|         } | ||||
|     } | ||||
|     { | ||||
|         std::scoped_lock lock_texture{texture_cache.mutex}; | ||||
|         texture_cache.WriteMemory(*cpu_addr, copy_size); | ||||
|     } | ||||
|     pipeline_cache.InvalidateRegion(*cpu_addr, copy_size); | ||||
|     query_cache.InvalidateRegion(*cpu_addr, copy_size); | ||||
| } | ||||
|  | ||||
| bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||
|                                          VAddr framebuffer_addr, u32 pixel_stride) { | ||||
|     if (!framebuffer_addr) { | ||||
|   | ||||
| @@ -99,6 +99,7 @@ public: | ||||
|                                const Tegra::Engines::Fermi2D::Surface& dst, | ||||
|                                const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||||
|     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||||
|     void AccelerateInline2Memory(GPUVAddr address, size_t copy_size, std::span<u8> memory) override; | ||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||||
|                            u32 pixel_stride) override; | ||||
|     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user