From 61ffc1086878ab664018f1f8e9b38d01e3827dce Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sat, 11 Feb 2023 20:06:54 +0100 Subject: [PATCH] early-access version 3385 --- README.md | 2 +- .../renderer/command/effect/aux_.cpp | 76 ++++++++++++++++--- src/video_core/gpu_thread.cpp | 3 +- src/video_core/gpu_thread.h | 4 +- src/video_core/host1x/vic.cpp | 14 ++-- 5 files changed, 74 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 48cdd1564..a507b0303 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 3384. +This is the source code for early-access 3385. ## Legal Notice diff --git a/src/audio_core/renderer/command/effect/aux_.cpp b/src/audio_core/renderer/command/effect/aux_.cpp index 32873ec49..e487feae0 100755 --- a/src/audio_core/renderer/command/effect/aux_.cpp +++ b/src/audio_core/renderer/command/effect/aux_.cpp @@ -20,10 +20,24 @@ static void ResetAuxBufferDsp(Core::Memory::Memory& memory, const CpuAddr aux_in return; } - auto info{reinterpret_cast(memory.GetPointer(aux_info))}; - info->read_offset = 0; - info->write_offset = 0; - info->total_sample_count = 0; + AuxInfo::AuxInfoDsp info{}; + auto info_ptr{&info}; + bool host_safe{(aux_info & Core::Memory::YUZU_PAGEMASK) <= + (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp))}; + + if (host_safe) [[likely]] { + info_ptr = memory.GetPointer(aux_info); + } else { + memory.ReadBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp)); + } + + info_ptr->read_offset = 0; + info_ptr->write_offset = 0; + info_ptr->total_sample_count = 0; + + if (!host_safe) [[unlikely]] { + memory.WriteBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp)); + } } /** @@ -71,9 +85,18 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_, return 0; } - auto send_info{reinterpret_cast(memory.GetPointer(send_info_))}; + AuxInfo::AuxInfoDsp send_info{}; + auto send_ptr = &send_info; + bool host_safe = (send_info_ & Core::Memory::YUZU_PAGEMASK) <= + (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp)); - u32 target_write_offset{send_info->write_offset + write_offset}; + if (host_safe) [[likely]] { + send_ptr = memory.GetPointer(send_info_); + } else { + memory.ReadBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp)); + } + + u32 target_write_offset{send_ptr->write_offset + write_offset}; if (target_write_offset > count_max) { return 0; } @@ -82,7 +105,13 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_, u32 read_pos{0}; while (write_count > 0) { u32 to_write{std::min(count_max - target_write_offset, write_count)}; - if (to_write) { + const auto write_addr = send_buffer + target_write_offset * sizeof(s32); + bool write_safe{(write_addr & Core::Memory::YUZU_PAGEMASK) <= + (Core::Memory::YUZU_PAGESIZE - (write_addr + to_write * sizeof(s32)))}; + if (write_safe) [[likely]] { + auto ptr = memory.GetPointer(write_addr); + std::memcpy(ptr, &input[read_pos], to_write * sizeof(s32)); + } else { memory.WriteBlockUnsafe(send_buffer + target_write_offset * sizeof(s32), &input[read_pos], to_write * sizeof(s32)); } @@ -92,7 +121,11 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_, } if (update_count) { - send_info->write_offset = (send_info->write_offset + update_count) % count_max; + send_ptr->write_offset = (send_ptr->write_offset + update_count) % count_max; + } + + if (!host_safe) [[unlikely]] { + memory.WriteBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp)); } return write_count_; @@ -140,9 +173,18 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_, return 0; } - auto return_info{reinterpret_cast(memory.GetPointer(return_info_))}; + AuxInfo::AuxInfoDsp return_info{}; + auto return_ptr = &return_info; + bool host_safe = (return_info_ & Core::Memory::YUZU_PAGEMASK) <= + (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp)); - u32 target_read_offset{return_info->read_offset + read_offset}; + if (host_safe) [[likely]] { + return_ptr = memory.GetPointer(return_info_); + } else { + memory.ReadBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp)); + } + + u32 target_read_offset{return_ptr->read_offset + read_offset}; if (target_read_offset > count_max) { return 0; } @@ -151,7 +193,13 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_, u32 write_pos{0}; while (read_count > 0) { u32 to_read{std::min(count_max - target_read_offset, read_count)}; - if (to_read) { + const auto read_addr = return_buffer + target_read_offset * sizeof(s32); + bool read_safe{(read_addr & Core::Memory::YUZU_PAGEMASK) <= + (Core::Memory::YUZU_PAGESIZE - (read_addr + to_read * sizeof(s32)))}; + if (read_safe) [[likely]] { + auto ptr = memory.GetPointer(read_addr); + std::memcpy(&output[write_pos], ptr, to_read * sizeof(s32)); + } else { memory.ReadBlockUnsafe(return_buffer + target_read_offset * sizeof(s32), &output[write_pos], to_read * sizeof(s32)); } @@ -161,7 +209,11 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_, } if (update_count) { - return_info->read_offset = (return_info->read_offset + update_count) % count_max; + return_ptr->read_offset = (return_ptr->read_offset + update_count) % count_max; + } + + if (!host_safe) [[unlikely]] { + memory.WriteBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp)); } return read_count_; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 33f9acf6c..036aebe0c 100755 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -32,7 +32,8 @@ static void RunThread(std::stop_token stop_token, Core::System& system, VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer(); while (!stop_token.stop_requested()) { - CommandDataContainer next = state.queue.PopWait(stop_token); + CommandDataContainer next; + state.queue.Pop(next, stop_token); if (stop_token.stop_requested()) { break; } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index f27debf31..ceaed6abc 100755 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -10,8 +10,8 @@ #include #include +#include "common/bounded_threadsafe_queue.h" #include "common/polyfill_thread.h" -#include "common/threadsafe_queue.h" #include "video_core/framebuffer_config.h" namespace Tegra { @@ -97,7 +97,7 @@ struct CommandDataContainer { /// Struct used to synchronize the GPU thread struct SynchState final { - using CommandQueue = Common::MPSCQueue; + using CommandQueue = Common::MPSCQueue; std::mutex write_lock; CommandQueue queue; u64 last_fence{}; diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index cfa129743..281d7d276 100755 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -189,9 +189,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { for (std::size_t y = 0; y < frame_height; ++y) { const std::size_t src = y * stride; const std::size_t dst = y * aligned_width; - for (std::size_t x = 0; x < frame_width; ++x) { - luma_buffer[dst + x] = luma_src[src + x]; - } + std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width); } host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), luma_buffer.size()); @@ -205,15 +203,15 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { // Frame from FFmpeg software // Populate chroma buffer from both channels with interleaving. const std::size_t half_width = frame_width / 2; + u8* chroma_buffer_data = chroma_buffer.data(); const u8* chroma_b_src = frame->data[1]; const u8* chroma_r_src = frame->data[2]; for (std::size_t y = 0; y < half_height; ++y) { const std::size_t src = y * half_stride; const std::size_t dst = y * aligned_width; - for (std::size_t x = 0; x < half_width; ++x) { - chroma_buffer[dst + x * 2] = chroma_b_src[src + x]; - chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x]; + chroma_buffer_data[dst + x * 2] = chroma_b_src[src + x]; + chroma_buffer_data[dst + x * 2 + 1] = chroma_r_src[src + x]; } } break; @@ -225,9 +223,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { for (std::size_t y = 0; y < half_height; ++y) { const std::size_t src = y * stride; const std::size_t dst = y * aligned_width; - for (std::size_t x = 0; x < frame_width; ++x) { - chroma_buffer[dst + x] = chroma_src[src + x]; - } + std::memcpy(chroma_buffer.data() + dst, chroma_src + src, frame_width); } break; }