From d2ffa9ae83b66585ddb8b618e9560c087cf818b9 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sat, 26 Jun 2021 10:15:46 +0200 Subject: [PATCH] early-access version 1829 --- README.md | 2 +- .../nvdrv/devices/nvhost_nvdec_common.cpp | 65 +++++++++---------- src/video_core/CMakeLists.txt | 1 + src/video_core/buffer_cache/buffer_cache.h | 11 ++-- src/video_core/command_classes/codecs/codec.h | 8 +++ src/video_core/command_classes/vic.cpp | 25 +++++-- .../renderer_vulkan/vk_buffer_cache.cpp | 5 +- src/video_core/texture_cache/texture_cache.h | 6 +- .../vulkan_common/vulkan_device.cpp | 4 +- src/video_core/vulkan_common/vulkan_device.h | 33 +++++----- 10 files changed, 93 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 6bfee0c15..dc64ff2a1 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1826. +This is the source code for early-access 1829. ## Legal Notice diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index e57074b0f..1375689e7 100755 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -19,29 +19,26 @@ namespace Service::Nvidia::Devices { namespace { -// Copies count amount of type T from the input vector into the dst vector. -// Returns the number of bytes written into dst. +// Splice vectors will copy count amount of type T from the input vector into the dst vector. template std::size_t SpliceVectors(const std::vector& input, std::vector& dst, std::size_t count, std::size_t offset) { - if (dst.empty()) { - return 0; + if (!dst.empty()) { + std::memcpy(dst.data(), input.data() + offset, count * sizeof(T)); } - const size_t bytes_copied = count * sizeof(T); - std::memcpy(dst.data(), input.data() + offset, bytes_copied); - return bytes_copied; + return 0; } -// Writes the data in src to an offset into the dst vector. The offset is specified in bytes -// Returns the number of bytes written into dst. +// Write vectors will write data to the output buffer template std::size_t WriteVectors(std::vector& dst, const std::vector& src, std::size_t offset) { if (src.empty()) { return 0; + } else { + std::memcpy(dst.data() + offset, src.data(), src.size() * sizeof(T)); + offset += src.size() * sizeof(T); + return offset; } - const size_t bytes_copied = src.size() * sizeof(T); - std::memcpy(dst.data() + offset, src.data(), bytes_copied); - return bytes_copied; } } // Anonymous namespace @@ -65,6 +62,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vector command_buffers(params.cmd_buffer_count); std::vector relocs(params.relocation_count); std::vector reloc_shifts(params.relocation_count); @@ -73,13 +71,12 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vector fences(params.fence_count); // Splice input into their respective buffers - std::size_t offset = sizeof(IoctlSubmit); - offset += SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset); - offset += SpliceVectors(input, relocs, params.relocation_count, offset); - offset += SpliceVectors(input, reloc_shifts, params.relocation_count, offset); - offset += SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset); - offset += SpliceVectors(input, wait_checks, params.syncpoint_count, offset); - offset += SpliceVectors(input, fences, params.fence_count, offset); + offset = SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset); + offset = SpliceVectors(input, relocs, params.relocation_count, offset); + offset = SpliceVectors(input, reloc_shifts, params.relocation_count, offset); + offset = SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset); + offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); + offset = SpliceVectors(input, fences, params.fence_count, offset); auto& gpu = system.GPU(); if (gpu.UseNvdec()) { @@ -91,7 +88,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vectorGetObject(cmd_buffer.memory_id); + auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); const auto map = FindBufferMap(object->dma_map_addr); if (!map) { @@ -105,19 +102,21 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vector& input, std::vecto auto& gpu = system.GPU(); - for (auto& cmd_buffer : cmd_buffer_handles) { - auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)}; + for (auto& cmf_buff : cmd_buffer_handles) { + auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; if (!object) { - LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle); + LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); std::memcpy(output.data(), ¶ms, output.size()); return NvResult::InvalidState; } @@ -171,7 +170,7 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector& input, std::vecto if (!object->dma_map_addr) { LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size); } else { - cmd_buffer.map_address = object->dma_map_addr; + cmf_buff.map_address = object->dma_map_addr; AddBufferMap(object->dma_map_addr, object->size, object->addr, object->status == nvmap::Object::Status::Allocated); } @@ -191,10 +190,10 @@ NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector& input, std::vec auto& gpu = system.GPU(); - for (auto& cmd_buffer : cmd_buffer_handles) { - const auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)}; + for (auto& cmf_buff : cmd_buffer_handles) { + const auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; if (!object) { - LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle); + LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); std::memcpy(output.data(), ¶ms, output.size()); return NvResult::InvalidState; } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f9454bbaa..e31eb30c0 100755 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -293,6 +293,7 @@ endif() if (MSVC) target_compile_options(video_core PRIVATE /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data + /we4244 # 'var' : conversion from integer to 'type', possible loss of data /we4456 # Declaration of 'identifier' hides previous local declaration /we4457 # Declaration of 'identifier' hides function parameter /we4458 # Declaration of 'identifier' hides class member diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9d726a6fb..cad7f902d 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -99,7 +99,7 @@ class BufferCache { }; public: - static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4_KiB; + static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast(4_KiB); explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -109,8 +109,6 @@ public: void TickFrame(); - void RunGarbageCollector(); - void WriteMemory(VAddr cpu_addr, u64 size); void CachedWriteMemory(VAddr cpu_addr, u64 size); @@ -197,6 +195,8 @@ private: ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); } + void RunGarbageCollector(); + void BindHostIndexBuffer(); void BindHostVertexBuffers(); @@ -416,8 +416,9 @@ void BufferCache

::CachedWriteMemory(VAddr cpu_addr, u64 size) { template void BufferCache

::DownloadMemory(VAddr cpu_addr, u64 size) { - ForEachBufferInRange(cpu_addr, size, - [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); }); + ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { + DownloadBufferMemory(buffer, cpu_addr, size); + }); } template diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 8a2a6c360..3e135a2a6 100755 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -14,10 +14,18 @@ extern "C" { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #endif +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4242) // conversion from 'type' to 'type', possible loss of data +#pragma warning(disable : 4244) // conversion from 'type' to 'type', possible loss of data +#endif #include #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif +#ifdef _MSC_VER +#pragma warning(pop) +#endif } namespace Tegra { diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 0a8b82f2b..5faf8c0f1 100755 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -3,7 +3,28 @@ // Refer to the license.txt file included. #include + +extern "C" { +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif +#ifdef _MSC_VER +#pragma warning(disable : 4244) // conversion from 'type' to 'type', possible loss of data +#pragma warning(push) +#endif +#include +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif +#ifdef _MSC_VER +#pragma warning(pop) +#endif +} + #include "common/assert.h" +#include "common/logging/log.h" + #include "video_core/command_classes/nvdec.h" #include "video_core/command_classes/vic.h" #include "video_core/engines/maxwell_3d.h" @@ -11,10 +32,6 @@ #include "video_core/memory_manager.h" #include "video_core/textures/decoders.h" -extern "C" { -#include -} - namespace Tegra { Vic::Vic(GPU& gpu_, std::shared_ptr nvdec_processor_) diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 8cb65e588..0df4e1a1c 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -55,8 +55,9 @@ size_t BytesPerIndex(VkIndexType index_type) { template std::array MakeQuadIndices(u32 quad, u32 first) { std::array indices{0, 1, 2, 0, 2, 3}; - std::ranges::transform(indices, indices.begin(), - [quad, first](u32 index) { return first + index + quad * 4; }); + for (T& index : indices) { + index = static_cast(first + index + quad * 4); + } return indices; } } // Anonymous namespace diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 512267b76..512832a42 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -111,9 +111,6 @@ public: /// Notify the cache that a new frame has been queued void TickFrame(); - /// Runs the Garbage Collector. - void RunGarbageCollector(); - /// Return a constant reference to the given image view id [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; @@ -226,6 +223,9 @@ private: } } + /// Runs the Garbage Collector. + void RunGarbageCollector(); + /// Fills image_view_ids in the image views in indices void FillImageViews(DescriptorTable& table, std::span cached_image_view_ids, std::span indices, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 16a2d68e8..f214510da 100755 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -842,9 +842,9 @@ void Device::CollectTelemetryParameters() { void Device::CollectPhysicalMemoryInfo() { const auto mem_properties = physical.GetMemoryProperties(); - const std::size_t num_properties = mem_properties.memoryHeapCount; + const size_t num_properties = mem_properties.memoryHeapCount; device_access_memory = 0; - for (std::size_t element = 0; element < num_properties; element++) { + for (size_t element = 0; element < num_properties; ++element) { if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) { device_access_memory += mem_properties.memoryHeaps[element].size; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 705c07e3e..96c0f8c60 100755 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -267,21 +267,22 @@ private: bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. - u32 graphics_family{}; ///< Main graphics queue family index. - u32 present_family{}; ///< Main present queue family index. - VkDriverIdKHR driver_id{}; ///< Driver ID. - VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed - bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool is_float16_supported{}; ///< Support for float16 arithmetics. - bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + VkPhysicalDeviceProperties properties; ///< Device properties. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan onstance version. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + VkDriverIdKHR driver_id{}; ///< Driver ID. + VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. + u64 device_access_memory{}; ///< Total size of device local memory in bytes. + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool is_float16_supported{}; ///< Support for float16 arithmetics. + bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. @@ -312,8 +313,6 @@ private: /// Nsight Aftermath GPU crash tracker std::unique_ptr nsight_aftermath_tracker; - - u64 device_access_memory; }; } // namespace Vulkan