early-access version 1794

2021-06-18 16:07:15 +02:00
parent d7e99270a3
commit b57607168a
15 changed files with 133 additions and 56 deletions
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -18,11 +18,15 @@
 #include <algorithm>
 #include <cassert>
 #include <cstring>
+#if __cpp_lib_parallel_algorithm
+#include <execution>
+#endif
 #include <span>
 #include <vector>

 #include <boost/container/static_vector.hpp>

+#include "common/alignment.h"
 #include "common/common_types.h"
 #include "video_core/textures/astc.h"

@@ -1548,30 +1552,87 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,

 void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
                uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
-    u32 block_index = 0;
-    std::size_t depth_offset = 0;
-    for (u32 z = 0; z < depth; z++) {
-        for (u32 y = 0; y < height; y += block_height) {
-            for (u32 x = 0; x < width; x += block_width) {
-                const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
+    struct ASTCStrideInfo {
+        u32 z{};
+        u32 index{};
+    };

-                // Blocks can be at most 12x12
-                std::array<u32, 12 * 12> uncompData;
-                DecompressBlock(blockPtr, block_width, block_height, uncompData);
+    const u32 rows = Common::DivideUp(height, block_height);
+    const u32 cols = Common::DivideUp(width, block_width);

-                u32 decompWidth = std::min(block_width, width - x);
-                u32 decompHeight = std::min(block_height, height - y);
+    const u32 num_strides = depth * rows;
+    std::vector<ASTCStrideInfo> astc_strides(num_strides);

-                const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
-                for (u32 jj = 0; jj < decompHeight; jj++) {
-                    std::memcpy(outRow.data() + jj * width * 4,
-                                uncompData.data() + jj * block_width, decompWidth * 4);
-                }
-                ++block_index;
+    for (u32 z = 0; z < depth; ++z) {
+        for (u32 index = 0; index < rows; ++index) {
+            astc_strides.emplace_back(ASTCStrideInfo{
+                .z{z},
+                .index{index},
+            });
+        }
+    }
+
+    auto decompress_stride = [&](const ASTCStrideInfo& stride) {
+        const u32 y = stride.index * block_height;
+        const u32 depth_offset = stride.z * height * width * 4;
+        for (u32 x_index = 0; x_index < cols; ++x_index) {
+            const u32 block_index = stride.index * cols + x_index;
+            const u32 x = x_index * block_width;
+
+            const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
+
+            // Blocks can be at most 12x12
+            std::array<u32, 12 * 12> uncompData;
+            DecompressBlock(blockPtr, block_width, block_height, uncompData);
+
+            const u32 decompWidth = std::min(block_width, width - x);
+            const u32 decompHeight = std::min(block_height, height - y);
+
+            const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
+
+            for (u32 h = 0; h < decompHeight; ++h) {
+                std::memcpy(outRow.data() + h * width * 4, uncompData.data() + h * block_width,
+                            decompWidth * 4);
            }
        }
-        depth_offset += height * width * 4;
-    }
+    };
+
+#if __cpp_lib_parallel_algorithm
+    std::for_each(std::execution::par, astc_strides.cbegin(), astc_strides.cend(),
+                  decompress_stride);
+#else
+    std::for_each(astc_strides.cbegin(), astc_strides.cend(), decompress_stride);
+#endif
+
+    // const u32 rows = Common::DivideUp(height, block_height);
+    // const u32 cols = Common::DivideUp(width, block_width);
+
+    // for (u32 z = 0; z < depth; ++z) {
+    //     const u32 depth_offset = z * height * width * 4;
+    //     for (u32 y_index = 0; y_index < rows; ++y_index) {
+    //         const u32 y = y_index * block_height;
+    //         for (u32 x_index = 0; x_index < cols; ++x_index) {
+    //             const u32 block_index = y_index * cols + x_index;
+    //             const u32 x = x_index * block_width;
+
+    //             const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
+
+    //             // Blocks can be at most 12x12
+    //             std::array<u32, 12 * 12> uncompData;
+    //             DecompressBlock(blockPtr, block_width, block_height, uncompData);
+
+    //             u32 decompWidth = std::min(block_width, width - x);
+    //             u32 decompHeight = std::min(block_height, height - y);
+
+    //             const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
+    //             for (u32 h = 0; h < decompHeight; ++h) {
+    //                 std::memcpy(outRow.data() + h * width * 4, uncompData.data() + h *
+    //                 block_width,
+    //                             decompWidth * 4);
+    //             }
+    //         }
+    //     }
+    // }
 }

 } // namespace Tegra::Texture::ASTC
--- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
@@ -12,6 +12,14 @@ VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
                  VkDebugUtilsMessageTypeFlagsEXT type,
                  const VkDebugUtilsMessengerCallbackDataEXT* data,
                  [[maybe_unused]] void* user_data) {
+    // Skip logging known false-positive validation errors
+    switch (static_cast<u32>(data->messageIdNumber)) {
+    case 0x682a878au: // VUID-vkCmdBindVertexBuffers2EXT-pBuffers-parameter
+    case 0x99fb7dfdu: // UNASSIGNED-RequiredParameter (vkCmdBindVertexBuffers2EXT pBuffers[0])
+        return VK_FALSE;
+    default:
+        break;
+    }
    const std::string_view message{data->pMessage};
    if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
        LOG_CRITICAL(Render_Vulkan, "{}", message);
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -821,13 +821,11 @@ void Device::CollectTelemetryParameters() {

 void Device::CollectPhysicalMemoryInfo() {
    const auto mem_properties = physical.GetMemoryProperties();
-    const std::size_t num_properties = mem_properties.memoryTypeCount;
+    const std::size_t num_properties = mem_properties.memoryHeapCount;
    device_access_memory = 0;
    for (std::size_t element = 0; element < num_properties; element++) {
-        if ((mem_properties.memoryTypes[element].propertyFlags &
-             VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
-            const std::size_t heap_index = mem_properties.memoryTypes[element].heapIndex;
-            device_access_memory += mem_properties.memoryHeaps[heap_index].size;
+        if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) {
+            device_access_memory += mem_properties.memoryHeaps[element].size;
        }
    }
 }