early-access version 4039

2023-12-30 11:00:24 +01:00
parent b89b2c20a4
commit 28e97ec962
8 changed files with 44 additions and 24 deletions
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 yuzu emulator early access
 =============

-This is the source code for early-access 4037.
+This is the source code for early-access 4039.

 ## Legal Notice

--- a/src/common/heap_tracker.cpp
+++ b/src/common/heap_tracker.cpp
@@ -1,7 +1,7 @@
 // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later

-#include <algorithm>
+#include <fstream>
 #include <vector>

 #include "common/heap_tracker.h"
@@ -11,11 +11,25 @@ namespace Common {

 namespace {

-constexpr s64 MaxResidentMapCount = 0x8000;
+s64 GetMaxPermissibleResidentMapCount() {
+    // Default value.
+    s64 value = 65530;
+
+    // Try to read how many mappings we can make.
+    std::ifstream s("/proc/sys/vm/max_map_count");
+    s >> value;
+
+    // Print, for debug.
+    LOG_INFO(HW_Memory, "Current maximum map count: {}", value);
+
+    // Allow 20000 maps for other code and to account for split inaccuracy.
+    return std::max<s64>(value - 20000, 0);
+}

 } // namespace

-HeapTracker::HeapTracker(Common::HostMemory& buffer) : m_buffer(buffer) {}
+HeapTracker::HeapTracker(Common::HostMemory& buffer)
+    : m_buffer(buffer), m_max_resident_map_count(GetMaxPermissibleResidentMapCount()) {}
 HeapTracker::~HeapTracker() = default;

 void HeapTracker::Map(size_t virtual_offset, size_t host_offset, size_t length,
@@ -74,8 +88,8 @@ void HeapTracker::Unmap(size_t virtual_offset, size_t size, bool is_separate_hea
            }

            // Erase from map.
-            it = m_mappings.erase(it);
            ASSERT(--m_map_count >= 0);
+            it = m_mappings.erase(it);

            // Free the item.
            delete item;
@@ -94,8 +108,8 @@ void HeapTracker::Protect(size_t virtual_offset, size_t size, MemoryPermission p
    this->SplitHeapMap(virtual_offset, size);

    // Declare tracking variables.
+    const VAddr end = virtual_offset + size;
    VAddr cur = virtual_offset;
-    VAddr end = virtual_offset + size;

    while (cur < end) {
        VAddr next = cur;
@@ -167,7 +181,7 @@ bool HeapTracker::DeferredMapSeparateHeap(size_t virtual_offset) {
        it->tick = m_tick++;

        // Check if we need to rebuild.
-        if (m_resident_map_count > MaxResidentMapCount) {
+        if (m_resident_map_count > m_max_resident_map_count) {
            rebuild_required = true;
        }

@@ -193,8 +207,12 @@ void HeapTracker::RebuildSeparateHeapAddressSpace() {

    ASSERT(!m_resident_mappings.empty());

-    // Unmap so we have at least 4 maps available.
-    const size_t desired_count = std::min(m_resident_map_count, MaxResidentMapCount - 4);
+    // Dump half of the mappings.
+    //
+    // Despite being worse in theory, this has proven to be better in practice than more
+    // regularly dumping a smaller amount, because it significantly reduces average case
+    // lock contention.
+    const size_t desired_count = std::min(m_resident_map_count, m_max_resident_map_count) / 2;
    const size_t evict_count = m_resident_map_count - desired_count;
    auto it = m_resident_mappings.begin();

@@ -247,8 +265,8 @@ void HeapTracker::SplitHeapMapLocked(VAddr offset) {

    // If resident, also insert into resident map.
    if (right->is_resident) {
-        m_resident_mappings.insert(*right);
        m_resident_map_count++;
+        m_resident_mappings.insert(*right);
    }
 }

--- a/src/common/heap_tracker.h
+++ b/src/common/heap_tracker.h
@@ -86,6 +86,7 @@ private:

 private:
    Common::HostMemory& m_buffer;
+    const s64 m_max_resident_map_count;

    std::shared_mutex m_rebuild_lock{};
    std::mutex m_lock{};
--- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -449,7 +449,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
 }

 void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                    std::string_view coords, std::string_view offset, std::string_view lod,
+                    std::string_view coords, const IR::Value& offset, std::string_view lod,
                    std::string_view ms) {
    const auto info{inst.Flags<IR::TextureInstInfo>()};
    if (info.has_bias) {
@@ -470,9 +470,9 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
        const auto int_coords{CoordsCastToInt(coords, info)};
        if (!ms.empty()) {
            ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms);
-        } else if (!offset.empty()) {
+        } else if (!offset.IsEmpty()) {
            ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod,
-                    CoordsCastToInt(offset, info));
+                    GetOffsetVec(ctx, offset));
        } else {
            if (info.type == TextureType::Buffer) {
                ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords);
@@ -485,10 +485,10 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
    if (!ms.empty()) {
        throw NotImplementedException("EmitImageFetch Sparse MSAA samples");
    }
-    if (!offset.empty()) {
+    if (!offset.IsEmpty()) {
        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
-                  *sparse_inst, texture, CastToIntVec(coords, info), lod,
-                  CastToIntVec(offset, info), texel);
+                  *sparse_inst, texture, CastToIntVec(coords, info), lod, GetOffsetVec(ctx, offset),
+                  texel);
    } else {
        ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));",
                  *sparse_inst, texture, CastToIntVec(coords, info), lod, texel);
--- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -651,7 +651,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
                         std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
                         std::string_view dref);
 void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                    std::string_view coords, std::string_view offset, std::string_view lod,
+                    std::string_view coords, const IR::Value& offset, std::string_view lod,
                    std::string_view ms);
 void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
                              std::string_view lod, const IR::Value& skip_mips);
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -1440,7 +1440,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
        if (profile.support_vertex_instance_id) {
            instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
            if (loads[IR::Attribute::BaseInstance]) {
-                base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
+                base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
            }
        } else {
            instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -195,9 +195,9 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
    has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
    has_astc = !has_slow_software_astc && IsASTCSupported();
    has_variable_aoffi = TestVariableAoffi();
-    has_component_indexing_bug = is_amd;
+    has_component_indexing_bug = false;
    has_precise_bug = TestPreciseBug();
-    has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
+    has_broken_texture_view_formats = (!is_linux && is_intel);
    has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
    has_derivative_control = GLAD_GL_ARB_derivative_control;
    has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
@@ -238,10 +238,11 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
    has_lmem_perf_bug = is_nvidia;

    strict_context_required = emu_window.StrictContextRequired();
-    // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
+    // Blocks Intel OpenGL drivers on Windows from using asynchronous shader compilation.
    // Blocks EGL on Wayland from using asynchronous shader compilation.
-    use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
-                               !(is_amd || (is_intel && !is_linux)) && !strict_context_required;
+    const bool blacklist_async_shaders = (is_intel && !is_linux) || strict_context_required;
+    use_asynchronous_shaders =
+        Settings::values.use_asynchronous_shaders.GetValue() && !blacklist_async_shaders;
    use_driver_cache = is_nvidia;
    supports_conditional_barriers = !is_intel;

--- a/src/yuzu/configuration/shared_translation.cpp
+++ b/src/yuzu/configuration/shared_translation.cpp
@@ -228,7 +228,7 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QWidget* parent) {
         {
             PAIR(ShaderBackend, Glsl, tr("GLSL")),
             PAIR(ShaderBackend, Glasm, tr("GLASM (Assembly Shaders, NVIDIA Only)")),
-             PAIR(ShaderBackend, SpirV, tr("SPIR-V (Experimental, Mesa Only)")),
+             PAIR(ShaderBackend, SpirV, tr("SPIR-V (Experimental, AMD/Mesa Only)")),
         }});
    translations->insert({Settings::EnumMetadata<Settings::GpuAccuracy>::Index(),
                          {