From 0f64bad6bd839144a01a4b0416c75a2d47917390 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Tue, 3 Jan 2023 22:43:57 +0100 Subject: [PATCH] early-access version 3275 --- README.md | 2 +- src/core/hid/emulated_controller.cpp | 20 +-- src/core/hle/service/hid/irs.cpp | 4 +- src/input_common/helpers/joycon_driver.cpp | 30 +++-- src/input_common/helpers/joycon_driver.h | 1 + .../joycon_protocol/common_protocol.cpp | 4 +- src/shader_recompiler/environment.h | 5 + .../ir_opt/constant_propagation_pass.cpp | 30 ++++- src/video_core/engines/draw_manager.cpp | 13 +- src/video_core/engines/draw_manager.h | 2 +- src/video_core/engines/maxwell_3d.cpp | 3 +- src/video_core/engines/maxwell_3d.h | 23 ++-- src/video_core/macro/macro_hle.cpp | 38 ++++-- .../renderer_opengl/gl_shader_cache.cpp | 2 +- .../renderer_opengl/gl_texture_cache.h | 2 + .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 73 ++++++---- .../vk_staging_buffer_pool.cpp | 19 ++- .../renderer_vulkan/vk_state_tracker.cpp | 22 +-- .../renderer_vulkan/vk_state_tracker.h | 22 +++ .../renderer_vulkan/vk_texture_cache.cpp | 8 +- .../renderer_vulkan/vk_texture_cache.h | 6 +- src/video_core/shader_environment.cpp | 9 +- src/video_core/texture_cache/texture_cache.h | 126 +++++++++++++----- .../texture_cache/texture_cache_base.h | 8 +- .../vulkan_common/vulkan_device.cpp | 36 ++--- 26 files changed, 350 insertions(+), 160 deletions(-) diff --git a/README.md b/README.md index 5c2935b7a..dc9abc7ff 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 3274. +This is the source code for early-access 3275. ## Legal Notice diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp index 4cf3dc26a..305ba9551 100755 --- a/src/core/hid/emulated_controller.cpp +++ b/src/core/hid/emulated_controller.cpp @@ -11,6 +11,11 @@ namespace Core::HID { constexpr s32 HID_JOYSTICK_MAX = 0x7fff; constexpr s32 HID_TRIGGER_MAX = 0x7fff; +// Use a common UUID for TAS and Virtual Gamepad +constexpr Common::UUID TAS_UUID = + Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}; +constexpr Common::UUID VIRTUAL_UUID = + Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}; EmulatedController::EmulatedController(NpadIdType npad_id_type_) : npad_id_type(npad_id_type_) {} @@ -392,10 +397,6 @@ void EmulatedController::ReloadInput() { nfc_devices[index]->ForceUpdate(); } - // Use a common UUID for TAS - static constexpr Common::UUID TAS_UUID = Common::UUID{ - {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}; - // Register TAS devices. No need to force update for (std::size_t index = 0; index < tas_button_devices.size(); ++index) { if (!tas_button_devices[index]) { @@ -421,10 +422,6 @@ void EmulatedController::ReloadInput() { }); } - // Use a common UUID for Virtual Gamepad - static constexpr Common::UUID VIRTUAL_UUID = Common::UUID{ - {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}; - // Register virtual devices. No need to force update for (std::size_t index = 0; index < virtual_button_devices.size(); ++index) { if (!virtual_button_devices[index]) { @@ -842,7 +839,12 @@ void EmulatedController::SetStick(const Common::Input::CallbackStatus& callback, // Only read stick values that have the same uuid or are over the threshold to avoid flapping if (controller.stick_values[index].uuid != uuid) { - if (!stick_value.down && !stick_value.up && !stick_value.left && !stick_value.right) { + const bool is_tas = uuid == TAS_UUID; + if (is_tas && stick_value.x.value == 0 && stick_value.y.value == 0) { + return; + } + if (!is_tas && !stick_value.down && !stick_value.up && !stick_value.left && + !stick_value.right) { return; } } diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp index b9052e497..01e3f63fc 100755 --- a/src/core/hle/service/hid/irs.cpp +++ b/src/core/hle/service/hid/irs.cpp @@ -74,8 +74,6 @@ void IRS::DeactivateIrsensor(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_IRS, "(STUBBED) called, applet_resource_user_id={}", applet_resource_user_id); - npad_device->SetPollingMode(Common::Input::PollingMode::Active); - IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); } @@ -514,7 +512,7 @@ void IRS::StopImageProcessorAsync(Kernel::HLERequestContext& ctx) { auto result = IsIrCameraHandleValid(parameters.camera_handle); if (result.IsSuccess()) { // TODO: Stop image processor async - npad_device->SetPollingMode(Common::Input::PollingMode::IR); + npad_device->SetPollingMode(Common::Input::PollingMode::Active); result = ResultSuccess; } diff --git a/src/input_common/helpers/joycon_driver.cpp b/src/input_common/helpers/joycon_driver.cpp index f6bb67e90..fdc883aba 100755 --- a/src/input_common/helpers/joycon_driver.cpp +++ b/src/input_common/helpers/joycon_driver.cpp @@ -264,6 +264,16 @@ DriverResult JoyconDriver::SetPollingMode() { irs_protocol->DisableIrs(); } + if (nfc_protocol->IsEnabled()) { + amiibo_detected = false; + nfc_protocol->DisableNfc(); + } + + if (ring_protocol->IsEnabled()) { + ring_connected = false; + ring_protocol->DisableRingCon(); + } + if (irs_enabled && supported_features.irs) { auto result = irs_protocol->EnableIrs(); if (result == DriverResult::Success) { @@ -274,11 +284,6 @@ DriverResult JoyconDriver::SetPollingMode() { LOG_ERROR(Input, "Error enabling IRS"); } - if (nfc_protocol->IsEnabled()) { - amiibo_detected = false; - nfc_protocol->DisableNfc(); - } - if (nfc_enabled && supported_features.nfc) { auto result = nfc_protocol->EnableNfc(); if (result == DriverResult::Success) { @@ -292,11 +297,6 @@ DriverResult JoyconDriver::SetPollingMode() { LOG_ERROR(Input, "Error enabling NFC"); } - if (ring_protocol->IsEnabled()) { - ring_connected = false; - ring_protocol->DisableRingCon(); - } - if (hidbus_enabled && supported_features.hidbus) { auto result = ring_protocol->EnableRingCon(); if (result == DriverResult::Success) { @@ -428,6 +428,12 @@ DriverResult JoyconDriver::SetPasiveMode() { } DriverResult JoyconDriver::SetActiveMode() { + if (is_ring_disabled_by_irs) { + is_ring_disabled_by_irs = false; + SetActiveMode(); + return SetRingConMode(); + } + std::scoped_lock lock{mutex}; motion_enabled = true; hidbus_enabled = false; @@ -444,6 +450,10 @@ DriverResult JoyconDriver::SetIrMode() { return DriverResult::NotSupported; } + if (ring_connected) { + is_ring_disabled_by_irs = true; + } + motion_enabled = false; hidbus_enabled = false; nfc_enabled = false; diff --git a/src/input_common/helpers/joycon_driver.h b/src/input_common/helpers/joycon_driver.h index f79eb4ee3..403a3a58c 100755 --- a/src/input_common/helpers/joycon_driver.h +++ b/src/input_common/helpers/joycon_driver.h @@ -108,6 +108,7 @@ private: bool starlink_connected{}; bool ring_connected{}; bool amiibo_detected{}; + bool is_ring_disabled_by_irs{}; // Harware configuration u8 leds{}; diff --git a/src/input_common/helpers/joycon_protocol/common_protocol.cpp b/src/input_common/helpers/joycon_protocol/common_protocol.cpp index a329db107..153a3908c 100755 --- a/src/input_common/helpers/joycon_protocol/common_protocol.cpp +++ b/src/input_common/helpers/joycon_protocol/common_protocol.cpp @@ -74,8 +74,8 @@ DriverResult JoyconCommonProtocol::SendData(std::span buffer) { } DriverResult JoyconCommonProtocol::GetSubCommandResponse(SubCommand sc, std::vector& output) { - constexpr int timeout_mili = 100; - constexpr int MaxTries = 10; + constexpr int timeout_mili = 66; + constexpr int MaxTries = 15; int tries = 0; output.resize(MaxSubCommandResponseSize); diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 9a5b39a0f..130385a72 100755 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -57,11 +57,16 @@ public: return start_address; } + [[nodiscard]] bool IsPropietaryDriver() const noexcept { + return is_propietary_driver; + } + protected: ProgramHeader sph{}; std::array gp_passthrough_mask{}; Stage stage{}; u32 start_address{}; + bool is_propietary_driver{}; }; } // namespace Shader diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 3ade96c87..07dbc99a6 100755 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -677,6 +677,30 @@ void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { } } +void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank, + u32 offset_start = 0, u32 offset_end = std::numeric_limits::max()) { + const IR::Value bank{inst.Arg(0)}; + const IR::Value offset{inst.Arg(1)}; + if (!bank.IsImmediate() || !offset.IsImmediate()) { + return; + } + const auto bank_value = bank.U32(); + if (bank_value != which_bank) { + return; + } + const auto offset_value = offset.U32(); + if (offset_value < offset_start || offset_value >= offset_end) { + return; + } + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::GetCbufU32) { + inst.ReplaceUsesWith(IR::Value{env.ReadCbufValue(bank_value, offset_value)}); + } else { + inst.ReplaceUsesWith( + IR::Value{Common::BitCast(env.ReadCbufValue(bank_value, offset_value))}); + } +} + void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: @@ -825,13 +849,17 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { case IR::Opcode::GetCbufF32: case IR::Opcode::GetCbufU32: if (env.HasHLEMacroState()) { - return FoldConstBuffer(env, block, inst); + FoldConstBuffer(env, block, inst); + } + if (env.IsPropietaryDriver()) { + FoldDriverConstBuffer(env, block, inst, 1); } break; default: break; } } + } // Anonymous namespace void ConstantPropagationPass(Environment& env, IR::Program& program) { diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index feea89c0e..2437121ce 100755 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -94,7 +94,7 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) { draw_state.topology = topology; - ProcessDrawIndirect(true); + ProcessDrawIndirect(); } void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, @@ -105,7 +105,7 @@ void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_firs draw_state.index_buffer.first = index_first; draw_state.index_buffer.count = index_count; - ProcessDrawIndirect(true); + ProcessDrawIndirect(); } void DrawManager::SetInlineIndexBuffer(u32 index) { @@ -216,9 +216,12 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) { } } -void DrawManager::ProcessDrawIndirect(bool draw_indexed) { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology, - draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count); +void DrawManager::ProcessDrawIndirect() { + LOG_TRACE( + HW_GPU, + "called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}", + draw_state.topology, indirect_state.is_indexed, indirect_state.include_count, + indirect_state.buffer_size, indirect_state.max_draw_counts); UpdateTopology(); diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 49a4fca48..58d1b2d59 100755 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -85,7 +85,7 @@ private: void ProcessDraw(bool draw_indexed, u32 instance_count); - void ProcessDrawIndirect(bool draw_indexed); + void ProcessDrawIndirect(); Maxwell3D* maxwell3d{}; State draw_state{}; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 4887df47b..21ba880ac 100755 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -685,7 +685,8 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { return regs.reg_array[method]; } -void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) { +void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset, + HLEReplacementAttributeType name) { const u64 key = (static_cast(bank) << 32) | offset; replace_table.emplace(key, name); } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cf2298e97..ef8fb9b48 100755 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -2711,7 +2711,7 @@ public: u32 post_z_pixel_imask; ///< 0x0F1C INSERT_PADDING_BYTES_NOINIT(0x20); ConstantColorRendering const_color_rendering; ///< 0x0F40 - s32 stencil_back_ref; ///< 0x0F54 + u32 stencil_back_ref; ///< 0x0F54 u32 stencil_back_mask; ///< 0x0F58 u32 stencil_back_func_mask; ///< 0x0F5C INSERT_PADDING_BYTES_NOINIT(0x14); @@ -2835,9 +2835,9 @@ public: Blend blend; ///< 0x133C u32 stencil_enable; ///< 0x1380 StencilOp stencil_front_op; ///< 0x1384 - s32 stencil_front_ref; ///< 0x1394 - s32 stencil_front_func_mask; ///< 0x1398 - s32 stencil_front_mask; ///< 0x139C + u32 stencil_front_ref; ///< 0x1394 + u32 stencil_front_func_mask; ///< 0x1398 + u32 stencil_front_mask; ///< 0x139C INSERT_PADDING_BYTES_NOINIT(0x4); u32 draw_auto_start_byte_count; ///< 0x13A4 PsSaturate frag_color_clamp; ///< 0x13A8 @@ -3031,14 +3031,14 @@ public: EngineHint engine_state{EngineHint::None}; - enum class HLEReplaceName : u32 { + enum class HLEReplacementAttributeType : u32 { BaseVertex = 0x0, BaseInstance = 0x1, }; - void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name); + void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name); - std::unordered_map replace_table; + std::unordered_map replace_table; static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); static_assert(std::is_trivially_copyable_v, "Maxwell3D Regs must be trivially copyable"); @@ -3089,7 +3089,7 @@ public: std::vector inline_index_draw_indexes; - GPUVAddr getMacroAddress(size_t index) const { + GPUVAddr GetMacroAddress(size_t index) const { return macro_addresses[index]; } @@ -3100,7 +3100,7 @@ public: RefreshParametersImpl(); } - bool AnyParametersDirty() { + bool AnyParametersDirty() const { return current_macro_dirty; } @@ -3196,11 +3196,6 @@ private: bool execute_on{true}; - std::array draw_command{}; - std::vector deferred_draw_method; - enum class DrawMode : u32 { General = 0, Instance, InlineIndex }; - DrawMode draw_mode{DrawMode::General}; - bool draw_indexed{}; std::vector> macro_segments; std::vector macro_addresses; bool current_macro_dirty{}; diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index a3d55a2dc..740ebee64 100755 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -81,14 +81,15 @@ public: params.is_indexed = false; params.include_count = false; params.count_start_address = 0; - params.indirect_start_address = maxwell3d.getMacroAddress(1); + params.indirect_start_address = maxwell3d.GetMacroAddress(1); params.buffer_size = 4 * sizeof(u32); params.max_draw_counts = 1; params.stride = 0; if (extended) { maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell::HLEReplacementAttributeType::BaseInstance); } maxwell3d.draw_manager->DrawArrayIndirect(topology); @@ -125,7 +126,8 @@ private: if (extended) { maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell::HLEReplacementAttributeType::BaseInstance); } maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, @@ -160,13 +162,15 @@ public: maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); - maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, + Maxwell::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance); auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_indexed = true; params.include_count = false; params.count_start_address = 0; - params.indirect_start_address = maxwell3d.getMacroAddress(1); + params.indirect_start_address = maxwell3d.GetMacroAddress(1); params.buffer_size = 5 * sizeof(u32); params.max_draw_counts = 1; params.stride = 0; @@ -190,8 +194,10 @@ private: maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); - maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, + Maxwell::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance); maxwell3d.draw_manager->DrawIndex( static_cast(parameters[0]), @@ -253,15 +259,17 @@ public: auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_indexed = true; params.include_count = true; - params.count_start_address = maxwell3d.getMacroAddress(4); - params.indirect_start_address = maxwell3d.getMacroAddress(5); + params.count_start_address = maxwell3d.GetMacroAddress(4); + params.indirect_start_address = maxwell3d.GetMacroAddress(5); params.buffer_size = stride * draw_count; params.max_draw_counts = draw_count; params.stride = stride; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); - maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, + Maxwell::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance); maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); maxwell3d.engine_state = Maxwell::EngineHint::None; maxwell3d.replace_table.clear(); @@ -298,8 +306,10 @@ private: const u32 base_instance = parameters[base + 4]; maxwell3d.regs.vertex_id_base = base_vertex; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; - maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); - maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], base_vertex, base_instance, parameters[base + 1]); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9d5d5a849..de2a83c87 100755 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -51,7 +51,7 @@ using VideoCommon::LoadPipelines; using VideoCommon::SerializePipeline; using Context = ShaderContext::Context; -constexpr u32 CACHE_VERSION = 8; +constexpr u32 CACHE_VERSION = 9; template auto MakeSpan(Container& container) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 6c54e4827..cbf1669cf 100755 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -354,6 +354,7 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool HAS_EMULATED_COPIES = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; + static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; using Runtime = OpenGL::TextureCacheRuntime; using Image = OpenGL::Image; @@ -361,6 +362,7 @@ struct TextureCacheParams { using ImageView = OpenGL::ImageView; using Sampler = OpenGL::Sampler; using Framebuffer = OpenGL::Framebuffer; + using AsyncBuffer = u32; }; using TextureCache = VideoCommon::TextureCache; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8fb2e90bc..4bf4bda2a 100755 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 9; +constexpr u32 CACHE_VERSION = 10; template auto MakeSpan(Container& container) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ed4161f32..5e0929d49 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -886,30 +886,52 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) if (!state_tracker.TouchStencilProperties()) { return; } - if (regs.stencil_two_side_enable) { - // Separate values per face - scheduler.Record( - [front_ref = regs.stencil_front_ref, front_write_mask = regs.stencil_front_mask, - front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_ref, - back_write_mask = regs.stencil_back_mask, - back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) { - // Front face - cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref); - cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask); - cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_BIT, front_test_mask); - - // Back face + bool update_references = state_tracker.TouchStencilReference(); + bool update_write_mask = state_tracker.TouchStencilWriteMask(); + bool update_compare_masks = state_tracker.TouchStencilCompare(); + if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) { + update_references = true; + update_write_mask = true; + update_compare_masks = true; + } + if (update_references) { + scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref, + two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { + const bool set_back = two_sided && front_ref != back_ref; + // Front face + cmdbuf.SetStencilReference( + set_back ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FACE_FRONT_AND_BACK, front_ref); + if (set_back) { cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref); + } + }); + } + if (update_write_mask) { + scheduler.Record([front_write_mask = regs.stencil_front_mask, + back_write_mask = regs.stencil_back_mask, + two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { + const bool set_back = two_sided && front_write_mask != back_write_mask; + // Front face + cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT + : VK_STENCIL_FACE_FRONT_AND_BACK, + front_write_mask); + if (set_back) { cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask); + } + }); + } + if (update_compare_masks) { + scheduler.Record([front_test_mask = regs.stencil_front_func_mask, + back_test_mask = regs.stencil_back_func_mask, + two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { + const bool set_back = two_sided && front_test_mask != back_test_mask; + // Front face + cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT + : VK_STENCIL_FACE_FRONT_AND_BACK, + front_test_mask); + if (set_back) { cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask); - }); - } else { - // Front face defines both faces - scheduler.Record([ref = regs.stencil_front_ref, write_mask = regs.stencil_front_mask, - test_mask = regs.stencil_front_func_mask](vk::CommandBuffer cmdbuf) { - cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref); - cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask); - cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask); + } }); } } @@ -990,7 +1012,7 @@ void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& re constexpr size_t POINT = 0; constexpr size_t LINE = 1; constexpr size_t POLYGON = 2; - constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { + static constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { POINT, // Points LINE, // Lines LINE, // LineLoop @@ -1099,13 +1121,12 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { } void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!regs.logic_op.enable) { - return; - } if (!state_tracker.TouchLogicOp()) { return; } - auto op = static_cast(static_cast(regs.logic_op.op) - 0x1500); + const auto op_value = static_cast(regs.logic_op.op); + auto op = op_value >= 0x1500 && op_value < 0x1510 ? static_cast(op_value - 0x1500) + : VK_LOGIC_OP_NO_OP; scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); }); } diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 0e44706ac..fa20035de 100755 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -30,7 +30,8 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; -constexpr VkMemoryPropertyFlags HOST_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +constexpr VkMemoryPropertyFlags HOST_FLAGS = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; bool IsStreamHeap(VkMemoryHeap heap) noexcept { @@ -92,7 +93,9 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .pNext = nullptr, .flags = 0, .size = STREAM_BUFFER_SIZE, - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, @@ -244,19 +247,15 @@ std::optional StagingBufferPool::TryGetReservedBuffer(size_t s StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage, bool deferred) { const u32 log2 = Common::Log2Ceil64(size); - VkBufferUsageFlags usage_flags{}; - if (usage == MemoryUsage::Upload) { - usage_flags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - } - if (usage == MemoryUsage::Download) { - usage_flags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; - } vk::Buffer buffer = device.GetLogical().CreateBuffer({ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, .size = 1ULL << log2, - .usage = usage_flags, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index d437d4908..e436ce40c 100755 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -33,6 +33,9 @@ Flags MakeInvalidationFlags() { BlendConstants, DepthBounds, StencilProperties, + StencilReference, + StencilWriteMask, + StencilCompare, LineWidth, CullMode, DepthBoundsEnable, @@ -99,14 +102,17 @@ void SetupDirtyDepthBounds(Tables& tables) { } void SetupDirtyStencilProperties(Tables& tables) { - auto& table = tables[0]; - table[OFF(stencil_two_side_enable)] = StencilProperties; - table[OFF(stencil_front_ref)] = StencilProperties; - table[OFF(stencil_front_mask)] = StencilProperties; - table[OFF(stencil_front_func_mask)] = StencilProperties; - table[OFF(stencil_back_ref)] = StencilProperties; - table[OFF(stencil_back_mask)] = StencilProperties; - table[OFF(stencil_back_func_mask)] = StencilProperties; + const auto setup = [&](size_t position, u8 flag) { + tables[0][position] = flag; + tables[1][position] = StencilProperties; + }; + tables[0][OFF(stencil_two_side_enable)] = StencilProperties; + setup(OFF(stencil_front_ref), StencilReference); + setup(OFF(stencil_front_mask), StencilWriteMask); + setup(OFF(stencil_front_func_mask), StencilCompare); + setup(OFF(stencil_back_ref), StencilReference); + setup(OFF(stencil_back_mask), StencilWriteMask); + setup(OFF(stencil_back_func_mask), StencilCompare); } void SetupDirtyLineWidth(Tables& tables) { diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index c25ddab54..ba3d5ebaa 100755 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -35,6 +35,9 @@ enum : u8 { BlendConstants, DepthBounds, StencilProperties, + StencilReference, + StencilWriteMask, + StencilCompare, LineWidth, CullMode, @@ -113,6 +116,24 @@ public: return Exchange(Dirty::StencilProperties, false); } + bool TouchStencilReference() { + return Exchange(Dirty::StencilReference, false); + } + + bool TouchStencilWriteMask() { + return Exchange(Dirty::StencilWriteMask, false); + } + + bool TouchStencilCompare() { + return Exchange(Dirty::StencilCompare, false); + } + + bool TouchStencilSide(bool two_sided_stencil_new) { + bool result = two_sided_stencil != two_sided_stencil_new; + two_sided_stencil = two_sided_stencil_new; + return result; + } + bool TouchLineWidth() const { return Exchange(Dirty::LineWidth, false); } @@ -218,6 +239,7 @@ private: Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags; Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; + bool two_sided_stencil = false; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8bb25eb38..ad3696e95 100755 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -812,8 +812,12 @@ StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) { return staging_buffer_pool.Request(size, MemoryUsage::Upload); } -StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { - return staging_buffer_pool.Request(size, MemoryUsage::Download); +StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { + return staging_buffer_pool.Request(size, MemoryUsage::Download, deferred); +} + +void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) { + staging_buffer_pool.FreeDeferred(ref); } bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index d86a52386..277692216 100755 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -51,7 +51,9 @@ public: StagingBufferRef UploadStagingBuffer(size_t size); - StagingBufferRef DownloadStagingBuffer(size_t size); + StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); + + void FreeDeferredStagingBuffer(StagingBufferRef& ref); void TickFrame(); @@ -347,6 +349,7 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool HAS_EMULATED_COPIES = false; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; + static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; using Runtime = Vulkan::TextureCacheRuntime; using Image = Vulkan::Image; @@ -354,6 +357,7 @@ struct TextureCacheParams { using ImageView = Vulkan::ImageView; using Sampler = Vulkan::Sampler; using Framebuffer = Vulkan::Framebuffer; + using AsyncBuffer = Vulkan::StagingBufferRef; }; using TextureCache = VideoCommon::TextureCache; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 96dedeef5..9c276cff9 100755 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -325,6 +325,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, ASSERT(local_size <= std::numeric_limits::max()); local_memory_size = static_cast(local_size) + sph.common3.shader_local_memory_crs_size; texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot; + is_propietary_driver = texture_bound == 2; has_hle_engine_state = maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro; } @@ -350,11 +351,11 @@ std::optional GraphicsEnvironment::GetReplaceConstBuffe if (it == maxwell3d->replace_table.end()) { return std::nullopt; } - const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) { + const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplacementAttributeType name) { switch (name) { - case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex: + case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseVertex: return Shader::ReplaceConstant::BaseVertex; - case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance: + case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseInstance: return Shader::ReplaceConstant::BaseInstance; default: UNREACHABLE(); @@ -399,6 +400,7 @@ ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_com stage = Shader::Stage::Compute; local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc; texture_bound = kepler_compute->regs.tex_cb_index; + is_propietary_driver = texture_bound == 2; shared_memory_size = qmd.shared_alloc; workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; } @@ -498,6 +500,7 @@ void FileEnvironment::Deserialize(std::ifstream& file) { file.read(reinterpret_cast(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); } } + is_propietary_driver = texture_bound == 2; } void FileEnvironment::Dump(u64 hash) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 01035ba1d..d05a7a582 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -646,7 +646,28 @@ bool TextureCache

::ShouldWaitAsyncFlushes() const noexcept { template void TextureCache

::CommitAsyncFlushes() { // This is intentionally passing the value by copy - committed_downloads.push(uncommitted_downloads); + if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { + const std::span download_ids = uncommitted_downloads; + if (download_ids.empty()) { + committed_downloads.emplace_back(std::move(uncommitted_downloads)); + uncommitted_downloads.clear(); + async_buffers.emplace_back(std::optional{}); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); + } + async_buffers.emplace_back(download_map); + } + committed_downloads.emplace_back(std::move(uncommitted_downloads)); uncommitted_downloads.clear(); } @@ -655,37 +676,58 @@ void TextureCache

::PopAsyncFlushes() { if (committed_downloads.empty()) { return; } - const std::span download_ids = committed_downloads.front(); - if (download_ids.empty()) { - committed_downloads.pop(); - return; + if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { + const std::span download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop_front(); + async_buffers.pop_front(); + return; + } + auto download_map = *async_buffers.front(); + std::span download_span = download_map.mapped_span; + for (size_t i = download_ids.size(); i > 0; i--) { + const ImageBase& image = slot_images[download_ids[i - 1]]; + const auto copies = FullDownloadCopies(image.info); + download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); + std::span download_span_alt = download_span.subspan(download_map.offset); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, + swizzle_data_buffer); + } + runtime.FreeDeferredStagingBuffer(download_map); + committed_downloads.pop_front(); + async_buffers.pop_front(); + } else { + const std::span download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop_front(); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); + const size_t original_offset = download_map.offset; + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += image.unswizzled_size_bytes; + } + // Wait for downloads to finish + runtime.Finish(); + download_map.offset = original_offset; + std::span download_span = download_map.mapped_span; + for (const ImageId image_id : download_ids) { + const ImageBase& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, + swizzle_data_buffer); + download_map.offset += image.unswizzled_size_bytes; + download_span = download_span.subspan(image.unswizzled_size_bytes); + } + committed_downloads.pop_front(); } - size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; - } - auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); - const size_t original_offset = download_map.offset; - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, copies); - download_map.offset += image.unswizzled_size_bytes; - } - // Wait for downloads to finish - runtime.Finish(); - - download_map.offset = original_offset; - std::span download_span = download_map.mapped_span; - for (const ImageId image_id : download_ids) { - const ImageBase& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, - swizzle_data_buffer); - download_map.offset += image.unswizzled_size_bytes; - download_span = download_span.subspan(image.unswizzled_size_bytes); - } - committed_downloads.pop(); } template @@ -1475,6 +1517,27 @@ void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { } } +template +void TextureCache

::BubbleUpImages(VAddr cpu_addr, size_t size) { + ForEachCPUPage(cpu_addr, size, [this](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + return; + } + std::vector& map_vector = it->second; + for (size_t i = 1; i < map_vector.size(); i++) { + ImageMapView& bottom_map = slot_map_views[map_vector[i - 1]]; + ImageMapView& top_map = slot_map_views[map_vector[i]]; + if (slot_images[bottom_map.image_id].modification_tick < + slot_images[top_map.image_id].modification_tick) { + std::swap(map_vector[i - 1], map_vector[i]); + } else { + return; + } + } + }); +} + template ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { Image& image = slot_images[image_id]; @@ -1788,6 +1851,7 @@ template void TextureCache

::MarkModification(ImageBase& image) noexcept { image.flags |= ImageFlagBits::GpuModified; image.modification_tick = ++modification_tick; + BubbleUpImages(image.cpu_addr, image.guest_size_bytes); } template diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index c2ea36761..bceacfc35 100755 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -92,6 +92,8 @@ class TextureCache : public VideoCommon::ChannelSetupCaches::max()}; @@ -106,6 +108,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches void ForEachSparseSegment(ImageBase& image, Func&& func); + void BubbleUpImages(VAddr cpu_addr, size_t size); + /// Find or create an image view in the given image with the passed parameters [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); @@ -403,7 +408,8 @@ private: // TODO: This data structure is not optimal and it should be reworked std::vector uncommitted_downloads; - std::queue> committed_downloads; + std::deque> committed_downloads; + std::deque> async_buffers; struct LRUItemParams { using ObjectType = ImageId; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index b41612773..c114942d3 100755 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -756,20 +756,23 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectToolingInfo(); if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { - const auto arch = GetNvidiaArchitecture(physical, supported_extensions); - switch (arch) { - case NvidiaArchitecture::AmpereOrNewer: - LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); - is_float16_supported = false; - break; - case NvidiaArchitecture::Turing: - break; - case NvidiaArchitecture::VoltaOrOlder: - LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor"); - khr_push_descriptor = false; - break; - } const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff; + if (nv_major_version < 527) { + const auto arch = GetNvidiaArchitecture(physical, supported_extensions); + switch (arch) { + case NvidiaArchitecture::AmpereOrNewer: + LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); + is_float16_supported = false; + break; + case NvidiaArchitecture::Turing: + break; + case NvidiaArchitecture::VoltaOrOlder: + LOG_WARNING(Render_Vulkan, + "Blacklisting Volta and older from VK_KHR_push_descriptor"); + khr_push_descriptor = false; + break; + } + } if (nv_major_version >= 510) { LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits"); cant_blit_msaa = true; @@ -834,8 +837,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; if (ext_vertex_input_dynamic_state && is_intel_windows) { - LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); - ext_vertex_input_dynamic_state = false; + const u32 version = (properties.driverVersion << 3) >> 3; + if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) { + LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); + ext_vertex_input_dynamic_state = false; + } } if (is_float16_supported && is_intel_windows) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.