early-access version 3275
This commit is contained in:
parent
4b11185ae8
commit
0f64bad6bd
@ -1,7 +1,7 @@
|
||||
yuzu emulator early access
|
||||
=============
|
||||
|
||||
This is the source code for early-access 3274.
|
||||
This is the source code for early-access 3275.
|
||||
|
||||
## Legal Notice
|
||||
|
||||
|
@ -11,6 +11,11 @@
|
||||
namespace Core::HID {
|
||||
constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
|
||||
constexpr s32 HID_TRIGGER_MAX = 0x7fff;
|
||||
// Use a common UUID for TAS and Virtual Gamepad
|
||||
constexpr Common::UUID TAS_UUID =
|
||||
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
constexpr Common::UUID VIRTUAL_UUID =
|
||||
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
EmulatedController::EmulatedController(NpadIdType npad_id_type_) : npad_id_type(npad_id_type_) {}
|
||||
|
||||
@ -392,10 +397,6 @@ void EmulatedController::ReloadInput() {
|
||||
nfc_devices[index]->ForceUpdate();
|
||||
}
|
||||
|
||||
// Use a common UUID for TAS
|
||||
static constexpr Common::UUID TAS_UUID = Common::UUID{
|
||||
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
// Register TAS devices. No need to force update
|
||||
for (std::size_t index = 0; index < tas_button_devices.size(); ++index) {
|
||||
if (!tas_button_devices[index]) {
|
||||
@ -421,10 +422,6 @@ void EmulatedController::ReloadInput() {
|
||||
});
|
||||
}
|
||||
|
||||
// Use a common UUID for Virtual Gamepad
|
||||
static constexpr Common::UUID VIRTUAL_UUID = Common::UUID{
|
||||
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
// Register virtual devices. No need to force update
|
||||
for (std::size_t index = 0; index < virtual_button_devices.size(); ++index) {
|
||||
if (!virtual_button_devices[index]) {
|
||||
@ -842,7 +839,12 @@ void EmulatedController::SetStick(const Common::Input::CallbackStatus& callback,
|
||||
|
||||
// Only read stick values that have the same uuid or are over the threshold to avoid flapping
|
||||
if (controller.stick_values[index].uuid != uuid) {
|
||||
if (!stick_value.down && !stick_value.up && !stick_value.left && !stick_value.right) {
|
||||
const bool is_tas = uuid == TAS_UUID;
|
||||
if (is_tas && stick_value.x.value == 0 && stick_value.y.value == 0) {
|
||||
return;
|
||||
}
|
||||
if (!is_tas && !stick_value.down && !stick_value.up && !stick_value.left &&
|
||||
!stick_value.right) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -74,8 +74,6 @@ void IRS::DeactivateIrsensor(Kernel::HLERequestContext& ctx) {
|
||||
LOG_WARNING(Service_IRS, "(STUBBED) called, applet_resource_user_id={}",
|
||||
applet_resource_user_id);
|
||||
|
||||
npad_device->SetPollingMode(Common::Input::PollingMode::Active);
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(ResultSuccess);
|
||||
}
|
||||
@ -514,7 +512,7 @@ void IRS::StopImageProcessorAsync(Kernel::HLERequestContext& ctx) {
|
||||
auto result = IsIrCameraHandleValid(parameters.camera_handle);
|
||||
if (result.IsSuccess()) {
|
||||
// TODO: Stop image processor async
|
||||
npad_device->SetPollingMode(Common::Input::PollingMode::IR);
|
||||
npad_device->SetPollingMode(Common::Input::PollingMode::Active);
|
||||
result = ResultSuccess;
|
||||
}
|
||||
|
||||
|
@ -264,6 +264,16 @@ DriverResult JoyconDriver::SetPollingMode() {
|
||||
irs_protocol->DisableIrs();
|
||||
}
|
||||
|
||||
if (nfc_protocol->IsEnabled()) {
|
||||
amiibo_detected = false;
|
||||
nfc_protocol->DisableNfc();
|
||||
}
|
||||
|
||||
if (ring_protocol->IsEnabled()) {
|
||||
ring_connected = false;
|
||||
ring_protocol->DisableRingCon();
|
||||
}
|
||||
|
||||
if (irs_enabled && supported_features.irs) {
|
||||
auto result = irs_protocol->EnableIrs();
|
||||
if (result == DriverResult::Success) {
|
||||
@ -274,11 +284,6 @@ DriverResult JoyconDriver::SetPollingMode() {
|
||||
LOG_ERROR(Input, "Error enabling IRS");
|
||||
}
|
||||
|
||||
if (nfc_protocol->IsEnabled()) {
|
||||
amiibo_detected = false;
|
||||
nfc_protocol->DisableNfc();
|
||||
}
|
||||
|
||||
if (nfc_enabled && supported_features.nfc) {
|
||||
auto result = nfc_protocol->EnableNfc();
|
||||
if (result == DriverResult::Success) {
|
||||
@ -292,11 +297,6 @@ DriverResult JoyconDriver::SetPollingMode() {
|
||||
LOG_ERROR(Input, "Error enabling NFC");
|
||||
}
|
||||
|
||||
if (ring_protocol->IsEnabled()) {
|
||||
ring_connected = false;
|
||||
ring_protocol->DisableRingCon();
|
||||
}
|
||||
|
||||
if (hidbus_enabled && supported_features.hidbus) {
|
||||
auto result = ring_protocol->EnableRingCon();
|
||||
if (result == DriverResult::Success) {
|
||||
@ -428,6 +428,12 @@ DriverResult JoyconDriver::SetPasiveMode() {
|
||||
}
|
||||
|
||||
DriverResult JoyconDriver::SetActiveMode() {
|
||||
if (is_ring_disabled_by_irs) {
|
||||
is_ring_disabled_by_irs = false;
|
||||
SetActiveMode();
|
||||
return SetRingConMode();
|
||||
}
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
motion_enabled = true;
|
||||
hidbus_enabled = false;
|
||||
@ -444,6 +450,10 @@ DriverResult JoyconDriver::SetIrMode() {
|
||||
return DriverResult::NotSupported;
|
||||
}
|
||||
|
||||
if (ring_connected) {
|
||||
is_ring_disabled_by_irs = true;
|
||||
}
|
||||
|
||||
motion_enabled = false;
|
||||
hidbus_enabled = false;
|
||||
nfc_enabled = false;
|
||||
|
@ -108,6 +108,7 @@ private:
|
||||
bool starlink_connected{};
|
||||
bool ring_connected{};
|
||||
bool amiibo_detected{};
|
||||
bool is_ring_disabled_by_irs{};
|
||||
|
||||
// Harware configuration
|
||||
u8 leds{};
|
||||
|
@ -74,8 +74,8 @@ DriverResult JoyconCommonProtocol::SendData(std::span<const u8> buffer) {
|
||||
}
|
||||
|
||||
DriverResult JoyconCommonProtocol::GetSubCommandResponse(SubCommand sc, std::vector<u8>& output) {
|
||||
constexpr int timeout_mili = 100;
|
||||
constexpr int MaxTries = 10;
|
||||
constexpr int timeout_mili = 66;
|
||||
constexpr int MaxTries = 15;
|
||||
int tries = 0;
|
||||
output.resize(MaxSubCommandResponseSize);
|
||||
|
||||
|
@ -57,11 +57,16 @@ public:
|
||||
return start_address;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsPropietaryDriver() const noexcept {
|
||||
return is_propietary_driver;
|
||||
}
|
||||
|
||||
protected:
|
||||
ProgramHeader sph{};
|
||||
std::array<u32, 8> gp_passthrough_mask{};
|
||||
Stage stage{};
|
||||
u32 start_address{};
|
||||
bool is_propietary_driver{};
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
@ -677,6 +677,30 @@ void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank,
|
||||
u32 offset_start = 0, u32 offset_end = std::numeric_limits<u16>::max()) {
|
||||
const IR::Value bank{inst.Arg(0)};
|
||||
const IR::Value offset{inst.Arg(1)};
|
||||
if (!bank.IsImmediate() || !offset.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
const auto bank_value = bank.U32();
|
||||
if (bank_value != which_bank) {
|
||||
return;
|
||||
}
|
||||
const auto offset_value = offset.U32();
|
||||
if (offset_value < offset_start || offset_value >= offset_end) {
|
||||
return;
|
||||
}
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
|
||||
inst.ReplaceUsesWith(IR::Value{env.ReadCbufValue(bank_value, offset_value)});
|
||||
} else {
|
||||
inst.ReplaceUsesWith(
|
||||
IR::Value{Common::BitCast<f32>(env.ReadCbufValue(bank_value, offset_value))});
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::GetRegister:
|
||||
@ -825,13 +849,17 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||
case IR::Opcode::GetCbufF32:
|
||||
case IR::Opcode::GetCbufU32:
|
||||
if (env.HasHLEMacroState()) {
|
||||
return FoldConstBuffer(env, block, inst);
|
||||
FoldConstBuffer(env, block, inst);
|
||||
}
|
||||
if (env.IsPropietaryDriver()) {
|
||||
FoldDriverConstBuffer(env, block, inst, 1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void ConstantPropagationPass(Environment& env, IR::Program& program) {
|
||||
|
@ -94,7 +94,7 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
|
||||
void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
|
||||
draw_state.topology = topology;
|
||||
|
||||
ProcessDrawIndirect(true);
|
||||
ProcessDrawIndirect();
|
||||
}
|
||||
|
||||
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first,
|
||||
@ -105,7 +105,7 @@ void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_firs
|
||||
draw_state.index_buffer.first = index_first;
|
||||
draw_state.index_buffer.count = index_count;
|
||||
|
||||
ProcessDrawIndirect(true);
|
||||
ProcessDrawIndirect();
|
||||
}
|
||||
|
||||
void DrawManager::SetInlineIndexBuffer(u32 index) {
|
||||
@ -216,9 +216,12 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
|
||||
}
|
||||
}
|
||||
|
||||
void DrawManager::ProcessDrawIndirect(bool draw_indexed) {
|
||||
LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology,
|
||||
draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count);
|
||||
void DrawManager::ProcessDrawIndirect() {
|
||||
LOG_TRACE(
|
||||
HW_GPU,
|
||||
"called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}",
|
||||
draw_state.topology, indirect_state.is_indexed, indirect_state.include_count,
|
||||
indirect_state.buffer_size, indirect_state.max_draw_counts);
|
||||
|
||||
UpdateTopology();
|
||||
|
||||
|
@ -85,7 +85,7 @@ private:
|
||||
|
||||
void ProcessDraw(bool draw_indexed, u32 instance_count);
|
||||
|
||||
void ProcessDrawIndirect(bool draw_indexed);
|
||||
void ProcessDrawIndirect();
|
||||
|
||||
Maxwell3D* maxwell3d{};
|
||||
State draw_state{};
|
||||
|
@ -685,7 +685,8 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
|
||||
return regs.reg_array[method];
|
||||
}
|
||||
|
||||
void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) {
|
||||
void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset,
|
||||
HLEReplacementAttributeType name) {
|
||||
const u64 key = (static_cast<u64>(bank) << 32) | offset;
|
||||
replace_table.emplace(key, name);
|
||||
}
|
||||
|
@ -2711,7 +2711,7 @@ public:
|
||||
u32 post_z_pixel_imask; ///< 0x0F1C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x20);
|
||||
ConstantColorRendering const_color_rendering; ///< 0x0F40
|
||||
s32 stencil_back_ref; ///< 0x0F54
|
||||
u32 stencil_back_ref; ///< 0x0F54
|
||||
u32 stencil_back_mask; ///< 0x0F58
|
||||
u32 stencil_back_func_mask; ///< 0x0F5C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x14);
|
||||
@ -2835,9 +2835,9 @@ public:
|
||||
Blend blend; ///< 0x133C
|
||||
u32 stencil_enable; ///< 0x1380
|
||||
StencilOp stencil_front_op; ///< 0x1384
|
||||
s32 stencil_front_ref; ///< 0x1394
|
||||
s32 stencil_front_func_mask; ///< 0x1398
|
||||
s32 stencil_front_mask; ///< 0x139C
|
||||
u32 stencil_front_ref; ///< 0x1394
|
||||
u32 stencil_front_func_mask; ///< 0x1398
|
||||
u32 stencil_front_mask; ///< 0x139C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x4);
|
||||
u32 draw_auto_start_byte_count; ///< 0x13A4
|
||||
PsSaturate frag_color_clamp; ///< 0x13A8
|
||||
@ -3031,14 +3031,14 @@ public:
|
||||
|
||||
EngineHint engine_state{EngineHint::None};
|
||||
|
||||
enum class HLEReplaceName : u32 {
|
||||
enum class HLEReplacementAttributeType : u32 {
|
||||
BaseVertex = 0x0,
|
||||
BaseInstance = 0x1,
|
||||
};
|
||||
|
||||
void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name);
|
||||
void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name);
|
||||
|
||||
std::unordered_map<u64, HLEReplaceName> replace_table;
|
||||
std::unordered_map<u64, HLEReplacementAttributeType> replace_table;
|
||||
|
||||
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
|
||||
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
|
||||
@ -3089,7 +3089,7 @@ public:
|
||||
|
||||
std::vector<u8> inline_index_draw_indexes;
|
||||
|
||||
GPUVAddr getMacroAddress(size_t index) const {
|
||||
GPUVAddr GetMacroAddress(size_t index) const {
|
||||
return macro_addresses[index];
|
||||
}
|
||||
|
||||
@ -3100,7 +3100,7 @@ public:
|
||||
RefreshParametersImpl();
|
||||
}
|
||||
|
||||
bool AnyParametersDirty() {
|
||||
bool AnyParametersDirty() const {
|
||||
return current_macro_dirty;
|
||||
}
|
||||
|
||||
@ -3196,11 +3196,6 @@ private:
|
||||
|
||||
bool execute_on{true};
|
||||
|
||||
std::array<bool, Regs::NUM_REGS> draw_command{};
|
||||
std::vector<u32> deferred_draw_method;
|
||||
enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
|
||||
DrawMode draw_mode{DrawMode::General};
|
||||
bool draw_indexed{};
|
||||
std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
|
||||
std::vector<GPUVAddr> macro_addresses;
|
||||
bool current_macro_dirty{};
|
||||
|
@ -81,14 +81,15 @@ public:
|
||||
params.is_indexed = false;
|
||||
params.include_count = false;
|
||||
params.count_start_address = 0;
|
||||
params.indirect_start_address = maxwell3d.getMacroAddress(1);
|
||||
params.indirect_start_address = maxwell3d.GetMacroAddress(1);
|
||||
params.buffer_size = 4 * sizeof(u32);
|
||||
params.max_draw_counts = 1;
|
||||
params.stride = 0;
|
||||
|
||||
if (extended) {
|
||||
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell::HLEReplacementAttributeType::BaseInstance);
|
||||
}
|
||||
|
||||
maxwell3d.draw_manager->DrawArrayIndirect(topology);
|
||||
@ -125,7 +126,8 @@ private:
|
||||
if (extended) {
|
||||
maxwell3d.regs.global_base_instance_index = base_instance;
|
||||
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell::HLEReplacementAttributeType::BaseInstance);
|
||||
}
|
||||
|
||||
maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance,
|
||||
@ -160,13 +162,15 @@ public:
|
||||
maxwell3d.regs.global_base_instance_index = base_instance;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
||||
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
|
||||
maxwell3d.SetHLEReplacementAttributeType(0, 0x640,
|
||||
Maxwell::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
|
||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||
params.is_indexed = true;
|
||||
params.include_count = false;
|
||||
params.count_start_address = 0;
|
||||
params.indirect_start_address = maxwell3d.getMacroAddress(1);
|
||||
params.indirect_start_address = maxwell3d.GetMacroAddress(1);
|
||||
params.buffer_size = 5 * sizeof(u32);
|
||||
params.max_draw_counts = 1;
|
||||
params.stride = 0;
|
||||
@ -190,8 +194,10 @@ private:
|
||||
maxwell3d.regs.global_base_instance_index = base_instance;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
||||
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
|
||||
maxwell3d.SetHLEReplacementAttributeType(0, 0x640,
|
||||
Maxwell::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
|
||||
|
||||
maxwell3d.draw_manager->DrawIndex(
|
||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
||||
@ -253,15 +259,17 @@ public:
|
||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||
params.is_indexed = true;
|
||||
params.include_count = true;
|
||||
params.count_start_address = maxwell3d.getMacroAddress(4);
|
||||
params.indirect_start_address = maxwell3d.getMacroAddress(5);
|
||||
params.count_start_address = maxwell3d.GetMacroAddress(4);
|
||||
params.indirect_start_address = maxwell3d.GetMacroAddress(5);
|
||||
params.buffer_size = stride * draw_count;
|
||||
params.max_draw_counts = draw_count;
|
||||
params.stride = stride;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
||||
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
|
||||
maxwell3d.SetHLEReplacementAttributeType(0, 0x640,
|
||||
Maxwell::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
|
||||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
|
||||
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
@ -298,8 +306,10 @@ private:
|
||||
const u32 base_instance = parameters[base + 4];
|
||||
maxwell3d.regs.vertex_id_base = base_vertex;
|
||||
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
||||
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
|
||||
base_vertex, base_instance, parameters[base + 1]);
|
||||
|
@ -51,7 +51,7 @@ using VideoCommon::LoadPipelines;
|
||||
using VideoCommon::SerializePipeline;
|
||||
using Context = ShaderContext::Context;
|
||||
|
||||
constexpr u32 CACHE_VERSION = 8;
|
||||
constexpr u32 CACHE_VERSION = 9;
|
||||
|
||||
template <typename Container>
|
||||
auto MakeSpan(Container& container) {
|
||||
|
@ -354,6 +354,7 @@ struct TextureCacheParams {
|
||||
static constexpr bool FRAMEBUFFER_BLITS = true;
|
||||
static constexpr bool HAS_EMULATED_COPIES = true;
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
|
||||
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
|
||||
|
||||
using Runtime = OpenGL::TextureCacheRuntime;
|
||||
using Image = OpenGL::Image;
|
||||
@ -361,6 +362,7 @@ struct TextureCacheParams {
|
||||
using ImageView = OpenGL::ImageView;
|
||||
using Sampler = OpenGL::Sampler;
|
||||
using Framebuffer = OpenGL::Framebuffer;
|
||||
using AsyncBuffer = u32;
|
||||
};
|
||||
|
||||
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
|
||||
|
@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment;
|
||||
using VideoCommon::GenericEnvironment;
|
||||
using VideoCommon::GraphicsEnvironment;
|
||||
|
||||
constexpr u32 CACHE_VERSION = 9;
|
||||
constexpr u32 CACHE_VERSION = 10;
|
||||
|
||||
template <typename Container>
|
||||
auto MakeSpan(Container& container) {
|
||||
|
@ -886,30 +886,52 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
|
||||
if (!state_tracker.TouchStencilProperties()) {
|
||||
return;
|
||||
}
|
||||
if (regs.stencil_two_side_enable) {
|
||||
// Separate values per face
|
||||
scheduler.Record(
|
||||
[front_ref = regs.stencil_front_ref, front_write_mask = regs.stencil_front_mask,
|
||||
front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_ref,
|
||||
back_write_mask = regs.stencil_back_mask,
|
||||
back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) {
|
||||
// Front face
|
||||
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref);
|
||||
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask);
|
||||
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_BIT, front_test_mask);
|
||||
|
||||
// Back face
|
||||
bool update_references = state_tracker.TouchStencilReference();
|
||||
bool update_write_mask = state_tracker.TouchStencilWriteMask();
|
||||
bool update_compare_masks = state_tracker.TouchStencilCompare();
|
||||
if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) {
|
||||
update_references = true;
|
||||
update_write_mask = true;
|
||||
update_compare_masks = true;
|
||||
}
|
||||
if (update_references) {
|
||||
scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref,
|
||||
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
|
||||
const bool set_back = two_sided && front_ref != back_ref;
|
||||
// Front face
|
||||
cmdbuf.SetStencilReference(
|
||||
set_back ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FACE_FRONT_AND_BACK, front_ref);
|
||||
if (set_back) {
|
||||
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (update_write_mask) {
|
||||
scheduler.Record([front_write_mask = regs.stencil_front_mask,
|
||||
back_write_mask = regs.stencil_back_mask,
|
||||
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
|
||||
const bool set_back = two_sided && front_write_mask != back_write_mask;
|
||||
// Front face
|
||||
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
|
||||
: VK_STENCIL_FACE_FRONT_AND_BACK,
|
||||
front_write_mask);
|
||||
if (set_back) {
|
||||
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (update_compare_masks) {
|
||||
scheduler.Record([front_test_mask = regs.stencil_front_func_mask,
|
||||
back_test_mask = regs.stencil_back_func_mask,
|
||||
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
|
||||
const bool set_back = two_sided && front_test_mask != back_test_mask;
|
||||
// Front face
|
||||
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
|
||||
: VK_STENCIL_FACE_FRONT_AND_BACK,
|
||||
front_test_mask);
|
||||
if (set_back) {
|
||||
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
|
||||
});
|
||||
} else {
|
||||
// Front face defines both faces
|
||||
scheduler.Record([ref = regs.stencil_front_ref, write_mask = regs.stencil_front_mask,
|
||||
test_mask = regs.stencil_front_func_mask](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref);
|
||||
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask);
|
||||
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -990,7 +1012,7 @@ void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& re
|
||||
constexpr size_t POINT = 0;
|
||||
constexpr size_t LINE = 1;
|
||||
constexpr size_t POLYGON = 2;
|
||||
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
|
||||
static constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
|
||||
POINT, // Points
|
||||
LINE, // Lines
|
||||
LINE, // LineLoop
|
||||
@ -1099,13 +1121,12 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
if (!regs.logic_op.enable) {
|
||||
return;
|
||||
}
|
||||
if (!state_tracker.TouchLogicOp()) {
|
||||
return;
|
||||
}
|
||||
auto op = static_cast<VkLogicOp>(static_cast<u32>(regs.logic_op.op) - 0x1500);
|
||||
const auto op_value = static_cast<u32>(regs.logic_op.op);
|
||||
auto op = op_value >= 0x1500 && op_value < 0x1510 ? static_cast<VkLogicOp>(op_value - 0x1500)
|
||||
: VK_LOGIC_OP_NO_OP;
|
||||
scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); });
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,8 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
|
||||
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
|
||||
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
|
||||
|
||||
constexpr VkMemoryPropertyFlags HOST_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
|
||||
constexpr VkMemoryPropertyFlags HOST_FLAGS =
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
|
||||
|
||||
bool IsStreamHeap(VkMemoryHeap heap) noexcept {
|
||||
@ -92,7 +93,9 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = STREAM_BUFFER_SIZE,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
@ -244,19 +247,15 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s
|
||||
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage,
|
||||
bool deferred) {
|
||||
const u32 log2 = Common::Log2Ceil64(size);
|
||||
VkBufferUsageFlags usage_flags{};
|
||||
if (usage == MemoryUsage::Upload) {
|
||||
usage_flags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
|
||||
}
|
||||
if (usage == MemoryUsage::Download) {
|
||||
usage_flags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
}
|
||||
vk::Buffer buffer = device.GetLogical().CreateBuffer({
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = 1ULL << log2,
|
||||
.usage = usage_flags,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
|
@ -33,6 +33,9 @@ Flags MakeInvalidationFlags() {
|
||||
BlendConstants,
|
||||
DepthBounds,
|
||||
StencilProperties,
|
||||
StencilReference,
|
||||
StencilWriteMask,
|
||||
StencilCompare,
|
||||
LineWidth,
|
||||
CullMode,
|
||||
DepthBoundsEnable,
|
||||
@ -99,14 +102,17 @@ void SetupDirtyDepthBounds(Tables& tables) {
|
||||
}
|
||||
|
||||
void SetupDirtyStencilProperties(Tables& tables) {
|
||||
auto& table = tables[0];
|
||||
table[OFF(stencil_two_side_enable)] = StencilProperties;
|
||||
table[OFF(stencil_front_ref)] = StencilProperties;
|
||||
table[OFF(stencil_front_mask)] = StencilProperties;
|
||||
table[OFF(stencil_front_func_mask)] = StencilProperties;
|
||||
table[OFF(stencil_back_ref)] = StencilProperties;
|
||||
table[OFF(stencil_back_mask)] = StencilProperties;
|
||||
table[OFF(stencil_back_func_mask)] = StencilProperties;
|
||||
const auto setup = [&](size_t position, u8 flag) {
|
||||
tables[0][position] = flag;
|
||||
tables[1][position] = StencilProperties;
|
||||
};
|
||||
tables[0][OFF(stencil_two_side_enable)] = StencilProperties;
|
||||
setup(OFF(stencil_front_ref), StencilReference);
|
||||
setup(OFF(stencil_front_mask), StencilWriteMask);
|
||||
setup(OFF(stencil_front_func_mask), StencilCompare);
|
||||
setup(OFF(stencil_back_ref), StencilReference);
|
||||
setup(OFF(stencil_back_mask), StencilWriteMask);
|
||||
setup(OFF(stencil_back_func_mask), StencilCompare);
|
||||
}
|
||||
|
||||
void SetupDirtyLineWidth(Tables& tables) {
|
||||
|
@ -35,6 +35,9 @@ enum : u8 {
|
||||
BlendConstants,
|
||||
DepthBounds,
|
||||
StencilProperties,
|
||||
StencilReference,
|
||||
StencilWriteMask,
|
||||
StencilCompare,
|
||||
LineWidth,
|
||||
|
||||
CullMode,
|
||||
@ -113,6 +116,24 @@ public:
|
||||
return Exchange(Dirty::StencilProperties, false);
|
||||
}
|
||||
|
||||
bool TouchStencilReference() {
|
||||
return Exchange(Dirty::StencilReference, false);
|
||||
}
|
||||
|
||||
bool TouchStencilWriteMask() {
|
||||
return Exchange(Dirty::StencilWriteMask, false);
|
||||
}
|
||||
|
||||
bool TouchStencilCompare() {
|
||||
return Exchange(Dirty::StencilCompare, false);
|
||||
}
|
||||
|
||||
bool TouchStencilSide(bool two_sided_stencil_new) {
|
||||
bool result = two_sided_stencil != two_sided_stencil_new;
|
||||
two_sided_stencil = two_sided_stencil_new;
|
||||
return result;
|
||||
}
|
||||
|
||||
bool TouchLineWidth() const {
|
||||
return Exchange(Dirty::LineWidth, false);
|
||||
}
|
||||
@ -218,6 +239,7 @@ private:
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags;
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
|
||||
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
|
||||
bool two_sided_stencil = false;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -812,8 +812,12 @@ StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return staging_buffer_pool.Request(size, MemoryUsage::Upload);
|
||||
}
|
||||
|
||||
StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return staging_buffer_pool.Request(size, MemoryUsage::Download);
|
||||
StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
|
||||
return staging_buffer_pool.Request(size, MemoryUsage::Download, deferred);
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
|
||||
staging_buffer_pool.FreeDeferred(ref);
|
||||
}
|
||||
|
||||
bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
|
||||
|
@ -51,7 +51,9 @@ public:
|
||||
|
||||
StagingBufferRef UploadStagingBuffer(size_t size);
|
||||
|
||||
StagingBufferRef DownloadStagingBuffer(size_t size);
|
||||
StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
|
||||
|
||||
void FreeDeferredStagingBuffer(StagingBufferRef& ref);
|
||||
|
||||
void TickFrame();
|
||||
|
||||
@ -347,6 +349,7 @@ struct TextureCacheParams {
|
||||
static constexpr bool FRAMEBUFFER_BLITS = false;
|
||||
static constexpr bool HAS_EMULATED_COPIES = false;
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
|
||||
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
|
||||
|
||||
using Runtime = Vulkan::TextureCacheRuntime;
|
||||
using Image = Vulkan::Image;
|
||||
@ -354,6 +357,7 @@ struct TextureCacheParams {
|
||||
using ImageView = Vulkan::ImageView;
|
||||
using Sampler = Vulkan::Sampler;
|
||||
using Framebuffer = Vulkan::Framebuffer;
|
||||
using AsyncBuffer = Vulkan::StagingBufferRef;
|
||||
};
|
||||
|
||||
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
|
||||
|
@ -325,6 +325,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
ASSERT(local_size <= std::numeric_limits<u32>::max());
|
||||
local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
|
||||
texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot;
|
||||
is_propietary_driver = texture_bound == 2;
|
||||
has_hle_engine_state =
|
||||
maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro;
|
||||
}
|
||||
@ -350,11 +351,11 @@ std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffe
|
||||
if (it == maxwell3d->replace_table.end()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) {
|
||||
const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplacementAttributeType name) {
|
||||
switch (name) {
|
||||
case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex:
|
||||
case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseVertex:
|
||||
return Shader::ReplaceConstant::BaseVertex;
|
||||
case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance:
|
||||
case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseInstance:
|
||||
return Shader::ReplaceConstant::BaseInstance;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
@ -399,6 +400,7 @@ ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_com
|
||||
stage = Shader::Stage::Compute;
|
||||
local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc;
|
||||
texture_bound = kepler_compute->regs.tex_cb_index;
|
||||
is_propietary_driver = texture_bound == 2;
|
||||
shared_memory_size = qmd.shared_alloc;
|
||||
workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
|
||||
}
|
||||
@ -498,6 +500,7 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
|
||||
file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
|
||||
}
|
||||
}
|
||||
is_propietary_driver = texture_bound == 2;
|
||||
}
|
||||
|
||||
void FileEnvironment::Dump(u64 hash) {
|
||||
|
@ -646,7 +646,28 @@ bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
|
||||
template <class P>
|
||||
void TextureCache<P>::CommitAsyncFlushes() {
|
||||
// This is intentionally passing the value by copy
|
||||
committed_downloads.push(uncommitted_downloads);
|
||||
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
||||
const std::span<const ImageId> download_ids = uncommitted_downloads;
|
||||
if (download_ids.empty()) {
|
||||
committed_downloads.emplace_back(std::move(uncommitted_downloads));
|
||||
uncommitted_downloads.clear();
|
||||
async_buffers.emplace_back(std::optional<AsyncBuffer>{});
|
||||
return;
|
||||
}
|
||||
size_t total_size_bytes = 0;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
|
||||
}
|
||||
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
|
||||
for (const ImageId image_id : download_ids) {
|
||||
Image& image = slot_images[image_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(download_map, copies);
|
||||
download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
|
||||
}
|
||||
async_buffers.emplace_back(download_map);
|
||||
}
|
||||
committed_downloads.emplace_back(std::move(uncommitted_downloads));
|
||||
uncommitted_downloads.clear();
|
||||
}
|
||||
|
||||
@ -655,37 +676,58 @@ void TextureCache<P>::PopAsyncFlushes() {
|
||||
if (committed_downloads.empty()) {
|
||||
return;
|
||||
}
|
||||
const std::span<const ImageId> download_ids = committed_downloads.front();
|
||||
if (download_ids.empty()) {
|
||||
committed_downloads.pop();
|
||||
return;
|
||||
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
||||
const std::span<const ImageId> download_ids = committed_downloads.front();
|
||||
if (download_ids.empty()) {
|
||||
committed_downloads.pop_front();
|
||||
async_buffers.pop_front();
|
||||
return;
|
||||
}
|
||||
auto download_map = *async_buffers.front();
|
||||
std::span<u8> download_span = download_map.mapped_span;
|
||||
for (size_t i = download_ids.size(); i > 0; i--) {
|
||||
const ImageBase& image = slot_images[download_ids[i - 1]];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
|
||||
std::span<u8> download_span_alt = download_span.subspan(download_map.offset);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt,
|
||||
swizzle_data_buffer);
|
||||
}
|
||||
runtime.FreeDeferredStagingBuffer(download_map);
|
||||
committed_downloads.pop_front();
|
||||
async_buffers.pop_front();
|
||||
} else {
|
||||
const std::span<const ImageId> download_ids = committed_downloads.front();
|
||||
if (download_ids.empty()) {
|
||||
committed_downloads.pop_front();
|
||||
return;
|
||||
}
|
||||
size_t total_size_bytes = 0;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
|
||||
}
|
||||
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
|
||||
const size_t original_offset = download_map.offset;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
Image& image = slot_images[image_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(download_map, copies);
|
||||
download_map.offset += image.unswizzled_size_bytes;
|
||||
}
|
||||
// Wait for downloads to finish
|
||||
runtime.Finish();
|
||||
download_map.offset = original_offset;
|
||||
std::span<u8> download_span = download_map.mapped_span;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
const ImageBase& image = slot_images[image_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
|
||||
swizzle_data_buffer);
|
||||
download_map.offset += image.unswizzled_size_bytes;
|
||||
download_span = download_span.subspan(image.unswizzled_size_bytes);
|
||||
}
|
||||
committed_downloads.pop_front();
|
||||
}
|
||||
size_t total_size_bytes = 0;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
|
||||
}
|
||||
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
|
||||
const size_t original_offset = download_map.offset;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
Image& image = slot_images[image_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(download_map, copies);
|
||||
download_map.offset += image.unswizzled_size_bytes;
|
||||
}
|
||||
// Wait for downloads to finish
|
||||
runtime.Finish();
|
||||
|
||||
download_map.offset = original_offset;
|
||||
std::span<u8> download_span = download_map.mapped_span;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
const ImageBase& image = slot_images[image_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
|
||||
swizzle_data_buffer);
|
||||
download_map.offset += image.unswizzled_size_bytes;
|
||||
download_span = download_span.subspan(image.unswizzled_size_bytes);
|
||||
}
|
||||
committed_downloads.pop();
|
||||
}
|
||||
|
||||
template <class P>
|
||||
@ -1475,6 +1517,27 @@ void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::BubbleUpImages(VAddr cpu_addr, size_t size) {
|
||||
ForEachCPUPage(cpu_addr, size, [this](u64 page) {
|
||||
const auto it = page_table.find(page);
|
||||
if (it == page_table.end()) {
|
||||
return;
|
||||
}
|
||||
std::vector<ImageMapId>& map_vector = it->second;
|
||||
for (size_t i = 1; i < map_vector.size(); i++) {
|
||||
ImageMapView& bottom_map = slot_map_views[map_vector[i - 1]];
|
||||
ImageMapView& top_map = slot_map_views[map_vector[i]];
|
||||
if (slot_images[bottom_map.image_id].modification_tick <
|
||||
slot_images[top_map.image_id].modification_tick) {
|
||||
std::swap(map_vector[i - 1], map_vector[i]);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
|
||||
Image& image = slot_images[image_id];
|
||||
@ -1788,6 +1851,7 @@ template <class P>
|
||||
void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
image.modification_tick = ++modification_tick;
|
||||
BubbleUpImages(image.cpu_addr, image.guest_size_bytes);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
@ -92,6 +92,8 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
|
||||
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
|
||||
/// True when the API can provide info about the memory of the device.
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
|
||||
/// True when the API can do asynchronous texture downloads.
|
||||
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
|
||||
|
||||
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
|
||||
|
||||
@ -106,6 +108,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
|
||||
using ImageView = typename P::ImageView;
|
||||
using Sampler = typename P::Sampler;
|
||||
using Framebuffer = typename P::Framebuffer;
|
||||
using AsyncBuffer = typename P::AsyncBuffer;
|
||||
|
||||
struct BlitImages {
|
||||
ImageId dst_id;
|
||||
@ -316,6 +319,8 @@ private:
|
||||
template <typename Func>
|
||||
void ForEachSparseSegment(ImageBase& image, Func&& func);
|
||||
|
||||
void BubbleUpImages(VAddr cpu_addr, size_t size);
|
||||
|
||||
/// Find or create an image view in the given image with the passed parameters
|
||||
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
|
||||
|
||||
@ -403,7 +408,8 @@ private:
|
||||
|
||||
// TODO: This data structure is not optimal and it should be reworked
|
||||
std::vector<ImageId> uncommitted_downloads;
|
||||
std::queue<std::vector<ImageId>> committed_downloads;
|
||||
std::deque<std::vector<ImageId>> committed_downloads;
|
||||
std::deque<std::optional<AsyncBuffer>> async_buffers;
|
||||
|
||||
struct LRUItemParams {
|
||||
using ObjectType = ImageId;
|
||||
|
@ -756,20 +756,23 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
CollectToolingInfo();
|
||||
|
||||
if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
|
||||
const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
|
||||
switch (arch) {
|
||||
case NvidiaArchitecture::AmpereOrNewer:
|
||||
LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math");
|
||||
is_float16_supported = false;
|
||||
break;
|
||||
case NvidiaArchitecture::Turing:
|
||||
break;
|
||||
case NvidiaArchitecture::VoltaOrOlder:
|
||||
LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor");
|
||||
khr_push_descriptor = false;
|
||||
break;
|
||||
}
|
||||
const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff;
|
||||
if (nv_major_version < 527) {
|
||||
const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
|
||||
switch (arch) {
|
||||
case NvidiaArchitecture::AmpereOrNewer:
|
||||
LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math");
|
||||
is_float16_supported = false;
|
||||
break;
|
||||
case NvidiaArchitecture::Turing:
|
||||
break;
|
||||
case NvidiaArchitecture::VoltaOrOlder:
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Blacklisting Volta and older from VK_KHR_push_descriptor");
|
||||
khr_push_descriptor = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (nv_major_version >= 510) {
|
||||
LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits");
|
||||
cant_blit_msaa = true;
|
||||
@ -834,8 +837,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||
const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
|
||||
const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
|
||||
if (ext_vertex_input_dynamic_state && is_intel_windows) {
|
||||
LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
|
||||
ext_vertex_input_dynamic_state = false;
|
||||
const u32 version = (properties.driverVersion << 3) >> 3;
|
||||
if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) {
|
||||
LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
|
||||
ext_vertex_input_dynamic_state = false;
|
||||
}
|
||||
}
|
||||
if (is_float16_supported && is_intel_windows) {
|
||||
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
|
||||
|
Loading…
Reference in New Issue
Block a user