another try
This commit is contained in:
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,25 +1,25 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void DeadCodeEliminationPass(IR::Program& program) {
|
||||
// We iterate over the instructions in reverse order.
|
||||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
auto it{block->end()};
|
||||
while (it != block->begin()) {
|
||||
--it;
|
||||
if (!it->HasUses() && !it->MayHaveSideEffects()) {
|
||||
it->Invalidate();
|
||||
it = block->Instructions().erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void DeadCodeEliminationPass(IR::Program& program) {
|
||||
// We iterate over the instructions in reverse order.
|
||||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
auto it{block->end()};
|
||||
while (it != block->begin()) {
|
||||
--it;
|
||||
if (!it->HasUses() && !it->MayHaveSideEffects()) {
|
||||
it->Invalidate();
|
||||
it = block->Instructions().erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
||||
@@ -1,29 +1,29 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void VertexATransformPass(IR::Program& program) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Epilogue) {
|
||||
return inst.Invalidate();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VertexBTransformPass(IR::Program& program) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Prologue) {
|
||||
return inst.Invalidate();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void VertexATransformPass(IR::Program& program) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Epilogue) {
|
||||
return inst.Invalidate();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VertexBTransformPass(IR::Program& program) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Prologue) {
|
||||
return inst.Invalidate();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,37 +1,37 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void IdentityRemovalPass(IR::Program& program) {
|
||||
std::vector<IR::Inst*> to_invalidate;
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (auto inst = block->begin(); inst != block->end();) {
|
||||
const size_t num_args{inst->NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
IR::Value arg;
|
||||
while ((arg = inst->Arg(i)).IsIdentity()) {
|
||||
inst->SetArg(i, arg.Inst()->Arg(0));
|
||||
}
|
||||
}
|
||||
if (inst->GetOpcode() == IR::Opcode::Identity ||
|
||||
inst->GetOpcode() == IR::Opcode::Void) {
|
||||
to_invalidate.push_back(&*inst);
|
||||
inst = block->Instructions().erase(inst);
|
||||
} else {
|
||||
++inst;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IR::Inst* const inst : to_invalidate) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void IdentityRemovalPass(IR::Program& program) {
|
||||
std::vector<IR::Inst*> to_invalidate;
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (auto inst = block->begin(); inst != block->end();) {
|
||||
const size_t num_args{inst->NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
IR::Value arg;
|
||||
while ((arg = inst->Arg(i)).IsIdentity()) {
|
||||
inst->SetArg(i, arg.Inst()->Arg(0));
|
||||
}
|
||||
}
|
||||
if (inst->GetOpcode() == IR::Opcode::Identity ||
|
||||
inst->GetOpcode() == IR::Opcode::Void) {
|
||||
to_invalidate.push_back(&*inst);
|
||||
inst = block->Instructions().erase(inst);
|
||||
} else {
|
||||
++inst;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IR::Inst* const inst : to_invalidate) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
||||
@@ -1,139 +1,139 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
IR::Opcode Replace(IR::Opcode op) {
|
||||
switch (op) {
|
||||
case IR::Opcode::FPAbs16:
|
||||
return IR::Opcode::FPAbs32;
|
||||
case IR::Opcode::FPAdd16:
|
||||
return IR::Opcode::FPAdd32;
|
||||
case IR::Opcode::FPCeil16:
|
||||
return IR::Opcode::FPCeil32;
|
||||
case IR::Opcode::FPFloor16:
|
||||
return IR::Opcode::FPFloor32;
|
||||
case IR::Opcode::FPFma16:
|
||||
return IR::Opcode::FPFma32;
|
||||
case IR::Opcode::FPMul16:
|
||||
return IR::Opcode::FPMul32;
|
||||
case IR::Opcode::FPNeg16:
|
||||
return IR::Opcode::FPNeg32;
|
||||
case IR::Opcode::FPRoundEven16:
|
||||
return IR::Opcode::FPRoundEven32;
|
||||
case IR::Opcode::FPSaturate16:
|
||||
return IR::Opcode::FPSaturate32;
|
||||
case IR::Opcode::FPClamp16:
|
||||
return IR::Opcode::FPClamp32;
|
||||
case IR::Opcode::FPTrunc16:
|
||||
return IR::Opcode::FPTrunc32;
|
||||
case IR::Opcode::CompositeConstructF16x2:
|
||||
return IR::Opcode::CompositeConstructF32x2;
|
||||
case IR::Opcode::CompositeConstructF16x3:
|
||||
return IR::Opcode::CompositeConstructF32x3;
|
||||
case IR::Opcode::CompositeConstructF16x4:
|
||||
return IR::Opcode::CompositeConstructF32x4;
|
||||
case IR::Opcode::CompositeExtractF16x2:
|
||||
return IR::Opcode::CompositeExtractF32x2;
|
||||
case IR::Opcode::CompositeExtractF16x3:
|
||||
return IR::Opcode::CompositeExtractF32x3;
|
||||
case IR::Opcode::CompositeExtractF16x4:
|
||||
return IR::Opcode::CompositeExtractF32x4;
|
||||
case IR::Opcode::CompositeInsertF16x2:
|
||||
return IR::Opcode::CompositeInsertF32x2;
|
||||
case IR::Opcode::CompositeInsertF16x3:
|
||||
return IR::Opcode::CompositeInsertF32x3;
|
||||
case IR::Opcode::CompositeInsertF16x4:
|
||||
return IR::Opcode::CompositeInsertF32x4;
|
||||
case IR::Opcode::FPOrdEqual16:
|
||||
return IR::Opcode::FPOrdEqual32;
|
||||
case IR::Opcode::FPUnordEqual16:
|
||||
return IR::Opcode::FPUnordEqual32;
|
||||
case IR::Opcode::FPOrdNotEqual16:
|
||||
return IR::Opcode::FPOrdNotEqual32;
|
||||
case IR::Opcode::FPUnordNotEqual16:
|
||||
return IR::Opcode::FPUnordNotEqual32;
|
||||
case IR::Opcode::FPOrdLessThan16:
|
||||
return IR::Opcode::FPOrdLessThan32;
|
||||
case IR::Opcode::FPUnordLessThan16:
|
||||
return IR::Opcode::FPUnordLessThan32;
|
||||
case IR::Opcode::FPOrdGreaterThan16:
|
||||
return IR::Opcode::FPOrdGreaterThan32;
|
||||
case IR::Opcode::FPUnordGreaterThan16:
|
||||
return IR::Opcode::FPUnordGreaterThan32;
|
||||
case IR::Opcode::FPOrdLessThanEqual16:
|
||||
return IR::Opcode::FPOrdLessThanEqual32;
|
||||
case IR::Opcode::FPUnordLessThanEqual16:
|
||||
return IR::Opcode::FPUnordLessThanEqual32;
|
||||
case IR::Opcode::FPOrdGreaterThanEqual16:
|
||||
return IR::Opcode::FPOrdGreaterThanEqual32;
|
||||
case IR::Opcode::FPUnordGreaterThanEqual16:
|
||||
return IR::Opcode::FPUnordGreaterThanEqual32;
|
||||
case IR::Opcode::FPIsNan16:
|
||||
return IR::Opcode::FPIsNan32;
|
||||
case IR::Opcode::ConvertS16F16:
|
||||
return IR::Opcode::ConvertS16F32;
|
||||
case IR::Opcode::ConvertS32F16:
|
||||
return IR::Opcode::ConvertS32F32;
|
||||
case IR::Opcode::ConvertS64F16:
|
||||
return IR::Opcode::ConvertS64F32;
|
||||
case IR::Opcode::ConvertU16F16:
|
||||
return IR::Opcode::ConvertU16F32;
|
||||
case IR::Opcode::ConvertU32F16:
|
||||
return IR::Opcode::ConvertU32F32;
|
||||
case IR::Opcode::ConvertU64F16:
|
||||
return IR::Opcode::ConvertU64F32;
|
||||
case IR::Opcode::PackFloat2x16:
|
||||
return IR::Opcode::PackHalf2x16;
|
||||
case IR::Opcode::UnpackFloat2x16:
|
||||
return IR::Opcode::UnpackHalf2x16;
|
||||
case IR::Opcode::ConvertF32F16:
|
||||
return IR::Opcode::Identity;
|
||||
case IR::Opcode::ConvertF16F32:
|
||||
return IR::Opcode::Identity;
|
||||
case IR::Opcode::ConvertF16S8:
|
||||
return IR::Opcode::ConvertF32S8;
|
||||
case IR::Opcode::ConvertF16S16:
|
||||
return IR::Opcode::ConvertF32S16;
|
||||
case IR::Opcode::ConvertF16S32:
|
||||
return IR::Opcode::ConvertF32S32;
|
||||
case IR::Opcode::ConvertF16S64:
|
||||
return IR::Opcode::ConvertF32S64;
|
||||
case IR::Opcode::ConvertF16U8:
|
||||
return IR::Opcode::ConvertF32U8;
|
||||
case IR::Opcode::ConvertF16U16:
|
||||
return IR::Opcode::ConvertF32U16;
|
||||
case IR::Opcode::ConvertF16U32:
|
||||
return IR::Opcode::ConvertF32U32;
|
||||
case IR::Opcode::ConvertF16U64:
|
||||
return IR::Opcode::ConvertF32U64;
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
return IR::Opcode::GlobalAtomicAddF32x2;
|
||||
case IR::Opcode::StorageAtomicAddF16x2:
|
||||
return IR::Opcode::StorageAtomicAddF32x2;
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
return IR::Opcode::GlobalAtomicMinF32x2;
|
||||
case IR::Opcode::StorageAtomicMinF16x2:
|
||||
return IR::Opcode::StorageAtomicMinF32x2;
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
return IR::Opcode::GlobalAtomicMaxF32x2;
|
||||
case IR::Opcode::StorageAtomicMaxF16x2:
|
||||
return IR::Opcode::StorageAtomicMaxF32x2;
|
||||
default:
|
||||
return op;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void LowerFp16ToFp32(IR::Program& program) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
inst.ReplaceOpcode(Replace(inst.GetOpcode()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
IR::Opcode Replace(IR::Opcode op) {
|
||||
switch (op) {
|
||||
case IR::Opcode::FPAbs16:
|
||||
return IR::Opcode::FPAbs32;
|
||||
case IR::Opcode::FPAdd16:
|
||||
return IR::Opcode::FPAdd32;
|
||||
case IR::Opcode::FPCeil16:
|
||||
return IR::Opcode::FPCeil32;
|
||||
case IR::Opcode::FPFloor16:
|
||||
return IR::Opcode::FPFloor32;
|
||||
case IR::Opcode::FPFma16:
|
||||
return IR::Opcode::FPFma32;
|
||||
case IR::Opcode::FPMul16:
|
||||
return IR::Opcode::FPMul32;
|
||||
case IR::Opcode::FPNeg16:
|
||||
return IR::Opcode::FPNeg32;
|
||||
case IR::Opcode::FPRoundEven16:
|
||||
return IR::Opcode::FPRoundEven32;
|
||||
case IR::Opcode::FPSaturate16:
|
||||
return IR::Opcode::FPSaturate32;
|
||||
case IR::Opcode::FPClamp16:
|
||||
return IR::Opcode::FPClamp32;
|
||||
case IR::Opcode::FPTrunc16:
|
||||
return IR::Opcode::FPTrunc32;
|
||||
case IR::Opcode::CompositeConstructF16x2:
|
||||
return IR::Opcode::CompositeConstructF32x2;
|
||||
case IR::Opcode::CompositeConstructF16x3:
|
||||
return IR::Opcode::CompositeConstructF32x3;
|
||||
case IR::Opcode::CompositeConstructF16x4:
|
||||
return IR::Opcode::CompositeConstructF32x4;
|
||||
case IR::Opcode::CompositeExtractF16x2:
|
||||
return IR::Opcode::CompositeExtractF32x2;
|
||||
case IR::Opcode::CompositeExtractF16x3:
|
||||
return IR::Opcode::CompositeExtractF32x3;
|
||||
case IR::Opcode::CompositeExtractF16x4:
|
||||
return IR::Opcode::CompositeExtractF32x4;
|
||||
case IR::Opcode::CompositeInsertF16x2:
|
||||
return IR::Opcode::CompositeInsertF32x2;
|
||||
case IR::Opcode::CompositeInsertF16x3:
|
||||
return IR::Opcode::CompositeInsertF32x3;
|
||||
case IR::Opcode::CompositeInsertF16x4:
|
||||
return IR::Opcode::CompositeInsertF32x4;
|
||||
case IR::Opcode::FPOrdEqual16:
|
||||
return IR::Opcode::FPOrdEqual32;
|
||||
case IR::Opcode::FPUnordEqual16:
|
||||
return IR::Opcode::FPUnordEqual32;
|
||||
case IR::Opcode::FPOrdNotEqual16:
|
||||
return IR::Opcode::FPOrdNotEqual32;
|
||||
case IR::Opcode::FPUnordNotEqual16:
|
||||
return IR::Opcode::FPUnordNotEqual32;
|
||||
case IR::Opcode::FPOrdLessThan16:
|
||||
return IR::Opcode::FPOrdLessThan32;
|
||||
case IR::Opcode::FPUnordLessThan16:
|
||||
return IR::Opcode::FPUnordLessThan32;
|
||||
case IR::Opcode::FPOrdGreaterThan16:
|
||||
return IR::Opcode::FPOrdGreaterThan32;
|
||||
case IR::Opcode::FPUnordGreaterThan16:
|
||||
return IR::Opcode::FPUnordGreaterThan32;
|
||||
case IR::Opcode::FPOrdLessThanEqual16:
|
||||
return IR::Opcode::FPOrdLessThanEqual32;
|
||||
case IR::Opcode::FPUnordLessThanEqual16:
|
||||
return IR::Opcode::FPUnordLessThanEqual32;
|
||||
case IR::Opcode::FPOrdGreaterThanEqual16:
|
||||
return IR::Opcode::FPOrdGreaterThanEqual32;
|
||||
case IR::Opcode::FPUnordGreaterThanEqual16:
|
||||
return IR::Opcode::FPUnordGreaterThanEqual32;
|
||||
case IR::Opcode::FPIsNan16:
|
||||
return IR::Opcode::FPIsNan32;
|
||||
case IR::Opcode::ConvertS16F16:
|
||||
return IR::Opcode::ConvertS16F32;
|
||||
case IR::Opcode::ConvertS32F16:
|
||||
return IR::Opcode::ConvertS32F32;
|
||||
case IR::Opcode::ConvertS64F16:
|
||||
return IR::Opcode::ConvertS64F32;
|
||||
case IR::Opcode::ConvertU16F16:
|
||||
return IR::Opcode::ConvertU16F32;
|
||||
case IR::Opcode::ConvertU32F16:
|
||||
return IR::Opcode::ConvertU32F32;
|
||||
case IR::Opcode::ConvertU64F16:
|
||||
return IR::Opcode::ConvertU64F32;
|
||||
case IR::Opcode::PackFloat2x16:
|
||||
return IR::Opcode::PackHalf2x16;
|
||||
case IR::Opcode::UnpackFloat2x16:
|
||||
return IR::Opcode::UnpackHalf2x16;
|
||||
case IR::Opcode::ConvertF32F16:
|
||||
return IR::Opcode::Identity;
|
||||
case IR::Opcode::ConvertF16F32:
|
||||
return IR::Opcode::Identity;
|
||||
case IR::Opcode::ConvertF16S8:
|
||||
return IR::Opcode::ConvertF32S8;
|
||||
case IR::Opcode::ConvertF16S16:
|
||||
return IR::Opcode::ConvertF32S16;
|
||||
case IR::Opcode::ConvertF16S32:
|
||||
return IR::Opcode::ConvertF32S32;
|
||||
case IR::Opcode::ConvertF16S64:
|
||||
return IR::Opcode::ConvertF32S64;
|
||||
case IR::Opcode::ConvertF16U8:
|
||||
return IR::Opcode::ConvertF32U8;
|
||||
case IR::Opcode::ConvertF16U16:
|
||||
return IR::Opcode::ConvertF32U16;
|
||||
case IR::Opcode::ConvertF16U32:
|
||||
return IR::Opcode::ConvertF32U32;
|
||||
case IR::Opcode::ConvertF16U64:
|
||||
return IR::Opcode::ConvertF32U64;
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
return IR::Opcode::GlobalAtomicAddF32x2;
|
||||
case IR::Opcode::StorageAtomicAddF16x2:
|
||||
return IR::Opcode::StorageAtomicAddF32x2;
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
return IR::Opcode::GlobalAtomicMinF32x2;
|
||||
case IR::Opcode::StorageAtomicMinF16x2:
|
||||
return IR::Opcode::StorageAtomicMinF32x2;
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
return IR::Opcode::GlobalAtomicMaxF32x2;
|
||||
case IR::Opcode::StorageAtomicMaxF16x2:
|
||||
return IR::Opcode::StorageAtomicMaxF32x2;
|
||||
default:
|
||||
return op;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void LowerFp16ToFp32(IR::Program& program) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
inst.ReplaceOpcode(Replace(inst.GetOpcode()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
||||
@@ -1,237 +1,237 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/program.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) {
|
||||
if (packed.IsImmediate()) {
|
||||
const u64 value{packed.U64()};
|
||||
return {
|
||||
ir.Imm32(static_cast<u32>(value)),
|
||||
ir.Imm32(static_cast<u32>(value >> 32)),
|
||||
};
|
||||
} else {
|
||||
return std::pair<IR::U32, IR::U32>{
|
||||
ir.CompositeExtract(packed, 0u),
|
||||
ir.CompositeExtract(packed, 1u),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void IAdd64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("IAdd64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
|
||||
const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
|
||||
|
||||
const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)};
|
||||
const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))};
|
||||
|
||||
const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)};
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
|
||||
}
|
||||
|
||||
void ISub64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("ISub64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
|
||||
const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
|
||||
|
||||
const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)};
|
||||
const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)};
|
||||
const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))};
|
||||
|
||||
const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)};
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
|
||||
}
|
||||
|
||||
void INeg64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("INeg64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
auto [lo, hi]{Unpack(ir, inst.Arg(0))};
|
||||
lo = ir.BitwiseNot(lo);
|
||||
hi = ir.BitwiseNot(hi);
|
||||
|
||||
lo = ir.IAdd(lo, ir.Imm32(1));
|
||||
|
||||
const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))};
|
||||
hi = ir.IAdd(hi, carry);
|
||||
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi));
|
||||
}
|
||||
|
||||
void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
|
||||
const IR::U32 shift{inst.Arg(1)};
|
||||
|
||||
const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)};
|
||||
const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)};
|
||||
|
||||
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
|
||||
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
|
||||
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
|
||||
|
||||
const IR::U32 long_ret_lo{ir.Imm32(0)};
|
||||
const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)};
|
||||
|
||||
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
|
||||
const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)};
|
||||
const IR::U32 short_ret_lo{shifted_lo};
|
||||
const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)};
|
||||
|
||||
const IR::U32 zero_ret_lo{lo};
|
||||
const IR::U32 zero_ret_hi{hi};
|
||||
|
||||
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
|
||||
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
|
||||
|
||||
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
|
||||
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
|
||||
}
|
||||
|
||||
void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
|
||||
const IR::U32 shift{inst.Arg(1)};
|
||||
|
||||
const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
|
||||
const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)};
|
||||
|
||||
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
|
||||
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
|
||||
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
|
||||
|
||||
const IR::U32 long_ret_hi{ir.Imm32(0)};
|
||||
const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)};
|
||||
|
||||
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
|
||||
const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)};
|
||||
const IR::U32 short_ret_hi{shifted_hi};
|
||||
const IR::U32 short_ret_lo{
|
||||
ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
|
||||
|
||||
const IR::U32 zero_ret_lo{lo};
|
||||
const IR::U32 zero_ret_hi{hi};
|
||||
|
||||
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
|
||||
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
|
||||
|
||||
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
|
||||
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
|
||||
}
|
||||
|
||||
void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
|
||||
const IR::U32 shift{inst.Arg(1)};
|
||||
|
||||
const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
|
||||
const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)};
|
||||
|
||||
const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))};
|
||||
|
||||
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
|
||||
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
|
||||
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
|
||||
|
||||
const IR::U32 long_ret_hi{sign_extension};
|
||||
const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)};
|
||||
|
||||
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
|
||||
const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift));
|
||||
const IR::U32 short_ret_hi{shifted_hi};
|
||||
const IR::U32 short_ret_lo{
|
||||
ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
|
||||
|
||||
const IR::U32 zero_ret_lo{lo};
|
||||
const IR::U32 zero_ret_hi{hi};
|
||||
|
||||
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
|
||||
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
|
||||
|
||||
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
|
||||
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
|
||||
}
|
||||
|
||||
void Lower(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::PackUint2x32:
|
||||
case IR::Opcode::UnpackUint2x32:
|
||||
return inst.ReplaceOpcode(IR::Opcode::Identity);
|
||||
case IR::Opcode::IAdd64:
|
||||
return IAdd64To32(block, inst);
|
||||
case IR::Opcode::ISub64:
|
||||
return ISub64To32(block, inst);
|
||||
case IR::Opcode::INeg64:
|
||||
return INeg64To32(block, inst);
|
||||
case IR::Opcode::ShiftLeftLogical64:
|
||||
return ShiftLeftLogical64To32(block, inst);
|
||||
case IR::Opcode::ShiftRightLogical64:
|
||||
return ShiftRightLogical64To32(block, inst);
|
||||
case IR::Opcode::ShiftRightArithmetic64:
|
||||
return ShiftRightArithmetic64To32(block, inst);
|
||||
case IR::Opcode::SharedAtomicExchange64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2);
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicIAdd32x2);
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMin32x2);
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMin32x2);
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMax32x2);
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMax32x2);
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicAnd32x2);
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicOr32x2);
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicXor32x2);
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicExchange32x2);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void LowerInt64ToInt32(IR::Program& program) {
|
||||
const auto end{program.post_order_blocks.rend()};
|
||||
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
|
||||
IR::Block* const block{*it};
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
Lower(*block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/program.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) {
|
||||
if (packed.IsImmediate()) {
|
||||
const u64 value{packed.U64()};
|
||||
return {
|
||||
ir.Imm32(static_cast<u32>(value)),
|
||||
ir.Imm32(static_cast<u32>(value >> 32)),
|
||||
};
|
||||
} else {
|
||||
return std::pair<IR::U32, IR::U32>{
|
||||
ir.CompositeExtract(packed, 0u),
|
||||
ir.CompositeExtract(packed, 1u),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void IAdd64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("IAdd64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
|
||||
const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
|
||||
|
||||
const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)};
|
||||
const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))};
|
||||
|
||||
const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)};
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
|
||||
}
|
||||
|
||||
void ISub64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("ISub64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
|
||||
const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
|
||||
|
||||
const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)};
|
||||
const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)};
|
||||
const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))};
|
||||
|
||||
const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)};
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
|
||||
}
|
||||
|
||||
void INeg64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("INeg64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
auto [lo, hi]{Unpack(ir, inst.Arg(0))};
|
||||
lo = ir.BitwiseNot(lo);
|
||||
hi = ir.BitwiseNot(hi);
|
||||
|
||||
lo = ir.IAdd(lo, ir.Imm32(1));
|
||||
|
||||
const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))};
|
||||
hi = ir.IAdd(hi, carry);
|
||||
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi));
|
||||
}
|
||||
|
||||
void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
|
||||
const IR::U32 shift{inst.Arg(1)};
|
||||
|
||||
const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)};
|
||||
const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)};
|
||||
|
||||
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
|
||||
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
|
||||
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
|
||||
|
||||
const IR::U32 long_ret_lo{ir.Imm32(0)};
|
||||
const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)};
|
||||
|
||||
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
|
||||
const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)};
|
||||
const IR::U32 short_ret_lo{shifted_lo};
|
||||
const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)};
|
||||
|
||||
const IR::U32 zero_ret_lo{lo};
|
||||
const IR::U32 zero_ret_hi{hi};
|
||||
|
||||
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
|
||||
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
|
||||
|
||||
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
|
||||
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
|
||||
}
|
||||
|
||||
void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
|
||||
const IR::U32 shift{inst.Arg(1)};
|
||||
|
||||
const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
|
||||
const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)};
|
||||
|
||||
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
|
||||
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
|
||||
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
|
||||
|
||||
const IR::U32 long_ret_hi{ir.Imm32(0)};
|
||||
const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)};
|
||||
|
||||
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
|
||||
const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)};
|
||||
const IR::U32 short_ret_hi{shifted_hi};
|
||||
const IR::U32 short_ret_lo{
|
||||
ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
|
||||
|
||||
const IR::U32 zero_ret_lo{lo};
|
||||
const IR::U32 zero_ret_hi{hi};
|
||||
|
||||
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
|
||||
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
|
||||
|
||||
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
|
||||
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
|
||||
}
|
||||
|
||||
void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions");
|
||||
}
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
|
||||
const IR::U32 shift{inst.Arg(1)};
|
||||
|
||||
const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
|
||||
const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)};
|
||||
|
||||
const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))};
|
||||
|
||||
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
|
||||
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
|
||||
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
|
||||
|
||||
const IR::U32 long_ret_hi{sign_extension};
|
||||
const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)};
|
||||
|
||||
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
|
||||
const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift));
|
||||
const IR::U32 short_ret_hi{shifted_hi};
|
||||
const IR::U32 short_ret_lo{
|
||||
ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
|
||||
|
||||
const IR::U32 zero_ret_lo{lo};
|
||||
const IR::U32 zero_ret_hi{hi};
|
||||
|
||||
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
|
||||
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
|
||||
|
||||
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
|
||||
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
|
||||
}
|
||||
|
||||
void Lower(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::PackUint2x32:
|
||||
case IR::Opcode::UnpackUint2x32:
|
||||
return inst.ReplaceOpcode(IR::Opcode::Identity);
|
||||
case IR::Opcode::IAdd64:
|
||||
return IAdd64To32(block, inst);
|
||||
case IR::Opcode::ISub64:
|
||||
return ISub64To32(block, inst);
|
||||
case IR::Opcode::INeg64:
|
||||
return INeg64To32(block, inst);
|
||||
case IR::Opcode::ShiftLeftLogical64:
|
||||
return ShiftLeftLogical64To32(block, inst);
|
||||
case IR::Opcode::ShiftRightLogical64:
|
||||
return ShiftRightLogical64To32(block, inst);
|
||||
case IR::Opcode::ShiftRightArithmetic64:
|
||||
return ShiftRightArithmetic64To32(block, inst);
|
||||
case IR::Opcode::SharedAtomicExchange64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2);
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicIAdd32x2);
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMin32x2);
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMin32x2);
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMax32x2);
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMax32x2);
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicAnd32x2);
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicOr32x2);
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicXor32x2);
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicExchange32x2);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void LowerInt64ToInt32(IR::Program& program) {
|
||||
const auto end{program.post_order_blocks.rend()};
|
||||
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
|
||||
IR::Block* const block{*it};
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
Lower(*block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
||||
@@ -1,29 +1,29 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/frontend/ir/program.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void CollectShaderInfoPass(Environment& env, IR::Program& program);
|
||||
void ConstantPropagationPass(IR::Program& program);
|
||||
void DeadCodeEliminationPass(IR::Program& program);
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
||||
void IdentityRemovalPass(IR::Program& program);
|
||||
void LowerFp16ToFp32(IR::Program& program);
|
||||
void LowerInt64ToInt32(IR::Program& program);
|
||||
void RescalingPass(IR::Program& program);
|
||||
void SsaRewritePass(IR::Program& program);
|
||||
void TexturePass(Environment& env, IR::Program& program);
|
||||
void VerificationPass(const IR::Program& program);
|
||||
|
||||
// Dual Vertex
|
||||
void VertexATransformPass(IR::Program& program);
|
||||
void VertexBTransformPass(IR::Program& program);
|
||||
void JoinTextureInfo(Info& base, Info& source);
|
||||
void JoinStorageInfo(Info& base, Info& source);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/frontend/ir/program.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void CollectShaderInfoPass(Environment& env, IR::Program& program);
|
||||
void ConstantPropagationPass(IR::Program& program);
|
||||
void DeadCodeEliminationPass(IR::Program& program);
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
||||
void IdentityRemovalPass(IR::Program& program);
|
||||
void LowerFp16ToFp32(IR::Program& program);
|
||||
void LowerInt64ToInt32(IR::Program& program);
|
||||
void RescalingPass(IR::Program& program);
|
||||
void SsaRewritePass(IR::Program& program);
|
||||
void TexturePass(Environment& env, IR::Program& program);
|
||||
void VerificationPass(const IR::Program& program);
|
||||
|
||||
// Dual Vertex
|
||||
void VertexATransformPass(IR::Program& program);
|
||||
void VertexBTransformPass(IR::Program& program);
|
||||
void JoinTextureInfo(Info& base, Info& source);
|
||||
void JoinStorageInfo(Info& base, Info& source);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
||||
@@ -1,355 +1,355 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/settings.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/ir/program.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
[[nodiscard]] bool IsTextureTypeRescalable(TextureType type) {
|
||||
switch (type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::ColorArray2D:
|
||||
case TextureType::Color2DRect:
|
||||
return true;
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void VisitMark(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ShuffleIndex:
|
||||
case IR::Opcode::ShuffleUp:
|
||||
case IR::Opcode::ShuffleDown:
|
||||
case IR::Opcode::ShuffleButterfly: {
|
||||
const IR::Value shfl_arg{inst.Arg(0)};
|
||||
if (shfl_arg.IsImmediate()) {
|
||||
break;
|
||||
}
|
||||
const IR::Inst* const arg_inst{shfl_arg.InstRecursive()};
|
||||
if (arg_inst->GetOpcode() != IR::Opcode::BitCastU32F32) {
|
||||
break;
|
||||
}
|
||||
const IR::Value bitcast_arg{arg_inst->Arg(0)};
|
||||
if (bitcast_arg.IsImmediate()) {
|
||||
break;
|
||||
}
|
||||
IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()};
|
||||
bool must_patch_outside = false;
|
||||
if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) {
|
||||
const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()};
|
||||
switch (attr) {
|
||||
case IR::Attribute::PositionX:
|
||||
case IR::Attribute::PositionY:
|
||||
bitcast_inst->SetFlags<u32>(0xDEADBEEF);
|
||||
must_patch_outside = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (must_patch_outside) {
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::F32 new_inst{&*block.PrependNewInst(it, inst)};
|
||||
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
|
||||
const IR::Value converted{ir.FPMul(new_inst, up_factor)};
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void PatchFragCoord(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::F32 down_factor{ir.ResolutionDownFactor()};
|
||||
const IR::F32 frag_coord{ir.GetAttribute(inst.Arg(0).Attribute())};
|
||||
const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)};
|
||||
inst.ReplaceUsesWith(downscaled_frag_coord);
|
||||
}
|
||||
|
||||
void PatchPointSize(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::F32 point_value{inst.Arg(1)};
|
||||
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
|
||||
const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)};
|
||||
inst.SetArg(1, upscaled_point_value);
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
|
||||
IR::U32 scaled_value{value};
|
||||
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
|
||||
scaled_value = ir.IMul(scaled_value, ir.Imm32(up_scale));
|
||||
}
|
||||
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
|
||||
scaled_value = ir.ShiftRightArithmetic(scaled_value, ir.Imm32(down_shift));
|
||||
}
|
||||
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value,
|
||||
const IR::Attribute attrib) {
|
||||
const IR::F32 up_factor{ir.Imm32(Settings::values.resolution_info.up_factor)};
|
||||
const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), up_factor)};
|
||||
const IR::F32 frag_coord{ir.GetAttribute(attrib)};
|
||||
const IR::F32 down_factor{ir.Imm32(Settings::values.resolution_info.down_factor)};
|
||||
const IR::F32 floor{ir.FPMul(up_factor, ir.FPFloor(ir.FPMul(frag_coord, down_factor)))};
|
||||
const IR::F16F32F64 deviation{ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor)))};
|
||||
return IR::U32{ir.Select(is_scaled, ir.ConvertFToU(32, deviation), value)};
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
|
||||
IR::U32 scaled_value{value};
|
||||
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
|
||||
scaled_value = ir.ShiftLeftLogical(scaled_value, ir.Imm32(down_shift));
|
||||
}
|
||||
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
|
||||
scaled_value = ir.IDiv(scaled_value, ir.Imm32(up_scale));
|
||||
}
|
||||
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
|
||||
}
|
||||
|
||||
void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) {
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||
switch (info.type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::ColorArray2D:
|
||||
case TextureType::Color2DRect: {
|
||||
const IR::Value new_inst{&*block.PrependNewInst(it, inst)};
|
||||
const IR::U32 width{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 0)})};
|
||||
const IR::U32 height{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 1)})};
|
||||
const IR::Value replacement{ir.CompositeConstruct(
|
||||
width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))};
|
||||
inst.ReplaceUsesWith(replacement);
|
||||
break;
|
||||
}
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
// Nothing to patch here
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
|
||||
size_t index) {
|
||||
const IR::Value composite{inst.Arg(index)};
|
||||
if (composite.IsEmpty()) {
|
||||
return;
|
||||
}
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
|
||||
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
|
||||
switch (info.type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::Color2DRect:
|
||||
inst.SetArg(index, ir.CompositeConstruct(x, y));
|
||||
break;
|
||||
case TextureType::ColorArray2D: {
|
||||
const IR::U32 z{ir.CompositeExtract(composite, 2)};
|
||||
inst.SetArg(index, ir.CompositeConstruct(x, y, z));
|
||||
break;
|
||||
}
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
// Nothing to patch here
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
|
||||
size_t index) {
|
||||
const IR::Value composite{inst.Arg(index)};
|
||||
if (composite.IsEmpty()) {
|
||||
return;
|
||||
}
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
|
||||
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
|
||||
switch (info.type) {
|
||||
case TextureType::ColorArray2D:
|
||||
case TextureType::Color2D:
|
||||
case TextureType::Color2DRect:
|
||||
inst.SetArg(index, ir.CompositeConstruct(x, y));
|
||||
break;
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
// Nothing to patch here
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
const IR::Value coord{inst.Arg(1)};
|
||||
const IR::U32 coord_x{ir.CompositeExtract(coord, 0)};
|
||||
const IR::U32 coord_y{ir.CompositeExtract(coord, 1)};
|
||||
|
||||
const IR::U32 scaled_x{SubScale(ir, is_scaled, coord_x, IR::Attribute::PositionX)};
|
||||
const IR::U32 scaled_y{SubScale(ir, is_scaled, coord_y, IR::Attribute::PositionY)};
|
||||
switch (info.type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::Color2DRect:
|
||||
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y));
|
||||
break;
|
||||
case TextureType::ColorArray2D: {
|
||||
const IR::U32 z{ir.CompositeExtract(coord, 2)};
|
||||
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y, z));
|
||||
break;
|
||||
}
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
// Nothing to patch here
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||
SubScaleCoord(ir, inst, is_scaled);
|
||||
// Scale ImageFetch offset
|
||||
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
|
||||
}
|
||||
|
||||
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
|
||||
SubScaleCoord(ir, inst, is_scaled);
|
||||
}
|
||||
|
||||
void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||
ScaleIntegerComposite(ir, inst, is_scaled, 1);
|
||||
// Scale ImageFetch offset
|
||||
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
|
||||
}
|
||||
|
||||
void PatchImageRead(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
|
||||
ScaleIntegerComposite(ir, inst, is_scaled, 1);
|
||||
}
|
||||
|
||||
void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) {
|
||||
const bool is_fragment_shader{program.stage == Stage::Fragment};
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::GetAttribute: {
|
||||
const IR::Attribute attr{inst.Arg(0).Attribute()};
|
||||
switch (attr) {
|
||||
case IR::Attribute::PositionX:
|
||||
case IR::Attribute::PositionY:
|
||||
if (is_fragment_shader && inst.Flags<u32>() != 0xDEADBEEF) {
|
||||
PatchFragCoord(block, inst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::SetAttribute: {
|
||||
const IR::Attribute attr{inst.Arg(0).Attribute()};
|
||||
switch (attr) {
|
||||
case IR::Attribute::PointSize:
|
||||
if (inst.Flags<u32>() != 0xDEADBEEF) {
|
||||
PatchPointSize(block, inst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::ImageQueryDimensions:
|
||||
PatchImageQueryDimensions(block, inst);
|
||||
break;
|
||||
case IR::Opcode::ImageFetch:
|
||||
if (is_fragment_shader) {
|
||||
SubScaleImageFetch(block, inst);
|
||||
} else {
|
||||
PatchImageFetch(block, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::ImageRead:
|
||||
if (is_fragment_shader) {
|
||||
SubScaleImageRead(block, inst);
|
||||
} else {
|
||||
PatchImageRead(block, inst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void RescalingPass(IR::Program& program) {
|
||||
const bool is_fragment_shader{program.stage == Stage::Fragment};
|
||||
if (is_fragment_shader) {
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
VisitMark(*block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
Visit(program, *block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/settings.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/ir/program.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
[[nodiscard]] bool IsTextureTypeRescalable(TextureType type) {
|
||||
switch (type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::ColorArray2D:
|
||||
case TextureType::Color2DRect:
|
||||
return true;
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void VisitMark(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ShuffleIndex:
|
||||
case IR::Opcode::ShuffleUp:
|
||||
case IR::Opcode::ShuffleDown:
|
||||
case IR::Opcode::ShuffleButterfly: {
|
||||
const IR::Value shfl_arg{inst.Arg(0)};
|
||||
if (shfl_arg.IsImmediate()) {
|
||||
break;
|
||||
}
|
||||
const IR::Inst* const arg_inst{shfl_arg.InstRecursive()};
|
||||
if (arg_inst->GetOpcode() != IR::Opcode::BitCastU32F32) {
|
||||
break;
|
||||
}
|
||||
const IR::Value bitcast_arg{arg_inst->Arg(0)};
|
||||
if (bitcast_arg.IsImmediate()) {
|
||||
break;
|
||||
}
|
||||
IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()};
|
||||
bool must_patch_outside = false;
|
||||
if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) {
|
||||
const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()};
|
||||
switch (attr) {
|
||||
case IR::Attribute::PositionX:
|
||||
case IR::Attribute::PositionY:
|
||||
bitcast_inst->SetFlags<u32>(0xDEADBEEF);
|
||||
must_patch_outside = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (must_patch_outside) {
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::F32 new_inst{&*block.PrependNewInst(it, inst)};
|
||||
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
|
||||
const IR::Value converted{ir.FPMul(new_inst, up_factor)};
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void PatchFragCoord(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::F32 down_factor{ir.ResolutionDownFactor()};
|
||||
const IR::F32 frag_coord{ir.GetAttribute(inst.Arg(0).Attribute())};
|
||||
const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)};
|
||||
inst.ReplaceUsesWith(downscaled_frag_coord);
|
||||
}
|
||||
|
||||
void PatchPointSize(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::F32 point_value{inst.Arg(1)};
|
||||
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
|
||||
const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)};
|
||||
inst.SetArg(1, upscaled_point_value);
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
|
||||
IR::U32 scaled_value{value};
|
||||
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
|
||||
scaled_value = ir.IMul(scaled_value, ir.Imm32(up_scale));
|
||||
}
|
||||
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
|
||||
scaled_value = ir.ShiftRightArithmetic(scaled_value, ir.Imm32(down_shift));
|
||||
}
|
||||
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value,
|
||||
const IR::Attribute attrib) {
|
||||
const IR::F32 up_factor{ir.Imm32(Settings::values.resolution_info.up_factor)};
|
||||
const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), up_factor)};
|
||||
const IR::F32 frag_coord{ir.GetAttribute(attrib)};
|
||||
const IR::F32 down_factor{ir.Imm32(Settings::values.resolution_info.down_factor)};
|
||||
const IR::F32 floor{ir.FPMul(up_factor, ir.FPFloor(ir.FPMul(frag_coord, down_factor)))};
|
||||
const IR::F16F32F64 deviation{ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor)))};
|
||||
return IR::U32{ir.Select(is_scaled, ir.ConvertFToU(32, deviation), value)};
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
|
||||
IR::U32 scaled_value{value};
|
||||
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
|
||||
scaled_value = ir.ShiftLeftLogical(scaled_value, ir.Imm32(down_shift));
|
||||
}
|
||||
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
|
||||
scaled_value = ir.IDiv(scaled_value, ir.Imm32(up_scale));
|
||||
}
|
||||
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
|
||||
}
|
||||
|
||||
void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) {
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||
switch (info.type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::ColorArray2D:
|
||||
case TextureType::Color2DRect: {
|
||||
const IR::Value new_inst{&*block.PrependNewInst(it, inst)};
|
||||
const IR::U32 width{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 0)})};
|
||||
const IR::U32 height{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 1)})};
|
||||
const IR::Value replacement{ir.CompositeConstruct(
|
||||
width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))};
|
||||
inst.ReplaceUsesWith(replacement);
|
||||
break;
|
||||
}
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
// Nothing to patch here
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
|
||||
size_t index) {
|
||||
const IR::Value composite{inst.Arg(index)};
|
||||
if (composite.IsEmpty()) {
|
||||
return;
|
||||
}
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
|
||||
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
|
||||
switch (info.type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::Color2DRect:
|
||||
inst.SetArg(index, ir.CompositeConstruct(x, y));
|
||||
break;
|
||||
case TextureType::ColorArray2D: {
|
||||
const IR::U32 z{ir.CompositeExtract(composite, 2)};
|
||||
inst.SetArg(index, ir.CompositeConstruct(x, y, z));
|
||||
break;
|
||||
}
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
// Nothing to patch here
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
|
||||
size_t index) {
|
||||
const IR::Value composite{inst.Arg(index)};
|
||||
if (composite.IsEmpty()) {
|
||||
return;
|
||||
}
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
|
||||
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
|
||||
switch (info.type) {
|
||||
case TextureType::ColorArray2D:
|
||||
case TextureType::Color2D:
|
||||
case TextureType::Color2DRect:
|
||||
inst.SetArg(index, ir.CompositeConstruct(x, y));
|
||||
break;
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
// Nothing to patch here
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
const IR::Value coord{inst.Arg(1)};
|
||||
const IR::U32 coord_x{ir.CompositeExtract(coord, 0)};
|
||||
const IR::U32 coord_y{ir.CompositeExtract(coord, 1)};
|
||||
|
||||
const IR::U32 scaled_x{SubScale(ir, is_scaled, coord_x, IR::Attribute::PositionX)};
|
||||
const IR::U32 scaled_y{SubScale(ir, is_scaled, coord_y, IR::Attribute::PositionY)};
|
||||
switch (info.type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::Color2DRect:
|
||||
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y));
|
||||
break;
|
||||
case TextureType::ColorArray2D: {
|
||||
const IR::U32 z{ir.CompositeExtract(coord, 2)};
|
||||
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y, z));
|
||||
break;
|
||||
}
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
// Nothing to patch here
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||
SubScaleCoord(ir, inst, is_scaled);
|
||||
// Scale ImageFetch offset
|
||||
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
|
||||
}
|
||||
|
||||
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
|
||||
SubScaleCoord(ir, inst, is_scaled);
|
||||
}
|
||||
|
||||
void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||
ScaleIntegerComposite(ir, inst, is_scaled, 1);
|
||||
// Scale ImageFetch offset
|
||||
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
|
||||
}
|
||||
|
||||
void PatchImageRead(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
|
||||
ScaleIntegerComposite(ir, inst, is_scaled, 1);
|
||||
}
|
||||
|
||||
void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) {
|
||||
const bool is_fragment_shader{program.stage == Stage::Fragment};
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::GetAttribute: {
|
||||
const IR::Attribute attr{inst.Arg(0).Attribute()};
|
||||
switch (attr) {
|
||||
case IR::Attribute::PositionX:
|
||||
case IR::Attribute::PositionY:
|
||||
if (is_fragment_shader && inst.Flags<u32>() != 0xDEADBEEF) {
|
||||
PatchFragCoord(block, inst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::SetAttribute: {
|
||||
const IR::Attribute attr{inst.Arg(0).Attribute()};
|
||||
switch (attr) {
|
||||
case IR::Attribute::PointSize:
|
||||
if (inst.Flags<u32>() != 0xDEADBEEF) {
|
||||
PatchPointSize(block, inst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::ImageQueryDimensions:
|
||||
PatchImageQueryDimensions(block, inst);
|
||||
break;
|
||||
case IR::Opcode::ImageFetch:
|
||||
if (is_fragment_shader) {
|
||||
SubScaleImageFetch(block, inst);
|
||||
} else {
|
||||
PatchImageFetch(block, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::ImageRead:
|
||||
if (is_fragment_shader) {
|
||||
SubScaleImageRead(block, inst);
|
||||
} else {
|
||||
PatchImageRead(block, inst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void RescalingPass(IR::Program& program) {
|
||||
const bool is_fragment_shader{program.stage == Stage::Fragment};
|
||||
if (is_fragment_shader) {
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
VisitMark(*block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
Visit(program, *block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
||||
@@ -1,412 +1,412 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
// This file implements the SSA rewriting algorithm proposed in
|
||||
//
|
||||
// Simple and Efficient Construction of Static Single Assignment Form.
|
||||
// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
|
||||
// In: Jhala R., De Bosschere K. (eds)
|
||||
// Compiler Construction. CC 2013.
|
||||
// Lecture Notes in Computer Science, vol 7791.
|
||||
// Springer, Berlin, Heidelberg
|
||||
//
|
||||
// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
|
||||
//
|
||||
|
||||
#include <deque>
|
||||
#include <span>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/container/flat_map.hpp>
|
||||
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/opcodes.h"
|
||||
#include "shader_recompiler/frontend/ir/pred.h"
|
||||
#include "shader_recompiler/frontend/ir/reg.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
struct FlagTag {
|
||||
auto operator<=>(const FlagTag&) const noexcept = default;
|
||||
};
|
||||
struct ZeroFlagTag : FlagTag {};
|
||||
struct SignFlagTag : FlagTag {};
|
||||
struct CarryFlagTag : FlagTag {};
|
||||
struct OverflowFlagTag : FlagTag {};
|
||||
|
||||
struct GotoVariable : FlagTag {
|
||||
GotoVariable() = default;
|
||||
explicit GotoVariable(u32 index_) : index{index_} {}
|
||||
|
||||
auto operator<=>(const GotoVariable&) const noexcept = default;
|
||||
|
||||
u32 index;
|
||||
};
|
||||
|
||||
struct IndirectBranchVariable {
|
||||
auto operator<=>(const IndirectBranchVariable&) const noexcept = default;
|
||||
};
|
||||
|
||||
using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag,
|
||||
OverflowFlagTag, GotoVariable, IndirectBranchVariable>;
|
||||
using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>;
|
||||
|
||||
struct DefTable {
|
||||
const IR::Value& Def(IR::Block* block, IR::Reg variable) {
|
||||
return block->SsaRegValue(variable);
|
||||
}
|
||||
void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) {
|
||||
block->SetSsaRegValue(variable, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, IR::Pred variable) {
|
||||
return preds[IR::PredIndex(variable)][block];
|
||||
}
|
||||
void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) {
|
||||
preds[IR::PredIndex(variable)].insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, GotoVariable variable) {
|
||||
return goto_vars[variable.index][block];
|
||||
}
|
||||
void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
|
||||
goto_vars[variable.index].insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, IndirectBranchVariable) {
|
||||
return indirect_branch_var[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) {
|
||||
indirect_branch_var.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
|
||||
return zero_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
|
||||
zero_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, SignFlagTag) {
|
||||
return sign_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
|
||||
sign_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, CarryFlagTag) {
|
||||
return carry_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
|
||||
carry_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
|
||||
return overflow_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
|
||||
overflow_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
std::array<ValueMap, IR::NUM_USER_PREDS> preds;
|
||||
boost::container::flat_map<u32, ValueMap> goto_vars;
|
||||
ValueMap indirect_branch_var;
|
||||
ValueMap zero_flag;
|
||||
ValueMap sign_flag;
|
||||
ValueMap carry_flag;
|
||||
ValueMap overflow_flag;
|
||||
};
|
||||
|
||||
IR::Opcode UndefOpcode(IR::Reg) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(IR::Pred) noexcept {
|
||||
return IR::Opcode::UndefU1;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const FlagTag&) noexcept {
|
||||
return IR::Opcode::UndefU1;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
enum class Status {
|
||||
Start,
|
||||
SetValue,
|
||||
PreparePhiArgument,
|
||||
PushPhiArgument,
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
struct ReadState {
|
||||
ReadState(IR::Block* block_) : block{block_} {}
|
||||
ReadState() = default;
|
||||
|
||||
IR::Block* block{};
|
||||
IR::Value result{};
|
||||
IR::Inst* phi{};
|
||||
IR::Block* const* pred_it{};
|
||||
IR::Block* const* pred_end{};
|
||||
Status pc{Status::Start};
|
||||
};
|
||||
|
||||
class Pass {
|
||||
public:
|
||||
template <typename Type>
|
||||
void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
|
||||
current_def.SetDef(block, variable, value);
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
IR::Value ReadVariable(Type variable, IR::Block* root_block) {
|
||||
boost::container::small_vector<ReadState<Type>, 64> stack{
|
||||
ReadState<Type>(nullptr),
|
||||
ReadState<Type>(root_block),
|
||||
};
|
||||
const auto prepare_phi_operand{[&] {
|
||||
if (stack.back().pred_it == stack.back().pred_end) {
|
||||
IR::Inst* const phi{stack.back().phi};
|
||||
IR::Block* const block{stack.back().block};
|
||||
const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
|
||||
stack.pop_back();
|
||||
stack.back().result = result;
|
||||
WriteVariable(variable, block, result);
|
||||
} else {
|
||||
IR::Block* const imm_pred{*stack.back().pred_it};
|
||||
stack.back().pc = Status::PushPhiArgument;
|
||||
stack.emplace_back(imm_pred);
|
||||
}
|
||||
}};
|
||||
do {
|
||||
IR::Block* const block{stack.back().block};
|
||||
switch (stack.back().pc) {
|
||||
case Status::Start: {
|
||||
if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
|
||||
stack.back().result = def;
|
||||
} else if (!block->IsSsaSealed()) {
|
||||
// Incomplete CFG
|
||||
IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
|
||||
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
|
||||
|
||||
incomplete_phis[block].insert_or_assign(variable, phi);
|
||||
stack.back().result = IR::Value{&*phi};
|
||||
} else if (const std::span imm_preds = block->ImmPredecessors();
|
||||
imm_preds.size() == 1) {
|
||||
// Optimize the common case of one predecessor: no phi needed
|
||||
stack.back().pc = Status::SetValue;
|
||||
stack.emplace_back(imm_preds.front());
|
||||
break;
|
||||
} else {
|
||||
// Break potential cycles with operandless phi
|
||||
IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
|
||||
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
|
||||
|
||||
WriteVariable(variable, block, IR::Value{phi});
|
||||
|
||||
stack.back().phi = phi;
|
||||
stack.back().pred_it = imm_preds.data();
|
||||
stack.back().pred_end = imm_preds.data() + imm_preds.size();
|
||||
prepare_phi_operand();
|
||||
break;
|
||||
}
|
||||
}
|
||||
[[fallthrough]];
|
||||
case Status::SetValue: {
|
||||
const IR::Value result{stack.back().result};
|
||||
WriteVariable(variable, block, result);
|
||||
stack.pop_back();
|
||||
stack.back().result = result;
|
||||
break;
|
||||
}
|
||||
case Status::PushPhiArgument: {
|
||||
IR::Inst* const phi{stack.back().phi};
|
||||
phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
|
||||
++stack.back().pred_it;
|
||||
}
|
||||
[[fallthrough]];
|
||||
case Status::PreparePhiArgument:
|
||||
prepare_phi_operand();
|
||||
break;
|
||||
}
|
||||
} while (stack.size() > 1);
|
||||
return stack.back().result;
|
||||
}
|
||||
|
||||
void SealBlock(IR::Block* block) {
|
||||
const auto it{incomplete_phis.find(block)};
|
||||
if (it != incomplete_phis.end()) {
|
||||
for (auto& pair : it->second) {
|
||||
auto& variant{pair.first};
|
||||
auto& phi{pair.second};
|
||||
std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
|
||||
}
|
||||
}
|
||||
block->SsaSeal();
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Type>
|
||||
IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
|
||||
for (IR::Block* const imm_pred : block->ImmPredecessors()) {
|
||||
phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
|
||||
}
|
||||
return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
|
||||
}
|
||||
|
||||
IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
|
||||
IR::Value same;
|
||||
const size_t num_args{phi.NumArgs()};
|
||||
for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
|
||||
const IR::Value& op{phi.Arg(arg_index)};
|
||||
if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
|
||||
// Unique value or self-reference
|
||||
continue;
|
||||
}
|
||||
if (!same.IsEmpty()) {
|
||||
// The phi merges at least two values: not trivial
|
||||
return IR::Value{&phi};
|
||||
}
|
||||
same = op;
|
||||
}
|
||||
// Remove the phi node from the block, it will be reinserted
|
||||
IR::Block::InstructionList& list{block->Instructions()};
|
||||
list.erase(IR::Block::InstructionList::s_iterator_to(phi));
|
||||
|
||||
// Find the first non-phi instruction and use it as an insertion point
|
||||
IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
|
||||
if (same.IsEmpty()) {
|
||||
// The phi is unreachable or in the start block
|
||||
// Insert an undefined instruction and make it the phi node replacement
|
||||
// The "phi" node reinsertion point is specified after this instruction
|
||||
reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
|
||||
same = IR::Value{&*reinsert_point};
|
||||
++reinsert_point;
|
||||
}
|
||||
// Reinsert the phi node and reroute all its uses to the "same" value
|
||||
list.insert(reinsert_point, phi);
|
||||
phi.ReplaceUsesWith(same);
|
||||
// TODO: Try to recursively remove all phi users, which might have become trivial
|
||||
return same;
|
||||
}
|
||||
|
||||
boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>>
|
||||
incomplete_phis;
|
||||
DefTable current_def;
|
||||
};
|
||||
|
||||
void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::SetRegister:
|
||||
if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
|
||||
pass.WriteVariable(reg, block, inst.Arg(1));
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::SetPred:
|
||||
if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
|
||||
pass.WriteVariable(pred, block, inst.Arg(1));
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::SetGotoVariable:
|
||||
pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
|
||||
break;
|
||||
case IR::Opcode::SetIndirectBranchVariable:
|
||||
pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetZFlag:
|
||||
pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetSFlag:
|
||||
pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetCFlag:
|
||||
pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetOFlag:
|
||||
pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::GetRegister:
|
||||
if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::GetPred:
|
||||
if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::GetGotoVariable:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
|
||||
break;
|
||||
case IR::Opcode::GetIndirectBranchVariable:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetZFlag:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetSFlag:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetCFlag:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetOFlag:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void VisitBlock(Pass& pass, IR::Block* block) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
VisitInst(pass, block, inst);
|
||||
}
|
||||
pass.SealBlock(block);
|
||||
}
|
||||
|
||||
IR::Type GetConcreteType(IR::Inst* inst) {
|
||||
std::deque<IR::Inst*> queue;
|
||||
queue.push_back(inst);
|
||||
while (!queue.empty()) {
|
||||
IR::Inst* current = queue.front();
|
||||
queue.pop_front();
|
||||
const size_t num_args{current->NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
const auto set_type = current->Arg(i).Type();
|
||||
if (set_type != IR::Type::Opaque) {
|
||||
return set_type;
|
||||
}
|
||||
if (!current->Arg(i).IsImmediate()) {
|
||||
queue.push_back(current->Arg(i).Inst());
|
||||
}
|
||||
}
|
||||
}
|
||||
return IR::Type::Opaque;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void SsaRewritePass(IR::Program& program) {
|
||||
Pass pass;
|
||||
const auto end{program.post_order_blocks.rend()};
|
||||
for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
|
||||
VisitBlock(pass, *block);
|
||||
}
|
||||
for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
|
||||
for (IR::Inst& inst : (*block)->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Phi) {
|
||||
if (inst.Type() == IR::Type::Opaque) {
|
||||
inst.SetFlags(GetConcreteType(&inst));
|
||||
}
|
||||
inst.OrderPhiArgs();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
// This file implements the SSA rewriting algorithm proposed in
|
||||
//
|
||||
// Simple and Efficient Construction of Static Single Assignment Form.
|
||||
// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
|
||||
// In: Jhala R., De Bosschere K. (eds)
|
||||
// Compiler Construction. CC 2013.
|
||||
// Lecture Notes in Computer Science, vol 7791.
|
||||
// Springer, Berlin, Heidelberg
|
||||
//
|
||||
// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
|
||||
//
|
||||
|
||||
#include <deque>
|
||||
#include <span>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/container/flat_map.hpp>
|
||||
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/opcodes.h"
|
||||
#include "shader_recompiler/frontend/ir/pred.h"
|
||||
#include "shader_recompiler/frontend/ir/reg.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
struct FlagTag {
|
||||
auto operator<=>(const FlagTag&) const noexcept = default;
|
||||
};
|
||||
struct ZeroFlagTag : FlagTag {};
|
||||
struct SignFlagTag : FlagTag {};
|
||||
struct CarryFlagTag : FlagTag {};
|
||||
struct OverflowFlagTag : FlagTag {};
|
||||
|
||||
struct GotoVariable : FlagTag {
|
||||
GotoVariable() = default;
|
||||
explicit GotoVariable(u32 index_) : index{index_} {}
|
||||
|
||||
auto operator<=>(const GotoVariable&) const noexcept = default;
|
||||
|
||||
u32 index;
|
||||
};
|
||||
|
||||
struct IndirectBranchVariable {
|
||||
auto operator<=>(const IndirectBranchVariable&) const noexcept = default;
|
||||
};
|
||||
|
||||
using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag,
|
||||
OverflowFlagTag, GotoVariable, IndirectBranchVariable>;
|
||||
using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>;
|
||||
|
||||
struct DefTable {
|
||||
const IR::Value& Def(IR::Block* block, IR::Reg variable) {
|
||||
return block->SsaRegValue(variable);
|
||||
}
|
||||
void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) {
|
||||
block->SetSsaRegValue(variable, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, IR::Pred variable) {
|
||||
return preds[IR::PredIndex(variable)][block];
|
||||
}
|
||||
void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) {
|
||||
preds[IR::PredIndex(variable)].insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, GotoVariable variable) {
|
||||
return goto_vars[variable.index][block];
|
||||
}
|
||||
void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
|
||||
goto_vars[variable.index].insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, IndirectBranchVariable) {
|
||||
return indirect_branch_var[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) {
|
||||
indirect_branch_var.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
|
||||
return zero_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
|
||||
zero_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, SignFlagTag) {
|
||||
return sign_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
|
||||
sign_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, CarryFlagTag) {
|
||||
return carry_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
|
||||
carry_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
|
||||
return overflow_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
|
||||
overflow_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
std::array<ValueMap, IR::NUM_USER_PREDS> preds;
|
||||
boost::container::flat_map<u32, ValueMap> goto_vars;
|
||||
ValueMap indirect_branch_var;
|
||||
ValueMap zero_flag;
|
||||
ValueMap sign_flag;
|
||||
ValueMap carry_flag;
|
||||
ValueMap overflow_flag;
|
||||
};
|
||||
|
||||
IR::Opcode UndefOpcode(IR::Reg) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(IR::Pred) noexcept {
|
||||
return IR::Opcode::UndefU1;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const FlagTag&) noexcept {
|
||||
return IR::Opcode::UndefU1;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
enum class Status {
|
||||
Start,
|
||||
SetValue,
|
||||
PreparePhiArgument,
|
||||
PushPhiArgument,
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
struct ReadState {
|
||||
ReadState(IR::Block* block_) : block{block_} {}
|
||||
ReadState() = default;
|
||||
|
||||
IR::Block* block{};
|
||||
IR::Value result{};
|
||||
IR::Inst* phi{};
|
||||
IR::Block* const* pred_it{};
|
||||
IR::Block* const* pred_end{};
|
||||
Status pc{Status::Start};
|
||||
};
|
||||
|
||||
class Pass {
|
||||
public:
|
||||
template <typename Type>
|
||||
void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
|
||||
current_def.SetDef(block, variable, value);
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
IR::Value ReadVariable(Type variable, IR::Block* root_block) {
|
||||
boost::container::small_vector<ReadState<Type>, 64> stack{
|
||||
ReadState<Type>(nullptr),
|
||||
ReadState<Type>(root_block),
|
||||
};
|
||||
const auto prepare_phi_operand{[&] {
|
||||
if (stack.back().pred_it == stack.back().pred_end) {
|
||||
IR::Inst* const phi{stack.back().phi};
|
||||
IR::Block* const block{stack.back().block};
|
||||
const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
|
||||
stack.pop_back();
|
||||
stack.back().result = result;
|
||||
WriteVariable(variable, block, result);
|
||||
} else {
|
||||
IR::Block* const imm_pred{*stack.back().pred_it};
|
||||
stack.back().pc = Status::PushPhiArgument;
|
||||
stack.emplace_back(imm_pred);
|
||||
}
|
||||
}};
|
||||
do {
|
||||
IR::Block* const block{stack.back().block};
|
||||
switch (stack.back().pc) {
|
||||
case Status::Start: {
|
||||
if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
|
||||
stack.back().result = def;
|
||||
} else if (!block->IsSsaSealed()) {
|
||||
// Incomplete CFG
|
||||
IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
|
||||
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
|
||||
|
||||
incomplete_phis[block].insert_or_assign(variable, phi);
|
||||
stack.back().result = IR::Value{&*phi};
|
||||
} else if (const std::span imm_preds = block->ImmPredecessors();
|
||||
imm_preds.size() == 1) {
|
||||
// Optimize the common case of one predecessor: no phi needed
|
||||
stack.back().pc = Status::SetValue;
|
||||
stack.emplace_back(imm_preds.front());
|
||||
break;
|
||||
} else {
|
||||
// Break potential cycles with operandless phi
|
||||
IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
|
||||
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
|
||||
|
||||
WriteVariable(variable, block, IR::Value{phi});
|
||||
|
||||
stack.back().phi = phi;
|
||||
stack.back().pred_it = imm_preds.data();
|
||||
stack.back().pred_end = imm_preds.data() + imm_preds.size();
|
||||
prepare_phi_operand();
|
||||
break;
|
||||
}
|
||||
}
|
||||
[[fallthrough]];
|
||||
case Status::SetValue: {
|
||||
const IR::Value result{stack.back().result};
|
||||
WriteVariable(variable, block, result);
|
||||
stack.pop_back();
|
||||
stack.back().result = result;
|
||||
break;
|
||||
}
|
||||
case Status::PushPhiArgument: {
|
||||
IR::Inst* const phi{stack.back().phi};
|
||||
phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
|
||||
++stack.back().pred_it;
|
||||
}
|
||||
[[fallthrough]];
|
||||
case Status::PreparePhiArgument:
|
||||
prepare_phi_operand();
|
||||
break;
|
||||
}
|
||||
} while (stack.size() > 1);
|
||||
return stack.back().result;
|
||||
}
|
||||
|
||||
void SealBlock(IR::Block* block) {
|
||||
const auto it{incomplete_phis.find(block)};
|
||||
if (it != incomplete_phis.end()) {
|
||||
for (auto& pair : it->second) {
|
||||
auto& variant{pair.first};
|
||||
auto& phi{pair.second};
|
||||
std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
|
||||
}
|
||||
}
|
||||
block->SsaSeal();
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Type>
|
||||
IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
|
||||
for (IR::Block* const imm_pred : block->ImmPredecessors()) {
|
||||
phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
|
||||
}
|
||||
return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
|
||||
}
|
||||
|
||||
IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
|
||||
IR::Value same;
|
||||
const size_t num_args{phi.NumArgs()};
|
||||
for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
|
||||
const IR::Value& op{phi.Arg(arg_index)};
|
||||
if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
|
||||
// Unique value or self-reference
|
||||
continue;
|
||||
}
|
||||
if (!same.IsEmpty()) {
|
||||
// The phi merges at least two values: not trivial
|
||||
return IR::Value{&phi};
|
||||
}
|
||||
same = op;
|
||||
}
|
||||
// Remove the phi node from the block, it will be reinserted
|
||||
IR::Block::InstructionList& list{block->Instructions()};
|
||||
list.erase(IR::Block::InstructionList::s_iterator_to(phi));
|
||||
|
||||
// Find the first non-phi instruction and use it as an insertion point
|
||||
IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
|
||||
if (same.IsEmpty()) {
|
||||
// The phi is unreachable or in the start block
|
||||
// Insert an undefined instruction and make it the phi node replacement
|
||||
// The "phi" node reinsertion point is specified after this instruction
|
||||
reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
|
||||
same = IR::Value{&*reinsert_point};
|
||||
++reinsert_point;
|
||||
}
|
||||
// Reinsert the phi node and reroute all its uses to the "same" value
|
||||
list.insert(reinsert_point, phi);
|
||||
phi.ReplaceUsesWith(same);
|
||||
// TODO: Try to recursively remove all phi users, which might have become trivial
|
||||
return same;
|
||||
}
|
||||
|
||||
boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>>
|
||||
incomplete_phis;
|
||||
DefTable current_def;
|
||||
};
|
||||
|
||||
void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::SetRegister:
|
||||
if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
|
||||
pass.WriteVariable(reg, block, inst.Arg(1));
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::SetPred:
|
||||
if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
|
||||
pass.WriteVariable(pred, block, inst.Arg(1));
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::SetGotoVariable:
|
||||
pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
|
||||
break;
|
||||
case IR::Opcode::SetIndirectBranchVariable:
|
||||
pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetZFlag:
|
||||
pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetSFlag:
|
||||
pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetCFlag:
|
||||
pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetOFlag:
|
||||
pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::GetRegister:
|
||||
if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::GetPred:
|
||||
if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::GetGotoVariable:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
|
||||
break;
|
||||
case IR::Opcode::GetIndirectBranchVariable:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetZFlag:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetSFlag:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetCFlag:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetOFlag:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void VisitBlock(Pass& pass, IR::Block* block) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
VisitInst(pass, block, inst);
|
||||
}
|
||||
pass.SealBlock(block);
|
||||
}
|
||||
|
||||
IR::Type GetConcreteType(IR::Inst* inst) {
|
||||
std::deque<IR::Inst*> queue;
|
||||
queue.push_back(inst);
|
||||
while (!queue.empty()) {
|
||||
IR::Inst* current = queue.front();
|
||||
queue.pop_front();
|
||||
const size_t num_args{current->NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
const auto set_type = current->Arg(i).Type();
|
||||
if (set_type != IR::Type::Opaque) {
|
||||
return set_type;
|
||||
}
|
||||
if (!current->Arg(i).IsImmediate()) {
|
||||
queue.push_back(current->Arg(i).Inst());
|
||||
}
|
||||
}
|
||||
}
|
||||
return IR::Type::Opaque;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void SsaRewritePass(IR::Program& program) {
|
||||
Pass pass;
|
||||
const auto end{program.post_order_blocks.rend()};
|
||||
for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
|
||||
VisitBlock(pass, *block);
|
||||
}
|
||||
for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
|
||||
for (IR::Inst& inst : (*block)->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Phi) {
|
||||
if (inst.Type() == IR::Type::Opaque) {
|
||||
inst.SetFlags(GetConcreteType(&inst));
|
||||
}
|
||||
inst.OrderPhiArgs();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,97 +1,97 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
static void ValidateTypes(const IR::Program& program) {
|
||||
for (const auto& block : program.blocks) {
|
||||
for (const IR::Inst& inst : *block) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Phi) {
|
||||
// Skip validation on phi nodes
|
||||
continue;
|
||||
}
|
||||
const size_t num_args{inst.NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
const IR::Type t1{inst.Arg(i).Type()};
|
||||
const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
|
||||
if (!IR::AreTypesCompatible(t1, t2)) {
|
||||
throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ValidateUses(const IR::Program& program) {
|
||||
std::map<IR::Inst*, int> actual_uses;
|
||||
for (const auto& block : program.blocks) {
|
||||
for (const IR::Inst& inst : *block) {
|
||||
const size_t num_args{inst.NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
const IR::Value arg{inst.Arg(i)};
|
||||
if (!arg.IsImmediate()) {
|
||||
++actual_uses[arg.Inst()];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const auto& [inst, uses] : actual_uses) {
|
||||
if (inst->UseCount() != uses) {
|
||||
throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ValidateForwardDeclarations(const IR::Program& program) {
|
||||
std::set<const IR::Inst*> definitions;
|
||||
for (const IR::Block* const block : program.blocks) {
|
||||
for (const IR::Inst& inst : *block) {
|
||||
definitions.emplace(&inst);
|
||||
if (inst.GetOpcode() == IR::Opcode::Phi) {
|
||||
// Phi nodes can have forward declarations
|
||||
continue;
|
||||
}
|
||||
const size_t num_args{inst.NumArgs()};
|
||||
for (size_t arg = 0; arg < num_args; ++arg) {
|
||||
if (inst.Arg(arg).IsImmediate()) {
|
||||
continue;
|
||||
}
|
||||
if (!definitions.contains(inst.Arg(arg).Inst())) {
|
||||
throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ValidatePhiNodes(const IR::Program& program) {
|
||||
for (const IR::Block* const block : program.blocks) {
|
||||
bool no_more_phis{false};
|
||||
for (const IR::Inst& inst : *block) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Phi) {
|
||||
if (no_more_phis) {
|
||||
throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block));
|
||||
}
|
||||
} else {
|
||||
no_more_phis = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VerificationPass(const IR::Program& program) {
|
||||
ValidateTypes(program);
|
||||
ValidateUses(program);
|
||||
ValidateForwardDeclarations(program);
|
||||
ValidatePhiNodes(program);
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
static void ValidateTypes(const IR::Program& program) {
|
||||
for (const auto& block : program.blocks) {
|
||||
for (const IR::Inst& inst : *block) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Phi) {
|
||||
// Skip validation on phi nodes
|
||||
continue;
|
||||
}
|
||||
const size_t num_args{inst.NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
const IR::Type t1{inst.Arg(i).Type()};
|
||||
const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
|
||||
if (!IR::AreTypesCompatible(t1, t2)) {
|
||||
throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ValidateUses(const IR::Program& program) {
|
||||
std::map<IR::Inst*, int> actual_uses;
|
||||
for (const auto& block : program.blocks) {
|
||||
for (const IR::Inst& inst : *block) {
|
||||
const size_t num_args{inst.NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
const IR::Value arg{inst.Arg(i)};
|
||||
if (!arg.IsImmediate()) {
|
||||
++actual_uses[arg.Inst()];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const auto& [inst, uses] : actual_uses) {
|
||||
if (inst->UseCount() != uses) {
|
||||
throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ValidateForwardDeclarations(const IR::Program& program) {
|
||||
std::set<const IR::Inst*> definitions;
|
||||
for (const IR::Block* const block : program.blocks) {
|
||||
for (const IR::Inst& inst : *block) {
|
||||
definitions.emplace(&inst);
|
||||
if (inst.GetOpcode() == IR::Opcode::Phi) {
|
||||
// Phi nodes can have forward declarations
|
||||
continue;
|
||||
}
|
||||
const size_t num_args{inst.NumArgs()};
|
||||
for (size_t arg = 0; arg < num_args; ++arg) {
|
||||
if (inst.Arg(arg).IsImmediate()) {
|
||||
continue;
|
||||
}
|
||||
if (!definitions.contains(inst.Arg(arg).Inst())) {
|
||||
throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ValidatePhiNodes(const IR::Program& program) {
|
||||
for (const IR::Block* const block : program.blocks) {
|
||||
bool no_more_phis{false};
|
||||
for (const IR::Inst& inst : *block) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Phi) {
|
||||
if (no_more_phis) {
|
||||
throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block));
|
||||
}
|
||||
} else {
|
||||
no_more_phis = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VerificationPass(const IR::Program& program) {
|
||||
ValidateTypes(program);
|
||||
ValidateUses(program);
|
||||
ValidateForwardDeclarations(program);
|
||||
ValidatePhiNodes(program);
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
||||
Reference in New Issue
Block a user