another try

This commit is contained in:
mgthepro
2022-11-05 13:58:44 +01:00
parent 4a9f2bbf2a
commit 9f63fbe700
2002 changed files with 671171 additions and 671092 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,25 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
void DeadCodeEliminationPass(IR::Program& program) {
// We iterate over the instructions in reverse order.
// This is because removing an instruction reduces the number of uses for earlier instructions.
for (IR::Block* const block : program.post_order_blocks) {
auto it{block->end()};
while (it != block->begin()) {
--it;
if (!it->HasUses() && !it->MayHaveSideEffects()) {
it->Invalidate();
it = block->Instructions().erase(it);
}
}
}
}
} // namespace Shader::Optimization
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
void DeadCodeEliminationPass(IR::Program& program) {
// We iterate over the instructions in reverse order.
// This is because removing an instruction reduces the number of uses for earlier instructions.
for (IR::Block* const block : program.post_order_blocks) {
auto it{block->end()};
while (it != block->begin()) {
--it;
if (!it->HasUses() && !it->MayHaveSideEffects()) {
it->Invalidate();
it = block->Instructions().erase(it);
}
}
}
}
} // namespace Shader::Optimization

View File

@@ -1,29 +1,29 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
void VertexATransformPass(IR::Program& program) {
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::Epilogue) {
return inst.Invalidate();
}
}
}
}
void VertexBTransformPass(IR::Program& program) {
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::Prologue) {
return inst.Invalidate();
}
}
}
}
} // namespace Shader::Optimization
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
void VertexATransformPass(IR::Program& program) {
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::Epilogue) {
return inst.Invalidate();
}
}
}
}
void VertexBTransformPass(IR::Program& program) {
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::Prologue) {
return inst.Invalidate();
}
}
}
}
} // namespace Shader::Optimization

View File

@@ -1,37 +1,37 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <vector>
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
void IdentityRemovalPass(IR::Program& program) {
std::vector<IR::Inst*> to_invalidate;
for (IR::Block* const block : program.blocks) {
for (auto inst = block->begin(); inst != block->end();) {
const size_t num_args{inst->NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
IR::Value arg;
while ((arg = inst->Arg(i)).IsIdentity()) {
inst->SetArg(i, arg.Inst()->Arg(0));
}
}
if (inst->GetOpcode() == IR::Opcode::Identity ||
inst->GetOpcode() == IR::Opcode::Void) {
to_invalidate.push_back(&*inst);
inst = block->Instructions().erase(inst);
} else {
++inst;
}
}
}
for (IR::Inst* const inst : to_invalidate) {
inst->Invalidate();
}
}
} // namespace Shader::Optimization
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <vector>
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
void IdentityRemovalPass(IR::Program& program) {
std::vector<IR::Inst*> to_invalidate;
for (IR::Block* const block : program.blocks) {
for (auto inst = block->begin(); inst != block->end();) {
const size_t num_args{inst->NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
IR::Value arg;
while ((arg = inst->Arg(i)).IsIdentity()) {
inst->SetArg(i, arg.Inst()->Arg(0));
}
}
if (inst->GetOpcode() == IR::Opcode::Identity ||
inst->GetOpcode() == IR::Opcode::Void) {
to_invalidate.push_back(&*inst);
inst = block->Instructions().erase(inst);
} else {
++inst;
}
}
}
for (IR::Inst* const inst : to_invalidate) {
inst->Invalidate();
}
}
} // namespace Shader::Optimization

View File

@@ -1,139 +1,139 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
namespace {
IR::Opcode Replace(IR::Opcode op) {
switch (op) {
case IR::Opcode::FPAbs16:
return IR::Opcode::FPAbs32;
case IR::Opcode::FPAdd16:
return IR::Opcode::FPAdd32;
case IR::Opcode::FPCeil16:
return IR::Opcode::FPCeil32;
case IR::Opcode::FPFloor16:
return IR::Opcode::FPFloor32;
case IR::Opcode::FPFma16:
return IR::Opcode::FPFma32;
case IR::Opcode::FPMul16:
return IR::Opcode::FPMul32;
case IR::Opcode::FPNeg16:
return IR::Opcode::FPNeg32;
case IR::Opcode::FPRoundEven16:
return IR::Opcode::FPRoundEven32;
case IR::Opcode::FPSaturate16:
return IR::Opcode::FPSaturate32;
case IR::Opcode::FPClamp16:
return IR::Opcode::FPClamp32;
case IR::Opcode::FPTrunc16:
return IR::Opcode::FPTrunc32;
case IR::Opcode::CompositeConstructF16x2:
return IR::Opcode::CompositeConstructF32x2;
case IR::Opcode::CompositeConstructF16x3:
return IR::Opcode::CompositeConstructF32x3;
case IR::Opcode::CompositeConstructF16x4:
return IR::Opcode::CompositeConstructF32x4;
case IR::Opcode::CompositeExtractF16x2:
return IR::Opcode::CompositeExtractF32x2;
case IR::Opcode::CompositeExtractF16x3:
return IR::Opcode::CompositeExtractF32x3;
case IR::Opcode::CompositeExtractF16x4:
return IR::Opcode::CompositeExtractF32x4;
case IR::Opcode::CompositeInsertF16x2:
return IR::Opcode::CompositeInsertF32x2;
case IR::Opcode::CompositeInsertF16x3:
return IR::Opcode::CompositeInsertF32x3;
case IR::Opcode::CompositeInsertF16x4:
return IR::Opcode::CompositeInsertF32x4;
case IR::Opcode::FPOrdEqual16:
return IR::Opcode::FPOrdEqual32;
case IR::Opcode::FPUnordEqual16:
return IR::Opcode::FPUnordEqual32;
case IR::Opcode::FPOrdNotEqual16:
return IR::Opcode::FPOrdNotEqual32;
case IR::Opcode::FPUnordNotEqual16:
return IR::Opcode::FPUnordNotEqual32;
case IR::Opcode::FPOrdLessThan16:
return IR::Opcode::FPOrdLessThan32;
case IR::Opcode::FPUnordLessThan16:
return IR::Opcode::FPUnordLessThan32;
case IR::Opcode::FPOrdGreaterThan16:
return IR::Opcode::FPOrdGreaterThan32;
case IR::Opcode::FPUnordGreaterThan16:
return IR::Opcode::FPUnordGreaterThan32;
case IR::Opcode::FPOrdLessThanEqual16:
return IR::Opcode::FPOrdLessThanEqual32;
case IR::Opcode::FPUnordLessThanEqual16:
return IR::Opcode::FPUnordLessThanEqual32;
case IR::Opcode::FPOrdGreaterThanEqual16:
return IR::Opcode::FPOrdGreaterThanEqual32;
case IR::Opcode::FPUnordGreaterThanEqual16:
return IR::Opcode::FPUnordGreaterThanEqual32;
case IR::Opcode::FPIsNan16:
return IR::Opcode::FPIsNan32;
case IR::Opcode::ConvertS16F16:
return IR::Opcode::ConvertS16F32;
case IR::Opcode::ConvertS32F16:
return IR::Opcode::ConvertS32F32;
case IR::Opcode::ConvertS64F16:
return IR::Opcode::ConvertS64F32;
case IR::Opcode::ConvertU16F16:
return IR::Opcode::ConvertU16F32;
case IR::Opcode::ConvertU32F16:
return IR::Opcode::ConvertU32F32;
case IR::Opcode::ConvertU64F16:
return IR::Opcode::ConvertU64F32;
case IR::Opcode::PackFloat2x16:
return IR::Opcode::PackHalf2x16;
case IR::Opcode::UnpackFloat2x16:
return IR::Opcode::UnpackHalf2x16;
case IR::Opcode::ConvertF32F16:
return IR::Opcode::Identity;
case IR::Opcode::ConvertF16F32:
return IR::Opcode::Identity;
case IR::Opcode::ConvertF16S8:
return IR::Opcode::ConvertF32S8;
case IR::Opcode::ConvertF16S16:
return IR::Opcode::ConvertF32S16;
case IR::Opcode::ConvertF16S32:
return IR::Opcode::ConvertF32S32;
case IR::Opcode::ConvertF16S64:
return IR::Opcode::ConvertF32S64;
case IR::Opcode::ConvertF16U8:
return IR::Opcode::ConvertF32U8;
case IR::Opcode::ConvertF16U16:
return IR::Opcode::ConvertF32U16;
case IR::Opcode::ConvertF16U32:
return IR::Opcode::ConvertF32U32;
case IR::Opcode::ConvertF16U64:
return IR::Opcode::ConvertF32U64;
case IR::Opcode::GlobalAtomicAddF16x2:
return IR::Opcode::GlobalAtomicAddF32x2;
case IR::Opcode::StorageAtomicAddF16x2:
return IR::Opcode::StorageAtomicAddF32x2;
case IR::Opcode::GlobalAtomicMinF16x2:
return IR::Opcode::GlobalAtomicMinF32x2;
case IR::Opcode::StorageAtomicMinF16x2:
return IR::Opcode::StorageAtomicMinF32x2;
case IR::Opcode::GlobalAtomicMaxF16x2:
return IR::Opcode::GlobalAtomicMaxF32x2;
case IR::Opcode::StorageAtomicMaxF16x2:
return IR::Opcode::StorageAtomicMaxF32x2;
default:
return op;
}
}
} // Anonymous namespace
void LowerFp16ToFp32(IR::Program& program) {
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
inst.ReplaceOpcode(Replace(inst.GetOpcode()));
}
}
}
} // namespace Shader::Optimization
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
namespace {
IR::Opcode Replace(IR::Opcode op) {
switch (op) {
case IR::Opcode::FPAbs16:
return IR::Opcode::FPAbs32;
case IR::Opcode::FPAdd16:
return IR::Opcode::FPAdd32;
case IR::Opcode::FPCeil16:
return IR::Opcode::FPCeil32;
case IR::Opcode::FPFloor16:
return IR::Opcode::FPFloor32;
case IR::Opcode::FPFma16:
return IR::Opcode::FPFma32;
case IR::Opcode::FPMul16:
return IR::Opcode::FPMul32;
case IR::Opcode::FPNeg16:
return IR::Opcode::FPNeg32;
case IR::Opcode::FPRoundEven16:
return IR::Opcode::FPRoundEven32;
case IR::Opcode::FPSaturate16:
return IR::Opcode::FPSaturate32;
case IR::Opcode::FPClamp16:
return IR::Opcode::FPClamp32;
case IR::Opcode::FPTrunc16:
return IR::Opcode::FPTrunc32;
case IR::Opcode::CompositeConstructF16x2:
return IR::Opcode::CompositeConstructF32x2;
case IR::Opcode::CompositeConstructF16x3:
return IR::Opcode::CompositeConstructF32x3;
case IR::Opcode::CompositeConstructF16x4:
return IR::Opcode::CompositeConstructF32x4;
case IR::Opcode::CompositeExtractF16x2:
return IR::Opcode::CompositeExtractF32x2;
case IR::Opcode::CompositeExtractF16x3:
return IR::Opcode::CompositeExtractF32x3;
case IR::Opcode::CompositeExtractF16x4:
return IR::Opcode::CompositeExtractF32x4;
case IR::Opcode::CompositeInsertF16x2:
return IR::Opcode::CompositeInsertF32x2;
case IR::Opcode::CompositeInsertF16x3:
return IR::Opcode::CompositeInsertF32x3;
case IR::Opcode::CompositeInsertF16x4:
return IR::Opcode::CompositeInsertF32x4;
case IR::Opcode::FPOrdEqual16:
return IR::Opcode::FPOrdEqual32;
case IR::Opcode::FPUnordEqual16:
return IR::Opcode::FPUnordEqual32;
case IR::Opcode::FPOrdNotEqual16:
return IR::Opcode::FPOrdNotEqual32;
case IR::Opcode::FPUnordNotEqual16:
return IR::Opcode::FPUnordNotEqual32;
case IR::Opcode::FPOrdLessThan16:
return IR::Opcode::FPOrdLessThan32;
case IR::Opcode::FPUnordLessThan16:
return IR::Opcode::FPUnordLessThan32;
case IR::Opcode::FPOrdGreaterThan16:
return IR::Opcode::FPOrdGreaterThan32;
case IR::Opcode::FPUnordGreaterThan16:
return IR::Opcode::FPUnordGreaterThan32;
case IR::Opcode::FPOrdLessThanEqual16:
return IR::Opcode::FPOrdLessThanEqual32;
case IR::Opcode::FPUnordLessThanEqual16:
return IR::Opcode::FPUnordLessThanEqual32;
case IR::Opcode::FPOrdGreaterThanEqual16:
return IR::Opcode::FPOrdGreaterThanEqual32;
case IR::Opcode::FPUnordGreaterThanEqual16:
return IR::Opcode::FPUnordGreaterThanEqual32;
case IR::Opcode::FPIsNan16:
return IR::Opcode::FPIsNan32;
case IR::Opcode::ConvertS16F16:
return IR::Opcode::ConvertS16F32;
case IR::Opcode::ConvertS32F16:
return IR::Opcode::ConvertS32F32;
case IR::Opcode::ConvertS64F16:
return IR::Opcode::ConvertS64F32;
case IR::Opcode::ConvertU16F16:
return IR::Opcode::ConvertU16F32;
case IR::Opcode::ConvertU32F16:
return IR::Opcode::ConvertU32F32;
case IR::Opcode::ConvertU64F16:
return IR::Opcode::ConvertU64F32;
case IR::Opcode::PackFloat2x16:
return IR::Opcode::PackHalf2x16;
case IR::Opcode::UnpackFloat2x16:
return IR::Opcode::UnpackHalf2x16;
case IR::Opcode::ConvertF32F16:
return IR::Opcode::Identity;
case IR::Opcode::ConvertF16F32:
return IR::Opcode::Identity;
case IR::Opcode::ConvertF16S8:
return IR::Opcode::ConvertF32S8;
case IR::Opcode::ConvertF16S16:
return IR::Opcode::ConvertF32S16;
case IR::Opcode::ConvertF16S32:
return IR::Opcode::ConvertF32S32;
case IR::Opcode::ConvertF16S64:
return IR::Opcode::ConvertF32S64;
case IR::Opcode::ConvertF16U8:
return IR::Opcode::ConvertF32U8;
case IR::Opcode::ConvertF16U16:
return IR::Opcode::ConvertF32U16;
case IR::Opcode::ConvertF16U32:
return IR::Opcode::ConvertF32U32;
case IR::Opcode::ConvertF16U64:
return IR::Opcode::ConvertF32U64;
case IR::Opcode::GlobalAtomicAddF16x2:
return IR::Opcode::GlobalAtomicAddF32x2;
case IR::Opcode::StorageAtomicAddF16x2:
return IR::Opcode::StorageAtomicAddF32x2;
case IR::Opcode::GlobalAtomicMinF16x2:
return IR::Opcode::GlobalAtomicMinF32x2;
case IR::Opcode::StorageAtomicMinF16x2:
return IR::Opcode::StorageAtomicMinF32x2;
case IR::Opcode::GlobalAtomicMaxF16x2:
return IR::Opcode::GlobalAtomicMaxF32x2;
case IR::Opcode::StorageAtomicMaxF16x2:
return IR::Opcode::StorageAtomicMaxF32x2;
default:
return op;
}
}
} // Anonymous namespace
void LowerFp16ToFp32(IR::Program& program) {
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
inst.ReplaceOpcode(Replace(inst.GetOpcode()));
}
}
}
} // namespace Shader::Optimization

View File

@@ -1,237 +1,237 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <utility>
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
namespace {
std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) {
if (packed.IsImmediate()) {
const u64 value{packed.U64()};
return {
ir.Imm32(static_cast<u32>(value)),
ir.Imm32(static_cast<u32>(value >> 32)),
};
} else {
return std::pair<IR::U32, IR::U32>{
ir.CompositeExtract(packed, 0u),
ir.CompositeExtract(packed, 1u),
};
}
}
void IAdd64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("IAdd64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)};
const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))};
const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)};
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
}
void ISub64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("ISub64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)};
const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)};
const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))};
const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)};
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
}
void INeg64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("INeg64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
auto [lo, hi]{Unpack(ir, inst.Arg(0))};
lo = ir.BitwiseNot(lo);
hi = ir.BitwiseNot(hi);
lo = ir.IAdd(lo, ir.Imm32(1));
const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))};
hi = ir.IAdd(hi, carry);
inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi));
}
void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
const IR::U32 shift{inst.Arg(1)};
const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)};
const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)};
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
const IR::U32 long_ret_lo{ir.Imm32(0)};
const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)};
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)};
const IR::U32 short_ret_lo{shifted_lo};
const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)};
const IR::U32 zero_ret_lo{lo};
const IR::U32 zero_ret_hi{hi};
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
}
void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
const IR::U32 shift{inst.Arg(1)};
const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)};
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
const IR::U32 long_ret_hi{ir.Imm32(0)};
const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)};
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)};
const IR::U32 short_ret_hi{shifted_hi};
const IR::U32 short_ret_lo{
ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
const IR::U32 zero_ret_lo{lo};
const IR::U32 zero_ret_hi{hi};
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
}
void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
const IR::U32 shift{inst.Arg(1)};
const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)};
const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))};
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
const IR::U32 long_ret_hi{sign_extension};
const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)};
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift));
const IR::U32 short_ret_hi{shifted_hi};
const IR::U32 short_ret_lo{
ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
const IR::U32 zero_ret_lo{lo};
const IR::U32 zero_ret_hi{hi};
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
}
void Lower(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::PackUint2x32:
case IR::Opcode::UnpackUint2x32:
return inst.ReplaceOpcode(IR::Opcode::Identity);
case IR::Opcode::IAdd64:
return IAdd64To32(block, inst);
case IR::Opcode::ISub64:
return ISub64To32(block, inst);
case IR::Opcode::INeg64:
return INeg64To32(block, inst);
case IR::Opcode::ShiftLeftLogical64:
return ShiftLeftLogical64To32(block, inst);
case IR::Opcode::ShiftRightLogical64:
return ShiftRightLogical64To32(block, inst);
case IR::Opcode::ShiftRightArithmetic64:
return ShiftRightArithmetic64To32(block, inst);
case IR::Opcode::SharedAtomicExchange64:
return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2);
case IR::Opcode::GlobalAtomicIAdd64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicIAdd32x2);
case IR::Opcode::GlobalAtomicSMin64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMin32x2);
case IR::Opcode::GlobalAtomicUMin64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMin32x2);
case IR::Opcode::GlobalAtomicSMax64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMax32x2);
case IR::Opcode::GlobalAtomicUMax64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMax32x2);
case IR::Opcode::GlobalAtomicAnd64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicAnd32x2);
case IR::Opcode::GlobalAtomicOr64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicOr32x2);
case IR::Opcode::GlobalAtomicXor64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicXor32x2);
case IR::Opcode::GlobalAtomicExchange64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicExchange32x2);
default:
break;
}
}
} // Anonymous namespace
void LowerInt64ToInt32(IR::Program& program) {
const auto end{program.post_order_blocks.rend()};
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
IR::Block* const block{*it};
for (IR::Inst& inst : block->Instructions()) {
Lower(*block, inst);
}
}
}
} // namespace Shader::Optimization
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <utility>
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
namespace {
std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) {
if (packed.IsImmediate()) {
const u64 value{packed.U64()};
return {
ir.Imm32(static_cast<u32>(value)),
ir.Imm32(static_cast<u32>(value >> 32)),
};
} else {
return std::pair<IR::U32, IR::U32>{
ir.CompositeExtract(packed, 0u),
ir.CompositeExtract(packed, 1u),
};
}
}
void IAdd64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("IAdd64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)};
const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))};
const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)};
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
}
void ISub64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("ISub64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)};
const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)};
const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))};
const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)};
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
}
void INeg64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("INeg64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
auto [lo, hi]{Unpack(ir, inst.Arg(0))};
lo = ir.BitwiseNot(lo);
hi = ir.BitwiseNot(hi);
lo = ir.IAdd(lo, ir.Imm32(1));
const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))};
hi = ir.IAdd(hi, carry);
inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi));
}
void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
const IR::U32 shift{inst.Arg(1)};
const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)};
const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)};
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
const IR::U32 long_ret_lo{ir.Imm32(0)};
const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)};
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)};
const IR::U32 short_ret_lo{shifted_lo};
const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)};
const IR::U32 zero_ret_lo{lo};
const IR::U32 zero_ret_hi{hi};
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
}
void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
const IR::U32 shift{inst.Arg(1)};
const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)};
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
const IR::U32 long_ret_hi{ir.Imm32(0)};
const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)};
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)};
const IR::U32 short_ret_hi{shifted_hi};
const IR::U32 short_ret_lo{
ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
const IR::U32 zero_ret_lo{lo};
const IR::U32 zero_ret_hi{hi};
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
}
void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
if (inst.HasAssociatedPseudoOperation()) {
throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions");
}
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
const IR::U32 shift{inst.Arg(1)};
const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)};
const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))};
const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
const IR::U32 long_ret_hi{sign_extension};
const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)};
const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift));
const IR::U32 short_ret_hi{shifted_hi};
const IR::U32 short_ret_lo{
ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
const IR::U32 zero_ret_lo{lo};
const IR::U32 zero_ret_hi{hi};
const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
}
void Lower(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::PackUint2x32:
case IR::Opcode::UnpackUint2x32:
return inst.ReplaceOpcode(IR::Opcode::Identity);
case IR::Opcode::IAdd64:
return IAdd64To32(block, inst);
case IR::Opcode::ISub64:
return ISub64To32(block, inst);
case IR::Opcode::INeg64:
return INeg64To32(block, inst);
case IR::Opcode::ShiftLeftLogical64:
return ShiftLeftLogical64To32(block, inst);
case IR::Opcode::ShiftRightLogical64:
return ShiftRightLogical64To32(block, inst);
case IR::Opcode::ShiftRightArithmetic64:
return ShiftRightArithmetic64To32(block, inst);
case IR::Opcode::SharedAtomicExchange64:
return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2);
case IR::Opcode::GlobalAtomicIAdd64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicIAdd32x2);
case IR::Opcode::GlobalAtomicSMin64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMin32x2);
case IR::Opcode::GlobalAtomicUMin64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMin32x2);
case IR::Opcode::GlobalAtomicSMax64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMax32x2);
case IR::Opcode::GlobalAtomicUMax64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMax32x2);
case IR::Opcode::GlobalAtomicAnd64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicAnd32x2);
case IR::Opcode::GlobalAtomicOr64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicOr32x2);
case IR::Opcode::GlobalAtomicXor64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicXor32x2);
case IR::Opcode::GlobalAtomicExchange64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicExchange32x2);
default:
break;
}
}
} // Anonymous namespace
void LowerInt64ToInt32(IR::Program& program) {
const auto end{program.post_order_blocks.rend()};
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
IR::Block* const block{*it};
for (IR::Inst& inst : block->Instructions()) {
Lower(*block, inst);
}
}
}
} // namespace Shader::Optimization

View File

@@ -1,29 +1,29 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/program.h"
namespace Shader::Optimization {
void CollectShaderInfoPass(Environment& env, IR::Program& program);
void ConstantPropagationPass(IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program);
void GlobalMemoryToStorageBufferPass(IR::Program& program);
void IdentityRemovalPass(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
void LowerInt64ToInt32(IR::Program& program);
void RescalingPass(IR::Program& program);
void SsaRewritePass(IR::Program& program);
void TexturePass(Environment& env, IR::Program& program);
void VerificationPass(const IR::Program& program);
// Dual Vertex
void VertexATransformPass(IR::Program& program);
void VertexBTransformPass(IR::Program& program);
void JoinTextureInfo(Info& base, Info& source);
void JoinStorageInfo(Info& base, Info& source);
} // namespace Shader::Optimization
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/program.h"
namespace Shader::Optimization {
void CollectShaderInfoPass(Environment& env, IR::Program& program);
void ConstantPropagationPass(IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program);
void GlobalMemoryToStorageBufferPass(IR::Program& program);
void IdentityRemovalPass(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
void LowerInt64ToInt32(IR::Program& program);
void RescalingPass(IR::Program& program);
void SsaRewritePass(IR::Program& program);
void TexturePass(Environment& env, IR::Program& program);
void VerificationPass(const IR::Program& program);
// Dual Vertex
void VertexATransformPass(IR::Program& program);
void VertexBTransformPass(IR::Program& program);
void JoinTextureInfo(Info& base, Info& source);
void JoinStorageInfo(Info& base, Info& source);
} // namespace Shader::Optimization

View File

@@ -1,355 +1,355 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
#include "shader_recompiler/shader_info.h"
namespace Shader::Optimization {
namespace {
[[nodiscard]] bool IsTextureTypeRescalable(TextureType type) {
switch (type) {
case TextureType::Color2D:
case TextureType::ColorArray2D:
case TextureType::Color2DRect:
return true;
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
break;
}
return false;
}
void VisitMark(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ShuffleIndex:
case IR::Opcode::ShuffleUp:
case IR::Opcode::ShuffleDown:
case IR::Opcode::ShuffleButterfly: {
const IR::Value shfl_arg{inst.Arg(0)};
if (shfl_arg.IsImmediate()) {
break;
}
const IR::Inst* const arg_inst{shfl_arg.InstRecursive()};
if (arg_inst->GetOpcode() != IR::Opcode::BitCastU32F32) {
break;
}
const IR::Value bitcast_arg{arg_inst->Arg(0)};
if (bitcast_arg.IsImmediate()) {
break;
}
IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()};
bool must_patch_outside = false;
if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) {
const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PositionX:
case IR::Attribute::PositionY:
bitcast_inst->SetFlags<u32>(0xDEADBEEF);
must_patch_outside = true;
break;
default:
break;
}
}
if (must_patch_outside) {
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 new_inst{&*block.PrependNewInst(it, inst)};
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
const IR::Value converted{ir.FPMul(new_inst, up_factor)};
inst.ReplaceUsesWith(converted);
}
break;
}
default:
break;
}
}
void PatchFragCoord(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 down_factor{ir.ResolutionDownFactor()};
const IR::F32 frag_coord{ir.GetAttribute(inst.Arg(0).Attribute())};
const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)};
inst.ReplaceUsesWith(downscaled_frag_coord);
}
void PatchPointSize(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 point_value{inst.Arg(1)};
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)};
inst.SetArg(1, upscaled_point_value);
}
[[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
IR::U32 scaled_value{value};
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
scaled_value = ir.IMul(scaled_value, ir.Imm32(up_scale));
}
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
scaled_value = ir.ShiftRightArithmetic(scaled_value, ir.Imm32(down_shift));
}
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
}
[[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value,
const IR::Attribute attrib) {
const IR::F32 up_factor{ir.Imm32(Settings::values.resolution_info.up_factor)};
const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), up_factor)};
const IR::F32 frag_coord{ir.GetAttribute(attrib)};
const IR::F32 down_factor{ir.Imm32(Settings::values.resolution_info.down_factor)};
const IR::F32 floor{ir.FPMul(up_factor, ir.FPFloor(ir.FPMul(frag_coord, down_factor)))};
const IR::F16F32F64 deviation{ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor)))};
return IR::U32{ir.Select(is_scaled, ir.ConvertFToU(32, deviation), value)};
}
[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
IR::U32 scaled_value{value};
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
scaled_value = ir.ShiftLeftLogical(scaled_value, ir.Imm32(down_shift));
}
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
scaled_value = ir.IDiv(scaled_value, ir.Imm32(up_scale));
}
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
}
void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) {
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
switch (info.type) {
case TextureType::Color2D:
case TextureType::ColorArray2D:
case TextureType::Color2DRect: {
const IR::Value new_inst{&*block.PrependNewInst(it, inst)};
const IR::U32 width{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 0)})};
const IR::U32 height{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 1)})};
const IR::Value replacement{ir.CompositeConstruct(
width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))};
inst.ReplaceUsesWith(replacement);
break;
}
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
size_t index) {
const IR::Value composite{inst.Arg(index)};
if (composite.IsEmpty()) {
return;
}
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
switch (info.type) {
case TextureType::Color2D:
case TextureType::Color2DRect:
inst.SetArg(index, ir.CompositeConstruct(x, y));
break;
case TextureType::ColorArray2D: {
const IR::U32 z{ir.CompositeExtract(composite, 2)};
inst.SetArg(index, ir.CompositeConstruct(x, y, z));
break;
}
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
size_t index) {
const IR::Value composite{inst.Arg(index)};
if (composite.IsEmpty()) {
return;
}
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
switch (info.type) {
case TextureType::ColorArray2D:
case TextureType::Color2D:
case TextureType::Color2DRect:
inst.SetArg(index, ir.CompositeConstruct(x, y));
break;
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::Value coord{inst.Arg(1)};
const IR::U32 coord_x{ir.CompositeExtract(coord, 0)};
const IR::U32 coord_y{ir.CompositeExtract(coord, 1)};
const IR::U32 scaled_x{SubScale(ir, is_scaled, coord_x, IR::Attribute::PositionX)};
const IR::U32 scaled_y{SubScale(ir, is_scaled, coord_y, IR::Attribute::PositionY)};
switch (info.type) {
case TextureType::Color2D:
case TextureType::Color2DRect:
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y));
break;
case TextureType::ColorArray2D: {
const IR::U32 z{ir.CompositeExtract(coord, 2)};
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y, z));
break;
}
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
SubScaleCoord(ir, inst, is_scaled);
// Scale ImageFetch offset
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
SubScaleCoord(ir, inst, is_scaled);
}
void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
ScaleIntegerComposite(ir, inst, is_scaled, 1);
// Scale ImageFetch offset
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void PatchImageRead(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
ScaleIntegerComposite(ir, inst, is_scaled, 1);
}
void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) {
const bool is_fragment_shader{program.stage == Stage::Fragment};
switch (inst.GetOpcode()) {
case IR::Opcode::GetAttribute: {
const IR::Attribute attr{inst.Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PositionX:
case IR::Attribute::PositionY:
if (is_fragment_shader && inst.Flags<u32>() != 0xDEADBEEF) {
PatchFragCoord(block, inst);
}
break;
default:
break;
}
break;
}
case IR::Opcode::SetAttribute: {
const IR::Attribute attr{inst.Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PointSize:
if (inst.Flags<u32>() != 0xDEADBEEF) {
PatchPointSize(block, inst);
}
break;
default:
break;
}
break;
}
case IR::Opcode::ImageQueryDimensions:
PatchImageQueryDimensions(block, inst);
break;
case IR::Opcode::ImageFetch:
if (is_fragment_shader) {
SubScaleImageFetch(block, inst);
} else {
PatchImageFetch(block, inst);
}
break;
case IR::Opcode::ImageRead:
if (is_fragment_shader) {
SubScaleImageRead(block, inst);
} else {
PatchImageRead(block, inst);
}
break;
default:
break;
}
}
} // Anonymous namespace
void RescalingPass(IR::Program& program) {
const bool is_fragment_shader{program.stage == Stage::Fragment};
if (is_fragment_shader) {
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
VisitMark(*block, inst);
}
}
}
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
Visit(program, *block, inst);
}
}
}
} // namespace Shader::Optimization
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
#include "shader_recompiler/shader_info.h"
namespace Shader::Optimization {
namespace {
[[nodiscard]] bool IsTextureTypeRescalable(TextureType type) {
switch (type) {
case TextureType::Color2D:
case TextureType::ColorArray2D:
case TextureType::Color2DRect:
return true;
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
break;
}
return false;
}
void VisitMark(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ShuffleIndex:
case IR::Opcode::ShuffleUp:
case IR::Opcode::ShuffleDown:
case IR::Opcode::ShuffleButterfly: {
const IR::Value shfl_arg{inst.Arg(0)};
if (shfl_arg.IsImmediate()) {
break;
}
const IR::Inst* const arg_inst{shfl_arg.InstRecursive()};
if (arg_inst->GetOpcode() != IR::Opcode::BitCastU32F32) {
break;
}
const IR::Value bitcast_arg{arg_inst->Arg(0)};
if (bitcast_arg.IsImmediate()) {
break;
}
IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()};
bool must_patch_outside = false;
if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) {
const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PositionX:
case IR::Attribute::PositionY:
bitcast_inst->SetFlags<u32>(0xDEADBEEF);
must_patch_outside = true;
break;
default:
break;
}
}
if (must_patch_outside) {
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 new_inst{&*block.PrependNewInst(it, inst)};
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
const IR::Value converted{ir.FPMul(new_inst, up_factor)};
inst.ReplaceUsesWith(converted);
}
break;
}
default:
break;
}
}
void PatchFragCoord(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 down_factor{ir.ResolutionDownFactor()};
const IR::F32 frag_coord{ir.GetAttribute(inst.Arg(0).Attribute())};
const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)};
inst.ReplaceUsesWith(downscaled_frag_coord);
}
void PatchPointSize(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 point_value{inst.Arg(1)};
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)};
inst.SetArg(1, upscaled_point_value);
}
[[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
IR::U32 scaled_value{value};
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
scaled_value = ir.IMul(scaled_value, ir.Imm32(up_scale));
}
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
scaled_value = ir.ShiftRightArithmetic(scaled_value, ir.Imm32(down_shift));
}
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
}
[[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value,
const IR::Attribute attrib) {
const IR::F32 up_factor{ir.Imm32(Settings::values.resolution_info.up_factor)};
const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), up_factor)};
const IR::F32 frag_coord{ir.GetAttribute(attrib)};
const IR::F32 down_factor{ir.Imm32(Settings::values.resolution_info.down_factor)};
const IR::F32 floor{ir.FPMul(up_factor, ir.FPFloor(ir.FPMul(frag_coord, down_factor)))};
const IR::F16F32F64 deviation{ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor)))};
return IR::U32{ir.Select(is_scaled, ir.ConvertFToU(32, deviation), value)};
}
[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
IR::U32 scaled_value{value};
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
scaled_value = ir.ShiftLeftLogical(scaled_value, ir.Imm32(down_shift));
}
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
scaled_value = ir.IDiv(scaled_value, ir.Imm32(up_scale));
}
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
}
void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) {
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
switch (info.type) {
case TextureType::Color2D:
case TextureType::ColorArray2D:
case TextureType::Color2DRect: {
const IR::Value new_inst{&*block.PrependNewInst(it, inst)};
const IR::U32 width{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 0)})};
const IR::U32 height{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 1)})};
const IR::Value replacement{ir.CompositeConstruct(
width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))};
inst.ReplaceUsesWith(replacement);
break;
}
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
size_t index) {
const IR::Value composite{inst.Arg(index)};
if (composite.IsEmpty()) {
return;
}
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
switch (info.type) {
case TextureType::Color2D:
case TextureType::Color2DRect:
inst.SetArg(index, ir.CompositeConstruct(x, y));
break;
case TextureType::ColorArray2D: {
const IR::U32 z{ir.CompositeExtract(composite, 2)};
inst.SetArg(index, ir.CompositeConstruct(x, y, z));
break;
}
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
size_t index) {
const IR::Value composite{inst.Arg(index)};
if (composite.IsEmpty()) {
return;
}
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
switch (info.type) {
case TextureType::ColorArray2D:
case TextureType::Color2D:
case TextureType::Color2DRect:
inst.SetArg(index, ir.CompositeConstruct(x, y));
break;
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::Value coord{inst.Arg(1)};
const IR::U32 coord_x{ir.CompositeExtract(coord, 0)};
const IR::U32 coord_y{ir.CompositeExtract(coord, 1)};
const IR::U32 scaled_x{SubScale(ir, is_scaled, coord_x, IR::Attribute::PositionX)};
const IR::U32 scaled_y{SubScale(ir, is_scaled, coord_y, IR::Attribute::PositionY)};
switch (info.type) {
case TextureType::Color2D:
case TextureType::Color2DRect:
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y));
break;
case TextureType::ColorArray2D: {
const IR::U32 z{ir.CompositeExtract(coord, 2)};
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y, z));
break;
}
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
SubScaleCoord(ir, inst, is_scaled);
// Scale ImageFetch offset
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
SubScaleCoord(ir, inst, is_scaled);
}
void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
ScaleIntegerComposite(ir, inst, is_scaled, 1);
// Scale ImageFetch offset
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void PatchImageRead(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (!IsTextureTypeRescalable(info.type)) {
return;
}
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
ScaleIntegerComposite(ir, inst, is_scaled, 1);
}
void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) {
const bool is_fragment_shader{program.stage == Stage::Fragment};
switch (inst.GetOpcode()) {
case IR::Opcode::GetAttribute: {
const IR::Attribute attr{inst.Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PositionX:
case IR::Attribute::PositionY:
if (is_fragment_shader && inst.Flags<u32>() != 0xDEADBEEF) {
PatchFragCoord(block, inst);
}
break;
default:
break;
}
break;
}
case IR::Opcode::SetAttribute: {
const IR::Attribute attr{inst.Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PointSize:
if (inst.Flags<u32>() != 0xDEADBEEF) {
PatchPointSize(block, inst);
}
break;
default:
break;
}
break;
}
case IR::Opcode::ImageQueryDimensions:
PatchImageQueryDimensions(block, inst);
break;
case IR::Opcode::ImageFetch:
if (is_fragment_shader) {
SubScaleImageFetch(block, inst);
} else {
PatchImageFetch(block, inst);
}
break;
case IR::Opcode::ImageRead:
if (is_fragment_shader) {
SubScaleImageRead(block, inst);
} else {
PatchImageRead(block, inst);
}
break;
default:
break;
}
}
} // Anonymous namespace
void RescalingPass(IR::Program& program) {
const bool is_fragment_shader{program.stage == Stage::Fragment};
if (is_fragment_shader) {
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
VisitMark(*block, inst);
}
}
}
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
Visit(program, *block, inst);
}
}
}
} // namespace Shader::Optimization

View File

@@ -1,412 +1,412 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// This file implements the SSA rewriting algorithm proposed in
//
// Simple and Efficient Construction of Static Single Assignment Form.
// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
// In: Jhala R., De Bosschere K. (eds)
// Compiler Construction. CC 2013.
// Lecture Notes in Computer Science, vol 7791.
// Springer, Berlin, Heidelberg
//
// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
//
#include <deque>
#include <span>
#include <variant>
#include <vector>
#include <boost/container/flat_map.hpp>
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/opcodes.h"
#include "shader_recompiler/frontend/ir/pred.h"
#include "shader_recompiler/frontend/ir/reg.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
namespace {
struct FlagTag {
auto operator<=>(const FlagTag&) const noexcept = default;
};
struct ZeroFlagTag : FlagTag {};
struct SignFlagTag : FlagTag {};
struct CarryFlagTag : FlagTag {};
struct OverflowFlagTag : FlagTag {};
struct GotoVariable : FlagTag {
GotoVariable() = default;
explicit GotoVariable(u32 index_) : index{index_} {}
auto operator<=>(const GotoVariable&) const noexcept = default;
u32 index;
};
struct IndirectBranchVariable {
auto operator<=>(const IndirectBranchVariable&) const noexcept = default;
};
using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag,
OverflowFlagTag, GotoVariable, IndirectBranchVariable>;
using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>;
struct DefTable {
const IR::Value& Def(IR::Block* block, IR::Reg variable) {
return block->SsaRegValue(variable);
}
void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) {
block->SetSsaRegValue(variable, value);
}
const IR::Value& Def(IR::Block* block, IR::Pred variable) {
return preds[IR::PredIndex(variable)][block];
}
void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) {
preds[IR::PredIndex(variable)].insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, GotoVariable variable) {
return goto_vars[variable.index][block];
}
void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
goto_vars[variable.index].insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, IndirectBranchVariable) {
return indirect_branch_var[block];
}
void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) {
indirect_branch_var.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
return zero_flag[block];
}
void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
zero_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, SignFlagTag) {
return sign_flag[block];
}
void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
sign_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, CarryFlagTag) {
return carry_flag[block];
}
void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
carry_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
return overflow_flag[block];
}
void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
overflow_flag.insert_or_assign(block, value);
}
std::array<ValueMap, IR::NUM_USER_PREDS> preds;
boost::container::flat_map<u32, ValueMap> goto_vars;
ValueMap indirect_branch_var;
ValueMap zero_flag;
ValueMap sign_flag;
ValueMap carry_flag;
ValueMap overflow_flag;
};
IR::Opcode UndefOpcode(IR::Reg) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(IR::Pred) noexcept {
return IR::Opcode::UndefU1;
}
IR::Opcode UndefOpcode(const FlagTag&) noexcept {
return IR::Opcode::UndefU1;
}
IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
return IR::Opcode::UndefU32;
}
enum class Status {
Start,
SetValue,
PreparePhiArgument,
PushPhiArgument,
};
template <typename Type>
struct ReadState {
ReadState(IR::Block* block_) : block{block_} {}
ReadState() = default;
IR::Block* block{};
IR::Value result{};
IR::Inst* phi{};
IR::Block* const* pred_it{};
IR::Block* const* pred_end{};
Status pc{Status::Start};
};
class Pass {
public:
template <typename Type>
void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
current_def.SetDef(block, variable, value);
}
template <typename Type>
IR::Value ReadVariable(Type variable, IR::Block* root_block) {
boost::container::small_vector<ReadState<Type>, 64> stack{
ReadState<Type>(nullptr),
ReadState<Type>(root_block),
};
const auto prepare_phi_operand{[&] {
if (stack.back().pred_it == stack.back().pred_end) {
IR::Inst* const phi{stack.back().phi};
IR::Block* const block{stack.back().block};
const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
stack.pop_back();
stack.back().result = result;
WriteVariable(variable, block, result);
} else {
IR::Block* const imm_pred{*stack.back().pred_it};
stack.back().pc = Status::PushPhiArgument;
stack.emplace_back(imm_pred);
}
}};
do {
IR::Block* const block{stack.back().block};
switch (stack.back().pc) {
case Status::Start: {
if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
stack.back().result = def;
} else if (!block->IsSsaSealed()) {
// Incomplete CFG
IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
incomplete_phis[block].insert_or_assign(variable, phi);
stack.back().result = IR::Value{&*phi};
} else if (const std::span imm_preds = block->ImmPredecessors();
imm_preds.size() == 1) {
// Optimize the common case of one predecessor: no phi needed
stack.back().pc = Status::SetValue;
stack.emplace_back(imm_preds.front());
break;
} else {
// Break potential cycles with operandless phi
IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
WriteVariable(variable, block, IR::Value{phi});
stack.back().phi = phi;
stack.back().pred_it = imm_preds.data();
stack.back().pred_end = imm_preds.data() + imm_preds.size();
prepare_phi_operand();
break;
}
}
[[fallthrough]];
case Status::SetValue: {
const IR::Value result{stack.back().result};
WriteVariable(variable, block, result);
stack.pop_back();
stack.back().result = result;
break;
}
case Status::PushPhiArgument: {
IR::Inst* const phi{stack.back().phi};
phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
++stack.back().pred_it;
}
[[fallthrough]];
case Status::PreparePhiArgument:
prepare_phi_operand();
break;
}
} while (stack.size() > 1);
return stack.back().result;
}
void SealBlock(IR::Block* block) {
const auto it{incomplete_phis.find(block)};
if (it != incomplete_phis.end()) {
for (auto& pair : it->second) {
auto& variant{pair.first};
auto& phi{pair.second};
std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
}
}
block->SsaSeal();
}
private:
template <typename Type>
IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
for (IR::Block* const imm_pred : block->ImmPredecessors()) {
phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
}
return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
}
IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
IR::Value same;
const size_t num_args{phi.NumArgs()};
for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
const IR::Value& op{phi.Arg(arg_index)};
if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
// Unique value or self-reference
continue;
}
if (!same.IsEmpty()) {
// The phi merges at least two values: not trivial
return IR::Value{&phi};
}
same = op;
}
// Remove the phi node from the block, it will be reinserted
IR::Block::InstructionList& list{block->Instructions()};
list.erase(IR::Block::InstructionList::s_iterator_to(phi));
// Find the first non-phi instruction and use it as an insertion point
IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
if (same.IsEmpty()) {
// The phi is unreachable or in the start block
// Insert an undefined instruction and make it the phi node replacement
// The "phi" node reinsertion point is specified after this instruction
reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
same = IR::Value{&*reinsert_point};
++reinsert_point;
}
// Reinsert the phi node and reroute all its uses to the "same" value
list.insert(reinsert_point, phi);
phi.ReplaceUsesWith(same);
// TODO: Try to recursively remove all phi users, which might have become trivial
return same;
}
boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>>
incomplete_phis;
DefTable current_def;
};
void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::SetRegister:
if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
pass.WriteVariable(reg, block, inst.Arg(1));
}
break;
case IR::Opcode::SetPred:
if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
pass.WriteVariable(pred, block, inst.Arg(1));
}
break;
case IR::Opcode::SetGotoVariable:
pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
break;
case IR::Opcode::SetIndirectBranchVariable:
pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0));
break;
case IR::Opcode::SetZFlag:
pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetSFlag:
pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetCFlag:
pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetOFlag:
pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::GetRegister:
if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
}
break;
case IR::Opcode::GetPred:
if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
}
break;
case IR::Opcode::GetGotoVariable:
inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
break;
case IR::Opcode::GetIndirectBranchVariable:
inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block));
break;
case IR::Opcode::GetZFlag:
inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
break;
case IR::Opcode::GetSFlag:
inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
break;
case IR::Opcode::GetCFlag:
inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
break;
case IR::Opcode::GetOFlag:
inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
break;
default:
break;
}
}
void VisitBlock(Pass& pass, IR::Block* block) {
for (IR::Inst& inst : block->Instructions()) {
VisitInst(pass, block, inst);
}
pass.SealBlock(block);
}
IR::Type GetConcreteType(IR::Inst* inst) {
std::deque<IR::Inst*> queue;
queue.push_back(inst);
while (!queue.empty()) {
IR::Inst* current = queue.front();
queue.pop_front();
const size_t num_args{current->NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
const auto set_type = current->Arg(i).Type();
if (set_type != IR::Type::Opaque) {
return set_type;
}
if (!current->Arg(i).IsImmediate()) {
queue.push_back(current->Arg(i).Inst());
}
}
}
return IR::Type::Opaque;
}
} // Anonymous namespace
void SsaRewritePass(IR::Program& program) {
Pass pass;
const auto end{program.post_order_blocks.rend()};
for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
VisitBlock(pass, *block);
}
for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
for (IR::Inst& inst : (*block)->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::Phi) {
if (inst.Type() == IR::Type::Opaque) {
inst.SetFlags(GetConcreteType(&inst));
}
inst.OrderPhiArgs();
}
}
}
}
} // namespace Shader::Optimization
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// This file implements the SSA rewriting algorithm proposed in
//
// Simple and Efficient Construction of Static Single Assignment Form.
// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
// In: Jhala R., De Bosschere K. (eds)
// Compiler Construction. CC 2013.
// Lecture Notes in Computer Science, vol 7791.
// Springer, Berlin, Heidelberg
//
// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
//
#include <deque>
#include <span>
#include <variant>
#include <vector>
#include <boost/container/flat_map.hpp>
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/opcodes.h"
#include "shader_recompiler/frontend/ir/pred.h"
#include "shader_recompiler/frontend/ir/reg.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
namespace {
struct FlagTag {
auto operator<=>(const FlagTag&) const noexcept = default;
};
struct ZeroFlagTag : FlagTag {};
struct SignFlagTag : FlagTag {};
struct CarryFlagTag : FlagTag {};
struct OverflowFlagTag : FlagTag {};
struct GotoVariable : FlagTag {
GotoVariable() = default;
explicit GotoVariable(u32 index_) : index{index_} {}
auto operator<=>(const GotoVariable&) const noexcept = default;
u32 index;
};
struct IndirectBranchVariable {
auto operator<=>(const IndirectBranchVariable&) const noexcept = default;
};
using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag,
OverflowFlagTag, GotoVariable, IndirectBranchVariable>;
using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>;
struct DefTable {
const IR::Value& Def(IR::Block* block, IR::Reg variable) {
return block->SsaRegValue(variable);
}
void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) {
block->SetSsaRegValue(variable, value);
}
const IR::Value& Def(IR::Block* block, IR::Pred variable) {
return preds[IR::PredIndex(variable)][block];
}
void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) {
preds[IR::PredIndex(variable)].insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, GotoVariable variable) {
return goto_vars[variable.index][block];
}
void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
goto_vars[variable.index].insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, IndirectBranchVariable) {
return indirect_branch_var[block];
}
void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) {
indirect_branch_var.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
return zero_flag[block];
}
void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
zero_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, SignFlagTag) {
return sign_flag[block];
}
void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
sign_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, CarryFlagTag) {
return carry_flag[block];
}
void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
carry_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
return overflow_flag[block];
}
void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
overflow_flag.insert_or_assign(block, value);
}
std::array<ValueMap, IR::NUM_USER_PREDS> preds;
boost::container::flat_map<u32, ValueMap> goto_vars;
ValueMap indirect_branch_var;
ValueMap zero_flag;
ValueMap sign_flag;
ValueMap carry_flag;
ValueMap overflow_flag;
};
IR::Opcode UndefOpcode(IR::Reg) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(IR::Pred) noexcept {
return IR::Opcode::UndefU1;
}
IR::Opcode UndefOpcode(const FlagTag&) noexcept {
return IR::Opcode::UndefU1;
}
IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
return IR::Opcode::UndefU32;
}
enum class Status {
Start,
SetValue,
PreparePhiArgument,
PushPhiArgument,
};
template <typename Type>
struct ReadState {
ReadState(IR::Block* block_) : block{block_} {}
ReadState() = default;
IR::Block* block{};
IR::Value result{};
IR::Inst* phi{};
IR::Block* const* pred_it{};
IR::Block* const* pred_end{};
Status pc{Status::Start};
};
class Pass {
public:
template <typename Type>
void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
current_def.SetDef(block, variable, value);
}
template <typename Type>
IR::Value ReadVariable(Type variable, IR::Block* root_block) {
boost::container::small_vector<ReadState<Type>, 64> stack{
ReadState<Type>(nullptr),
ReadState<Type>(root_block),
};
const auto prepare_phi_operand{[&] {
if (stack.back().pred_it == stack.back().pred_end) {
IR::Inst* const phi{stack.back().phi};
IR::Block* const block{stack.back().block};
const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
stack.pop_back();
stack.back().result = result;
WriteVariable(variable, block, result);
} else {
IR::Block* const imm_pred{*stack.back().pred_it};
stack.back().pc = Status::PushPhiArgument;
stack.emplace_back(imm_pred);
}
}};
do {
IR::Block* const block{stack.back().block};
switch (stack.back().pc) {
case Status::Start: {
if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
stack.back().result = def;
} else if (!block->IsSsaSealed()) {
// Incomplete CFG
IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
incomplete_phis[block].insert_or_assign(variable, phi);
stack.back().result = IR::Value{&*phi};
} else if (const std::span imm_preds = block->ImmPredecessors();
imm_preds.size() == 1) {
// Optimize the common case of one predecessor: no phi needed
stack.back().pc = Status::SetValue;
stack.emplace_back(imm_preds.front());
break;
} else {
// Break potential cycles with operandless phi
IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
WriteVariable(variable, block, IR::Value{phi});
stack.back().phi = phi;
stack.back().pred_it = imm_preds.data();
stack.back().pred_end = imm_preds.data() + imm_preds.size();
prepare_phi_operand();
break;
}
}
[[fallthrough]];
case Status::SetValue: {
const IR::Value result{stack.back().result};
WriteVariable(variable, block, result);
stack.pop_back();
stack.back().result = result;
break;
}
case Status::PushPhiArgument: {
IR::Inst* const phi{stack.back().phi};
phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
++stack.back().pred_it;
}
[[fallthrough]];
case Status::PreparePhiArgument:
prepare_phi_operand();
break;
}
} while (stack.size() > 1);
return stack.back().result;
}
void SealBlock(IR::Block* block) {
const auto it{incomplete_phis.find(block)};
if (it != incomplete_phis.end()) {
for (auto& pair : it->second) {
auto& variant{pair.first};
auto& phi{pair.second};
std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
}
}
block->SsaSeal();
}
private:
template <typename Type>
IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
for (IR::Block* const imm_pred : block->ImmPredecessors()) {
phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
}
return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
}
IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
IR::Value same;
const size_t num_args{phi.NumArgs()};
for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
const IR::Value& op{phi.Arg(arg_index)};
if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
// Unique value or self-reference
continue;
}
if (!same.IsEmpty()) {
// The phi merges at least two values: not trivial
return IR::Value{&phi};
}
same = op;
}
// Remove the phi node from the block, it will be reinserted
IR::Block::InstructionList& list{block->Instructions()};
list.erase(IR::Block::InstructionList::s_iterator_to(phi));
// Find the first non-phi instruction and use it as an insertion point
IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
if (same.IsEmpty()) {
// The phi is unreachable or in the start block
// Insert an undefined instruction and make it the phi node replacement
// The "phi" node reinsertion point is specified after this instruction
reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
same = IR::Value{&*reinsert_point};
++reinsert_point;
}
// Reinsert the phi node and reroute all its uses to the "same" value
list.insert(reinsert_point, phi);
phi.ReplaceUsesWith(same);
// TODO: Try to recursively remove all phi users, which might have become trivial
return same;
}
boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>>
incomplete_phis;
DefTable current_def;
};
void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::SetRegister:
if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
pass.WriteVariable(reg, block, inst.Arg(1));
}
break;
case IR::Opcode::SetPred:
if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
pass.WriteVariable(pred, block, inst.Arg(1));
}
break;
case IR::Opcode::SetGotoVariable:
pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
break;
case IR::Opcode::SetIndirectBranchVariable:
pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0));
break;
case IR::Opcode::SetZFlag:
pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetSFlag:
pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetCFlag:
pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetOFlag:
pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::GetRegister:
if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
}
break;
case IR::Opcode::GetPred:
if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
}
break;
case IR::Opcode::GetGotoVariable:
inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
break;
case IR::Opcode::GetIndirectBranchVariable:
inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block));
break;
case IR::Opcode::GetZFlag:
inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
break;
case IR::Opcode::GetSFlag:
inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
break;
case IR::Opcode::GetCFlag:
inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
break;
case IR::Opcode::GetOFlag:
inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
break;
default:
break;
}
}
void VisitBlock(Pass& pass, IR::Block* block) {
for (IR::Inst& inst : block->Instructions()) {
VisitInst(pass, block, inst);
}
pass.SealBlock(block);
}
IR::Type GetConcreteType(IR::Inst* inst) {
std::deque<IR::Inst*> queue;
queue.push_back(inst);
while (!queue.empty()) {
IR::Inst* current = queue.front();
queue.pop_front();
const size_t num_args{current->NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
const auto set_type = current->Arg(i).Type();
if (set_type != IR::Type::Opaque) {
return set_type;
}
if (!current->Arg(i).IsImmediate()) {
queue.push_back(current->Arg(i).Inst());
}
}
}
return IR::Type::Opaque;
}
} // Anonymous namespace
void SsaRewritePass(IR::Program& program) {
Pass pass;
const auto end{program.post_order_blocks.rend()};
for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
VisitBlock(pass, *block);
}
for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
for (IR::Inst& inst : (*block)->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::Phi) {
if (inst.Type() == IR::Type::Opaque) {
inst.SetFlags(GetConcreteType(&inst));
}
inst.OrderPhiArgs();
}
}
}
}
} // namespace Shader::Optimization

File diff suppressed because it is too large Load Diff

View File

@@ -1,97 +1,97 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <map>
#include <set>
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
static void ValidateTypes(const IR::Program& program) {
for (const auto& block : program.blocks) {
for (const IR::Inst& inst : *block) {
if (inst.GetOpcode() == IR::Opcode::Phi) {
// Skip validation on phi nodes
continue;
}
const size_t num_args{inst.NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
const IR::Type t1{inst.Arg(i).Type()};
const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
if (!IR::AreTypesCompatible(t1, t2)) {
throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
}
}
}
}
}
static void ValidateUses(const IR::Program& program) {
std::map<IR::Inst*, int> actual_uses;
for (const auto& block : program.blocks) {
for (const IR::Inst& inst : *block) {
const size_t num_args{inst.NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
const IR::Value arg{inst.Arg(i)};
if (!arg.IsImmediate()) {
++actual_uses[arg.Inst()];
}
}
}
}
for (const auto& [inst, uses] : actual_uses) {
if (inst->UseCount() != uses) {
throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program));
}
}
}
static void ValidateForwardDeclarations(const IR::Program& program) {
std::set<const IR::Inst*> definitions;
for (const IR::Block* const block : program.blocks) {
for (const IR::Inst& inst : *block) {
definitions.emplace(&inst);
if (inst.GetOpcode() == IR::Opcode::Phi) {
// Phi nodes can have forward declarations
continue;
}
const size_t num_args{inst.NumArgs()};
for (size_t arg = 0; arg < num_args; ++arg) {
if (inst.Arg(arg).IsImmediate()) {
continue;
}
if (!definitions.contains(inst.Arg(arg).Inst())) {
throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block));
}
}
}
}
}
static void ValidatePhiNodes(const IR::Program& program) {
for (const IR::Block* const block : program.blocks) {
bool no_more_phis{false};
for (const IR::Inst& inst : *block) {
if (inst.GetOpcode() == IR::Opcode::Phi) {
if (no_more_phis) {
throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block));
}
} else {
no_more_phis = true;
}
}
}
}
void VerificationPass(const IR::Program& program) {
ValidateTypes(program);
ValidateUses(program);
ValidateForwardDeclarations(program);
ValidatePhiNodes(program);
}
} // namespace Shader::Optimization
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <map>
#include <set>
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
static void ValidateTypes(const IR::Program& program) {
for (const auto& block : program.blocks) {
for (const IR::Inst& inst : *block) {
if (inst.GetOpcode() == IR::Opcode::Phi) {
// Skip validation on phi nodes
continue;
}
const size_t num_args{inst.NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
const IR::Type t1{inst.Arg(i).Type()};
const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
if (!IR::AreTypesCompatible(t1, t2)) {
throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
}
}
}
}
}
static void ValidateUses(const IR::Program& program) {
std::map<IR::Inst*, int> actual_uses;
for (const auto& block : program.blocks) {
for (const IR::Inst& inst : *block) {
const size_t num_args{inst.NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
const IR::Value arg{inst.Arg(i)};
if (!arg.IsImmediate()) {
++actual_uses[arg.Inst()];
}
}
}
}
for (const auto& [inst, uses] : actual_uses) {
if (inst->UseCount() != uses) {
throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program));
}
}
}
static void ValidateForwardDeclarations(const IR::Program& program) {
std::set<const IR::Inst*> definitions;
for (const IR::Block* const block : program.blocks) {
for (const IR::Inst& inst : *block) {
definitions.emplace(&inst);
if (inst.GetOpcode() == IR::Opcode::Phi) {
// Phi nodes can have forward declarations
continue;
}
const size_t num_args{inst.NumArgs()};
for (size_t arg = 0; arg < num_args; ++arg) {
if (inst.Arg(arg).IsImmediate()) {
continue;
}
if (!definitions.contains(inst.Arg(arg).Inst())) {
throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block));
}
}
}
}
}
static void ValidatePhiNodes(const IR::Program& program) {
for (const IR::Block* const block : program.blocks) {
bool no_more_phis{false};
for (const IR::Inst& inst : *block) {
if (inst.GetOpcode() == IR::Opcode::Phi) {
if (no_more_phis) {
throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block));
}
} else {
no_more_phis = true;
}
}
}
}
void VerificationPass(const IR::Program& program) {
ValidateTypes(program);
ValidateUses(program);
ValidateForwardDeclarations(program);
ValidatePhiNodes(program);
}
} // namespace Shader::Optimization