early-access version 1866
This commit is contained in:
@@ -0,0 +1,214 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class AtomOp : u64 {
|
||||
ADD,
|
||||
MIN,
|
||||
MAX,
|
||||
INC,
|
||||
DEC,
|
||||
AND,
|
||||
OR,
|
||||
XOR,
|
||||
EXCH,
|
||||
SAFEADD,
|
||||
};
|
||||
|
||||
enum class AtomSize : u64 {
|
||||
U32,
|
||||
S32,
|
||||
U64,
|
||||
F32,
|
||||
F16x2,
|
||||
S64,
|
||||
};
|
||||
|
||||
IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
|
||||
AtomOp op, bool is_signed) {
|
||||
switch (op) {
|
||||
case AtomOp::ADD:
|
||||
return ir.GlobalAtomicIAdd(offset, op_b);
|
||||
case AtomOp::MIN:
|
||||
return ir.GlobalAtomicIMin(offset, op_b, is_signed);
|
||||
case AtomOp::MAX:
|
||||
return ir.GlobalAtomicIMax(offset, op_b, is_signed);
|
||||
case AtomOp::INC:
|
||||
return ir.GlobalAtomicInc(offset, op_b);
|
||||
case AtomOp::DEC:
|
||||
return ir.GlobalAtomicDec(offset, op_b);
|
||||
case AtomOp::AND:
|
||||
return ir.GlobalAtomicAnd(offset, op_b);
|
||||
case AtomOp::OR:
|
||||
return ir.GlobalAtomicOr(offset, op_b);
|
||||
case AtomOp::XOR:
|
||||
return ir.GlobalAtomicXor(offset, op_b);
|
||||
case AtomOp::EXCH:
|
||||
return ir.GlobalAtomicExchange(offset, op_b);
|
||||
default:
|
||||
throw NotImplementedException("Integer Atom Operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
|
||||
AtomSize size) {
|
||||
static constexpr IR::FpControl f16_control{
|
||||
.no_contraction{false},
|
||||
.rounding{IR::FpRounding::RN},
|
||||
.fmz_mode{IR::FmzMode::DontCare},
|
||||
};
|
||||
static constexpr IR::FpControl f32_control{
|
||||
.no_contraction{false},
|
||||
.rounding{IR::FpRounding::RN},
|
||||
.fmz_mode{IR::FmzMode::FTZ},
|
||||
};
|
||||
switch (op) {
|
||||
case AtomOp::ADD:
|
||||
return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
|
||||
: ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
|
||||
case AtomOp::MIN:
|
||||
return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
|
||||
case AtomOp::MAX:
|
||||
return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
|
||||
default:
|
||||
throw NotImplementedException("FP Atom Operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<28, 20, s64> addr_offset;
|
||||
BitField<28, 20, u64> rz_addr_offset;
|
||||
BitField<48, 1, u64> e;
|
||||
} const mem{insn};
|
||||
|
||||
const IR::U64 address{[&]() -> IR::U64 {
|
||||
if (mem.e == 0) {
|
||||
return v.ir.UConvert(64, v.X(mem.addr_reg));
|
||||
}
|
||||
return v.L(mem.addr_reg);
|
||||
}()};
|
||||
const u64 addr_offset{[&]() -> u64 {
|
||||
if (mem.addr_reg == IR::Reg::RZ) {
|
||||
// When RZ is used, the address is an absolute address
|
||||
return static_cast<u64>(mem.rz_addr_offset.Value());
|
||||
} else {
|
||||
return static_cast<u64>(mem.addr_offset.Value());
|
||||
}
|
||||
}()};
|
||||
return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
|
||||
}
|
||||
|
||||
bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
|
||||
// TODO: SAFEADD
|
||||
switch (size) {
|
||||
case AtomSize::S32:
|
||||
case AtomSize::U64:
|
||||
return (op == AtomOp::INC || op == AtomOp::DEC);
|
||||
case AtomSize::S64:
|
||||
return !(op == AtomOp::MIN || op == AtomOp::MAX);
|
||||
case AtomSize::F32:
|
||||
return op != AtomOp::ADD;
|
||||
case AtomSize::F16x2:
|
||||
return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
|
||||
switch (size) {
|
||||
case AtomSize::U32:
|
||||
case AtomSize::S32:
|
||||
case AtomSize::F32:
|
||||
case AtomSize::F16x2:
|
||||
return ir.LoadGlobal32(offset);
|
||||
case AtomSize::U64:
|
||||
case AtomSize::S64:
|
||||
return ir.PackUint2x32(ir.LoadGlobal64(offset));
|
||||
default:
|
||||
throw NotImplementedException("Atom Size {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
|
||||
switch (size) {
|
||||
case AtomSize::U32:
|
||||
case AtomSize::S32:
|
||||
case AtomSize::F16x2:
|
||||
return v.X(dest_reg, IR::U32{result});
|
||||
case AtomSize::U64:
|
||||
case AtomSize::S64:
|
||||
return v.L(dest_reg, IR::U64{result});
|
||||
case AtomSize::F32:
|
||||
return v.F(dest_reg, IR::F32{result});
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
|
||||
AtomSize size, AtomOp op) {
|
||||
switch (size) {
|
||||
case AtomSize::U32:
|
||||
case AtomSize::S32:
|
||||
return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
|
||||
case AtomSize::U64:
|
||||
case AtomSize::S64:
|
||||
return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
|
||||
case AtomSize::F32:
|
||||
return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
|
||||
case AtomSize::F16x2: {
|
||||
return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Atom Size {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
|
||||
const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
|
||||
IR::Value result;
|
||||
if (AtomOpNotApplicable(size, op)) {
|
||||
result = LoadGlobal(v.ir, offset, size);
|
||||
} else {
|
||||
result = ApplyAtomOp(v, operand_reg, offset, size, op);
|
||||
}
|
||||
if (write_dest) {
|
||||
StoreResult(v, dest_reg, result, size);
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ATOM(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<20, 8, IR::Reg> operand_reg;
|
||||
BitField<49, 3, AtomSize> size;
|
||||
BitField<52, 4, AtomOp> op;
|
||||
} const atom{insn};
|
||||
const IR::U64 offset{AtomOffset(*this, insn)};
|
||||
GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::RED(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> operand_reg;
|
||||
BitField<20, 3, AtomSize> size;
|
||||
BitField<23, 3, AtomOp> op;
|
||||
} const red{insn};
|
||||
const IR::U64 offset{AtomOffset(*this, insn)};
|
||||
GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,110 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class AtomOp : u64 {
|
||||
ADD,
|
||||
MIN,
|
||||
MAX,
|
||||
INC,
|
||||
DEC,
|
||||
AND,
|
||||
OR,
|
||||
XOR,
|
||||
EXCH,
|
||||
};
|
||||
|
||||
enum class AtomsSize : u64 {
|
||||
U32,
|
||||
S32,
|
||||
U64,
|
||||
};
|
||||
|
||||
IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
|
||||
bool is_signed) {
|
||||
switch (op) {
|
||||
case AtomOp::ADD:
|
||||
return ir.SharedAtomicIAdd(offset, op_b);
|
||||
case AtomOp::MIN:
|
||||
return ir.SharedAtomicIMin(offset, op_b, is_signed);
|
||||
case AtomOp::MAX:
|
||||
return ir.SharedAtomicIMax(offset, op_b, is_signed);
|
||||
case AtomOp::INC:
|
||||
return ir.SharedAtomicInc(offset, op_b);
|
||||
case AtomOp::DEC:
|
||||
return ir.SharedAtomicDec(offset, op_b);
|
||||
case AtomOp::AND:
|
||||
return ir.SharedAtomicAnd(offset, op_b);
|
||||
case AtomOp::OR:
|
||||
return ir.SharedAtomicOr(offset, op_b);
|
||||
case AtomOp::XOR:
|
||||
return ir.SharedAtomicXor(offset, op_b);
|
||||
case AtomOp::EXCH:
|
||||
return ir.SharedAtomicExchange(offset, op_b);
|
||||
default:
|
||||
throw NotImplementedException("Integer Atoms Operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> offset_reg;
|
||||
BitField<30, 22, u64> absolute_offset;
|
||||
BitField<30, 22, s64> relative_offset;
|
||||
} const encoding{insn};
|
||||
|
||||
if (encoding.offset_reg == IR::Reg::RZ) {
|
||||
return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
|
||||
} else {
|
||||
const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
|
||||
return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
|
||||
}
|
||||
}
|
||||
|
||||
void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
|
||||
switch (size) {
|
||||
case AtomsSize::U32:
|
||||
case AtomsSize::S32:
|
||||
return v.X(dest_reg, IR::U32{result});
|
||||
case AtomsSize::U64:
|
||||
return v.L(dest_reg, IR::U64{result});
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ATOMS(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<20, 8, IR::Reg> src_reg_b;
|
||||
BitField<28, 2, AtomsSize> size;
|
||||
BitField<52, 4, AtomOp> op;
|
||||
} const atoms{insn};
|
||||
|
||||
const bool size_64{atoms.size == AtomsSize::U64};
|
||||
if (size_64 && atoms.op != AtomOp::EXCH) {
|
||||
throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
|
||||
}
|
||||
const bool is_signed{atoms.size == AtomsSize::S32};
|
||||
const IR::U32 offset{AtomsOffset(*this, insn)};
|
||||
|
||||
IR::Value result;
|
||||
if (size_64) {
|
||||
result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
|
||||
} else {
|
||||
result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
|
||||
}
|
||||
StoreResult(*this, atoms.dest_reg, result, atoms.size);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,35 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/opcodes.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
|
||||
enum class BitSize : u64 {
|
||||
B32,
|
||||
B64,
|
||||
B96,
|
||||
B128,
|
||||
};
|
||||
|
||||
void TranslatorVisitor::AL2P(u64 inst) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> result_register;
|
||||
BitField<8, 8, IR::Reg> indexing_register;
|
||||
BitField<20, 11, s64> offset;
|
||||
BitField<47, 2, BitSize> bitsize;
|
||||
} al2p{inst};
|
||||
if (al2p.bitsize != BitSize::B32) {
|
||||
throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
|
||||
}
|
||||
const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
|
||||
const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
|
||||
X(al2p.result_register, result);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
96
src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
Executable file
96
src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
Executable file
@@ -0,0 +1,96 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/opcodes.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
// Seems to be in CUDA terminology.
|
||||
enum class LocalScope : u64 {
|
||||
CTA,
|
||||
GL,
|
||||
SYS,
|
||||
VC,
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::MEMBAR(u64 inst) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 2, LocalScope> scope;
|
||||
} const membar{inst};
|
||||
|
||||
if (membar.scope == LocalScope::CTA) {
|
||||
ir.WorkgroupMemoryBarrier();
|
||||
} else {
|
||||
ir.DeviceMemoryBarrier();
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DEPBAR() {
|
||||
// DEPBAR is a no-op
|
||||
}
|
||||
|
||||
void TranslatorVisitor::BAR(u64 insn) {
|
||||
enum class Mode {
|
||||
RedPopc,
|
||||
Scan,
|
||||
RedAnd,
|
||||
RedOr,
|
||||
Sync,
|
||||
Arrive,
|
||||
};
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<43, 1, u64> is_a_imm;
|
||||
BitField<44, 1, u64> is_b_imm;
|
||||
BitField<8, 8, u64> imm_a;
|
||||
BitField<20, 12, u64> imm_b;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<39, 3, IR::Pred> pred;
|
||||
} const bar{insn};
|
||||
|
||||
const Mode mode{[insn] {
|
||||
switch (insn & 0x0000009B00000000ULL) {
|
||||
case 0x0000000200000000ULL:
|
||||
return Mode::RedPopc;
|
||||
case 0x0000000300000000ULL:
|
||||
return Mode::Scan;
|
||||
case 0x0000000A00000000ULL:
|
||||
return Mode::RedAnd;
|
||||
case 0x0000001200000000ULL:
|
||||
return Mode::RedOr;
|
||||
case 0x0000008000000000ULL:
|
||||
return Mode::Sync;
|
||||
case 0x0000008100000000ULL:
|
||||
return Mode::Arrive;
|
||||
}
|
||||
throw NotImplementedException("Invalid encoding");
|
||||
}()};
|
||||
if (mode != Mode::Sync) {
|
||||
throw NotImplementedException("BAR mode {}", mode);
|
||||
}
|
||||
if (bar.is_a_imm == 0) {
|
||||
throw NotImplementedException("Non-immediate input A");
|
||||
}
|
||||
if (bar.imm_a != 0) {
|
||||
throw NotImplementedException("Non-zero input A");
|
||||
}
|
||||
if (bar.is_b_imm == 0) {
|
||||
throw NotImplementedException("Non-immediate input B");
|
||||
}
|
||||
if (bar.imm_b != 0) {
|
||||
throw NotImplementedException("Non-zero input B");
|
||||
}
|
||||
if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
|
||||
throw NotImplementedException("Non-true input predicate");
|
||||
}
|
||||
ir.Barrier();
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
74
src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
Executable file
74
src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
Executable file
@@ -0,0 +1,74 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> offset_reg;
|
||||
BitField<40, 1, u64> brev;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> is_signed;
|
||||
} const bfe{insn};
|
||||
|
||||
const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
|
||||
const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
|
||||
|
||||
// Common constants
|
||||
const IR::U32 zero{v.ir.Imm32(0)};
|
||||
const IR::U32 one{v.ir.Imm32(1)};
|
||||
const IR::U32 max_size{v.ir.Imm32(32)};
|
||||
// Edge case conditions
|
||||
const IR::U1 zero_count{v.ir.IEqual(count, zero)};
|
||||
const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
|
||||
const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
|
||||
|
||||
IR::U32 base{v.X(bfe.offset_reg)};
|
||||
if (bfe.brev != 0) {
|
||||
base = v.ir.BitReverse(base);
|
||||
}
|
||||
IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
|
||||
if (bfe.is_signed != 0) {
|
||||
const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
|
||||
const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
|
||||
const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
|
||||
// Replicate condition
|
||||
result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
|
||||
// Exceeding condition
|
||||
const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
|
||||
result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
|
||||
}
|
||||
// Zero count condition
|
||||
result = IR::U32{v.ir.Select(zero_count, zero, result)};
|
||||
|
||||
v.X(bfe.dest_reg, result);
|
||||
|
||||
if (bfe.cc != 0) {
|
||||
v.SetZFlag(v.ir.IEqual(result, zero));
|
||||
v.SetSFlag(v.ir.ILessThan(result, zero, true));
|
||||
v.ResetCFlag();
|
||||
v.ResetOFlag();
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::BFE_reg(u64 insn) {
|
||||
BFE(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::BFE_cbuf(u64 insn) {
|
||||
BFE(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::BFE_imm(u64 insn) {
|
||||
BFE(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
62
src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
Executable file
62
src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
Executable file
@@ -0,0 +1,62 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> insert_reg;
|
||||
BitField<47, 1, u64> cc;
|
||||
} const bfi{insn};
|
||||
|
||||
const IR::U32 zero{v.ir.Imm32(0)};
|
||||
const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
|
||||
const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
|
||||
const IR::U32 max_size{v.ir.Imm32(32)};
|
||||
|
||||
// Edge case conditions
|
||||
const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
|
||||
const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
|
||||
|
||||
const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
|
||||
const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
|
||||
|
||||
const IR::U32 insert{v.X(bfi.insert_reg)};
|
||||
IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
|
||||
|
||||
result = IR::U32{v.ir.Select(exceed_offset, base, result)};
|
||||
|
||||
v.X(bfi.dest_reg, result);
|
||||
if (bfi.cc != 0) {
|
||||
v.SetZFlag(v.ir.IEqual(result, zero));
|
||||
v.SetSFlag(v.ir.ILessThan(result, zero, true));
|
||||
v.ResetCFlag();
|
||||
v.ResetOFlag();
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::BFI_reg(u64 insn) {
|
||||
BFI(*this, insn, GetReg20(insn), GetReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::BFI_rc(u64 insn) {
|
||||
BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::BFI_cr(u64 insn) {
|
||||
BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::BFI_imm(u64 insn) {
|
||||
BFI(*this, insn, GetImm20(insn), GetReg39(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
36
src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
Executable file
36
src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
Executable file
@@ -0,0 +1,36 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void Check(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<5, 1, u64> cbuf_mode;
|
||||
BitField<6, 1, u64> lmt;
|
||||
} const encoding{insn};
|
||||
|
||||
if (encoding.cbuf_mode != 0) {
|
||||
throw NotImplementedException("Constant buffer mode");
|
||||
}
|
||||
if (encoding.lmt != 0) {
|
||||
throw NotImplementedException("LMT");
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::BRX(u64 insn) {
|
||||
Check(insn);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::JMX(u64 insn) {
|
||||
Check(insn);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
57
src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
Executable file
57
src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
Executable file
@@ -0,0 +1,57 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
|
||||
enum class FpRounding : u64 {
|
||||
RN,
|
||||
RM,
|
||||
RP,
|
||||
RZ,
|
||||
};
|
||||
|
||||
enum class FmzMode : u64 {
|
||||
None,
|
||||
FTZ,
|
||||
FMZ,
|
||||
INVALIDFMZ3,
|
||||
};
|
||||
|
||||
inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
|
||||
switch (fp_rounding) {
|
||||
case FpRounding::RN:
|
||||
return IR::FpRounding::RN;
|
||||
case FpRounding::RM:
|
||||
return IR::FpRounding::RM;
|
||||
case FpRounding::RP:
|
||||
return IR::FpRounding::RP;
|
||||
case FpRounding::RZ:
|
||||
return IR::FpRounding::RZ;
|
||||
}
|
||||
throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
|
||||
}
|
||||
|
||||
inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
|
||||
switch (fmz_mode) {
|
||||
case FmzMode::None:
|
||||
return IR::FmzMode::None;
|
||||
case FmzMode::FTZ:
|
||||
return IR::FmzMode::FTZ;
|
||||
case FmzMode::FMZ:
|
||||
// FMZ is manually handled in the instruction
|
||||
return IR::FmzMode::FTZ;
|
||||
case FmzMode::INVALIDFMZ3:
|
||||
break;
|
||||
}
|
||||
throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
110
src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
Executable file
110
src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
Executable file
@@ -0,0 +1,110 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
|
||||
CompareOp compare_op, bool is_signed) {
|
||||
switch (compare_op) {
|
||||
case CompareOp::False:
|
||||
return ir.Imm1(false);
|
||||
case CompareOp::LessThan:
|
||||
return ir.ILessThan(operand_1, operand_2, is_signed);
|
||||
case CompareOp::Equal:
|
||||
return ir.IEqual(operand_1, operand_2);
|
||||
case CompareOp::LessThanEqual:
|
||||
return ir.ILessThanEqual(operand_1, operand_2, is_signed);
|
||||
case CompareOp::GreaterThan:
|
||||
return ir.IGreaterThan(operand_1, operand_2, is_signed);
|
||||
case CompareOp::NotEqual:
|
||||
return ir.INotEqual(operand_1, operand_2);
|
||||
case CompareOp::GreaterThanEqual:
|
||||
return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
|
||||
case CompareOp::True:
|
||||
return ir.Imm1(true);
|
||||
default:
|
||||
throw NotImplementedException("Invalid compare op {}", compare_op);
|
||||
}
|
||||
}
|
||||
|
||||
IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
|
||||
BooleanOp bop) {
|
||||
switch (bop) {
|
||||
case BooleanOp::AND:
|
||||
return ir.LogicalAnd(predicate_1, predicate_2);
|
||||
case BooleanOp::OR:
|
||||
return ir.LogicalOr(predicate_1, predicate_2);
|
||||
case BooleanOp::XOR:
|
||||
return ir.LogicalXor(predicate_1, predicate_2);
|
||||
default:
|
||||
throw NotImplementedException("Invalid bop {}", bop);
|
||||
}
|
||||
}
|
||||
|
||||
IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
|
||||
switch (op) {
|
||||
case PredicateOp::False:
|
||||
return ir.Imm1(false);
|
||||
case PredicateOp::True:
|
||||
return ir.Imm1(true);
|
||||
case PredicateOp::Zero:
|
||||
return ir.IEqual(result, ir.Imm32(0));
|
||||
case PredicateOp::NonZero:
|
||||
return ir.INotEqual(result, ir.Imm32(0));
|
||||
default:
|
||||
throw NotImplementedException("Invalid Predicate operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsCompareOpOrdered(FPCompareOp op) {
|
||||
switch (op) {
|
||||
case FPCompareOp::LTU:
|
||||
case FPCompareOp::EQU:
|
||||
case FPCompareOp::LEU:
|
||||
case FPCompareOp::GTU:
|
||||
case FPCompareOp::NEU:
|
||||
case FPCompareOp::GEU:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
|
||||
const IR::F16F32F64& operand_2, FPCompareOp compare_op,
|
||||
IR::FpControl control) {
|
||||
const bool ordered{IsCompareOpOrdered(compare_op)};
|
||||
switch (compare_op) {
|
||||
case FPCompareOp::F:
|
||||
return ir.Imm1(false);
|
||||
case FPCompareOp::LT:
|
||||
case FPCompareOp::LTU:
|
||||
return ir.FPLessThan(operand_1, operand_2, control, ordered);
|
||||
case FPCompareOp::EQ:
|
||||
case FPCompareOp::EQU:
|
||||
return ir.FPEqual(operand_1, operand_2, control, ordered);
|
||||
case FPCompareOp::LE:
|
||||
case FPCompareOp::LEU:
|
||||
return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
|
||||
case FPCompareOp::GT:
|
||||
case FPCompareOp::GTU:
|
||||
return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
|
||||
case FPCompareOp::NE:
|
||||
case FPCompareOp::NEU:
|
||||
return ir.FPNotEqual(operand_1, operand_2, control, ordered);
|
||||
case FPCompareOp::GE:
|
||||
case FPCompareOp::GEU:
|
||||
return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
|
||||
case FPCompareOp::NUM:
|
||||
return ir.FPOrdered(operand_1, operand_2);
|
||||
case FPCompareOp::Nan:
|
||||
return ir.FPUnordered(operand_1, operand_2);
|
||||
case FPCompareOp::T:
|
||||
return ir.Imm1(true);
|
||||
default:
|
||||
throw NotImplementedException("Invalid FP compare op {}", compare_op);
|
||||
}
|
||||
}
|
||||
} // namespace Shader::Maxwell
|
24
src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
Executable file
24
src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
Executable file
@@ -0,0 +1,24 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
|
||||
const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
|
||||
|
||||
[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
|
||||
const IR::U1& predicate_2, BooleanOp bop);
|
||||
|
||||
[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
|
||||
|
||||
[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
|
||||
|
||||
[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
|
||||
const IR::F16F32F64& operand_2, FPCompareOp compare_op,
|
||||
IR::FpControl control = {});
|
||||
} // namespace Shader::Maxwell
|
66
src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
Executable file
66
src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
Executable file
@@ -0,0 +1,66 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
|
||||
void TranslatorVisitor::CSET(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 5, IR::FlowTest> cc_test;
|
||||
BitField<39, 3, IR::Pred> bop_pred;
|
||||
BitField<42, 1, u64> neg_bop_pred;
|
||||
BitField<44, 1, u64> bf;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
BitField<47, 1, u64> cc;
|
||||
} const cset{insn};
|
||||
|
||||
const IR::U32 one_mask{ir.Imm32(-1)};
|
||||
const IR::U32 fp_one{ir.Imm32(0x3f800000)};
|
||||
const IR::U32 zero{ir.Imm32(0)};
|
||||
const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
|
||||
const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
|
||||
const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
|
||||
const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
|
||||
const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
|
||||
X(cset.dest_reg, result);
|
||||
if (cset.cc != 0) {
|
||||
const IR::U1 is_zero{ir.IEqual(result, zero)};
|
||||
SetZFlag(is_zero);
|
||||
if (cset.bf != 0) {
|
||||
ResetSFlag();
|
||||
} else {
|
||||
SetSFlag(ir.LogicalNot(is_zero));
|
||||
}
|
||||
ResetOFlag();
|
||||
ResetCFlag();
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::CSETP(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 3, IR::Pred> dest_pred_b;
|
||||
BitField<3, 3, IR::Pred> dest_pred_a;
|
||||
BitField<8, 5, IR::FlowTest> cc_test;
|
||||
BitField<39, 3, IR::Pred> bop_pred;
|
||||
BitField<42, 1, u64> neg_bop_pred;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
} const csetp{insn};
|
||||
|
||||
const BooleanOp bop{csetp.bop};
|
||||
const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
|
||||
const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
|
||||
const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
|
||||
const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
|
||||
ir.SetPred(csetp.dest_pred_a, result_a);
|
||||
ir.SetPred(csetp.dest_pred_b, result_b);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
55
src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
Executable file
55
src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
Executable file
@@ -0,0 +1,55 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
|
||||
void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a_reg;
|
||||
BitField<39, 2, FpRounding> fp_rounding;
|
||||
BitField<45, 1, u64> neg_b;
|
||||
BitField<46, 1, u64> abs_a;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> neg_a;
|
||||
BitField<49, 1, u64> abs_b;
|
||||
} const dadd{insn};
|
||||
if (dadd.cc != 0) {
|
||||
throw NotImplementedException("DADD CC");
|
||||
}
|
||||
|
||||
const IR::F64 src_a{v.D(dadd.src_a_reg)};
|
||||
const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
|
||||
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
|
||||
|
||||
const IR::FpControl control{
|
||||
.no_contraction = true,
|
||||
.rounding = CastFpRounding(dadd.fp_rounding),
|
||||
.fmz_mode = IR::FmzMode::None,
|
||||
};
|
||||
|
||||
v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::DADD_reg(u64 insn) {
|
||||
DADD(*this, insn, GetDoubleReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DADD_cbuf(u64 insn) {
|
||||
DADD(*this, insn, GetDoubleCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DADD_imm(u64 insn) {
|
||||
DADD(*this, insn, GetDoubleImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,72 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a_reg;
|
||||
BitField<39, 3, IR::Pred> pred;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<43, 1, u64> negate_a;
|
||||
BitField<44, 1, u64> abs_b;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 4, FPCompareOp> compare_op;
|
||||
BitField<52, 1, u64> bf;
|
||||
BitField<53, 1, u64> negate_b;
|
||||
BitField<54, 1, u64> abs_a;
|
||||
} const dset{insn};
|
||||
|
||||
const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
|
||||
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
|
||||
|
||||
IR::U1 pred{v.ir.GetPred(dset.pred)};
|
||||
if (dset.neg_pred != 0) {
|
||||
pred = v.ir.LogicalNot(pred);
|
||||
}
|
||||
const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
|
||||
const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
|
||||
|
||||
const IR::U32 one_mask{v.ir.Imm32(-1)};
|
||||
const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
|
||||
const IR::U32 zero{v.ir.Imm32(0)};
|
||||
const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
|
||||
const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
|
||||
|
||||
v.X(dset.dest_reg, result);
|
||||
if (dset.cc != 0) {
|
||||
const IR::U1 is_zero{v.ir.IEqual(result, zero)};
|
||||
v.SetZFlag(is_zero);
|
||||
if (dset.bf != 0) {
|
||||
v.ResetSFlag();
|
||||
} else {
|
||||
v.SetSFlag(v.ir.LogicalNot(is_zero));
|
||||
}
|
||||
v.ResetCFlag();
|
||||
v.ResetOFlag();
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::DSET_reg(u64 insn) {
|
||||
DSET(*this, insn, GetDoubleReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DSET_cbuf(u64 insn) {
|
||||
DSET(*this, insn, GetDoubleCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DSET_imm(u64 insn) {
|
||||
DSET(*this, insn, GetDoubleImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,58 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
|
||||
void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a_reg;
|
||||
BitField<50, 2, FpRounding> fp_rounding;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> neg_b;
|
||||
BitField<49, 1, u64> neg_c;
|
||||
} const dfma{insn};
|
||||
|
||||
if (dfma.cc != 0) {
|
||||
throw NotImplementedException("DFMA CC");
|
||||
}
|
||||
|
||||
const IR::F64 src_a{v.D(dfma.src_a_reg)};
|
||||
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
|
||||
const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
|
||||
|
||||
const IR::FpControl control{
|
||||
.no_contraction = true,
|
||||
.rounding = CastFpRounding(dfma.fp_rounding),
|
||||
.fmz_mode = IR::FmzMode::None,
|
||||
};
|
||||
|
||||
v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::DFMA_reg(u64 insn) {
|
||||
DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DFMA_cr(u64 insn) {
|
||||
DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DFMA_rc(u64 insn) {
|
||||
DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DFMA_imm(u64 insn) {
|
||||
DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
55
src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
Executable file
55
src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
Executable file
@@ -0,0 +1,55 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a_reg;
|
||||
BitField<39, 3, IR::Pred> pred;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<45, 1, u64> negate_b;
|
||||
BitField<46, 1, u64> abs_a;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> negate_a;
|
||||
BitField<49, 1, u64> abs_b;
|
||||
} const dmnmx{insn};
|
||||
|
||||
if (dmnmx.cc != 0) {
|
||||
throw NotImplementedException("DMNMX CC");
|
||||
}
|
||||
|
||||
const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
|
||||
const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
|
||||
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
|
||||
|
||||
IR::F64 max{v.ir.FPMax(op_a, op_b)};
|
||||
IR::F64 min{v.ir.FPMin(op_a, op_b)};
|
||||
|
||||
if (dmnmx.neg_pred != 0) {
|
||||
std::swap(min, max);
|
||||
}
|
||||
v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::DMNMX_reg(u64 insn) {
|
||||
DMNMX(*this, insn, GetDoubleReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
|
||||
DMNMX(*this, insn, GetDoubleCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DMNMX_imm(u64 insn) {
|
||||
DMNMX(*this, insn, GetDoubleImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
50
src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
Executable file
50
src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
Executable file
@@ -0,0 +1,50 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
|
||||
void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a_reg;
|
||||
BitField<39, 2, FpRounding> fp_rounding;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> neg;
|
||||
} const dmul{insn};
|
||||
|
||||
if (dmul.cc != 0) {
|
||||
throw NotImplementedException("DMUL CC");
|
||||
}
|
||||
|
||||
const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
|
||||
const IR::FpControl control{
|
||||
.no_contraction = true,
|
||||
.rounding = CastFpRounding(dmul.fp_rounding),
|
||||
.fmz_mode = IR::FmzMode::None,
|
||||
};
|
||||
|
||||
v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::DMUL_reg(u64 insn) {
|
||||
DMUL(*this, insn, GetDoubleReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DMUL_cbuf(u64 insn) {
|
||||
DMUL(*this, insn, GetDoubleCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DMUL_imm(u64 insn) {
|
||||
DMUL(*this, insn, GetDoubleImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,54 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 3, IR::Pred> dest_pred_b;
|
||||
BitField<3, 3, IR::Pred> dest_pred_a;
|
||||
BitField<6, 1, u64> negate_b;
|
||||
BitField<7, 1, u64> abs_a;
|
||||
BitField<8, 8, IR::Reg> src_a_reg;
|
||||
BitField<39, 3, IR::Pred> bop_pred;
|
||||
BitField<42, 1, u64> neg_bop_pred;
|
||||
BitField<43, 1, u64> negate_a;
|
||||
BitField<44, 1, u64> abs_b;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
BitField<48, 4, FPCompareOp> compare_op;
|
||||
} const dsetp{insn};
|
||||
|
||||
const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
|
||||
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
|
||||
|
||||
const BooleanOp bop{dsetp.bop};
|
||||
const FPCompareOp compare_op{dsetp.compare_op};
|
||||
const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
|
||||
const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
|
||||
const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
|
||||
const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
|
||||
v.ir.SetPred(dsetp.dest_pred_a, result_a);
|
||||
v.ir.SetPred(dsetp.dest_pred_b, result_b);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::DSETP_reg(u64 insn) {
|
||||
DSETP(*this, insn, GetDoubleReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DSETP_cbuf(u64 insn) {
|
||||
DSETP(*this, insn, GetDoubleCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::DSETP_imm(u64 insn) {
|
||||
DSETP(*this, insn, GetDoubleImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
43
src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
Executable file
43
src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
Executable file
@@ -0,0 +1,43 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void ExitFragment(TranslatorVisitor& v) {
|
||||
const ProgramHeader sph{v.env.SPH()};
|
||||
IR::Reg src_reg{IR::Reg::R0};
|
||||
for (u32 render_target = 0; render_target < 8; ++render_target) {
|
||||
const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!mask[component]) {
|
||||
continue;
|
||||
}
|
||||
v.ir.SetFragColor(render_target, component, v.F(src_reg));
|
||||
++src_reg;
|
||||
}
|
||||
}
|
||||
if (sph.ps.omap.sample_mask != 0) {
|
||||
v.ir.SetSampleMask(v.X(src_reg));
|
||||
}
|
||||
if (sph.ps.omap.depth != 0) {
|
||||
v.ir.SetFragDepth(v.F(src_reg + 1));
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::EXIT() {
|
||||
switch (env.ShaderStage()) {
|
||||
case Stage::Fragment:
|
||||
ExitFragment(*this);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
47
src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
Executable file
47
src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
Executable file
@@ -0,0 +1,47 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<40, 1, u64> tilde;
|
||||
BitField<41, 1, u64> shift;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> is_signed;
|
||||
} const flo{insn};
|
||||
|
||||
if (flo.cc != 0) {
|
||||
throw NotImplementedException("CC");
|
||||
}
|
||||
if (flo.tilde != 0) {
|
||||
src = v.ir.BitwiseNot(src);
|
||||
}
|
||||
IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
|
||||
if (flo.shift != 0) {
|
||||
const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
|
||||
result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
|
||||
}
|
||||
v.X(flo.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::FLO_reg(u64 insn) {
|
||||
FLO(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FLO_cbuf(u64 insn) {
|
||||
FLO(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FLO_imm(u64 insn) {
|
||||
FLO(*this, insn, GetImm20(insn));
|
||||
}
|
||||
} // namespace Shader::Maxwell
|
82
src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
Executable file
82
src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
Executable file
@@ -0,0 +1,82 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
|
||||
const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a;
|
||||
} const fadd{insn};
|
||||
|
||||
if (cc) {
|
||||
throw NotImplementedException("FADD CC");
|
||||
}
|
||||
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
|
||||
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
|
||||
IR::FpControl control{
|
||||
.no_contraction = true,
|
||||
.rounding = CastFpRounding(fp_rounding),
|
||||
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||
};
|
||||
IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
|
||||
if (sat) {
|
||||
value = v.ir.FPSaturate(value);
|
||||
}
|
||||
v.F(fadd.dest_reg, value);
|
||||
}
|
||||
|
||||
void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<39, 2, FpRounding> fp_rounding;
|
||||
BitField<44, 1, u64> ftz;
|
||||
BitField<45, 1, u64> neg_b;
|
||||
BitField<46, 1, u64> abs_a;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> neg_a;
|
||||
BitField<49, 1, u64> abs_b;
|
||||
BitField<50, 1, u64> sat;
|
||||
} const fadd{insn};
|
||||
|
||||
FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
|
||||
fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::FADD_reg(u64 insn) {
|
||||
FADD(*this, insn, GetFloatReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FADD_cbuf(u64 insn) {
|
||||
FADD(*this, insn, GetFloatCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FADD_imm(u64 insn) {
|
||||
FADD(*this, insn, GetFloatImm20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FADD32I(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<55, 1, u64> ftz;
|
||||
BitField<56, 1, u64> neg_a;
|
||||
BitField<54, 1, u64> abs_a;
|
||||
BitField<52, 1, u64> cc;
|
||||
BitField<53, 1, u64> neg_b;
|
||||
BitField<57, 1, u64> abs_b;
|
||||
} const fadd32i{insn};
|
||||
|
||||
FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
|
||||
fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,55 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<47, 1, u64> ftz;
|
||||
BitField<48, 4, FPCompareOp> compare_op;
|
||||
} const fcmp{insn};
|
||||
|
||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||
const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
|
||||
const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
|
||||
const IR::U32 src_reg{v.X(fcmp.src_reg)};
|
||||
const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
|
||||
|
||||
v.X(fcmp.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::FCMP_reg(u64 insn) {
|
||||
FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FCMP_rc(u64 insn) {
|
||||
FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FCMP_cr(u64 insn) {
|
||||
FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FCMP_imm(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 19, u64> value;
|
||||
BitField<56, 1, u64> is_negative;
|
||||
} const fcmp{insn};
|
||||
const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
|
||||
const u32 value{static_cast<u32>(fcmp.value) << 12};
|
||||
|
||||
FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,78 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a_reg;
|
||||
BitField<39, 3, IR::Pred> pred;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<43, 1, u64> negate_a;
|
||||
BitField<44, 1, u64> abs_b;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 4, FPCompareOp> compare_op;
|
||||
BitField<52, 1, u64> bf;
|
||||
BitField<53, 1, u64> negate_b;
|
||||
BitField<54, 1, u64> abs_a;
|
||||
BitField<55, 1, u64> ftz;
|
||||
} const fset{insn};
|
||||
|
||||
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
|
||||
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
|
||||
const IR::FpControl control{
|
||||
.no_contraction = false,
|
||||
.rounding = IR::FpRounding::DontCare,
|
||||
.fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||
};
|
||||
|
||||
IR::U1 pred{v.ir.GetPred(fset.pred)};
|
||||
if (fset.neg_pred != 0) {
|
||||
pred = v.ir.LogicalNot(pred);
|
||||
}
|
||||
const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
|
||||
const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
|
||||
|
||||
const IR::U32 one_mask{v.ir.Imm32(-1)};
|
||||
const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
|
||||
const IR::U32 zero{v.ir.Imm32(0)};
|
||||
const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
|
||||
const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
|
||||
|
||||
v.X(fset.dest_reg, result);
|
||||
if (fset.cc != 0) {
|
||||
const IR::U1 is_zero{v.ir.IEqual(result, zero)};
|
||||
v.SetZFlag(is_zero);
|
||||
if (fset.bf != 0) {
|
||||
v.ResetSFlag();
|
||||
} else {
|
||||
v.SetSFlag(v.ir.LogicalNot(is_zero));
|
||||
}
|
||||
v.ResetCFlag();
|
||||
v.ResetOFlag();
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::FSET_reg(u64 insn) {
|
||||
FSET(*this, insn, GetFloatReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FSET_cbuf(u64 insn) {
|
||||
FSET(*this, insn, GetFloatCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FSET_imm(u64 insn) {
|
||||
FSET(*this, insn, GetFloatImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,214 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class FloatFormat : u64 {
|
||||
F16 = 1,
|
||||
F32 = 2,
|
||||
F64 = 3,
|
||||
};
|
||||
|
||||
enum class RoundingOp : u64 {
|
||||
None = 0,
|
||||
Pass = 3,
|
||||
Round = 8,
|
||||
Floor = 9,
|
||||
Ceil = 10,
|
||||
Trunc = 11,
|
||||
};
|
||||
|
||||
[[nodiscard]] u32 WidthSize(FloatFormat width) {
|
||||
switch (width) {
|
||||
case FloatFormat::F16:
|
||||
return 16;
|
||||
case FloatFormat::F32:
|
||||
return 32;
|
||||
case FloatFormat::F64:
|
||||
return 64;
|
||||
default:
|
||||
throw NotImplementedException("Invalid width {}", width);
|
||||
}
|
||||
}
|
||||
|
||||
void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<44, 1, u64> ftz;
|
||||
BitField<45, 1, u64> neg;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<50, 1, u64> sat;
|
||||
BitField<39, 4, u64> rounding_op;
|
||||
BitField<39, 2, FpRounding> rounding;
|
||||
BitField<10, 2, FloatFormat> src_size;
|
||||
BitField<8, 2, FloatFormat> dst_size;
|
||||
|
||||
[[nodiscard]] RoundingOp RoundingOperation() const {
|
||||
constexpr u64 rounding_mask = 0x0B;
|
||||
return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
|
||||
}
|
||||
} const f2f{insn};
|
||||
|
||||
if (f2f.cc != 0) {
|
||||
throw NotImplementedException("F2F CC");
|
||||
}
|
||||
|
||||
IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
|
||||
|
||||
const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
|
||||
IR::FpControl fp_control{
|
||||
.no_contraction = false,
|
||||
.rounding = IR::FpRounding::DontCare,
|
||||
.fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||
};
|
||||
if (f2f.src_size != f2f.dst_size) {
|
||||
fp_control.rounding = CastFpRounding(f2f.rounding);
|
||||
input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
|
||||
} else {
|
||||
switch (f2f.RoundingOperation()) {
|
||||
case RoundingOp::None:
|
||||
case RoundingOp::Pass:
|
||||
// Make sure NANs are handled properly
|
||||
switch (f2f.src_size) {
|
||||
case FloatFormat::F16:
|
||||
input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
|
||||
break;
|
||||
case FloatFormat::F32:
|
||||
input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
|
||||
break;
|
||||
case FloatFormat::F64:
|
||||
input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case RoundingOp::Round:
|
||||
input = v.ir.FPRoundEven(input, fp_control);
|
||||
break;
|
||||
case RoundingOp::Floor:
|
||||
input = v.ir.FPFloor(input, fp_control);
|
||||
break;
|
||||
case RoundingOp::Ceil:
|
||||
input = v.ir.FPCeil(input, fp_control);
|
||||
break;
|
||||
case RoundingOp::Trunc:
|
||||
input = v.ir.FPTrunc(input, fp_control);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
|
||||
}
|
||||
}
|
||||
if (f2f.sat != 0 && !any_fp64) {
|
||||
input = v.ir.FPSaturate(input);
|
||||
}
|
||||
|
||||
switch (f2f.dst_size) {
|
||||
case FloatFormat::F16: {
|
||||
const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
|
||||
v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
|
||||
break;
|
||||
}
|
||||
case FloatFormat::F32:
|
||||
v.F(f2f.dest_reg, input);
|
||||
break;
|
||||
case FloatFormat::F64:
|
||||
v.D(f2f.dest_reg, input);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::F2F_reg(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<49, 1, u64> abs;
|
||||
BitField<10, 2, FloatFormat> src_size;
|
||||
BitField<41, 1, u64> selector;
|
||||
} const f2f{insn};
|
||||
|
||||
IR::F16F32F64 src_a;
|
||||
switch (f2f.src_size) {
|
||||
case FloatFormat::F16: {
|
||||
auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
|
||||
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
|
||||
break;
|
||||
}
|
||||
case FloatFormat::F32:
|
||||
src_a = GetFloatReg20(insn);
|
||||
break;
|
||||
case FloatFormat::F64:
|
||||
src_a = GetDoubleReg20(insn);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
|
||||
}
|
||||
F2F(*this, insn, src_a, f2f.abs != 0);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::F2F_cbuf(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<49, 1, u64> abs;
|
||||
BitField<10, 2, FloatFormat> src_size;
|
||||
BitField<41, 1, u64> selector;
|
||||
} const f2f{insn};
|
||||
|
||||
IR::F16F32F64 src_a;
|
||||
switch (f2f.src_size) {
|
||||
case FloatFormat::F16: {
|
||||
auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
|
||||
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
|
||||
break;
|
||||
}
|
||||
case FloatFormat::F32:
|
||||
src_a = GetFloatCbuf(insn);
|
||||
break;
|
||||
case FloatFormat::F64:
|
||||
src_a = GetDoubleCbuf(insn);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
|
||||
}
|
||||
F2F(*this, insn, src_a, f2f.abs != 0);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<49, 1, u64> abs;
|
||||
BitField<10, 2, FloatFormat> src_size;
|
||||
BitField<41, 1, u64> selector;
|
||||
BitField<20, 19, u64> imm;
|
||||
BitField<56, 1, u64> imm_neg;
|
||||
} const f2f{insn};
|
||||
|
||||
IR::F16F32F64 src_a;
|
||||
switch (f2f.src_size) {
|
||||
case FloatFormat::F16: {
|
||||
const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
|
||||
const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
|
||||
src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
|
||||
if (f2f.imm_neg != 0) {
|
||||
throw NotImplementedException("Neg bit on F16");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case FloatFormat::F32:
|
||||
src_a = GetFloatImm20(insn);
|
||||
break;
|
||||
case FloatFormat::F64:
|
||||
src_a = GetDoubleImm20(insn);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
|
||||
}
|
||||
F2F(*this, insn, src_a, f2f.abs != 0);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,253 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/opcodes.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class DestFormat : u64 {
|
||||
Invalid,
|
||||
I16,
|
||||
I32,
|
||||
I64,
|
||||
};
|
||||
enum class SrcFormat : u64 {
|
||||
Invalid,
|
||||
F16,
|
||||
F32,
|
||||
F64,
|
||||
};
|
||||
enum class Rounding : u64 {
|
||||
Round,
|
||||
Floor,
|
||||
Ceil,
|
||||
Trunc,
|
||||
};
|
||||
|
||||
union F2I {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 2, DestFormat> dest_format;
|
||||
BitField<10, 2, SrcFormat> src_format;
|
||||
BitField<12, 1, u64> is_signed;
|
||||
BitField<39, 2, Rounding> rounding;
|
||||
BitField<41, 1, u64> half;
|
||||
BitField<44, 1, u64> ftz;
|
||||
BitField<45, 1, u64> abs;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<49, 1, u64> neg;
|
||||
};
|
||||
|
||||
size_t BitSize(DestFormat dest_format) {
|
||||
switch (dest_format) {
|
||||
case DestFormat::I16:
|
||||
return 16;
|
||||
case DestFormat::I32:
|
||||
return 32;
|
||||
case DestFormat::I64:
|
||||
return 64;
|
||||
default:
|
||||
throw NotImplementedException("Invalid destination format {}", dest_format);
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
|
||||
if (is_signed) {
|
||||
switch (format) {
|
||||
case DestFormat::I16:
|
||||
return {static_cast<f64>(std::numeric_limits<s16>::max()),
|
||||
static_cast<f64>(std::numeric_limits<s16>::min())};
|
||||
case DestFormat::I32:
|
||||
return {static_cast<f64>(std::numeric_limits<s32>::max()),
|
||||
static_cast<f64>(std::numeric_limits<s32>::min())};
|
||||
case DestFormat::I64:
|
||||
return {static_cast<f64>(std::numeric_limits<s64>::max()),
|
||||
static_cast<f64>(std::numeric_limits<s64>::min())};
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (format) {
|
||||
case DestFormat::I16:
|
||||
return {static_cast<f64>(std::numeric_limits<u16>::max()),
|
||||
static_cast<f64>(std::numeric_limits<u16>::min())};
|
||||
case DestFormat::I32:
|
||||
return {static_cast<f64>(std::numeric_limits<u32>::max()),
|
||||
static_cast<f64>(std::numeric_limits<u32>::min())};
|
||||
case DestFormat::I64:
|
||||
return {static_cast<f64>(std::numeric_limits<u64>::max()),
|
||||
static_cast<f64>(std::numeric_limits<u64>::min())};
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
throw NotImplementedException("Invalid destination format {}", format);
|
||||
}
|
||||
|
||||
IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 14, s64> offset;
|
||||
BitField<34, 5, u64> binding;
|
||||
} const cbuf{insn};
|
||||
if (cbuf.binding >= 18) {
|
||||
throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
|
||||
}
|
||||
if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
|
||||
throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
|
||||
}
|
||||
if (cbuf.offset % 2 != 0) {
|
||||
throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
|
||||
}
|
||||
const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
|
||||
const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
|
||||
const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
|
||||
const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
|
||||
return v.ir.PackDouble2x32(vector);
|
||||
}
|
||||
|
||||
void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
|
||||
// F2I is used to convert from a floating point value to an integer
|
||||
const F2I f2i{insn};
|
||||
|
||||
const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
|
||||
f2i.dest_format != DestFormat::I64};
|
||||
IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
|
||||
if (denorm_cares) {
|
||||
fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
|
||||
}
|
||||
const IR::FpControl fp_control{
|
||||
.no_contraction = true,
|
||||
.rounding = IR::FpRounding::DontCare,
|
||||
.fmz_mode = fmz_mode,
|
||||
};
|
||||
const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
|
||||
const IR::F16F32F64 rounded_value{[&] {
|
||||
switch (f2i.rounding) {
|
||||
case Rounding::Round:
|
||||
return v.ir.FPRoundEven(op_a, fp_control);
|
||||
case Rounding::Floor:
|
||||
return v.ir.FPFloor(op_a, fp_control);
|
||||
case Rounding::Ceil:
|
||||
return v.ir.FPCeil(op_a, fp_control);
|
||||
case Rounding::Trunc:
|
||||
return v.ir.FPTrunc(op_a, fp_control);
|
||||
default:
|
||||
throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
|
||||
}
|
||||
}()};
|
||||
const bool is_signed{f2i.is_signed != 0};
|
||||
const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
|
||||
|
||||
IR::F16F32F64 intermediate;
|
||||
switch (f2i.src_format) {
|
||||
case SrcFormat::F16: {
|
||||
const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
|
||||
const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
|
||||
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
|
||||
break;
|
||||
}
|
||||
case SrcFormat::F32: {
|
||||
const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
|
||||
const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
|
||||
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
|
||||
break;
|
||||
}
|
||||
case SrcFormat::F64: {
|
||||
const IR::F64 max_val{v.ir.Imm64(max_bound)};
|
||||
const IR::F64 min_val{v.ir.Imm64(min_bound)};
|
||||
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
|
||||
}
|
||||
|
||||
const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
|
||||
IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
|
||||
|
||||
bool handled_special_case = false;
|
||||
const bool special_nan_cases =
|
||||
(f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
|
||||
if (special_nan_cases) {
|
||||
if (f2i.dest_format == DestFormat::I32) {
|
||||
handled_special_case = true;
|
||||
result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
|
||||
} else if (f2i.dest_format == DestFormat::I64) {
|
||||
handled_special_case = true;
|
||||
result = IR::U64{
|
||||
v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
|
||||
}
|
||||
}
|
||||
if (!handled_special_case && is_signed) {
|
||||
if (bitsize != 64) {
|
||||
result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
|
||||
} else {
|
||||
result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
|
||||
}
|
||||
}
|
||||
|
||||
if (bitsize == 64) {
|
||||
v.L(f2i.dest_reg, result);
|
||||
} else {
|
||||
v.X(f2i.dest_reg, result);
|
||||
}
|
||||
|
||||
if (f2i.cc != 0) {
|
||||
throw NotImplementedException("F2I CC");
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::F2I_reg(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
F2I base;
|
||||
BitField<20, 8, IR::Reg> src_reg;
|
||||
} const f2i{insn};
|
||||
|
||||
const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
|
||||
switch (f2i.base.src_format) {
|
||||
case SrcFormat::F16:
|
||||
return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
|
||||
case SrcFormat::F32:
|
||||
return F(f2i.src_reg);
|
||||
case SrcFormat::F64:
|
||||
return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
|
||||
default:
|
||||
throw NotImplementedException("Invalid F2I source format {}",
|
||||
f2i.base.src_format.Value());
|
||||
}
|
||||
}()};
|
||||
TranslateF2I(*this, insn, op_a);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::F2I_cbuf(u64 insn) {
|
||||
const F2I f2i{insn};
|
||||
const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
|
||||
switch (f2i.src_format) {
|
||||
case SrcFormat::F16:
|
||||
return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
|
||||
case SrcFormat::F32:
|
||||
return GetFloatCbuf(insn);
|
||||
case SrcFormat::F64: {
|
||||
return UnpackCbuf(*this, insn);
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
|
||||
}
|
||||
}()};
|
||||
TranslateF2I(*this, insn, op_a);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::F2I_imm(u64) {
|
||||
throw NotImplementedException("{}", Opcode::F2I_imm);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,94 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
|
||||
bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a;
|
||||
} const ffma{insn};
|
||||
|
||||
if (cc) {
|
||||
throw NotImplementedException("FFMA CC");
|
||||
}
|
||||
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
|
||||
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
|
||||
const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
|
||||
const IR::FpControl fp_control{
|
||||
.no_contraction = true,
|
||||
.rounding = CastFpRounding(fp_rounding),
|
||||
.fmz_mode = CastFmzMode(fmz_mode),
|
||||
};
|
||||
IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
|
||||
if (fmz_mode == FmzMode::FMZ && !sat) {
|
||||
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
|
||||
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
|
||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||
const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
|
||||
const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
|
||||
const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
|
||||
value = IR::F32{v.ir.Select(any_zero, op_c, value)};
|
||||
}
|
||||
if (sat) {
|
||||
value = v.ir.FPSaturate(value);
|
||||
}
|
||||
v.F(ffma.dest_reg, value);
|
||||
}
|
||||
|
||||
void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> neg_b;
|
||||
BitField<49, 1, u64> neg_c;
|
||||
BitField<50, 1, u64> sat;
|
||||
BitField<51, 2, FpRounding> fp_rounding;
|
||||
BitField<53, 2, FmzMode> fmz_mode;
|
||||
} const ffma{insn};
|
||||
|
||||
FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
|
||||
ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::FFMA_reg(u64 insn) {
|
||||
FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FFMA_rc(u64 insn) {
|
||||
FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FFMA_cr(u64 insn) {
|
||||
FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FFMA_imm(u64 insn) {
|
||||
FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FFMA32I(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
|
||||
BitField<52, 1, u64> cc;
|
||||
BitField<53, 2, FmzMode> fmz_mode;
|
||||
BitField<55, 1, u64> sat;
|
||||
BitField<56, 1, u64> neg_a;
|
||||
BitField<57, 1, u64> neg_c;
|
||||
} const ffma32i{insn};
|
||||
|
||||
FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
|
||||
ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,62 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a_reg;
|
||||
BitField<39, 3, IR::Pred> pred;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<44, 1, u64> ftz;
|
||||
BitField<45, 1, u64> negate_b;
|
||||
BitField<46, 1, u64> abs_a;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> negate_a;
|
||||
BitField<49, 1, u64> abs_b;
|
||||
} const fmnmx{insn};
|
||||
|
||||
if (fmnmx.cc) {
|
||||
throw NotImplementedException("FMNMX CC");
|
||||
}
|
||||
|
||||
const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
|
||||
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
|
||||
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
|
||||
|
||||
const IR::FpControl control{
|
||||
.no_contraction = false,
|
||||
.rounding = IR::FpRounding::DontCare,
|
||||
.fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||
};
|
||||
IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
|
||||
IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
|
||||
|
||||
if (fmnmx.neg_pred != 0) {
|
||||
std::swap(min, max);
|
||||
}
|
||||
|
||||
v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::FMNMX_reg(u64 insn) {
|
||||
FMNMX(*this, insn, GetFloatReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
|
||||
FMNMX(*this, insn, GetFloatCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FMNMX_imm(u64 insn) {
|
||||
FMNMX(*this, insn, GetFloatImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,71 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/opcodes.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Operation : u64 {
|
||||
Cos = 0,
|
||||
Sin = 1,
|
||||
Ex2 = 2, // Base 2 exponent
|
||||
Lg2 = 3, // Base 2 logarithm
|
||||
Rcp = 4, // Reciprocal
|
||||
Rsq = 5, // Reciprocal square root
|
||||
Rcp64H = 6, // 64-bit reciprocal
|
||||
Rsq64H = 7, // 64-bit reciprocal square root
|
||||
Sqrt = 8,
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::MUFU(u64 insn) {
|
||||
// MUFU is used to implement a bunch of special functions. See Operation.
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<20, 4, Operation> operation;
|
||||
BitField<46, 1, u64> abs;
|
||||
BitField<48, 1, u64> neg;
|
||||
BitField<50, 1, u64> sat;
|
||||
} const mufu{insn};
|
||||
|
||||
const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
|
||||
IR::F32 value{[&]() -> IR::F32 {
|
||||
switch (mufu.operation) {
|
||||
case Operation::Cos:
|
||||
return ir.FPCos(op_a);
|
||||
case Operation::Sin:
|
||||
return ir.FPSin(op_a);
|
||||
case Operation::Ex2:
|
||||
return ir.FPExp2(op_a);
|
||||
case Operation::Lg2:
|
||||
return ir.FPLog2(op_a);
|
||||
case Operation::Rcp:
|
||||
return ir.FPRecip(op_a);
|
||||
case Operation::Rsq:
|
||||
return ir.FPRecipSqrt(op_a);
|
||||
case Operation::Rcp64H:
|
||||
throw NotImplementedException("MUFU.RCP64H");
|
||||
case Operation::Rsq64H:
|
||||
throw NotImplementedException("MUFU.RSQ64H");
|
||||
case Operation::Sqrt:
|
||||
return ir.FPSqrt(op_a);
|
||||
default:
|
||||
throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
|
||||
}
|
||||
}()};
|
||||
|
||||
if (mufu.sat) {
|
||||
value = ir.FPSaturate(value);
|
||||
}
|
||||
|
||||
F(mufu.dest_reg, value);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,127 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Scale : u64 {
|
||||
None,
|
||||
D2,
|
||||
D4,
|
||||
D8,
|
||||
M8,
|
||||
M4,
|
||||
M2,
|
||||
INVALIDSCALE37,
|
||||
};
|
||||
|
||||
float ScaleFactor(Scale scale) {
|
||||
switch (scale) {
|
||||
case Scale::None:
|
||||
return 1.0f;
|
||||
case Scale::D2:
|
||||
return 1.0f / 2.0f;
|
||||
case Scale::D4:
|
||||
return 1.0f / 4.0f;
|
||||
case Scale::D8:
|
||||
return 1.0f / 8.0f;
|
||||
case Scale::M8:
|
||||
return 8.0f;
|
||||
case Scale::M4:
|
||||
return 4.0f;
|
||||
case Scale::M2:
|
||||
return 2.0f;
|
||||
case Scale::INVALIDSCALE37:
|
||||
break;
|
||||
}
|
||||
throw NotImplementedException("Invalid FMUL scale {}", scale);
|
||||
}
|
||||
|
||||
void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
|
||||
FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a;
|
||||
} const fmul{insn};
|
||||
|
||||
if (cc) {
|
||||
throw NotImplementedException("FMUL CC");
|
||||
}
|
||||
IR::F32 op_a{v.F(fmul.src_a)};
|
||||
if (scale != Scale::None) {
|
||||
if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
|
||||
throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
|
||||
}
|
||||
op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
|
||||
}
|
||||
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
|
||||
const IR::FpControl fp_control{
|
||||
.no_contraction = true,
|
||||
.rounding = CastFpRounding(fp_rounding),
|
||||
.fmz_mode = CastFmzMode(fmz_mode),
|
||||
};
|
||||
IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
|
||||
if (fmz_mode == FmzMode::FMZ && !sat) {
|
||||
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
|
||||
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
|
||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||
const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
|
||||
const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
|
||||
const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
|
||||
value = IR::F32{v.ir.Select(any_zero, zero, value)};
|
||||
}
|
||||
if (sat) {
|
||||
value = v.ir.FPSaturate(value);
|
||||
}
|
||||
v.F(fmul.dest_reg, value);
|
||||
}
|
||||
|
||||
void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<39, 2, FpRounding> fp_rounding;
|
||||
BitField<41, 3, Scale> scale;
|
||||
BitField<44, 2, FmzMode> fmz;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> neg_b;
|
||||
BitField<50, 1, u64> sat;
|
||||
} const fmul{insn};
|
||||
|
||||
FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
|
||||
fmul.neg_b != 0);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::FMUL_reg(u64 insn) {
|
||||
return FMUL(*this, insn, GetFloatReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FMUL_cbuf(u64 insn) {
|
||||
return FMUL(*this, insn, GetFloatCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FMUL_imm(u64 insn) {
|
||||
return FMUL(*this, insn, GetFloatImm20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FMUL32I(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<52, 1, u64> cc;
|
||||
BitField<53, 2, FmzMode> fmz;
|
||||
BitField<55, 1, u64> sat;
|
||||
} const fmul32i{insn};
|
||||
|
||||
FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
|
||||
fmul32i.sat != 0, fmul32i.cc != 0, false);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,41 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Mode : u64 {
|
||||
SINCOS,
|
||||
EX2,
|
||||
};
|
||||
|
||||
void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<39, 1, Mode> mode;
|
||||
BitField<45, 1, u64> neg;
|
||||
BitField<49, 1, u64> abs;
|
||||
} const rro{insn};
|
||||
|
||||
v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::RRO_reg(u64 insn) {
|
||||
RRO(*this, insn, GetFloatReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::RRO_cbuf(u64 insn) {
|
||||
RRO(*this, insn, GetFloatCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::RRO_imm(u64) {
|
||||
throw NotImplementedException("RRO (imm)");
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,60 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 3, IR::Pred> dest_pred_b;
|
||||
BitField<3, 3, IR::Pred> dest_pred_a;
|
||||
BitField<6, 1, u64> negate_b;
|
||||
BitField<7, 1, u64> abs_a;
|
||||
BitField<8, 8, IR::Reg> src_a_reg;
|
||||
BitField<39, 3, IR::Pred> bop_pred;
|
||||
BitField<42, 1, u64> neg_bop_pred;
|
||||
BitField<43, 1, u64> negate_a;
|
||||
BitField<44, 1, u64> abs_b;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
BitField<47, 1, u64> ftz;
|
||||
BitField<48, 4, FPCompareOp> compare_op;
|
||||
} const fsetp{insn};
|
||||
|
||||
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
|
||||
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
|
||||
const IR::FpControl control{
|
||||
.no_contraction = false,
|
||||
.rounding = IR::FpRounding::DontCare,
|
||||
.fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||
};
|
||||
|
||||
const BooleanOp bop{fsetp.bop};
|
||||
const FPCompareOp compare_op{fsetp.compare_op};
|
||||
const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
|
||||
const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
|
||||
const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
|
||||
const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
|
||||
v.ir.SetPred(fsetp.dest_pred_a, result_a);
|
||||
v.ir.SetPred(fsetp.dest_pred_b, result_b);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::FSETP_reg(u64 insn) {
|
||||
FSETP(*this, insn, GetFloatReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FSETP_cbuf(u64 insn) {
|
||||
FSETP(*this, insn, GetFloatCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FSETP_imm(u64 insn) {
|
||||
FSETP(*this, insn, GetFloatImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,44 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
void TranslatorVisitor::FSWZADD(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<28, 8, u64> swizzle;
|
||||
BitField<38, 1, u64> ndv;
|
||||
BitField<39, 2, FpRounding> round;
|
||||
BitField<44, 1, u64> ftz;
|
||||
BitField<47, 1, u64> cc;
|
||||
} const fswzadd{insn};
|
||||
|
||||
if (fswzadd.ndv != 0) {
|
||||
throw NotImplementedException("FSWZADD NDV");
|
||||
}
|
||||
|
||||
const IR::F32 src_a{GetFloatReg8(insn)};
|
||||
const IR::F32 src_b{GetFloatReg20(insn)};
|
||||
const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
|
||||
|
||||
const IR::FpControl fp_control{
|
||||
.no_contraction = false,
|
||||
.rounding = CastFpRounding(fswzadd.round),
|
||||
.fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||
};
|
||||
|
||||
const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
|
||||
F(fswzadd.dest_reg, result);
|
||||
|
||||
if (fswzadd.cc != 0) {
|
||||
throw NotImplementedException("FSWZADD CC");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,125 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
|
||||
Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a;
|
||||
} const hadd2{insn};
|
||||
|
||||
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
|
||||
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
|
||||
const bool promotion{lhs_a.Type() != lhs_b.Type()};
|
||||
if (promotion) {
|
||||
if (lhs_a.Type() == IR::Type::F16) {
|
||||
lhs_a = v.ir.FPConvert(32, lhs_a);
|
||||
rhs_a = v.ir.FPConvert(32, rhs_a);
|
||||
}
|
||||
if (lhs_b.Type() == IR::Type::F16) {
|
||||
lhs_b = v.ir.FPConvert(32, lhs_b);
|
||||
rhs_b = v.ir.FPConvert(32, rhs_b);
|
||||
}
|
||||
}
|
||||
lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
|
||||
rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
|
||||
|
||||
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
|
||||
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
||||
|
||||
const IR::FpControl fp_control{
|
||||
.no_contraction = true,
|
||||
.rounding = IR::FpRounding::DontCare,
|
||||
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||
};
|
||||
IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
|
||||
IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
|
||||
if (sat) {
|
||||
lhs = v.ir.FPSaturate(lhs);
|
||||
rhs = v.ir.FPSaturate(rhs);
|
||||
}
|
||||
if (promotion) {
|
||||
lhs = v.ir.FPConvert(16, lhs);
|
||||
rhs = v.ir.FPConvert(16, rhs);
|
||||
}
|
||||
v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
|
||||
}
|
||||
|
||||
void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
|
||||
const IR::U32& src_b) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<49, 2, Merge> merge;
|
||||
BitField<39, 1, u64> ftz;
|
||||
BitField<43, 1, u64> neg_a;
|
||||
BitField<44, 1, u64> abs_a;
|
||||
BitField<47, 2, Swizzle> swizzle_a;
|
||||
} const hadd2{insn};
|
||||
|
||||
HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
|
||||
hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::HADD2_reg(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<32, 1, u64> sat;
|
||||
BitField<31, 1, u64> neg_b;
|
||||
BitField<30, 1, u64> abs_b;
|
||||
BitField<28, 2, Swizzle> swizzle_b;
|
||||
} const hadd2{insn};
|
||||
|
||||
HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
|
||||
GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HADD2_cbuf(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<52, 1, u64> sat;
|
||||
BitField<56, 1, u64> neg_b;
|
||||
BitField<54, 1, u64> abs_b;
|
||||
} const hadd2{insn};
|
||||
|
||||
HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
|
||||
GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HADD2_imm(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<52, 1, u64> sat;
|
||||
BitField<56, 1, u64> neg_high;
|
||||
BitField<30, 9, u64> high;
|
||||
BitField<29, 1, u64> neg_low;
|
||||
BitField<20, 9, u64> low;
|
||||
} const hadd2{insn};
|
||||
|
||||
const u32 imm{
|
||||
static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
|
||||
static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
|
||||
HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HADD2_32I(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<55, 1, u64> ftz;
|
||||
BitField<52, 1, u64> sat;
|
||||
BitField<56, 1, u64> neg_a;
|
||||
BitField<53, 2, Swizzle> swizzle_a;
|
||||
BitField<20, 32, u64> imm32;
|
||||
} const hadd2{insn};
|
||||
|
||||
const u32 imm{static_cast<u32>(hadd2.imm32)};
|
||||
HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
|
||||
hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
|
||||
}
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,169 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
|
||||
Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
|
||||
bool sat, HalfPrecision precision) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a;
|
||||
} const hfma2{insn};
|
||||
|
||||
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
|
||||
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
|
||||
auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
|
||||
const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
|
||||
if (promotion) {
|
||||
if (lhs_a.Type() == IR::Type::F16) {
|
||||
lhs_a = v.ir.FPConvert(32, lhs_a);
|
||||
rhs_a = v.ir.FPConvert(32, rhs_a);
|
||||
}
|
||||
if (lhs_b.Type() == IR::Type::F16) {
|
||||
lhs_b = v.ir.FPConvert(32, lhs_b);
|
||||
rhs_b = v.ir.FPConvert(32, rhs_b);
|
||||
}
|
||||
if (lhs_c.Type() == IR::Type::F16) {
|
||||
lhs_c = v.ir.FPConvert(32, lhs_c);
|
||||
rhs_c = v.ir.FPConvert(32, rhs_c);
|
||||
}
|
||||
}
|
||||
|
||||
lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
|
||||
rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
|
||||
|
||||
lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
|
||||
rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
|
||||
|
||||
const IR::FpControl fp_control{
|
||||
.no_contraction = true,
|
||||
.rounding = IR::FpRounding::DontCare,
|
||||
.fmz_mode = HalfPrecision2FmzMode(precision),
|
||||
};
|
||||
IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
|
||||
IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
|
||||
if (precision == HalfPrecision::FMZ && !sat) {
|
||||
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
|
||||
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
|
||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||
const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
|
||||
const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
|
||||
const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
|
||||
lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
|
||||
|
||||
const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
|
||||
const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
|
||||
const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
|
||||
rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
|
||||
}
|
||||
if (sat) {
|
||||
lhs = v.ir.FPSaturate(lhs);
|
||||
rhs = v.ir.FPSaturate(rhs);
|
||||
}
|
||||
if (promotion) {
|
||||
lhs = v.ir.FPConvert(16, lhs);
|
||||
rhs = v.ir.FPConvert(16, rhs);
|
||||
}
|
||||
v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
|
||||
}
|
||||
|
||||
void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
|
||||
Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
|
||||
HalfPrecision precision) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<47, 2, Swizzle> swizzle_a;
|
||||
BitField<49, 2, Merge> merge;
|
||||
} const hfma2{insn};
|
||||
|
||||
HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
|
||||
sat, precision);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::HFMA2_reg(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<28, 2, Swizzle> swizzle_b;
|
||||
BitField<32, 1, u64> saturate;
|
||||
BitField<31, 1, u64> neg_b;
|
||||
BitField<30, 1, u64> neg_c;
|
||||
BitField<35, 2, Swizzle> swizzle_c;
|
||||
BitField<37, 2, HalfPrecision> precision;
|
||||
} const hfma2{insn};
|
||||
|
||||
HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
|
||||
GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HFMA2_rc(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<51, 1, u64> neg_c;
|
||||
BitField<52, 1, u64> saturate;
|
||||
BitField<53, 2, Swizzle> swizzle_b;
|
||||
BitField<56, 1, u64> neg_b;
|
||||
BitField<57, 2, HalfPrecision> precision;
|
||||
} const hfma2{insn};
|
||||
|
||||
HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
|
||||
GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HFMA2_cr(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<51, 1, u64> neg_c;
|
||||
BitField<52, 1, u64> saturate;
|
||||
BitField<53, 2, Swizzle> swizzle_c;
|
||||
BitField<56, 1, u64> neg_b;
|
||||
BitField<57, 2, HalfPrecision> precision;
|
||||
} const hfma2{insn};
|
||||
|
||||
HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
|
||||
GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HFMA2_imm(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<51, 1, u64> neg_c;
|
||||
BitField<52, 1, u64> saturate;
|
||||
BitField<53, 2, Swizzle> swizzle_c;
|
||||
|
||||
BitField<56, 1, u64> neg_high;
|
||||
BitField<30, 9, u64> high;
|
||||
BitField<29, 1, u64> neg_low;
|
||||
BitField<20, 9, u64> low;
|
||||
BitField<57, 2, HalfPrecision> precision;
|
||||
} const hfma2{insn};
|
||||
|
||||
const u32 imm{
|
||||
static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
|
||||
static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
|
||||
|
||||
HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
|
||||
GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HFMA2_32I(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> src_c;
|
||||
BitField<20, 32, u64> imm32;
|
||||
BitField<52, 1, u64> neg_c;
|
||||
BitField<53, 2, Swizzle> swizzle_a;
|
||||
BitField<55, 2, HalfPrecision> precision;
|
||||
} const hfma2{insn};
|
||||
|
||||
const u32 imm{static_cast<u32>(hfma2.imm32)};
|
||||
HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
|
||||
Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,62 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
|
||||
IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
|
||||
switch (precision) {
|
||||
case HalfPrecision::None:
|
||||
return IR::FmzMode::None;
|
||||
case HalfPrecision::FTZ:
|
||||
return IR::FmzMode::FTZ;
|
||||
case HalfPrecision::FMZ:
|
||||
return IR::FmzMode::FMZ;
|
||||
default:
|
||||
return IR::FmzMode::DontCare;
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
|
||||
switch (swizzle) {
|
||||
case Swizzle::H1_H0: {
|
||||
const IR::Value vector{ir.UnpackFloat2x16(value)};
|
||||
return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
|
||||
}
|
||||
case Swizzle::H0_H0: {
|
||||
const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
|
||||
return {scalar, scalar};
|
||||
}
|
||||
case Swizzle::H1_H1: {
|
||||
const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
|
||||
return {scalar, scalar};
|
||||
}
|
||||
case Swizzle::F32: {
|
||||
const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
|
||||
return {scalar, scalar};
|
||||
}
|
||||
}
|
||||
throw InvalidArgument("Invalid swizzle {}", swizzle);
|
||||
}
|
||||
|
||||
IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
|
||||
Merge merge) {
|
||||
switch (merge) {
|
||||
case Merge::H1_H0:
|
||||
return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
|
||||
case Merge::F32:
|
||||
return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
|
||||
case Merge::MRG_H0:
|
||||
case Merge::MRG_H1: {
|
||||
const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
|
||||
const bool is_h0{merge == Merge::MRG_H0};
|
||||
const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
|
||||
return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
|
||||
}
|
||||
}
|
||||
throw InvalidArgument("Invalid merge {}", merge);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,42 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
|
||||
enum class Merge : u64 {
|
||||
H1_H0,
|
||||
F32,
|
||||
MRG_H0,
|
||||
MRG_H1,
|
||||
};
|
||||
|
||||
enum class Swizzle : u64 {
|
||||
H1_H0,
|
||||
F32,
|
||||
H0_H0,
|
||||
H1_H1,
|
||||
};
|
||||
|
||||
enum class HalfPrecision : u64 {
|
||||
None = 0,
|
||||
FTZ = 1,
|
||||
FMZ = 2,
|
||||
};
|
||||
|
||||
IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
|
||||
|
||||
std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
|
||||
|
||||
IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
|
||||
Merge merge);
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,143 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
|
||||
Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
|
||||
HalfPrecision precision) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a;
|
||||
} const hmul2{insn};
|
||||
|
||||
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
|
||||
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
|
||||
const bool promotion{lhs_a.Type() != lhs_b.Type()};
|
||||
if (promotion) {
|
||||
if (lhs_a.Type() == IR::Type::F16) {
|
||||
lhs_a = v.ir.FPConvert(32, lhs_a);
|
||||
rhs_a = v.ir.FPConvert(32, rhs_a);
|
||||
}
|
||||
if (lhs_b.Type() == IR::Type::F16) {
|
||||
lhs_b = v.ir.FPConvert(32, lhs_b);
|
||||
rhs_b = v.ir.FPConvert(32, rhs_b);
|
||||
}
|
||||
}
|
||||
lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
|
||||
rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
|
||||
|
||||
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
|
||||
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
||||
|
||||
const IR::FpControl fp_control{
|
||||
.no_contraction = true,
|
||||
.rounding = IR::FpRounding::DontCare,
|
||||
.fmz_mode = HalfPrecision2FmzMode(precision),
|
||||
};
|
||||
IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
|
||||
IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
|
||||
if (precision == HalfPrecision::FMZ && !sat) {
|
||||
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
|
||||
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
|
||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||
const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
|
||||
const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
|
||||
const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
|
||||
lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
|
||||
|
||||
const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
|
||||
const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
|
||||
const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
|
||||
rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
|
||||
}
|
||||
if (sat) {
|
||||
lhs = v.ir.FPSaturate(lhs);
|
||||
rhs = v.ir.FPSaturate(rhs);
|
||||
}
|
||||
if (promotion) {
|
||||
lhs = v.ir.FPConvert(16, lhs);
|
||||
rhs = v.ir.FPConvert(16, rhs);
|
||||
}
|
||||
v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
|
||||
}
|
||||
|
||||
void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
|
||||
Swizzle swizzle_b, const IR::U32& src_b) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<49, 2, Merge> merge;
|
||||
BitField<47, 2, Swizzle> swizzle_a;
|
||||
BitField<39, 2, HalfPrecision> precision;
|
||||
} const hmul2{insn};
|
||||
|
||||
HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
|
||||
hmul2.precision);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::HMUL2_reg(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<32, 1, u64> sat;
|
||||
BitField<31, 1, u64> neg_b;
|
||||
BitField<30, 1, u64> abs_b;
|
||||
BitField<44, 1, u64> abs_a;
|
||||
BitField<28, 2, Swizzle> swizzle_b;
|
||||
} const hmul2{insn};
|
||||
|
||||
HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
|
||||
hmul2.swizzle_b, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<52, 1, u64> sat;
|
||||
BitField<54, 1, u64> abs_b;
|
||||
BitField<43, 1, u64> neg_a;
|
||||
BitField<44, 1, u64> abs_a;
|
||||
} const hmul2{insn};
|
||||
|
||||
HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
|
||||
Swizzle::F32, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HMUL2_imm(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<52, 1, u64> sat;
|
||||
BitField<56, 1, u64> neg_high;
|
||||
BitField<30, 9, u64> high;
|
||||
BitField<29, 1, u64> neg_low;
|
||||
BitField<20, 9, u64> low;
|
||||
BitField<43, 1, u64> neg_a;
|
||||
BitField<44, 1, u64> abs_a;
|
||||
} const hmul2{insn};
|
||||
|
||||
const u32 imm{
|
||||
static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
|
||||
static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
|
||||
HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
|
||||
Swizzle::H1_H0, ir.Imm32(imm));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HMUL2_32I(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<55, 2, HalfPrecision> precision;
|
||||
BitField<52, 1, u64> sat;
|
||||
BitField<53, 2, Swizzle> swizzle_a;
|
||||
BitField<20, 32, u64> imm32;
|
||||
} const hmul2{insn};
|
||||
|
||||
const u32 imm{static_cast<u32>(hmul2.imm32)};
|
||||
HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
|
||||
Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,117 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
|
||||
bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a_reg;
|
||||
BitField<39, 3, IR::Pred> pred;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<43, 1, u64> neg_a;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
BitField<44, 1, u64> abs_a;
|
||||
BitField<47, 2, Swizzle> swizzle_a;
|
||||
} const hset2{insn};
|
||||
|
||||
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
|
||||
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
|
||||
|
||||
if (lhs_a.Type() != lhs_b.Type()) {
|
||||
if (lhs_a.Type() == IR::Type::F16) {
|
||||
lhs_a = v.ir.FPConvert(32, lhs_a);
|
||||
rhs_a = v.ir.FPConvert(32, rhs_a);
|
||||
}
|
||||
if (lhs_b.Type() == IR::Type::F16) {
|
||||
lhs_b = v.ir.FPConvert(32, lhs_b);
|
||||
rhs_b = v.ir.FPConvert(32, rhs_b);
|
||||
}
|
||||
}
|
||||
|
||||
lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
|
||||
rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
|
||||
|
||||
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
|
||||
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
||||
|
||||
const IR::FpControl control{
|
||||
.no_contraction = false,
|
||||
.rounding = IR::FpRounding::DontCare,
|
||||
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||
};
|
||||
|
||||
IR::U1 pred{v.ir.GetPred(hset2.pred)};
|
||||
if (hset2.neg_pred != 0) {
|
||||
pred = v.ir.LogicalNot(pred);
|
||||
}
|
||||
const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
|
||||
const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
|
||||
const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
|
||||
const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
|
||||
|
||||
const u32 true_value = bf ? 0x3c00 : 0xffff;
|
||||
const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
|
||||
const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
|
||||
const IR::U32 fail_result{v.ir.Imm32(0)};
|
||||
const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
|
||||
const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
|
||||
|
||||
v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::HSET2_reg(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<30, 1, u64> abs_b;
|
||||
BitField<49, 1, u64> bf;
|
||||
BitField<31, 1, u64> neg_b;
|
||||
BitField<50, 1, u64> ftz;
|
||||
BitField<35, 4, FPCompareOp> compare_op;
|
||||
BitField<28, 2, Swizzle> swizzle_b;
|
||||
} const hset2{insn};
|
||||
|
||||
HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
|
||||
hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HSET2_cbuf(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<53, 1, u64> bf;
|
||||
BitField<56, 1, u64> neg_b;
|
||||
BitField<54, 1, u64> ftz;
|
||||
BitField<49, 4, FPCompareOp> compare_op;
|
||||
} const hset2{insn};
|
||||
|
||||
HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
|
||||
hset2.compare_op, Swizzle::F32);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HSET2_imm(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<53, 1, u64> bf;
|
||||
BitField<54, 1, u64> ftz;
|
||||
BitField<49, 4, FPCompareOp> compare_op;
|
||||
BitField<56, 1, u64> neg_high;
|
||||
BitField<30, 9, u64> high;
|
||||
BitField<29, 1, u64> neg_low;
|
||||
BitField<20, 9, u64> low;
|
||||
} const hset2{insn};
|
||||
|
||||
const u32 imm{
|
||||
static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
|
||||
static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
|
||||
|
||||
HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
|
||||
Swizzle::H1_H0);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,118 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
|
||||
Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<8, 8, IR::Reg> src_a_reg;
|
||||
BitField<3, 3, IR::Pred> dest_pred_a;
|
||||
BitField<0, 3, IR::Pred> dest_pred_b;
|
||||
BitField<39, 3, IR::Pred> pred;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<43, 1, u64> neg_a;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
BitField<44, 1, u64> abs_a;
|
||||
BitField<6, 1, u64> ftz;
|
||||
BitField<47, 2, Swizzle> swizzle_a;
|
||||
} const hsetp2{insn};
|
||||
|
||||
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
|
||||
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
|
||||
|
||||
if (lhs_a.Type() != lhs_b.Type()) {
|
||||
if (lhs_a.Type() == IR::Type::F16) {
|
||||
lhs_a = v.ir.FPConvert(32, lhs_a);
|
||||
rhs_a = v.ir.FPConvert(32, rhs_a);
|
||||
}
|
||||
if (lhs_b.Type() == IR::Type::F16) {
|
||||
lhs_b = v.ir.FPConvert(32, lhs_b);
|
||||
rhs_b = v.ir.FPConvert(32, rhs_b);
|
||||
}
|
||||
}
|
||||
|
||||
lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
|
||||
rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
|
||||
|
||||
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
|
||||
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
|
||||
|
||||
const IR::FpControl control{
|
||||
.no_contraction = false,
|
||||
.rounding = IR::FpRounding::DontCare,
|
||||
.fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
|
||||
};
|
||||
|
||||
IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
|
||||
if (hsetp2.neg_pred != 0) {
|
||||
pred = v.ir.LogicalNot(pred);
|
||||
}
|
||||
const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
|
||||
const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
|
||||
const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
|
||||
const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
|
||||
|
||||
if (h_and) {
|
||||
auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
|
||||
v.ir.SetPred(hsetp2.dest_pred_a, result);
|
||||
v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
|
||||
} else {
|
||||
v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
|
||||
v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::HSETP2_reg(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<30, 1, u64> abs_b;
|
||||
BitField<49, 1, u64> h_and;
|
||||
BitField<31, 1, u64> neg_b;
|
||||
BitField<35, 4, FPCompareOp> compare_op;
|
||||
BitField<28, 2, Swizzle> swizzle_b;
|
||||
} const hsetp2{insn};
|
||||
HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
|
||||
hsetp2.compare_op, hsetp2.h_and != 0);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<53, 1, u64> h_and;
|
||||
BitField<54, 1, u64> abs_b;
|
||||
BitField<56, 1, u64> neg_b;
|
||||
BitField<49, 4, FPCompareOp> compare_op;
|
||||
} const hsetp2{insn};
|
||||
|
||||
HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
|
||||
hsetp2.compare_op, hsetp2.h_and != 0);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::HSETP2_imm(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<53, 1, u64> h_and;
|
||||
BitField<54, 1, u64> ftz;
|
||||
BitField<49, 4, FPCompareOp> compare_op;
|
||||
BitField<56, 1, u64> neg_high;
|
||||
BitField<30, 9, u64> high;
|
||||
BitField<29, 1, u64> neg_low;
|
||||
BitField<20, 9, u64> low;
|
||||
} const hsetp2{insn};
|
||||
|
||||
const u32 imm{static_cast<u32>(hsetp2.low << 6) |
|
||||
static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
|
||||
static_cast<u32>(hsetp2.high << 22) |
|
||||
static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
|
||||
|
||||
HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
|
||||
hsetp2.h_and != 0);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
272
src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
Executable file
272
src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
Executable file
@@ -0,0 +1,272 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
|
||||
u32 offset) {
|
||||
if (unaligned) {
|
||||
return ir.Imm32(0);
|
||||
}
|
||||
return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
IR::U32 TranslatorVisitor::X(IR::Reg reg) {
|
||||
return ir.GetReg(reg);
|
||||
}
|
||||
|
||||
IR::U64 TranslatorVisitor::L(IR::Reg reg) {
|
||||
if (!IR::IsAligned(reg, 2)) {
|
||||
throw NotImplementedException("Unaligned source register {}", reg);
|
||||
}
|
||||
return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
|
||||
}
|
||||
|
||||
IR::F32 TranslatorVisitor::F(IR::Reg reg) {
|
||||
return ir.BitCast<IR::F32>(X(reg));
|
||||
}
|
||||
|
||||
IR::F64 TranslatorVisitor::D(IR::Reg reg) {
|
||||
if (!IR::IsAligned(reg, 2)) {
|
||||
throw NotImplementedException("Unaligned source register {}", reg);
|
||||
}
|
||||
return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
|
||||
}
|
||||
|
||||
void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
|
||||
ir.SetReg(dest_reg, value);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
|
||||
if (!IR::IsAligned(dest_reg, 2)) {
|
||||
throw NotImplementedException("Unaligned destination register {}", dest_reg);
|
||||
}
|
||||
const IR::Value result{ir.UnpackUint2x32(value)};
|
||||
for (int i = 0; i < 2; i++) {
|
||||
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
|
||||
X(dest_reg, ir.BitCast<IR::U32>(value));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
|
||||
if (!IR::IsAligned(dest_reg, 2)) {
|
||||
throw NotImplementedException("Unaligned destination register {}", dest_reg);
|
||||
}
|
||||
const IR::Value result{ir.UnpackDouble2x32(value)};
|
||||
for (int i = 0; i < 2; i++) {
|
||||
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> index;
|
||||
} const reg{insn};
|
||||
return X(reg.index);
|
||||
}
|
||||
|
||||
IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 8, IR::Reg> index;
|
||||
} const reg{insn};
|
||||
return X(reg.index);
|
||||
}
|
||||
|
||||
IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<39, 8, IR::Reg> index;
|
||||
} const reg{insn};
|
||||
return X(reg.index);
|
||||
}
|
||||
|
||||
IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
|
||||
return ir.BitCast<IR::F32>(GetReg8(insn));
|
||||
}
|
||||
|
||||
IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
|
||||
return ir.BitCast<IR::F32>(GetReg20(insn));
|
||||
}
|
||||
|
||||
IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
|
||||
return ir.BitCast<IR::F32>(GetReg39(insn));
|
||||
}
|
||||
|
||||
IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 8, IR::Reg> index;
|
||||
} const reg{insn};
|
||||
return D(reg.index);
|
||||
}
|
||||
|
||||
IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<39, 8, IR::Reg> index;
|
||||
} const reg{insn};
|
||||
return D(reg.index);
|
||||
}
|
||||
|
||||
static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 14, u64> offset;
|
||||
BitField<34, 5, u64> binding;
|
||||
} const cbuf{insn};
|
||||
|
||||
if (cbuf.binding >= 18) {
|
||||
throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
|
||||
}
|
||||
if (cbuf.offset >= 0x10'000) {
|
||||
throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
|
||||
}
|
||||
const IR::Value binding{static_cast<u32>(cbuf.binding)};
|
||||
const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
|
||||
return {IR::U32{binding}, IR::U32{byte_offset}};
|
||||
}
|
||||
|
||||
IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
|
||||
const auto [binding, byte_offset]{CbufAddr(insn)};
|
||||
return ir.GetCbuf(binding, byte_offset);
|
||||
}
|
||||
|
||||
IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
|
||||
const auto [binding, byte_offset]{CbufAddr(insn)};
|
||||
return ir.GetFloatCbuf(binding, byte_offset);
|
||||
}
|
||||
|
||||
IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 1, u64> unaligned;
|
||||
} const cbuf{insn};
|
||||
|
||||
const auto [binding, offset_value]{CbufAddr(insn)};
|
||||
const bool unaligned{cbuf.unaligned != 0};
|
||||
const u32 offset{offset_value.U32()};
|
||||
const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
|
||||
|
||||
const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
|
||||
const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
|
||||
return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
|
||||
}
|
||||
|
||||
IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 1, u64> unaligned;
|
||||
} const cbuf{insn};
|
||||
|
||||
if (cbuf.unaligned != 0) {
|
||||
throw NotImplementedException("Unaligned packed constant buffer read");
|
||||
}
|
||||
const auto [binding, lower_offset]{CbufAddr(insn)};
|
||||
const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
|
||||
const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
|
||||
const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
|
||||
return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
|
||||
}
|
||||
|
||||
IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 19, u64> value;
|
||||
BitField<56, 1, u64> is_negative;
|
||||
} const imm{insn};
|
||||
|
||||
if (imm.is_negative != 0) {
|
||||
const s64 raw{static_cast<s64>(imm.value)};
|
||||
return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
|
||||
} else {
|
||||
return ir.Imm32(static_cast<u32>(imm.value));
|
||||
}
|
||||
}
|
||||
|
||||
IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 19, u64> value;
|
||||
BitField<56, 1, u64> is_negative;
|
||||
} const imm{insn};
|
||||
const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
|
||||
const u32 value{static_cast<u32>(imm.value) << 12};
|
||||
return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
|
||||
}
|
||||
|
||||
IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 19, u64> value;
|
||||
BitField<56, 1, u64> is_negative;
|
||||
} const imm{insn};
|
||||
const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
|
||||
const u64 value{imm.value << 44};
|
||||
return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
|
||||
}
|
||||
|
||||
IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
|
||||
const s64 value{GetImm20(insn).U32()};
|
||||
return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
|
||||
}
|
||||
|
||||
IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 32, u64> value;
|
||||
} const imm{insn};
|
||||
return ir.Imm32(static_cast<u32>(imm.value));
|
||||
}
|
||||
|
||||
IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 32, u64> value;
|
||||
} const imm{insn};
|
||||
return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SetZFlag(const IR::U1& value) {
|
||||
ir.SetZFlag(value);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SetSFlag(const IR::U1& value) {
|
||||
ir.SetSFlag(value);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SetCFlag(const IR::U1& value) {
|
||||
ir.SetCFlag(value);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SetOFlag(const IR::U1& value) {
|
||||
ir.SetOFlag(value);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ResetZero() {
|
||||
SetZFlag(ir.Imm1(false));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ResetSFlag() {
|
||||
SetSFlag(ir.Imm1(false));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ResetCFlag() {
|
||||
SetCFlag(ir.Imm1(false));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ResetOFlag() {
|
||||
SetOFlag(ir.Imm1(false));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
387
src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
Executable file
387
src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
Executable file
@@ -0,0 +1,387 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/maxwell/instruction.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
|
||||
enum class CompareOp : u64 {
|
||||
False,
|
||||
LessThan,
|
||||
Equal,
|
||||
LessThanEqual,
|
||||
GreaterThan,
|
||||
NotEqual,
|
||||
GreaterThanEqual,
|
||||
True,
|
||||
};
|
||||
|
||||
enum class BooleanOp : u64 {
|
||||
AND,
|
||||
OR,
|
||||
XOR,
|
||||
};
|
||||
|
||||
enum class PredicateOp : u64 {
|
||||
False,
|
||||
True,
|
||||
Zero,
|
||||
NonZero,
|
||||
};
|
||||
|
||||
enum class FPCompareOp : u64 {
|
||||
F,
|
||||
LT,
|
||||
EQ,
|
||||
LE,
|
||||
GT,
|
||||
NE,
|
||||
GE,
|
||||
NUM,
|
||||
Nan,
|
||||
LTU,
|
||||
EQU,
|
||||
LEU,
|
||||
GTU,
|
||||
NEU,
|
||||
GEU,
|
||||
T,
|
||||
};
|
||||
|
||||
class TranslatorVisitor {
|
||||
public:
|
||||
explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
|
||||
|
||||
Environment& env;
|
||||
IR::IREmitter ir;
|
||||
|
||||
void AL2P(u64 insn);
|
||||
void ALD(u64 insn);
|
||||
void AST(u64 insn);
|
||||
void ATOM_cas(u64 insn);
|
||||
void ATOM(u64 insn);
|
||||
void ATOMS_cas(u64 insn);
|
||||
void ATOMS(u64 insn);
|
||||
void B2R(u64 insn);
|
||||
void BAR(u64 insn);
|
||||
void BFE_reg(u64 insn);
|
||||
void BFE_cbuf(u64 insn);
|
||||
void BFE_imm(u64 insn);
|
||||
void BFI_reg(u64 insn);
|
||||
void BFI_rc(u64 insn);
|
||||
void BFI_cr(u64 insn);
|
||||
void BFI_imm(u64 insn);
|
||||
void BPT(u64 insn);
|
||||
void BRA(u64 insn);
|
||||
void BRK(u64 insn);
|
||||
void BRX(u64 insn);
|
||||
void CAL();
|
||||
void CCTL(u64 insn);
|
||||
void CCTLL(u64 insn);
|
||||
void CONT(u64 insn);
|
||||
void CS2R(u64 insn);
|
||||
void CSET(u64 insn);
|
||||
void CSETP(u64 insn);
|
||||
void DADD_reg(u64 insn);
|
||||
void DADD_cbuf(u64 insn);
|
||||
void DADD_imm(u64 insn);
|
||||
void DEPBAR();
|
||||
void DFMA_reg(u64 insn);
|
||||
void DFMA_rc(u64 insn);
|
||||
void DFMA_cr(u64 insn);
|
||||
void DFMA_imm(u64 insn);
|
||||
void DMNMX_reg(u64 insn);
|
||||
void DMNMX_cbuf(u64 insn);
|
||||
void DMNMX_imm(u64 insn);
|
||||
void DMUL_reg(u64 insn);
|
||||
void DMUL_cbuf(u64 insn);
|
||||
void DMUL_imm(u64 insn);
|
||||
void DSET_reg(u64 insn);
|
||||
void DSET_cbuf(u64 insn);
|
||||
void DSET_imm(u64 insn);
|
||||
void DSETP_reg(u64 insn);
|
||||
void DSETP_cbuf(u64 insn);
|
||||
void DSETP_imm(u64 insn);
|
||||
void EXIT();
|
||||
void F2F_reg(u64 insn);
|
||||
void F2F_cbuf(u64 insn);
|
||||
void F2F_imm(u64 insn);
|
||||
void F2I_reg(u64 insn);
|
||||
void F2I_cbuf(u64 insn);
|
||||
void F2I_imm(u64 insn);
|
||||
void FADD_reg(u64 insn);
|
||||
void FADD_cbuf(u64 insn);
|
||||
void FADD_imm(u64 insn);
|
||||
void FADD32I(u64 insn);
|
||||
void FCHK_reg(u64 insn);
|
||||
void FCHK_cbuf(u64 insn);
|
||||
void FCHK_imm(u64 insn);
|
||||
void FCMP_reg(u64 insn);
|
||||
void FCMP_rc(u64 insn);
|
||||
void FCMP_cr(u64 insn);
|
||||
void FCMP_imm(u64 insn);
|
||||
void FFMA_reg(u64 insn);
|
||||
void FFMA_rc(u64 insn);
|
||||
void FFMA_cr(u64 insn);
|
||||
void FFMA_imm(u64 insn);
|
||||
void FFMA32I(u64 insn);
|
||||
void FLO_reg(u64 insn);
|
||||
void FLO_cbuf(u64 insn);
|
||||
void FLO_imm(u64 insn);
|
||||
void FMNMX_reg(u64 insn);
|
||||
void FMNMX_cbuf(u64 insn);
|
||||
void FMNMX_imm(u64 insn);
|
||||
void FMUL_reg(u64 insn);
|
||||
void FMUL_cbuf(u64 insn);
|
||||
void FMUL_imm(u64 insn);
|
||||
void FMUL32I(u64 insn);
|
||||
void FSET_reg(u64 insn);
|
||||
void FSET_cbuf(u64 insn);
|
||||
void FSET_imm(u64 insn);
|
||||
void FSETP_reg(u64 insn);
|
||||
void FSETP_cbuf(u64 insn);
|
||||
void FSETP_imm(u64 insn);
|
||||
void FSWZADD(u64 insn);
|
||||
void GETCRSPTR(u64 insn);
|
||||
void GETLMEMBASE(u64 insn);
|
||||
void HADD2_reg(u64 insn);
|
||||
void HADD2_cbuf(u64 insn);
|
||||
void HADD2_imm(u64 insn);
|
||||
void HADD2_32I(u64 insn);
|
||||
void HFMA2_reg(u64 insn);
|
||||
void HFMA2_rc(u64 insn);
|
||||
void HFMA2_cr(u64 insn);
|
||||
void HFMA2_imm(u64 insn);
|
||||
void HFMA2_32I(u64 insn);
|
||||
void HMUL2_reg(u64 insn);
|
||||
void HMUL2_cbuf(u64 insn);
|
||||
void HMUL2_imm(u64 insn);
|
||||
void HMUL2_32I(u64 insn);
|
||||
void HSET2_reg(u64 insn);
|
||||
void HSET2_cbuf(u64 insn);
|
||||
void HSET2_imm(u64 insn);
|
||||
void HSETP2_reg(u64 insn);
|
||||
void HSETP2_cbuf(u64 insn);
|
||||
void HSETP2_imm(u64 insn);
|
||||
void I2F_reg(u64 insn);
|
||||
void I2F_cbuf(u64 insn);
|
||||
void I2F_imm(u64 insn);
|
||||
void I2I_reg(u64 insn);
|
||||
void I2I_cbuf(u64 insn);
|
||||
void I2I_imm(u64 insn);
|
||||
void IADD_reg(u64 insn);
|
||||
void IADD_cbuf(u64 insn);
|
||||
void IADD_imm(u64 insn);
|
||||
void IADD3_reg(u64 insn);
|
||||
void IADD3_cbuf(u64 insn);
|
||||
void IADD3_imm(u64 insn);
|
||||
void IADD32I(u64 insn);
|
||||
void ICMP_reg(u64 insn);
|
||||
void ICMP_rc(u64 insn);
|
||||
void ICMP_cr(u64 insn);
|
||||
void ICMP_imm(u64 insn);
|
||||
void IDE(u64 insn);
|
||||
void IDP_reg(u64 insn);
|
||||
void IDP_imm(u64 insn);
|
||||
void IMAD_reg(u64 insn);
|
||||
void IMAD_rc(u64 insn);
|
||||
void IMAD_cr(u64 insn);
|
||||
void IMAD_imm(u64 insn);
|
||||
void IMAD32I(u64 insn);
|
||||
void IMADSP_reg(u64 insn);
|
||||
void IMADSP_rc(u64 insn);
|
||||
void IMADSP_cr(u64 insn);
|
||||
void IMADSP_imm(u64 insn);
|
||||
void IMNMX_reg(u64 insn);
|
||||
void IMNMX_cbuf(u64 insn);
|
||||
void IMNMX_imm(u64 insn);
|
||||
void IMUL_reg(u64 insn);
|
||||
void IMUL_cbuf(u64 insn);
|
||||
void IMUL_imm(u64 insn);
|
||||
void IMUL32I(u64 insn);
|
||||
void IPA(u64 insn);
|
||||
void ISBERD(u64 insn);
|
||||
void ISCADD_reg(u64 insn);
|
||||
void ISCADD_cbuf(u64 insn);
|
||||
void ISCADD_imm(u64 insn);
|
||||
void ISCADD32I(u64 insn);
|
||||
void ISET_reg(u64 insn);
|
||||
void ISET_cbuf(u64 insn);
|
||||
void ISET_imm(u64 insn);
|
||||
void ISETP_reg(u64 insn);
|
||||
void ISETP_cbuf(u64 insn);
|
||||
void ISETP_imm(u64 insn);
|
||||
void JCAL(u64 insn);
|
||||
void JMP(u64 insn);
|
||||
void JMX(u64 insn);
|
||||
void KIL();
|
||||
void LD(u64 insn);
|
||||
void LDC(u64 insn);
|
||||
void LDG(u64 insn);
|
||||
void LDL(u64 insn);
|
||||
void LDS(u64 insn);
|
||||
void LEA_hi_reg(u64 insn);
|
||||
void LEA_hi_cbuf(u64 insn);
|
||||
void LEA_lo_reg(u64 insn);
|
||||
void LEA_lo_cbuf(u64 insn);
|
||||
void LEA_lo_imm(u64 insn);
|
||||
void LEPC(u64 insn);
|
||||
void LONGJMP(u64 insn);
|
||||
void LOP_reg(u64 insn);
|
||||
void LOP_cbuf(u64 insn);
|
||||
void LOP_imm(u64 insn);
|
||||
void LOP3_reg(u64 insn);
|
||||
void LOP3_cbuf(u64 insn);
|
||||
void LOP3_imm(u64 insn);
|
||||
void LOP32I(u64 insn);
|
||||
void MEMBAR(u64 insn);
|
||||
void MOV_reg(u64 insn);
|
||||
void MOV_cbuf(u64 insn);
|
||||
void MOV_imm(u64 insn);
|
||||
void MOV32I(u64 insn);
|
||||
void MUFU(u64 insn);
|
||||
void NOP(u64 insn);
|
||||
void OUT_reg(u64 insn);
|
||||
void OUT_cbuf(u64 insn);
|
||||
void OUT_imm(u64 insn);
|
||||
void P2R_reg(u64 insn);
|
||||
void P2R_cbuf(u64 insn);
|
||||
void P2R_imm(u64 insn);
|
||||
void PBK();
|
||||
void PCNT();
|
||||
void PEXIT(u64 insn);
|
||||
void PIXLD(u64 insn);
|
||||
void PLONGJMP(u64 insn);
|
||||
void POPC_reg(u64 insn);
|
||||
void POPC_cbuf(u64 insn);
|
||||
void POPC_imm(u64 insn);
|
||||
void PRET(u64 insn);
|
||||
void PRMT_reg(u64 insn);
|
||||
void PRMT_rc(u64 insn);
|
||||
void PRMT_cr(u64 insn);
|
||||
void PRMT_imm(u64 insn);
|
||||
void PSET(u64 insn);
|
||||
void PSETP(u64 insn);
|
||||
void R2B(u64 insn);
|
||||
void R2P_reg(u64 insn);
|
||||
void R2P_cbuf(u64 insn);
|
||||
void R2P_imm(u64 insn);
|
||||
void RAM(u64 insn);
|
||||
void RED(u64 insn);
|
||||
void RET(u64 insn);
|
||||
void RRO_reg(u64 insn);
|
||||
void RRO_cbuf(u64 insn);
|
||||
void RRO_imm(u64 insn);
|
||||
void RTT(u64 insn);
|
||||
void S2R(u64 insn);
|
||||
void SAM(u64 insn);
|
||||
void SEL_reg(u64 insn);
|
||||
void SEL_cbuf(u64 insn);
|
||||
void SEL_imm(u64 insn);
|
||||
void SETCRSPTR(u64 insn);
|
||||
void SETLMEMBASE(u64 insn);
|
||||
void SHF_l_reg(u64 insn);
|
||||
void SHF_l_imm(u64 insn);
|
||||
void SHF_r_reg(u64 insn);
|
||||
void SHF_r_imm(u64 insn);
|
||||
void SHFL(u64 insn);
|
||||
void SHL_reg(u64 insn);
|
||||
void SHL_cbuf(u64 insn);
|
||||
void SHL_imm(u64 insn);
|
||||
void SHR_reg(u64 insn);
|
||||
void SHR_cbuf(u64 insn);
|
||||
void SHR_imm(u64 insn);
|
||||
void SSY();
|
||||
void ST(u64 insn);
|
||||
void STG(u64 insn);
|
||||
void STL(u64 insn);
|
||||
void STP(u64 insn);
|
||||
void STS(u64 insn);
|
||||
void SUATOM(u64 insn);
|
||||
void SUATOM_cas(u64 insn);
|
||||
void SULD(u64 insn);
|
||||
void SURED(u64 insn);
|
||||
void SUST(u64 insn);
|
||||
void SYNC(u64 insn);
|
||||
void TEX(u64 insn);
|
||||
void TEX_b(u64 insn);
|
||||
void TEXS(u64 insn);
|
||||
void TLD(u64 insn);
|
||||
void TLD_b(u64 insn);
|
||||
void TLD4(u64 insn);
|
||||
void TLD4_b(u64 insn);
|
||||
void TLD4S(u64 insn);
|
||||
void TLDS(u64 insn);
|
||||
void TMML(u64 insn);
|
||||
void TMML_b(u64 insn);
|
||||
void TXA(u64 insn);
|
||||
void TXD(u64 insn);
|
||||
void TXD_b(u64 insn);
|
||||
void TXQ(u64 insn);
|
||||
void TXQ_b(u64 insn);
|
||||
void VABSDIFF(u64 insn);
|
||||
void VABSDIFF4(u64 insn);
|
||||
void VADD(u64 insn);
|
||||
void VMAD(u64 insn);
|
||||
void VMNMX(u64 insn);
|
||||
void VOTE(u64 insn);
|
||||
void VOTE_vtg(u64 insn);
|
||||
void VSET(u64 insn);
|
||||
void VSETP(u64 insn);
|
||||
void VSHL(u64 insn);
|
||||
void VSHR(u64 insn);
|
||||
void XMAD_reg(u64 insn);
|
||||
void XMAD_rc(u64 insn);
|
||||
void XMAD_cr(u64 insn);
|
||||
void XMAD_imm(u64 insn);
|
||||
|
||||
[[nodiscard]] IR::U32 X(IR::Reg reg);
|
||||
[[nodiscard]] IR::U64 L(IR::Reg reg);
|
||||
[[nodiscard]] IR::F32 F(IR::Reg reg);
|
||||
[[nodiscard]] IR::F64 D(IR::Reg reg);
|
||||
|
||||
void X(IR::Reg dest_reg, const IR::U32& value);
|
||||
void L(IR::Reg dest_reg, const IR::U64& value);
|
||||
void F(IR::Reg dest_reg, const IR::F32& value);
|
||||
void D(IR::Reg dest_reg, const IR::F64& value);
|
||||
|
||||
[[nodiscard]] IR::U32 GetReg8(u64 insn);
|
||||
[[nodiscard]] IR::U32 GetReg20(u64 insn);
|
||||
[[nodiscard]] IR::U32 GetReg39(u64 insn);
|
||||
[[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
|
||||
[[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
|
||||
[[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
|
||||
[[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
|
||||
[[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
|
||||
|
||||
[[nodiscard]] IR::U32 GetCbuf(u64 insn);
|
||||
[[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
|
||||
[[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
|
||||
[[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
|
||||
|
||||
[[nodiscard]] IR::U32 GetImm20(u64 insn);
|
||||
[[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
|
||||
[[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
|
||||
[[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
|
||||
|
||||
[[nodiscard]] IR::U32 GetImm32(u64 insn);
|
||||
[[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
|
||||
|
||||
void SetZFlag(const IR::U1& value);
|
||||
void SetSFlag(const IR::U1& value);
|
||||
void SetCFlag(const IR::U1& value);
|
||||
void SetOFlag(const IR::U1& value);
|
||||
|
||||
void ResetZero();
|
||||
void ResetSFlag();
|
||||
void ResetCFlag();
|
||||
void ResetOFlag();
|
||||
};
|
||||
|
||||
} // namespace Shader::Maxwell
|
105
src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
Executable file
105
src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
Executable file
@@ -0,0 +1,105 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
|
||||
bool cc) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_a;
|
||||
} const iadd{insn};
|
||||
|
||||
if (sat) {
|
||||
throw NotImplementedException("IADD SAT");
|
||||
}
|
||||
if (x && po) {
|
||||
throw NotImplementedException("IADD X+PO");
|
||||
}
|
||||
// Operand A is always read from here, negated if needed
|
||||
IR::U32 op_a{v.X(iadd.src_a)};
|
||||
if (neg_a) {
|
||||
op_a = v.ir.INeg(op_a);
|
||||
}
|
||||
// Add both operands
|
||||
IR::U32 result{v.ir.IAdd(op_a, op_b)};
|
||||
if (x) {
|
||||
const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
|
||||
result = v.ir.IAdd(result, carry);
|
||||
}
|
||||
if (po) {
|
||||
// .PO adds one to the result
|
||||
result = v.ir.IAdd(result, v.ir.Imm32(1));
|
||||
}
|
||||
if (cc) {
|
||||
// Store flags
|
||||
// TODO: Does this grab the result pre-PO or after?
|
||||
if (po) {
|
||||
throw NotImplementedException("IADD CC+PO");
|
||||
}
|
||||
// TODO: How does CC behave when X is set?
|
||||
if (x) {
|
||||
throw NotImplementedException("IADD X+CC");
|
||||
}
|
||||
v.SetZFlag(v.ir.GetZeroFromOp(result));
|
||||
v.SetSFlag(v.ir.GetSignFromOp(result));
|
||||
v.SetCFlag(v.ir.GetCarryFromOp(result));
|
||||
v.SetOFlag(v.ir.GetOverflowFromOp(result));
|
||||
}
|
||||
// Store result
|
||||
v.X(iadd.dest_reg, result);
|
||||
}
|
||||
|
||||
void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<43, 1, u64> x;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 2, u64> three_for_po;
|
||||
BitField<48, 1, u64> neg_b;
|
||||
BitField<49, 1, u64> neg_a;
|
||||
BitField<50, 1, u64> sat;
|
||||
} const iadd{insn};
|
||||
|
||||
const bool po{iadd.three_for_po == 3};
|
||||
if (!po && iadd.neg_b != 0) {
|
||||
op_b = v.ir.INeg(op_b);
|
||||
}
|
||||
IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::IADD_reg(u64 insn) {
|
||||
IADD(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IADD_cbuf(u64 insn) {
|
||||
IADD(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IADD_imm(u64 insn) {
|
||||
IADD(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IADD32I(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<52, 1, u64> cc;
|
||||
BitField<53, 1, u64> x;
|
||||
BitField<54, 1, u64> sat;
|
||||
BitField<55, 2, u64> three_for_po;
|
||||
BitField<56, 1, u64> neg_a;
|
||||
} const iadd32i{insn};
|
||||
|
||||
const bool po{iadd32i.three_for_po == 3};
|
||||
const bool neg_a{!po && iadd32i.neg_a != 0};
|
||||
IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,122 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Shift : u64 {
|
||||
None,
|
||||
Right,
|
||||
Left,
|
||||
};
|
||||
enum class Half : u64 {
|
||||
All,
|
||||
Lower,
|
||||
Upper,
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
|
||||
constexpr bool is_signed{false};
|
||||
switch (half) {
|
||||
case Half::All:
|
||||
return value;
|
||||
case Half::Lower:
|
||||
return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
|
||||
case Half::Upper:
|
||||
return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
|
||||
}
|
||||
throw NotImplementedException("Invalid half");
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
|
||||
switch (shift) {
|
||||
case Shift::None:
|
||||
return value;
|
||||
case Shift::Right: {
|
||||
// 33-bit RS IADD3 edge case
|
||||
const IR::U1 edge_case{ir.GetCarryFromOp(value)};
|
||||
const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
|
||||
return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
|
||||
}
|
||||
case Shift::Left:
|
||||
return ir.ShiftLeftLogical(value, ir.Imm32(16));
|
||||
}
|
||||
throw NotImplementedException("Invalid shift");
|
||||
}
|
||||
|
||||
void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
|
||||
Shift shift = Shift::None) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> x;
|
||||
BitField<49, 1, u64> neg_c;
|
||||
BitField<50, 1, u64> neg_b;
|
||||
BitField<51, 1, u64> neg_a;
|
||||
} iadd3{insn};
|
||||
|
||||
if (iadd3.neg_a != 0) {
|
||||
op_a = v.ir.INeg(op_a);
|
||||
}
|
||||
if (iadd3.neg_b != 0) {
|
||||
op_b = v.ir.INeg(op_b);
|
||||
}
|
||||
if (iadd3.neg_c != 0) {
|
||||
op_c = v.ir.INeg(op_c);
|
||||
}
|
||||
IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
|
||||
if (iadd3.x != 0) {
|
||||
// TODO: How does RS behave when X is set?
|
||||
if (shift == Shift::Right) {
|
||||
throw NotImplementedException("IADD3 X+RS");
|
||||
}
|
||||
const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
|
||||
lhs_1 = v.ir.IAdd(lhs_1, carry);
|
||||
}
|
||||
const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
|
||||
const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
|
||||
|
||||
v.X(iadd3.dest_reg, result);
|
||||
if (iadd3.cc != 0) {
|
||||
// TODO: How does CC behave when X is set?
|
||||
if (iadd3.x != 0) {
|
||||
throw NotImplementedException("IADD3 X+CC");
|
||||
}
|
||||
v.SetZFlag(v.ir.GetZeroFromOp(result));
|
||||
v.SetSFlag(v.ir.GetSignFromOp(result));
|
||||
v.SetCFlag(v.ir.GetCarryFromOp(result));
|
||||
const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
|
||||
v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::IADD3_reg(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<37, 2, Shift> shift;
|
||||
BitField<35, 2, Half> half_a;
|
||||
BitField<33, 2, Half> half_b;
|
||||
BitField<31, 2, Half> half_c;
|
||||
} const iadd3{insn};
|
||||
|
||||
const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
|
||||
const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
|
||||
const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
|
||||
IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IADD3_cbuf(u64 insn) {
|
||||
IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IADD3_imm(u64 insn) {
|
||||
IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
48
src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
Executable file
48
src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
Executable file
@@ -0,0 +1,48 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<48, 1, u64> is_signed;
|
||||
BitField<49, 3, CompareOp> compare_op;
|
||||
} const icmp{insn};
|
||||
|
||||
const IR::U32 zero{v.ir.Imm32(0)};
|
||||
const bool is_signed{icmp.is_signed != 0};
|
||||
const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
|
||||
|
||||
const IR::U32 src_reg{v.X(icmp.src_reg)};
|
||||
const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
|
||||
|
||||
v.X(icmp.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ICMP_reg(u64 insn) {
|
||||
ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ICMP_rc(u64 insn) {
|
||||
ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ICMP_cr(u64 insn) {
|
||||
ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ICMP_imm(u64 insn) {
|
||||
ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,123 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
|
||||
CompareOp compare_op, bool is_signed) {
|
||||
const IR::U32 zero{ir.Imm32(0)};
|
||||
const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
|
||||
const IR::U1 z_flag{ir.GetZFlag()};
|
||||
const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
|
||||
const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
|
||||
: ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
|
||||
ir.ILessThan(operand_2, zero, true))};
|
||||
switch (compare_op) {
|
||||
case CompareOp::False:
|
||||
return ir.Imm1(false);
|
||||
case CompareOp::LessThan:
|
||||
return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
|
||||
ir.ILessThan(intermediate, zero, true))};
|
||||
case CompareOp::Equal:
|
||||
return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
|
||||
case CompareOp::LessThanEqual: {
|
||||
const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
|
||||
ir.ILessThan(intermediate, zero, true))};
|
||||
return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
|
||||
}
|
||||
case CompareOp::GreaterThan: {
|
||||
const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
|
||||
ir.IGreaterThan(intermediate, zero, true))};
|
||||
const IR::U1 not_z{ir.LogicalNot(z_flag)};
|
||||
return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
|
||||
}
|
||||
case CompareOp::NotEqual:
|
||||
return ir.LogicalOr(ir.INotEqual(intermediate, zero),
|
||||
ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
|
||||
case CompareOp::GreaterThanEqual: {
|
||||
const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
|
||||
ir.IGreaterThanEqual(intermediate, zero, true))};
|
||||
return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
|
||||
}
|
||||
case CompareOp::True:
|
||||
return ir.Imm1(true);
|
||||
default:
|
||||
throw NotImplementedException("Invalid compare op {}", compare_op);
|
||||
}
|
||||
}
|
||||
|
||||
IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
|
||||
CompareOp compare_op, bool is_signed, bool x) {
|
||||
return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
|
||||
: IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
|
||||
}
|
||||
|
||||
void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<39, 3, IR::Pred> pred;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<43, 1, u64> x;
|
||||
BitField<44, 1, u64> bf;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> is_signed;
|
||||
BitField<49, 3, CompareOp> compare_op;
|
||||
} const iset{insn};
|
||||
|
||||
const IR::U32 src_a{v.X(iset.src_reg)};
|
||||
const bool is_signed{iset.is_signed != 0};
|
||||
const IR::U32 zero{v.ir.Imm32(0)};
|
||||
const bool x{iset.x != 0};
|
||||
const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
|
||||
|
||||
IR::U1 pred{v.ir.GetPred(iset.pred)};
|
||||
if (iset.neg_pred != 0) {
|
||||
pred = v.ir.LogicalNot(pred);
|
||||
}
|
||||
const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
|
||||
|
||||
const IR::U32 one_mask{v.ir.Imm32(-1)};
|
||||
const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
|
||||
const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
|
||||
const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
|
||||
|
||||
v.X(iset.dest_reg, result);
|
||||
if (iset.cc != 0) {
|
||||
if (x) {
|
||||
throw NotImplementedException("ISET.CC + X");
|
||||
}
|
||||
const IR::U1 is_zero{v.ir.IEqual(result, zero)};
|
||||
v.SetZFlag(is_zero);
|
||||
if (iset.bf != 0) {
|
||||
v.ResetSFlag();
|
||||
} else {
|
||||
v.SetSFlag(v.ir.LogicalNot(is_zero));
|
||||
}
|
||||
v.ResetCFlag();
|
||||
v.ResetOFlag();
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ISET_reg(u64 insn) {
|
||||
ISET(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ISET_cbuf(u64 insn) {
|
||||
ISET(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ISET_imm(u64 insn) {
|
||||
ISET(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,180 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class FloatFormat : u64 {
|
||||
F16 = 1,
|
||||
F32 = 2,
|
||||
F64 = 3,
|
||||
};
|
||||
|
||||
enum class IntFormat : u64 {
|
||||
U8 = 0,
|
||||
U16 = 1,
|
||||
U32 = 2,
|
||||
U64 = 3,
|
||||
};
|
||||
|
||||
union Encoding {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 2, FloatFormat> float_format;
|
||||
BitField<10, 2, IntFormat> int_format;
|
||||
BitField<13, 1, u64> is_signed;
|
||||
BitField<39, 2, FpRounding> fp_rounding;
|
||||
BitField<41, 2, u64> selector;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<45, 1, u64> neg;
|
||||
BitField<49, 1, u64> abs;
|
||||
};
|
||||
|
||||
bool Is64(u64 insn) {
|
||||
return Encoding{insn}.int_format == IntFormat::U64;
|
||||
}
|
||||
|
||||
int BitSize(FloatFormat format) {
|
||||
switch (format) {
|
||||
case FloatFormat::F16:
|
||||
return 16;
|
||||
case FloatFormat::F32:
|
||||
return 32;
|
||||
case FloatFormat::F64:
|
||||
return 64;
|
||||
}
|
||||
throw NotImplementedException("Invalid float format {}", format);
|
||||
}
|
||||
|
||||
IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
|
||||
const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
|
||||
const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
|
||||
const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
|
||||
const IR::U1 is_least{v.ir.IEqual(value, least_value)};
|
||||
return IR::U32{v.ir.Select(is_least, value, absolute)};
|
||||
}
|
||||
|
||||
void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
|
||||
const Encoding i2f{insn};
|
||||
if (i2f.cc != 0) {
|
||||
throw NotImplementedException("I2F CC");
|
||||
}
|
||||
const bool is_signed{i2f.is_signed != 0};
|
||||
int src_bitsize{};
|
||||
switch (i2f.int_format) {
|
||||
case IntFormat::U8:
|
||||
src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
|
||||
v.ir.Imm32(8), is_signed);
|
||||
if (i2f.abs != 0) {
|
||||
src = SmallAbs(v, src, 8);
|
||||
}
|
||||
src_bitsize = 8;
|
||||
break;
|
||||
case IntFormat::U16:
|
||||
if (i2f.selector == 1 || i2f.selector == 3) {
|
||||
throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
|
||||
}
|
||||
src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
|
||||
v.ir.Imm32(16), is_signed);
|
||||
if (i2f.abs != 0) {
|
||||
src = SmallAbs(v, src, 16);
|
||||
}
|
||||
src_bitsize = 16;
|
||||
break;
|
||||
case IntFormat::U32:
|
||||
case IntFormat::U64:
|
||||
if (i2f.selector != 0) {
|
||||
throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
|
||||
}
|
||||
if (i2f.abs != 0 && is_signed) {
|
||||
src = v.ir.IAbs(src);
|
||||
}
|
||||
src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
|
||||
break;
|
||||
}
|
||||
const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
|
||||
const int dst_bitsize{BitSize(i2f.float_format)};
|
||||
const IR::FpControl fp_control{
|
||||
.no_contraction = false,
|
||||
.rounding = CastFpRounding(i2f.fp_rounding),
|
||||
.fmz_mode = IR::FmzMode::DontCare,
|
||||
};
|
||||
auto value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src, fp_control)};
|
||||
if (i2f.neg != 0) {
|
||||
if (i2f.abs != 0 || !is_signed) {
|
||||
// We know the value is positive
|
||||
value = v.ir.FPNeg(value);
|
||||
} else {
|
||||
// Only negate if the input isn't the lowest value
|
||||
IR::U1 is_least;
|
||||
if (src_bitsize == 64) {
|
||||
is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
|
||||
} else if (src_bitsize == 32) {
|
||||
is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
|
||||
} else {
|
||||
const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
|
||||
is_least = v.ir.IEqual(src, least_value);
|
||||
}
|
||||
value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
|
||||
}
|
||||
}
|
||||
switch (i2f.float_format) {
|
||||
case FloatFormat::F16: {
|
||||
const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
|
||||
v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
|
||||
break;
|
||||
}
|
||||
case FloatFormat::F32:
|
||||
v.F(i2f.dest_reg, value);
|
||||
break;
|
||||
case FloatFormat::F64: {
|
||||
if (!IR::IsAligned(i2f.dest_reg, 2)) {
|
||||
throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
|
||||
}
|
||||
const IR::Value vector{v.ir.UnpackDouble2x32(value)};
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::I2F_reg(u64 insn) {
|
||||
if (Is64(insn)) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 8, IR::Reg> reg;
|
||||
} const value{insn};
|
||||
const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
|
||||
I2F(*this, insn, ir.PackUint2x32(regs));
|
||||
} else {
|
||||
I2F(*this, insn, GetReg20(insn));
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::I2F_cbuf(u64 insn) {
|
||||
if (Is64(insn)) {
|
||||
I2F(*this, insn, GetPackedCbuf(insn));
|
||||
} else {
|
||||
I2F(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::I2F_imm(u64 insn) {
|
||||
if (Is64(insn)) {
|
||||
I2F(*this, insn, GetPackedImm20(insn));
|
||||
} else {
|
||||
I2F(*this, insn, GetImm20(insn));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,82 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class MaxShift : u64 {
|
||||
U32,
|
||||
Undefined,
|
||||
U64,
|
||||
S64,
|
||||
};
|
||||
|
||||
IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
|
||||
bool right_shift, bool is_signed) {
|
||||
if (!right_shift) {
|
||||
return ir.ShiftLeftLogical(packed_int, safe_shift);
|
||||
}
|
||||
if (is_signed) {
|
||||
return ir.ShiftRightArithmetic(packed_int, safe_shift);
|
||||
}
|
||||
return ir.ShiftRightLogical(packed_int, safe_shift);
|
||||
}
|
||||
|
||||
void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
|
||||
bool right_shift) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<0, 8, IR::Reg> lo_bits_reg;
|
||||
BitField<37, 2, MaxShift> max_shift;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 2, u64> x_mode;
|
||||
BitField<50, 1, u64> wrap;
|
||||
} const shf{insn};
|
||||
|
||||
if (shf.cc != 0) {
|
||||
throw NotImplementedException("SHF CC");
|
||||
}
|
||||
if (shf.x_mode != 0) {
|
||||
throw NotImplementedException("SHF X Mode");
|
||||
}
|
||||
if (shf.max_shift == MaxShift::Undefined) {
|
||||
throw NotImplementedException("SHF Use of undefined MaxShift value");
|
||||
}
|
||||
const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
|
||||
const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
|
||||
const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
|
||||
const IR::U32 safe_shift{shf.wrap != 0
|
||||
? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
|
||||
: v.ir.UMin(shift, max_shift)};
|
||||
|
||||
const bool is_signed{shf.max_shift == MaxShift::S64};
|
||||
const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
|
||||
const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
|
||||
|
||||
const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
|
||||
v.X(shf.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::SHF_l_reg(u64 insn) {
|
||||
SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SHF_l_imm(u64 insn) {
|
||||
SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SHF_r_reg(u64 insn) {
|
||||
SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SHF_r_imm(u64 insn) {
|
||||
SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,64 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<39, 3, IR::Pred> pred;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<43, 2, u64> mode;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> is_signed;
|
||||
} const imnmx{insn};
|
||||
|
||||
if (imnmx.cc != 0) {
|
||||
throw NotImplementedException("IMNMX CC");
|
||||
}
|
||||
|
||||
if (imnmx.mode != 0) {
|
||||
throw NotImplementedException("IMNMX.MODE");
|
||||
}
|
||||
|
||||
const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
|
||||
const IR::U32 op_a{v.X(imnmx.src_reg)};
|
||||
IR::U32 min;
|
||||
IR::U32 max;
|
||||
|
||||
if (imnmx.is_signed != 0) {
|
||||
min = IR::U32{v.ir.SMin(op_a, op_b)};
|
||||
max = IR::U32{v.ir.SMax(op_a, op_b)};
|
||||
} else {
|
||||
min = IR::U32{v.ir.UMin(op_a, op_b)};
|
||||
max = IR::U32{v.ir.UMax(op_a, op_b)};
|
||||
}
|
||||
if (imnmx.neg_pred != 0) {
|
||||
std::swap(min, max);
|
||||
}
|
||||
|
||||
const IR::U32 result{v.ir.Select(pred, min, max)};
|
||||
v.X(imnmx.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::IMNMX_reg(u64 insn) {
|
||||
IMNMX(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
|
||||
IMNMX(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMNMX_imm(u64 insn) {
|
||||
IMNMX(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
36
src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
Executable file
36
src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
Executable file
@@ -0,0 +1,36 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<40, 1, u64> tilde;
|
||||
} const popc{insn};
|
||||
|
||||
const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
|
||||
const IR::U32 result = v.ir.BitCount(operand);
|
||||
v.X(popc.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::POPC_reg(u64 insn) {
|
||||
POPC(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::POPC_cbuf(u64 insn) {
|
||||
POPC(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::POPC_imm(u64 insn) {
|
||||
POPC(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
86
src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
Executable file
86
src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
Executable file
@@ -0,0 +1,86 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
|
||||
u64 scale_imm) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> op_a;
|
||||
} const iscadd{insn};
|
||||
|
||||
const bool po{neg_a && neg_b};
|
||||
IR::U32 op_a{v.X(iscadd.op_a)};
|
||||
if (po) {
|
||||
// When PO is present, add one
|
||||
op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
|
||||
} else {
|
||||
// When PO is not present, the bits are interpreted as negation
|
||||
if (neg_a) {
|
||||
op_a = v.ir.INeg(op_a);
|
||||
}
|
||||
if (neg_b) {
|
||||
op_b = v.ir.INeg(op_b);
|
||||
}
|
||||
}
|
||||
// With the operands already processed, scale A
|
||||
const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
|
||||
const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
|
||||
|
||||
const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
|
||||
v.X(iscadd.dest_reg, result);
|
||||
|
||||
if (cc) {
|
||||
v.SetZFlag(v.ir.GetZeroFromOp(result));
|
||||
v.SetSFlag(v.ir.GetSignFromOp(result));
|
||||
const IR::U1 carry{v.ir.GetCarryFromOp(result)};
|
||||
const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
|
||||
v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
|
||||
v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
|
||||
}
|
||||
}
|
||||
|
||||
void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> neg_b;
|
||||
BitField<49, 1, u64> neg_a;
|
||||
BitField<39, 5, u64> scale;
|
||||
} const iscadd{insn};
|
||||
|
||||
ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ISCADD_reg(u64 insn) {
|
||||
ISCADD(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
|
||||
ISCADD(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ISCADD_imm(u64 insn) {
|
||||
ISCADD(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ISCADD32I(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<52, 1, u64> cc;
|
||||
BitField<53, 5, u64> scale;
|
||||
} const iscadd{insn};
|
||||
|
||||
return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,49 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 3, IR::Pred> dest_pred_b;
|
||||
BitField<3, 3, IR::Pred> dest_pred_a;
|
||||
BitField<8, 8, IR::Reg> src_reg_a;
|
||||
BitField<39, 3, IR::Pred> bop_pred;
|
||||
BitField<42, 1, u64> neg_bop_pred;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
BitField<48, 1, u64> is_signed;
|
||||
BitField<49, 3, CompareOp> compare_op;
|
||||
} const isetp{insn};
|
||||
|
||||
const BooleanOp bop{isetp.bop};
|
||||
const CompareOp compare_op{isetp.compare_op};
|
||||
const IR::U32 op_a{v.X(isetp.src_reg_a)};
|
||||
const IR::U1 comparison{IntegerCompare(v.ir, op_a, op_b, compare_op, isetp.is_signed != 0)};
|
||||
const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
|
||||
const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
|
||||
const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
|
||||
v.ir.SetPred(isetp.dest_pred_a, result_a);
|
||||
v.ir.SetPred(isetp.dest_pred_b, result_b);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ISETP_reg(u64 insn) {
|
||||
ISETP(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ISETP_cbuf(u64 insn) {
|
||||
ISETP(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ISETP_imm(u64 insn) {
|
||||
ISETP(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
71
src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
Executable file
71
src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
Executable file
@@ -0,0 +1,71 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg_a;
|
||||
BitField<39, 1, u64> w;
|
||||
BitField<43, 1, u64> x;
|
||||
BitField<47, 1, u64> cc;
|
||||
} const shl{insn};
|
||||
|
||||
if (shl.x != 0) {
|
||||
throw NotImplementedException("SHL.X");
|
||||
}
|
||||
if (shl.cc != 0) {
|
||||
throw NotImplementedException("SHL.CC");
|
||||
}
|
||||
const IR::U32 base{v.X(shl.src_reg_a)};
|
||||
IR::U32 result;
|
||||
if (shl.w != 0) {
|
||||
// When .W is set, the shift value is wrapped
|
||||
// To emulate this we just have to wrap it ourselves.
|
||||
const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
|
||||
result = v.ir.ShiftLeftLogical(base, shift);
|
||||
} else {
|
||||
// When .W is not set, the shift value is clamped between 0 and 32.
|
||||
// To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
|
||||
// We can safely evaluate an out of bounds shift according to the SPIR-V specification:
|
||||
//
|
||||
// https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
|
||||
// "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
|
||||
// or equal to the bit width of the components of Base."
|
||||
//
|
||||
// And on the GLASM specification it is also safe to evaluate out of bounds:
|
||||
//
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
|
||||
// "The results of a shift operation ("<<") are undefined if the value of the second operand
|
||||
// is negative, or greater than or equal to the number of bits in the first operand."
|
||||
//
|
||||
// Emphasis on undefined results in contrast to undefined behavior.
|
||||
//
|
||||
const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
|
||||
const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
|
||||
result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
|
||||
}
|
||||
v.X(shl.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::SHL_reg(u64 insn) {
|
||||
SHL(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SHL_cbuf(u64 insn) {
|
||||
SHL(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SHL_imm(u64 insn) {
|
||||
SHL(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,66 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg_a;
|
||||
BitField<39, 1, u64> is_wrapped;
|
||||
BitField<40, 1, u64> brev;
|
||||
BitField<43, 1, u64> xmode;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> is_signed;
|
||||
} const shr{insn};
|
||||
|
||||
if (shr.xmode != 0) {
|
||||
throw NotImplementedException("SHR.XMODE");
|
||||
}
|
||||
if (shr.cc != 0) {
|
||||
throw NotImplementedException("SHR.CC");
|
||||
}
|
||||
|
||||
IR::U32 base{v.X(shr.src_reg_a)};
|
||||
if (shr.brev == 1) {
|
||||
base = v.ir.BitReverse(base);
|
||||
}
|
||||
IR::U32 result;
|
||||
const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
|
||||
if (shr.is_signed == 1) {
|
||||
result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
|
||||
} else {
|
||||
result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
|
||||
}
|
||||
|
||||
if (shr.is_wrapped == 0) {
|
||||
const IR::U32 zero{v.ir.Imm32(0)};
|
||||
const IR::U32 safe_bits{v.ir.Imm32(32)};
|
||||
|
||||
const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
|
||||
const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
|
||||
const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
|
||||
result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
|
||||
}
|
||||
v.X(shr.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::SHR_reg(u64 insn) {
|
||||
SHR(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SHR_cbuf(u64 insn) {
|
||||
SHR(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SHR_imm(u64 insn) {
|
||||
SHR(*this, insn, GetImm20(insn));
|
||||
}
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,135 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class SelectMode : u64 {
|
||||
Default,
|
||||
CLO,
|
||||
CHI,
|
||||
CSFU,
|
||||
CBCC,
|
||||
};
|
||||
|
||||
enum class Half : u64 {
|
||||
H0, // Least-significant bits (15:0)
|
||||
H1, // Most-significant bits (31:16)
|
||||
};
|
||||
|
||||
IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
|
||||
const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
|
||||
return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
|
||||
}
|
||||
|
||||
void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
|
||||
SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg_a;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> is_a_signed;
|
||||
BitField<49, 1, u64> is_b_signed;
|
||||
BitField<53, 1, Half> half_a;
|
||||
} const xmad{insn};
|
||||
|
||||
if (x) {
|
||||
throw NotImplementedException("XMAD X");
|
||||
}
|
||||
const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
|
||||
const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
|
||||
|
||||
IR::U32 product{v.ir.IMul(op_a, op_b)};
|
||||
if (psl) {
|
||||
// .PSL shifts the product 16 bits
|
||||
product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
|
||||
}
|
||||
const IR::U32 op_c{[&]() -> IR::U32 {
|
||||
switch (select_mode) {
|
||||
case SelectMode::Default:
|
||||
return src_c;
|
||||
case SelectMode::CLO:
|
||||
return ExtractHalf(v, src_c, Half::H0, false);
|
||||
case SelectMode::CHI:
|
||||
return ExtractHalf(v, src_c, Half::H1, false);
|
||||
case SelectMode::CBCC:
|
||||
return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
|
||||
case SelectMode::CSFU:
|
||||
throw NotImplementedException("XMAD CSFU");
|
||||
}
|
||||
throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
|
||||
}()};
|
||||
IR::U32 result{v.ir.IAdd(product, op_c)};
|
||||
if (mrg) {
|
||||
// .MRG inserts src_b [15:0] into result's [31:16].
|
||||
const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
|
||||
result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
|
||||
}
|
||||
if (xmad.cc) {
|
||||
throw NotImplementedException("XMAD CC");
|
||||
}
|
||||
// Store result
|
||||
v.X(xmad.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::XMAD_reg(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<35, 1, Half> half_b;
|
||||
BitField<36, 1, u64> psl;
|
||||
BitField<37, 1, u64> mrg;
|
||||
BitField<38, 1, u64> x;
|
||||
BitField<50, 3, SelectMode> select_mode;
|
||||
} const xmad{insn};
|
||||
|
||||
XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
|
||||
xmad.mrg != 0, xmad.x != 0);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::XMAD_rc(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<50, 2, SelectMode> select_mode;
|
||||
BitField<52, 1, Half> half_b;
|
||||
BitField<54, 1, u64> x;
|
||||
} const xmad{insn};
|
||||
|
||||
XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
|
||||
xmad.x != 0);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::XMAD_cr(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<50, 2, SelectMode> select_mode;
|
||||
BitField<52, 1, Half> half_b;
|
||||
BitField<54, 1, u64> x;
|
||||
BitField<55, 1, u64> psl;
|
||||
BitField<56, 1, u64> mrg;
|
||||
} const xmad{insn};
|
||||
|
||||
XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
|
||||
xmad.mrg != 0, xmad.x != 0);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::XMAD_imm(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<20, 16, u64> src_b;
|
||||
BitField<36, 1, u64> psl;
|
||||
BitField<37, 1, u64> mrg;
|
||||
BitField<38, 1, u64> x;
|
||||
BitField<50, 3, SelectMode> select_mode;
|
||||
} const xmad{insn};
|
||||
|
||||
XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
|
||||
Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,126 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class IntegerWidth : u64 {
|
||||
Byte,
|
||||
Short,
|
||||
Word,
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
|
||||
switch (width) {
|
||||
case IntegerWidth::Byte:
|
||||
return ir.Imm32(8);
|
||||
case IntegerWidth::Short:
|
||||
return ir.Imm32(16);
|
||||
case IntegerWidth::Word:
|
||||
return ir.Imm32(32);
|
||||
default:
|
||||
throw NotImplementedException("Invalid width {}", width);
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
|
||||
IntegerWidth dst_width) {
|
||||
const IR::U32 zero{ir.Imm32(0)};
|
||||
const IR::U32 count{WidthSize(ir, dst_width)};
|
||||
return ir.BitFieldExtract(src, zero, count, false);
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
|
||||
bool dst_signed, bool src_signed) {
|
||||
IR::U32 min{};
|
||||
IR::U32 max{};
|
||||
const IR::U32 zero{ir.Imm32(0)};
|
||||
switch (dst_width) {
|
||||
case IntegerWidth::Byte:
|
||||
min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
|
||||
max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
|
||||
break;
|
||||
case IntegerWidth::Short:
|
||||
min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
|
||||
max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
|
||||
break;
|
||||
case IntegerWidth::Word:
|
||||
min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
|
||||
max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Invalid width {}", dst_width);
|
||||
}
|
||||
const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
|
||||
return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
|
||||
}
|
||||
|
||||
void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 2, IntegerWidth> dst_fmt;
|
||||
BitField<12, 1, u64> dst_fmt_sign;
|
||||
BitField<10, 2, IntegerWidth> src_fmt;
|
||||
BitField<13, 1, u64> src_fmt_sign;
|
||||
BitField<41, 3, u64> selector;
|
||||
BitField<45, 1, u64> neg;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<49, 1, u64> abs;
|
||||
BitField<50, 1, u64> sat;
|
||||
} const i2i{insn};
|
||||
|
||||
if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
|
||||
throw NotImplementedException("16-bit source format incompatible with selector {}",
|
||||
i2i.selector);
|
||||
}
|
||||
if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
|
||||
throw NotImplementedException("32-bit source format incompatible with selector {}",
|
||||
i2i.selector);
|
||||
}
|
||||
|
||||
const s32 selector{static_cast<s32>(i2i.selector)};
|
||||
const IR::U32 offset{v.ir.Imm32(selector * 8)};
|
||||
const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
|
||||
const bool src_signed{i2i.src_fmt_sign != 0};
|
||||
const bool dst_signed{i2i.dst_fmt_sign != 0};
|
||||
const bool sat{i2i.sat != 0};
|
||||
|
||||
IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
|
||||
if (i2i.abs != 0) {
|
||||
src_values = v.ir.IAbs(src_values);
|
||||
}
|
||||
if (i2i.neg != 0) {
|
||||
src_values = v.ir.INeg(src_values);
|
||||
}
|
||||
const IR::U32 result{
|
||||
sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
|
||||
: ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
|
||||
|
||||
v.X(i2i.dest_reg, result);
|
||||
if (i2i.cc != 0) {
|
||||
v.SetZFlag(v.ir.GetZeroFromOp(result));
|
||||
v.SetSFlag(v.ir.GetSignFromOp(result));
|
||||
v.ResetCFlag();
|
||||
v.ResetOFlag();
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::I2I_reg(u64 insn) {
|
||||
I2I(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::I2I_cbuf(u64 insn) {
|
||||
I2I(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::I2I_imm(u64 insn) {
|
||||
I2I(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,53 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Mode : u64 {
|
||||
Default,
|
||||
Patch,
|
||||
Prim,
|
||||
Attr,
|
||||
};
|
||||
|
||||
enum class Shift : u64 {
|
||||
Default,
|
||||
U16,
|
||||
B32,
|
||||
};
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ISBERD(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<31, 1, u64> skew;
|
||||
BitField<32, 1, u64> o;
|
||||
BitField<33, 2, Mode> mode;
|
||||
BitField<47, 2, Shift> shift;
|
||||
} const isberd{insn};
|
||||
|
||||
if (isberd.skew != 0) {
|
||||
throw NotImplementedException("SKEW");
|
||||
}
|
||||
if (isberd.o != 0) {
|
||||
throw NotImplementedException("O");
|
||||
}
|
||||
if (isberd.mode != Mode::Default) {
|
||||
throw NotImplementedException("Mode {}", isberd.mode.Value());
|
||||
}
|
||||
if (isberd.shift != Shift::Default) {
|
||||
throw NotImplementedException("Shift {}", isberd.shift.Value());
|
||||
}
|
||||
LOG_WARNING(Shader, "(STUBBED) called");
|
||||
X(isberd.dest_reg, X(isberd.src_reg));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
62
src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
Executable file
62
src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
Executable file
@@ -0,0 +1,62 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
using namespace LDC;
|
||||
namespace {
|
||||
std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
|
||||
const IR::U32& reg, const IR::U32& imm) {
|
||||
switch (mode) {
|
||||
case Mode::Default:
|
||||
return {imm_index, ir.IAdd(reg, imm)};
|
||||
default:
|
||||
break;
|
||||
}
|
||||
throw NotImplementedException("Mode {}", mode);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::LDC(u64 insn) {
|
||||
const Encoding ldc{insn};
|
||||
const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
|
||||
const IR::U32 reg{X(ldc.src_reg)};
|
||||
const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
|
||||
const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
|
||||
switch (ldc.size) {
|
||||
case Size::U8:
|
||||
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
|
||||
break;
|
||||
case Size::S8:
|
||||
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
|
||||
break;
|
||||
case Size::U16:
|
||||
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
|
||||
break;
|
||||
case Size::S16:
|
||||
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
|
||||
break;
|
||||
case Size::B32:
|
||||
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
|
||||
break;
|
||||
case Size::B64: {
|
||||
if (!IR::IsAligned(ldc.dest_reg, 2)) {
|
||||
throw NotImplementedException("Unaligned destination register");
|
||||
}
|
||||
const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Invalid size {}", ldc.size.Value());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
39
src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
Executable file
39
src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
Executable file
@@ -0,0 +1,39 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/reg.h"
|
||||
|
||||
namespace Shader::Maxwell::LDC {
|
||||
|
||||
enum class Mode : u64 {
|
||||
Default,
|
||||
IL,
|
||||
IS,
|
||||
ISL,
|
||||
};
|
||||
|
||||
enum class Size : u64 {
|
||||
U8,
|
||||
S8,
|
||||
U16,
|
||||
S16,
|
||||
B32,
|
||||
B64,
|
||||
};
|
||||
|
||||
union Encoding {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<20, 16, s64> offset;
|
||||
BitField<36, 5, u64> index;
|
||||
BitField<44, 2, Mode> mode;
|
||||
BitField<48, 3, Size> size;
|
||||
};
|
||||
|
||||
} // namespace Shader::Maxwell::LDC
|
108
src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
Executable file
108
src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
Executable file
@@ -0,0 +1,108 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
|
||||
bool neg, bool x) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> offset_lo_reg;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 3, IR::Pred> pred;
|
||||
} const lea{insn};
|
||||
|
||||
if (x) {
|
||||
throw NotImplementedException("LEA.HI X");
|
||||
}
|
||||
if (lea.pred != IR::Pred::PT) {
|
||||
throw NotImplementedException("LEA.HI Pred");
|
||||
}
|
||||
if (lea.cc != 0) {
|
||||
throw NotImplementedException("LEA.HI CC");
|
||||
}
|
||||
|
||||
const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
|
||||
const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
|
||||
const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
|
||||
|
||||
const s32 hi_scale{32 - static_cast<s32>(scale)};
|
||||
const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
|
||||
const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
|
||||
|
||||
IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
|
||||
v.X(lea.dest_reg, result);
|
||||
}
|
||||
|
||||
void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> offset_lo_reg;
|
||||
BitField<39, 5, u64> scale;
|
||||
BitField<45, 1, u64> neg;
|
||||
BitField<46, 1, u64> x;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 3, IR::Pred> pred;
|
||||
} const lea{insn};
|
||||
if (lea.x != 0) {
|
||||
throw NotImplementedException("LEA.LO X");
|
||||
}
|
||||
if (lea.pred != IR::Pred::PT) {
|
||||
throw NotImplementedException("LEA.LO Pred");
|
||||
}
|
||||
if (lea.cc != 0) {
|
||||
throw NotImplementedException("LEA.LO CC");
|
||||
}
|
||||
|
||||
const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
|
||||
const s32 scale{static_cast<s32>(lea.scale)};
|
||||
const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
|
||||
const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
|
||||
|
||||
IR::U32 result{v.ir.IAdd(base, scaled_offset)};
|
||||
v.X(lea.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::LEA_hi_reg(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<28, 5, u64> scale;
|
||||
BitField<37, 1, u64> neg;
|
||||
BitField<38, 1, u64> x;
|
||||
} const lea{insn};
|
||||
|
||||
LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<51, 5, u64> scale;
|
||||
BitField<56, 1, u64> neg;
|
||||
BitField<57, 1, u64> x;
|
||||
} const lea{insn};
|
||||
|
||||
LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LEA_lo_reg(u64 insn) {
|
||||
LEA_lo(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
|
||||
LEA_lo(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LEA_lo_imm(u64 insn) {
|
||||
LEA_lo(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
196
src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
Executable file
196
src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
Executable file
@@ -0,0 +1,196 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/maxwell/opcodes.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Size : u64 {
|
||||
B32,
|
||||
B64,
|
||||
B96,
|
||||
B128,
|
||||
};
|
||||
|
||||
enum class InterpolationMode : u64 {
|
||||
Pass,
|
||||
Multiply,
|
||||
Constant,
|
||||
Sc,
|
||||
};
|
||||
|
||||
enum class SampleMode : u64 {
|
||||
Default,
|
||||
Centroid,
|
||||
Offset,
|
||||
};
|
||||
|
||||
u32 NumElements(Size size) {
|
||||
switch (size) {
|
||||
case Size::B32:
|
||||
return 1;
|
||||
case Size::B64:
|
||||
return 2;
|
||||
case Size::B96:
|
||||
return 3;
|
||||
case Size::B128:
|
||||
return 4;
|
||||
}
|
||||
throw InvalidArgument("Invalid size {}", size);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
|
||||
const IR::U32 index_value{v.X(index_reg)};
|
||||
for (u32 element = 0; element < num_elements; ++element) {
|
||||
const IR::U32 final_offset{
|
||||
element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
|
||||
f(element, final_offset);
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ALD(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> index_reg;
|
||||
BitField<20, 10, u64> absolute_offset;
|
||||
BitField<20, 11, s64> relative_offset;
|
||||
BitField<39, 8, IR::Reg> vertex_reg;
|
||||
BitField<32, 1, u64> o;
|
||||
BitField<31, 1, u64> patch;
|
||||
BitField<47, 2, Size> size;
|
||||
} const ald{insn};
|
||||
|
||||
const u64 offset{ald.absolute_offset.Value()};
|
||||
if (offset % 4 != 0) {
|
||||
throw NotImplementedException("Unaligned absolute offset {}", offset);
|
||||
}
|
||||
const IR::U32 vertex{X(ald.vertex_reg)};
|
||||
const u32 num_elements{NumElements(ald.size)};
|
||||
if (ald.index_reg == IR::Reg::RZ) {
|
||||
for (u32 element = 0; element < num_elements; ++element) {
|
||||
if (ald.patch != 0) {
|
||||
const IR::Patch patch{offset / 4 + element};
|
||||
F(ald.dest_reg + element, ir.GetPatch(patch));
|
||||
} else {
|
||||
const IR::Attribute attr{offset / 4 + element};
|
||||
F(ald.dest_reg + element, ir.GetAttribute(attr, vertex));
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (ald.patch != 0) {
|
||||
throw NotImplementedException("Indirect patch read");
|
||||
}
|
||||
HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
|
||||
F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex));
|
||||
});
|
||||
}
|
||||
|
||||
void TranslatorVisitor::AST(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> src_reg;
|
||||
BitField<8, 8, IR::Reg> index_reg;
|
||||
BitField<20, 10, u64> absolute_offset;
|
||||
BitField<20, 11, s64> relative_offset;
|
||||
BitField<31, 1, u64> patch;
|
||||
BitField<39, 8, IR::Reg> vertex_reg;
|
||||
BitField<47, 2, Size> size;
|
||||
} const ast{insn};
|
||||
|
||||
if (ast.index_reg != IR::Reg::RZ) {
|
||||
throw NotImplementedException("Indexed store");
|
||||
}
|
||||
const u64 offset{ast.absolute_offset.Value()};
|
||||
if (offset % 4 != 0) {
|
||||
throw NotImplementedException("Unaligned absolute offset {}", offset);
|
||||
}
|
||||
const IR::U32 vertex{X(ast.vertex_reg)};
|
||||
const u32 num_elements{NumElements(ast.size)};
|
||||
if (ast.index_reg == IR::Reg::RZ) {
|
||||
for (u32 element = 0; element < num_elements; ++element) {
|
||||
if (ast.patch != 0) {
|
||||
const IR::Patch patch{offset / 4 + element};
|
||||
ir.SetPatch(patch, F(ast.src_reg + element));
|
||||
} else {
|
||||
const IR::Attribute attr{offset / 4 + element};
|
||||
ir.SetAttribute(attr, F(ast.src_reg + element), vertex);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (ast.patch != 0) {
|
||||
throw NotImplementedException("Indexed tessellation patch store");
|
||||
}
|
||||
HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
|
||||
ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex);
|
||||
});
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IPA(u64 insn) {
|
||||
// IPA is the instruction used to read varyings from a fragment shader.
|
||||
// gl_FragCoord is mapped to the gl_Position attribute.
|
||||
// It yields unknown results when used outside of the fragment shader stage.
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> index_reg;
|
||||
BitField<20, 8, IR::Reg> multiplier;
|
||||
BitField<30, 8, IR::Attribute> attribute;
|
||||
BitField<38, 1, u64> idx;
|
||||
BitField<51, 1, u64> sat;
|
||||
BitField<52, 2, SampleMode> sample_mode;
|
||||
BitField<54, 2, InterpolationMode> interpolation_mode;
|
||||
} const ipa{insn};
|
||||
|
||||
// Indexed IPAs are used for indexed varyings.
|
||||
// For example:
|
||||
//
|
||||
// in vec4 colors[4];
|
||||
// uniform int idx;
|
||||
// void main() {
|
||||
// gl_FragColor = colors[idx];
|
||||
// }
|
||||
const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
|
||||
const IR::Attribute attribute{ipa.attribute};
|
||||
IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
|
||||
: ir.GetAttribute(attribute)};
|
||||
if (IR::IsGeneric(attribute)) {
|
||||
const ProgramHeader& sph{env.SPH()};
|
||||
const u32 attr_index{IR::GenericAttributeIndex(attribute)};
|
||||
const u32 element{static_cast<u32>(attribute) % 4};
|
||||
const std::array input_map{sph.ps.GenericInputMap(attr_index)};
|
||||
const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
|
||||
if (is_perspective) {
|
||||
const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
|
||||
value = ir.FPMul(value, position_w);
|
||||
}
|
||||
}
|
||||
if (ipa.interpolation_mode == InterpolationMode::Multiply) {
|
||||
value = ir.FPMul(value, F(ipa.multiplier));
|
||||
}
|
||||
|
||||
// Saturated IPAs are generally generated out of clamped varyings.
|
||||
// For example: clamp(some_varying, 0.0, 1.0)
|
||||
const bool is_saturated{ipa.sat != 0};
|
||||
if (is_saturated) {
|
||||
if (attribute == IR::Attribute::FrontFace) {
|
||||
throw NotImplementedException("IPA.SAT on FrontFace");
|
||||
}
|
||||
value = ir.FPSaturate(value);
|
||||
}
|
||||
|
||||
F(ipa.dest_reg, value);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,201 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Size : u64 {
|
||||
U8,
|
||||
S8,
|
||||
U16,
|
||||
S16,
|
||||
B32,
|
||||
B64,
|
||||
B128,
|
||||
};
|
||||
|
||||
IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> offset_reg;
|
||||
BitField<20, 24, u64> absolute_offset;
|
||||
BitField<20, 24, s64> relative_offset;
|
||||
} const encoding{insn};
|
||||
|
||||
if (encoding.offset_reg == IR::Reg::RZ) {
|
||||
return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
|
||||
} else {
|
||||
const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
|
||||
return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
|
||||
const IR::U32 offset{Offset(v, insn)};
|
||||
if (offset.IsImmediate()) {
|
||||
return {v.ir.Imm32(offset.U32() / 4), offset};
|
||||
} else {
|
||||
return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<int, bool> GetSize(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<48, 3, Size> size;
|
||||
} const encoding{insn};
|
||||
|
||||
switch (encoding.size) {
|
||||
case Size::U8:
|
||||
return {8, false};
|
||||
case Size::S8:
|
||||
return {8, true};
|
||||
case Size::U16:
|
||||
return {16, false};
|
||||
case Size::S16:
|
||||
return {16, true};
|
||||
case Size::B32:
|
||||
return {32, false};
|
||||
case Size::B64:
|
||||
return {64, false};
|
||||
case Size::B128:
|
||||
return {128, false};
|
||||
default:
|
||||
throw NotImplementedException("Invalid size {}", encoding.size.Value());
|
||||
}
|
||||
}
|
||||
|
||||
IR::Reg Reg(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> reg;
|
||||
} const encoding{insn};
|
||||
|
||||
return encoding.reg;
|
||||
}
|
||||
|
||||
IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
|
||||
return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
|
||||
}
|
||||
|
||||
IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
|
||||
return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::LDL(u64 insn) {
|
||||
const auto [word_offset, offset]{WordOffset(*this, insn)};
|
||||
const IR::Reg dest{Reg(insn)};
|
||||
const auto [bit_size, is_signed]{GetSize(insn)};
|
||||
switch (bit_size) {
|
||||
case 8: {
|
||||
const IR::U32 bit{ByteOffset(ir, offset)};
|
||||
X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed));
|
||||
break;
|
||||
}
|
||||
case 16: {
|
||||
const IR::U32 bit{ShortOffset(ir, offset)};
|
||||
X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed));
|
||||
break;
|
||||
}
|
||||
case 32:
|
||||
case 64:
|
||||
case 128:
|
||||
if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
|
||||
throw NotImplementedException("Unaligned destination register {}", dest);
|
||||
}
|
||||
X(dest, ir.LoadLocal(word_offset));
|
||||
for (int i = 1; i < bit_size / 32; ++i) {
|
||||
X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i))));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LDS(u64 insn) {
|
||||
const IR::U32 offset{Offset(*this, insn)};
|
||||
const IR::Reg dest{Reg(insn)};
|
||||
const auto [bit_size, is_signed]{GetSize(insn)};
|
||||
const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
case 16:
|
||||
case 32:
|
||||
X(dest, IR::U32{value});
|
||||
break;
|
||||
case 64:
|
||||
case 128:
|
||||
if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
|
||||
throw NotImplementedException("Unaligned destination register {}", dest);
|
||||
}
|
||||
for (int element = 0; element < bit_size / 32; ++element) {
|
||||
X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::STL(u64 insn) {
|
||||
const auto [word_offset, offset]{WordOffset(*this, insn)};
|
||||
const IR::Reg reg{Reg(insn)};
|
||||
const IR::U32 src{X(reg)};
|
||||
const int bit_size{GetSize(insn).first};
|
||||
switch (bit_size) {
|
||||
case 8: {
|
||||
const IR::U32 bit{ByteOffset(ir, offset)};
|
||||
const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
|
||||
ir.WriteLocal(word_offset, value);
|
||||
break;
|
||||
}
|
||||
case 16: {
|
||||
const IR::U32 bit{ShortOffset(ir, offset)};
|
||||
const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
|
||||
ir.WriteLocal(word_offset, value);
|
||||
break;
|
||||
}
|
||||
case 32:
|
||||
case 64:
|
||||
case 128:
|
||||
if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
|
||||
throw NotImplementedException("Unaligned source register");
|
||||
}
|
||||
ir.WriteLocal(word_offset, src);
|
||||
for (int i = 1; i < bit_size / 32; ++i) {
|
||||
ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::STS(u64 insn) {
|
||||
const IR::U32 offset{Offset(*this, insn)};
|
||||
const IR::Reg reg{Reg(insn)};
|
||||
const int bit_size{GetSize(insn).first};
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
case 16:
|
||||
case 32:
|
||||
ir.WriteShared(bit_size, offset, X(reg));
|
||||
break;
|
||||
case 64:
|
||||
if (!IR::IsAligned(reg, 2)) {
|
||||
throw NotImplementedException("Unaligned source register {}", reg);
|
||||
}
|
||||
ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
|
||||
break;
|
||||
case 128: {
|
||||
if (!IR::IsAligned(reg, 2)) {
|
||||
throw NotImplementedException("Unaligned source register {}", reg);
|
||||
}
|
||||
const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
|
||||
ir.WriteShared(128, offset, vector);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
184
src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
Executable file
184
src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
Executable file
@@ -0,0 +1,184 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/opcodes.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class LoadSize : u64 {
|
||||
U8, // Zero-extend
|
||||
S8, // Sign-extend
|
||||
U16, // Zero-extend
|
||||
S16, // Sign-extend
|
||||
B32,
|
||||
B64,
|
||||
B128,
|
||||
U128, // ???
|
||||
};
|
||||
|
||||
enum class StoreSize : u64 {
|
||||
U8, // Zero-extend
|
||||
S8, // Sign-extend
|
||||
U16, // Zero-extend
|
||||
S16, // Sign-extend
|
||||
B32,
|
||||
B64,
|
||||
B128,
|
||||
};
|
||||
|
||||
// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
|
||||
enum class LoadCache : u64 {
|
||||
CA, // Cache at all levels, likely to be accessed again
|
||||
CG, // Cache at global level (cache in L2 and below, not L1)
|
||||
CI, // ???
|
||||
CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
|
||||
};
|
||||
|
||||
// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
|
||||
enum class StoreCache : u64 {
|
||||
WB, // Cache write-back all coherent levels
|
||||
CG, // Cache at global level
|
||||
CS, // Cache streaming, likely to be accessed once
|
||||
WT, // Cache write-through (to system memory)
|
||||
};
|
||||
|
||||
IR::U64 Address(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<20, 24, s64> addr_offset;
|
||||
BitField<20, 24, u64> rz_addr_offset;
|
||||
BitField<45, 1, u64> e;
|
||||
} const mem{insn};
|
||||
|
||||
const IR::U64 address{[&]() -> IR::U64 {
|
||||
if (mem.e == 0) {
|
||||
// LDG/STG without .E uses a 32-bit pointer, zero-extend it
|
||||
return v.ir.UConvert(64, v.X(mem.addr_reg));
|
||||
}
|
||||
if (!IR::IsAligned(mem.addr_reg, 2)) {
|
||||
throw NotImplementedException("Unaligned address register");
|
||||
}
|
||||
// Pack two registers to build the 64-bit address
|
||||
return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
|
||||
}()};
|
||||
const u64 addr_offset{[&]() -> u64 {
|
||||
if (mem.addr_reg == IR::Reg::RZ) {
|
||||
// When RZ is used, the address is an absolute address
|
||||
return static_cast<u64>(mem.rz_addr_offset.Value());
|
||||
} else {
|
||||
return static_cast<u64>(mem.addr_offset.Value());
|
||||
}
|
||||
}()};
|
||||
// Apply the offset
|
||||
return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::LDG(u64 insn) {
|
||||
// LDG loads global memory into registers
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<46, 2, LoadCache> cache;
|
||||
BitField<48, 3, LoadSize> size;
|
||||
} const ldg{insn};
|
||||
|
||||
// Pointer to load data from
|
||||
const IR::U64 address{Address(*this, insn)};
|
||||
const IR::Reg dest_reg{ldg.dest_reg};
|
||||
switch (ldg.size) {
|
||||
case LoadSize::U8:
|
||||
X(dest_reg, ir.LoadGlobalU8(address));
|
||||
break;
|
||||
case LoadSize::S8:
|
||||
X(dest_reg, ir.LoadGlobalS8(address));
|
||||
break;
|
||||
case LoadSize::U16:
|
||||
X(dest_reg, ir.LoadGlobalU16(address));
|
||||
break;
|
||||
case LoadSize::S16:
|
||||
X(dest_reg, ir.LoadGlobalS16(address));
|
||||
break;
|
||||
case LoadSize::B32:
|
||||
X(dest_reg, ir.LoadGlobal32(address));
|
||||
break;
|
||||
case LoadSize::B64: {
|
||||
if (!IR::IsAligned(dest_reg, 2)) {
|
||||
throw NotImplementedException("Unaligned data registers");
|
||||
}
|
||||
const IR::Value vector{ir.LoadGlobal64(address)};
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LoadSize::B128:
|
||||
case LoadSize::U128: {
|
||||
if (!IR::IsAligned(dest_reg, 4)) {
|
||||
throw NotImplementedException("Unaligned data registers");
|
||||
}
|
||||
const IR::Value vector{ir.LoadGlobal128(address)};
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::STG(u64 insn) {
|
||||
// STG stores registers into global memory.
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> data_reg;
|
||||
BitField<46, 2, StoreCache> cache;
|
||||
BitField<48, 3, StoreSize> size;
|
||||
} const stg{insn};
|
||||
|
||||
// Pointer to store data into
|
||||
const IR::U64 address{Address(*this, insn)};
|
||||
const IR::Reg data_reg{stg.data_reg};
|
||||
switch (stg.size) {
|
||||
case StoreSize::U8:
|
||||
ir.WriteGlobalU8(address, X(data_reg));
|
||||
break;
|
||||
case StoreSize::S8:
|
||||
ir.WriteGlobalS8(address, X(data_reg));
|
||||
break;
|
||||
case StoreSize::U16:
|
||||
ir.WriteGlobalU16(address, X(data_reg));
|
||||
break;
|
||||
case StoreSize::S16:
|
||||
ir.WriteGlobalS16(address, X(data_reg));
|
||||
break;
|
||||
case StoreSize::B32:
|
||||
ir.WriteGlobal32(address, X(data_reg));
|
||||
break;
|
||||
case StoreSize::B64: {
|
||||
if (!IR::IsAligned(data_reg, 2)) {
|
||||
throw NotImplementedException("Unaligned data registers");
|
||||
}
|
||||
const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
|
||||
ir.WriteGlobal64(address, vector);
|
||||
break;
|
||||
}
|
||||
case StoreSize::B128:
|
||||
if (!IR::IsAligned(data_reg, 4)) {
|
||||
throw NotImplementedException("Unaligned data registers");
|
||||
}
|
||||
const IR::Value vector{
|
||||
ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
|
||||
ir.WriteGlobal128(address, vector);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
116
src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
Executable file
116
src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
Executable file
@@ -0,0 +1,116 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class LogicalOp : u64 {
|
||||
AND,
|
||||
OR,
|
||||
XOR,
|
||||
PASS_B,
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
|
||||
const IR::U32& operand_2, LogicalOp op) {
|
||||
switch (op) {
|
||||
case LogicalOp::AND:
|
||||
return ir.BitwiseAnd(operand_1, operand_2);
|
||||
case LogicalOp::OR:
|
||||
return ir.BitwiseOr(operand_1, operand_2);
|
||||
case LogicalOp::XOR:
|
||||
return ir.BitwiseXor(operand_1, operand_2);
|
||||
case LogicalOp::PASS_B:
|
||||
return operand_2;
|
||||
default:
|
||||
throw NotImplementedException("Invalid Logical operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
|
||||
LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
|
||||
IR::Pred dest_pred = IR::Pred::PT) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
} const lop{insn};
|
||||
|
||||
if (x) {
|
||||
throw NotImplementedException("X");
|
||||
}
|
||||
IR::U32 op_a{v.X(lop.src_reg)};
|
||||
if (inv_a != 0) {
|
||||
op_a = v.ir.BitwiseNot(op_a);
|
||||
}
|
||||
if (inv_b != 0) {
|
||||
op_b = v.ir.BitwiseNot(op_b);
|
||||
}
|
||||
|
||||
const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
|
||||
if (pred_op) {
|
||||
const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
|
||||
v.ir.SetPred(dest_pred, pred_result);
|
||||
}
|
||||
if (cc) {
|
||||
if (bit_op == LogicalOp::PASS_B) {
|
||||
v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
|
||||
v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
|
||||
} else {
|
||||
v.SetZFlag(v.ir.GetZeroFromOp(result));
|
||||
v.SetSFlag(v.ir.GetSignFromOp(result));
|
||||
}
|
||||
v.ResetCFlag();
|
||||
v.ResetOFlag();
|
||||
}
|
||||
v.X(lop.dest_reg, result);
|
||||
}
|
||||
|
||||
void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<39, 1, u64> inv_a;
|
||||
BitField<40, 1, u64> inv_b;
|
||||
BitField<41, 2, LogicalOp> bit_op;
|
||||
BitField<43, 1, u64> x;
|
||||
BitField<44, 2, PredicateOp> pred_op;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 3, IR::Pred> dest_pred;
|
||||
} const lop{insn};
|
||||
|
||||
LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
|
||||
lop.pred_op, lop.dest_pred);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::LOP_reg(u64 insn) {
|
||||
LOP(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LOP_cbuf(u64 insn) {
|
||||
LOP(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LOP_imm(u64 insn) {
|
||||
LOP(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LOP32I(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<53, 2, LogicalOp> bit_op;
|
||||
BitField<57, 1, u64> x;
|
||||
BitField<52, 1, u64> cc;
|
||||
BitField<55, 1, u64> inv_a;
|
||||
BitField<56, 1, u64> inv_b;
|
||||
} const lop32i{insn};
|
||||
|
||||
LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
|
||||
lop32i.inv_b != 0, lop32i.bit_op);
|
||||
}
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,122 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
|
||||
// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
|
||||
IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
|
||||
u64 ttbl) {
|
||||
IR::U32 r{ir.Imm32(0)};
|
||||
const IR::U32 not_a{ir.BitwiseNot(a)};
|
||||
const IR::U32 not_b{ir.BitwiseNot(b)};
|
||||
const IR::U32 not_c{ir.BitwiseNot(c)};
|
||||
if (ttbl & 0x01) {
|
||||
// r |= ~a & ~b & ~c;
|
||||
const auto lhs{ir.BitwiseAnd(not_a, not_b)};
|
||||
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
|
||||
r = ir.BitwiseOr(r, rhs);
|
||||
}
|
||||
if (ttbl & 0x02) {
|
||||
// r |= ~a & ~b & c;
|
||||
const auto lhs{ir.BitwiseAnd(not_a, not_b)};
|
||||
const auto rhs{ir.BitwiseAnd(lhs, c)};
|
||||
r = ir.BitwiseOr(r, rhs);
|
||||
}
|
||||
if (ttbl & 0x04) {
|
||||
// r |= ~a & b & ~c;
|
||||
const auto lhs{ir.BitwiseAnd(not_a, b)};
|
||||
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
|
||||
r = ir.BitwiseOr(r, rhs);
|
||||
}
|
||||
if (ttbl & 0x08) {
|
||||
// r |= ~a & b & c;
|
||||
const auto lhs{ir.BitwiseAnd(not_a, b)};
|
||||
const auto rhs{ir.BitwiseAnd(lhs, c)};
|
||||
r = ir.BitwiseOr(r, rhs);
|
||||
}
|
||||
if (ttbl & 0x10) {
|
||||
// r |= a & ~b & ~c;
|
||||
const auto lhs{ir.BitwiseAnd(a, not_b)};
|
||||
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
|
||||
r = ir.BitwiseOr(r, rhs);
|
||||
}
|
||||
if (ttbl & 0x20) {
|
||||
// r |= a & ~b & c;
|
||||
const auto lhs{ir.BitwiseAnd(a, not_b)};
|
||||
const auto rhs{ir.BitwiseAnd(lhs, c)};
|
||||
r = ir.BitwiseOr(r, rhs);
|
||||
}
|
||||
if (ttbl & 0x40) {
|
||||
// r |= a & b & ~c;
|
||||
const auto lhs{ir.BitwiseAnd(a, b)};
|
||||
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
|
||||
r = ir.BitwiseOr(r, rhs);
|
||||
}
|
||||
if (ttbl & 0x80) {
|
||||
// r |= a & b & c;
|
||||
const auto lhs{ir.BitwiseAnd(a, b)};
|
||||
const auto rhs{ir.BitwiseAnd(lhs, c)};
|
||||
r = ir.BitwiseOr(r, rhs);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<47, 1, u64> cc;
|
||||
} const lop3{insn};
|
||||
|
||||
if (lop3.cc != 0) {
|
||||
throw NotImplementedException("LOP3 CC");
|
||||
}
|
||||
|
||||
const IR::U32 op_a{v.X(lop3.src_reg)};
|
||||
const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
|
||||
v.X(lop3.dest_reg, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
u64 GetLut48(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<48, 8, u64> lut;
|
||||
} const lut{insn};
|
||||
return lut.lut;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::LOP3_reg(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<28, 8, u64> lut;
|
||||
BitField<38, 1, u64> x;
|
||||
BitField<36, 2, PredicateOp> pred_op;
|
||||
BitField<48, 3, IR::Pred> pred;
|
||||
} const lop3{insn};
|
||||
|
||||
if (lop3.x != 0) {
|
||||
throw NotImplementedException("LOP3 X");
|
||||
}
|
||||
const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
|
||||
const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
|
||||
ir.SetPred(lop3.pred, pred_result);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LOP3_cbuf(u64 insn) {
|
||||
LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LOP3_imm(u64 insn) {
|
||||
LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
|
||||
}
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,66 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Mode : u64 {
|
||||
PR,
|
||||
CC,
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::P2R_reg(u64) {
|
||||
throw NotImplementedException("P2R (reg)");
|
||||
}
|
||||
|
||||
void TranslatorVisitor::P2R_cbuf(u64) {
|
||||
throw NotImplementedException("P2R (cbuf)");
|
||||
}
|
||||
|
||||
void TranslatorVisitor::P2R_imm(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src;
|
||||
BitField<40, 1, Mode> mode;
|
||||
BitField<41, 2, u64> byte_selector;
|
||||
} const p2r{insn};
|
||||
|
||||
const u32 mask{GetImm20(insn).U32()};
|
||||
const bool pr_mode{p2r.mode == Mode::PR};
|
||||
const u32 num_items{pr_mode ? 7U : 4U};
|
||||
const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
|
||||
IR::U32 insert{ir.Imm32(0)};
|
||||
for (u32 index = 0; index < num_items; ++index) {
|
||||
if (((mask >> index) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
const IR::U1 cond{[this, index, pr_mode] {
|
||||
if (pr_mode) {
|
||||
return ir.GetPred(IR::Pred{index});
|
||||
}
|
||||
switch (index) {
|
||||
case 0:
|
||||
return ir.GetZFlag();
|
||||
case 1:
|
||||
return ir.GetSFlag();
|
||||
case 2:
|
||||
return ir.GetCFlag();
|
||||
case 3:
|
||||
return ir.GetOFlag();
|
||||
}
|
||||
throw LogicError("Unreachable P2R index");
|
||||
}()};
|
||||
const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
|
||||
insert = ir.BitwiseOr(insert, bit);
|
||||
}
|
||||
const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
|
||||
X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
44
src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
Executable file
44
src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
Executable file
@@ -0,0 +1,44 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/opcodes.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<39, 4, u64> mask;
|
||||
BitField<12, 4, u64> mov32i_mask;
|
||||
} const mov{insn};
|
||||
|
||||
if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
|
||||
throw NotImplementedException("Non-full move mask");
|
||||
}
|
||||
v.X(mov.dest_reg, src);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::MOV_reg(u64 insn) {
|
||||
MOV(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::MOV_cbuf(u64 insn) {
|
||||
MOV(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::MOV_imm(u64 insn) {
|
||||
MOV(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::MOV32I(u64 insn) {
|
||||
MOV(*this, insn, GetImm32(insn), true);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,71 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Mode : u64 {
|
||||
PR,
|
||||
CC,
|
||||
};
|
||||
|
||||
void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
|
||||
switch (index) {
|
||||
case 0:
|
||||
return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
|
||||
case 1:
|
||||
return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
|
||||
case 2:
|
||||
return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
|
||||
case 3:
|
||||
return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
|
||||
default:
|
||||
throw LogicError("Unreachable R2P index");
|
||||
}
|
||||
}
|
||||
|
||||
void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<40, 1, Mode> mode;
|
||||
BitField<41, 2, u64> byte_selector;
|
||||
} const r2p{insn};
|
||||
const IR::U32 src{v.X(r2p.src_reg)};
|
||||
const IR::U32 count{v.ir.Imm32(1)};
|
||||
const bool pr_mode{r2p.mode == Mode::PR};
|
||||
const u32 num_items{pr_mode ? 7U : 4U};
|
||||
const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
|
||||
for (u32 index = 0; index < num_items; ++index) {
|
||||
const IR::U32 offset{v.ir.Imm32(offset_base + index)};
|
||||
const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
|
||||
const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
|
||||
const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
|
||||
const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
|
||||
if (pr_mode) {
|
||||
const IR::Pred pred{index};
|
||||
v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
|
||||
} else {
|
||||
SetFlag(v.ir, inv_mask_bit, src_bit, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::R2P_reg(u64 insn) {
|
||||
R2P(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::R2P_cbuf(u64 insn) {
|
||||
R2P(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::R2P_imm(u64 insn) {
|
||||
R2P(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
181
src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
Executable file
181
src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
Executable file
@@ -0,0 +1,181 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class SpecialRegister : u64 {
|
||||
SR_LANEID = 0,
|
||||
SR_CLOCK = 1,
|
||||
SR_VIRTCFG = 2,
|
||||
SR_VIRTID = 3,
|
||||
SR_PM0 = 4,
|
||||
SR_PM1 = 5,
|
||||
SR_PM2 = 6,
|
||||
SR_PM3 = 7,
|
||||
SR_PM4 = 8,
|
||||
SR_PM5 = 9,
|
||||
SR_PM6 = 10,
|
||||
SR_PM7 = 11,
|
||||
SR12 = 12,
|
||||
SR13 = 13,
|
||||
SR14 = 14,
|
||||
SR_ORDERING_TICKET = 15,
|
||||
SR_PRIM_TYPE = 16,
|
||||
SR_INVOCATION_ID = 17,
|
||||
SR_Y_DIRECTION = 18,
|
||||
SR_THREAD_KILL = 19,
|
||||
SM_SHADER_TYPE = 20,
|
||||
SR_DIRECTCBEWRITEADDRESSLOW = 21,
|
||||
SR_DIRECTCBEWRITEADDRESSHIGH = 22,
|
||||
SR_DIRECTCBEWRITEENABLE = 23,
|
||||
SR_MACHINE_ID_0 = 24,
|
||||
SR_MACHINE_ID_1 = 25,
|
||||
SR_MACHINE_ID_2 = 26,
|
||||
SR_MACHINE_ID_3 = 27,
|
||||
SR_AFFINITY = 28,
|
||||
SR_INVOCATION_INFO = 29,
|
||||
SR_WSCALEFACTOR_XY = 30,
|
||||
SR_WSCALEFACTOR_Z = 31,
|
||||
SR_TID = 32,
|
||||
SR_TID_X = 33,
|
||||
SR_TID_Y = 34,
|
||||
SR_TID_Z = 35,
|
||||
SR_CTA_PARAM = 36,
|
||||
SR_CTAID_X = 37,
|
||||
SR_CTAID_Y = 38,
|
||||
SR_CTAID_Z = 39,
|
||||
SR_NTID = 40,
|
||||
SR_CirQueueIncrMinusOne = 41,
|
||||
SR_NLATC = 42,
|
||||
SR43 = 43,
|
||||
SR_SM_SPA_VERSION = 44,
|
||||
SR_MULTIPASSSHADERINFO = 45,
|
||||
SR_LWINHI = 46,
|
||||
SR_SWINHI = 47,
|
||||
SR_SWINLO = 48,
|
||||
SR_SWINSZ = 49,
|
||||
SR_SMEMSZ = 50,
|
||||
SR_SMEMBANKS = 51,
|
||||
SR_LWINLO = 52,
|
||||
SR_LWINSZ = 53,
|
||||
SR_LMEMLOSZ = 54,
|
||||
SR_LMEMHIOFF = 55,
|
||||
SR_EQMASK = 56,
|
||||
SR_LTMASK = 57,
|
||||
SR_LEMASK = 58,
|
||||
SR_GTMASK = 59,
|
||||
SR_GEMASK = 60,
|
||||
SR_REGALLOC = 61,
|
||||
SR_BARRIERALLOC = 62,
|
||||
SR63 = 63,
|
||||
SR_GLOBALERRORSTATUS = 64,
|
||||
SR65 = 65,
|
||||
SR_WARPERRORSTATUS = 66,
|
||||
SR_WARPERRORSTATUSCLEAR = 67,
|
||||
SR68 = 68,
|
||||
SR69 = 69,
|
||||
SR70 = 70,
|
||||
SR71 = 71,
|
||||
SR_PM_HI0 = 72,
|
||||
SR_PM_HI1 = 73,
|
||||
SR_PM_HI2 = 74,
|
||||
SR_PM_HI3 = 75,
|
||||
SR_PM_HI4 = 76,
|
||||
SR_PM_HI5 = 77,
|
||||
SR_PM_HI6 = 78,
|
||||
SR_PM_HI7 = 79,
|
||||
SR_CLOCKLO = 80,
|
||||
SR_CLOCKHI = 81,
|
||||
SR_GLOBALTIMERLO = 82,
|
||||
SR_GLOBALTIMERHI = 83,
|
||||
SR84 = 84,
|
||||
SR85 = 85,
|
||||
SR86 = 86,
|
||||
SR87 = 87,
|
||||
SR88 = 88,
|
||||
SR89 = 89,
|
||||
SR90 = 90,
|
||||
SR91 = 91,
|
||||
SR92 = 92,
|
||||
SR93 = 93,
|
||||
SR94 = 94,
|
||||
SR95 = 95,
|
||||
SR_HWTASKID = 96,
|
||||
SR_CIRCULARQUEUEENTRYINDEX = 97,
|
||||
SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
|
||||
SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
|
||||
switch (special_register) {
|
||||
case SpecialRegister::SR_INVOCATION_ID:
|
||||
return ir.InvocationId();
|
||||
case SpecialRegister::SR_THREAD_KILL:
|
||||
return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
|
||||
case SpecialRegister::SR_INVOCATION_INFO:
|
||||
LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
|
||||
return ir.Imm32(0x00ff'0000);
|
||||
case SpecialRegister::SR_TID: {
|
||||
const IR::Value tid{ir.LocalInvocationId()};
|
||||
return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
|
||||
IR::U32{ir.CompositeExtract(tid, 1)},
|
||||
ir.Imm32(16), ir.Imm32(8)),
|
||||
IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
|
||||
}
|
||||
case SpecialRegister::SR_TID_X:
|
||||
return ir.LocalInvocationIdX();
|
||||
case SpecialRegister::SR_TID_Y:
|
||||
return ir.LocalInvocationIdY();
|
||||
case SpecialRegister::SR_TID_Z:
|
||||
return ir.LocalInvocationIdZ();
|
||||
case SpecialRegister::SR_CTAID_X:
|
||||
return ir.WorkgroupIdX();
|
||||
case SpecialRegister::SR_CTAID_Y:
|
||||
return ir.WorkgroupIdY();
|
||||
case SpecialRegister::SR_CTAID_Z:
|
||||
return ir.WorkgroupIdZ();
|
||||
case SpecialRegister::SR_WSCALEFACTOR_XY:
|
||||
LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
|
||||
return ir.Imm32(Common::BitCast<u32>(1.0f));
|
||||
case SpecialRegister::SR_WSCALEFACTOR_Z:
|
||||
LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
|
||||
return ir.Imm32(Common::BitCast<u32>(1.0f));
|
||||
case SpecialRegister::SR_LANEID:
|
||||
return ir.LaneId();
|
||||
case SpecialRegister::SR_EQMASK:
|
||||
return ir.SubgroupEqMask();
|
||||
case SpecialRegister::SR_LTMASK:
|
||||
return ir.SubgroupLtMask();
|
||||
case SpecialRegister::SR_LEMASK:
|
||||
return ir.SubgroupLeMask();
|
||||
case SpecialRegister::SR_GTMASK:
|
||||
return ir.SubgroupGtMask();
|
||||
case SpecialRegister::SR_GEMASK:
|
||||
return ir.SubgroupGeMask();
|
||||
case SpecialRegister::SR_Y_DIRECTION:
|
||||
return ir.BitCast<IR::U32>(ir.YDirection());
|
||||
case SpecialRegister::SR_AFFINITY:
|
||||
LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
|
||||
return ir.Imm32(0); // This is the default value hardware returns.
|
||||
default:
|
||||
throw NotImplementedException("S2R special register {}", special_register);
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::S2R(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<20, 8, SpecialRegister> src_reg;
|
||||
} const s2r{insn};
|
||||
|
||||
X(s2r.dest_reg, Read(ir, s2r.src_reg));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
283
src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
Executable file
283
src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
Executable file
@@ -0,0 +1,283 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/opcodes.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
|
||||
[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
|
||||
throw NotImplementedException("Instruction {} is not implemented", opcode);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ATOM_cas(u64) {
|
||||
ThrowNotImplemented(Opcode::ATOM_cas);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ATOMS_cas(u64) {
|
||||
ThrowNotImplemented(Opcode::ATOMS_cas);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::B2R(u64) {
|
||||
ThrowNotImplemented(Opcode::B2R);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::BPT(u64) {
|
||||
ThrowNotImplemented(Opcode::BPT);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::BRA(u64) {
|
||||
ThrowNotImplemented(Opcode::BRA);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::BRK(u64) {
|
||||
ThrowNotImplemented(Opcode::BRK);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::CAL() {
|
||||
// CAL is a no-op
|
||||
}
|
||||
|
||||
void TranslatorVisitor::CCTL(u64) {
|
||||
ThrowNotImplemented(Opcode::CCTL);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::CCTLL(u64) {
|
||||
ThrowNotImplemented(Opcode::CCTLL);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::CONT(u64) {
|
||||
ThrowNotImplemented(Opcode::CONT);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::CS2R(u64) {
|
||||
ThrowNotImplemented(Opcode::CS2R);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FCHK_reg(u64) {
|
||||
ThrowNotImplemented(Opcode::FCHK_reg);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FCHK_cbuf(u64) {
|
||||
ThrowNotImplemented(Opcode::FCHK_cbuf);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::FCHK_imm(u64) {
|
||||
ThrowNotImplemented(Opcode::FCHK_imm);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::GETCRSPTR(u64) {
|
||||
ThrowNotImplemented(Opcode::GETCRSPTR);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::GETLMEMBASE(u64) {
|
||||
ThrowNotImplemented(Opcode::GETLMEMBASE);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IDE(u64) {
|
||||
ThrowNotImplemented(Opcode::IDE);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IDP_reg(u64) {
|
||||
ThrowNotImplemented(Opcode::IDP_reg);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IDP_imm(u64) {
|
||||
ThrowNotImplemented(Opcode::IDP_imm);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMAD_reg(u64) {
|
||||
ThrowNotImplemented(Opcode::IMAD_reg);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMAD_rc(u64) {
|
||||
ThrowNotImplemented(Opcode::IMAD_rc);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMAD_cr(u64) {
|
||||
ThrowNotImplemented(Opcode::IMAD_cr);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMAD_imm(u64) {
|
||||
ThrowNotImplemented(Opcode::IMAD_imm);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMAD32I(u64) {
|
||||
ThrowNotImplemented(Opcode::IMAD32I);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMADSP_reg(u64) {
|
||||
ThrowNotImplemented(Opcode::IMADSP_reg);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMADSP_rc(u64) {
|
||||
ThrowNotImplemented(Opcode::IMADSP_rc);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMADSP_cr(u64) {
|
||||
ThrowNotImplemented(Opcode::IMADSP_cr);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMADSP_imm(u64) {
|
||||
ThrowNotImplemented(Opcode::IMADSP_imm);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMUL_reg(u64) {
|
||||
ThrowNotImplemented(Opcode::IMUL_reg);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMUL_cbuf(u64) {
|
||||
ThrowNotImplemented(Opcode::IMUL_cbuf);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMUL_imm(u64) {
|
||||
ThrowNotImplemented(Opcode::IMUL_imm);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IMUL32I(u64) {
|
||||
ThrowNotImplemented(Opcode::IMUL32I);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::JCAL(u64) {
|
||||
ThrowNotImplemented(Opcode::JCAL);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::JMP(u64) {
|
||||
ThrowNotImplemented(Opcode::JMP);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::KIL() {
|
||||
// KIL is a no-op
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LD(u64) {
|
||||
ThrowNotImplemented(Opcode::LD);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LEPC(u64) {
|
||||
ThrowNotImplemented(Opcode::LEPC);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LONGJMP(u64) {
|
||||
ThrowNotImplemented(Opcode::LONGJMP);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::NOP(u64) {
|
||||
// NOP is No-Op.
|
||||
}
|
||||
|
||||
void TranslatorVisitor::PBK() {
|
||||
// PBK is a no-op
|
||||
}
|
||||
|
||||
void TranslatorVisitor::PCNT() {
|
||||
// PCNT is a no-op
|
||||
}
|
||||
|
||||
void TranslatorVisitor::PEXIT(u64) {
|
||||
ThrowNotImplemented(Opcode::PEXIT);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::PLONGJMP(u64) {
|
||||
ThrowNotImplemented(Opcode::PLONGJMP);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::PRET(u64) {
|
||||
ThrowNotImplemented(Opcode::PRET);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::PRMT_reg(u64) {
|
||||
ThrowNotImplemented(Opcode::PRMT_reg);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::PRMT_rc(u64) {
|
||||
ThrowNotImplemented(Opcode::PRMT_rc);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::PRMT_cr(u64) {
|
||||
ThrowNotImplemented(Opcode::PRMT_cr);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::PRMT_imm(u64) {
|
||||
ThrowNotImplemented(Opcode::PRMT_imm);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::R2B(u64) {
|
||||
ThrowNotImplemented(Opcode::R2B);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::RAM(u64) {
|
||||
ThrowNotImplemented(Opcode::RAM);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::RET(u64) {
|
||||
ThrowNotImplemented(Opcode::RET);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::RTT(u64) {
|
||||
ThrowNotImplemented(Opcode::RTT);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SAM(u64) {
|
||||
ThrowNotImplemented(Opcode::SAM);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SETCRSPTR(u64) {
|
||||
ThrowNotImplemented(Opcode::SETCRSPTR);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SETLMEMBASE(u64) {
|
||||
ThrowNotImplemented(Opcode::SETLMEMBASE);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SSY() {
|
||||
// SSY is a no-op
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ST(u64) {
|
||||
ThrowNotImplemented(Opcode::ST);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::STP(u64) {
|
||||
ThrowNotImplemented(Opcode::STP);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SUATOM_cas(u64) {
|
||||
ThrowNotImplemented(Opcode::SUATOM_cas);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SYNC(u64) {
|
||||
ThrowNotImplemented(Opcode::SYNC);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TXA(u64) {
|
||||
ThrowNotImplemented(Opcode::TXA);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VABSDIFF(u64) {
|
||||
ThrowNotImplemented(Opcode::VABSDIFF);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VABSDIFF4(u64) {
|
||||
ThrowNotImplemented(Opcode::VABSDIFF4);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VADD(u64) {
|
||||
ThrowNotImplemented(Opcode::VADD);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VSET(u64) {
|
||||
ThrowNotImplemented(Opcode::VSET);
|
||||
}
|
||||
void TranslatorVisitor::VSHL(u64) {
|
||||
ThrowNotImplemented(Opcode::VSHL);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VSHR(u64) {
|
||||
ThrowNotImplemented(Opcode::VSHR);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
45
src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
Executable file
45
src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
Executable file
@@ -0,0 +1,45 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> output_reg; // Not needed on host
|
||||
BitField<39, 1, u64> emit;
|
||||
BitField<40, 1, u64> cut;
|
||||
} const out{insn};
|
||||
|
||||
stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
|
||||
|
||||
if (out.emit != 0) {
|
||||
v.ir.EmitVertex(stream_index);
|
||||
}
|
||||
if (out.cut != 0) {
|
||||
v.ir.EndPrimitive(stream_index);
|
||||
}
|
||||
// Host doesn't need the output register, but we can write to it to avoid undefined reads
|
||||
v.X(out.dest_reg, v.ir.Imm32(0));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::OUT_reg(u64 insn) {
|
||||
OUT(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::OUT_cbuf(u64 insn) {
|
||||
OUT(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::OUT_imm(u64 insn) {
|
||||
OUT(*this, insn, GetImm20(insn));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
46
src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
Executable file
46
src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
Executable file
@@ -0,0 +1,46 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Mode : u64 {
|
||||
Default,
|
||||
CovMask,
|
||||
Covered,
|
||||
Offset,
|
||||
CentroidOffset,
|
||||
MyIndex,
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::PIXLD(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<31, 3, Mode> mode;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<20, 8, s64> addr_offset;
|
||||
BitField<45, 3, IR::Pred> dest_pred;
|
||||
} const pixld{insn};
|
||||
|
||||
if (pixld.dest_pred != IR::Pred::PT) {
|
||||
throw NotImplementedException("Destination predicate");
|
||||
}
|
||||
if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
|
||||
throw NotImplementedException("Non-zero source register");
|
||||
}
|
||||
switch (pixld.mode) {
|
||||
case Mode::MyIndex:
|
||||
X(pixld.dest_reg, ir.SampleId());
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Mode {}", pixld.mode.Value());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,38 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
void TranslatorVisitor::PSETP(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 3, IR::Pred> dest_pred_b;
|
||||
BitField<3, 3, IR::Pred> dest_pred_a;
|
||||
BitField<12, 3, IR::Pred> pred_a;
|
||||
BitField<15, 1, u64> neg_pred_a;
|
||||
BitField<24, 2, BooleanOp> bop_1;
|
||||
BitField<29, 3, IR::Pred> pred_b;
|
||||
BitField<32, 1, u64> neg_pred_b;
|
||||
BitField<39, 3, IR::Pred> pred_c;
|
||||
BitField<42, 1, u64> neg_pred_c;
|
||||
BitField<45, 2, BooleanOp> bop_2;
|
||||
} const pset{insn};
|
||||
|
||||
const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
|
||||
const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
|
||||
const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
|
||||
|
||||
const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
|
||||
const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
|
||||
const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
|
||||
const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
|
||||
|
||||
ir.SetPred(pset.dest_pred_a, result_a);
|
||||
ir.SetPred(pset.dest_pred_b, result_b);
|
||||
}
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,53 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
void TranslatorVisitor::PSET(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<12, 3, IR::Pred> pred_a;
|
||||
BitField<15, 1, u64> neg_pred_a;
|
||||
BitField<24, 2, BooleanOp> bop_1;
|
||||
BitField<29, 3, IR::Pred> pred_b;
|
||||
BitField<32, 1, u64> neg_pred_b;
|
||||
BitField<39, 3, IR::Pred> pred_c;
|
||||
BitField<42, 1, u64> neg_pred_c;
|
||||
BitField<44, 1, u64> bf;
|
||||
BitField<45, 2, BooleanOp> bop_2;
|
||||
BitField<47, 1, u64> cc;
|
||||
} const pset{insn};
|
||||
|
||||
const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
|
||||
const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
|
||||
const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
|
||||
|
||||
const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
|
||||
const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
|
||||
|
||||
const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
|
||||
const IR::U32 zero{ir.Imm32(0)};
|
||||
|
||||
const IR::U32 result{ir.Select(res_2, true_result, zero)};
|
||||
|
||||
X(pset.dest_reg, result);
|
||||
if (pset.cc != 0) {
|
||||
const IR::U1 is_zero{ir.IEqual(result, zero)};
|
||||
SetZFlag(is_zero);
|
||||
if (pset.bf != 0) {
|
||||
ResetSFlag();
|
||||
} else {
|
||||
SetSFlag(ir.LogicalNot(is_zero));
|
||||
}
|
||||
ResetOFlag();
|
||||
ResetCFlag();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,44 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
|
||||
void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<39, 3, IR::Pred> pred;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
} const sel{insn};
|
||||
|
||||
const IR::U1 pred = v.ir.GetPred(sel.pred);
|
||||
IR::U32 op_a{v.X(sel.src_reg)};
|
||||
IR::U32 op_b{src};
|
||||
if (sel.neg_pred != 0) {
|
||||
std::swap(op_a, op_b);
|
||||
}
|
||||
const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
|
||||
|
||||
v.X(sel.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::SEL_reg(u64 insn) {
|
||||
SEL(*this, insn, GetReg20(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SEL_cbuf(u64 insn) {
|
||||
SEL(*this, insn, GetCbuf(insn));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SEL_imm(u64 insn) {
|
||||
SEL(*this, insn, GetImm20(insn));
|
||||
}
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,208 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <bit>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Type : u64 {
|
||||
_1D,
|
||||
BUFFER_1D,
|
||||
ARRAY_1D,
|
||||
_2D,
|
||||
ARRAY_2D,
|
||||
_3D,
|
||||
};
|
||||
|
||||
enum class Size : u64 {
|
||||
U32,
|
||||
S32,
|
||||
U64,
|
||||
S64,
|
||||
F32FTZRN,
|
||||
F16x2FTZRN,
|
||||
SD32,
|
||||
SD64,
|
||||
};
|
||||
|
||||
enum class AtomicOp : u64 {
|
||||
ADD,
|
||||
MIN,
|
||||
MAX,
|
||||
INC,
|
||||
DEC,
|
||||
AND,
|
||||
OR,
|
||||
XOR,
|
||||
EXCH,
|
||||
};
|
||||
|
||||
enum class Clamp : u64 {
|
||||
IGN,
|
||||
Default,
|
||||
TRAP,
|
||||
};
|
||||
|
||||
TextureType GetType(Type type) {
|
||||
switch (type) {
|
||||
case Type::_1D:
|
||||
return TextureType::Color1D;
|
||||
case Type::BUFFER_1D:
|
||||
return TextureType::Buffer;
|
||||
case Type::ARRAY_1D:
|
||||
return TextureType::ColorArray1D;
|
||||
case Type::_2D:
|
||||
return TextureType::Color2D;
|
||||
case Type::ARRAY_2D:
|
||||
return TextureType::ColorArray2D;
|
||||
case Type::_3D:
|
||||
return TextureType::Color3D;
|
||||
}
|
||||
throw NotImplementedException("Invalid type {}", type);
|
||||
}
|
||||
|
||||
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
|
||||
const auto array{[&](int index) {
|
||||
return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
|
||||
}};
|
||||
switch (type) {
|
||||
case Type::_1D:
|
||||
case Type::BUFFER_1D:
|
||||
return v.X(reg);
|
||||
case Type::_2D:
|
||||
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
|
||||
case Type::_3D:
|
||||
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
throw NotImplementedException("Invalid type {}", type);
|
||||
}
|
||||
|
||||
IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
|
||||
const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
|
||||
bool is_signed) {
|
||||
switch (op) {
|
||||
case AtomicOp::ADD:
|
||||
return ir.ImageAtomicIAdd(handle, coords, op_b, info);
|
||||
case AtomicOp::MIN:
|
||||
return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
|
||||
case AtomicOp::MAX:
|
||||
return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
|
||||
case AtomicOp::INC:
|
||||
return ir.ImageAtomicInc(handle, coords, op_b, info);
|
||||
case AtomicOp::DEC:
|
||||
return ir.ImageAtomicDec(handle, coords, op_b, info);
|
||||
case AtomicOp::AND:
|
||||
return ir.ImageAtomicAnd(handle, coords, op_b, info);
|
||||
case AtomicOp::OR:
|
||||
return ir.ImageAtomicOr(handle, coords, op_b, info);
|
||||
case AtomicOp::XOR:
|
||||
return ir.ImageAtomicXor(handle, coords, op_b, info);
|
||||
case AtomicOp::EXCH:
|
||||
return ir.ImageAtomicExchange(handle, coords, op_b, info);
|
||||
default:
|
||||
throw NotImplementedException("Atomic Operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
ImageFormat Format(Size size) {
|
||||
switch (size) {
|
||||
case Size::U32:
|
||||
case Size::S32:
|
||||
case Size::SD32:
|
||||
return ImageFormat::R32_UINT;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
throw NotImplementedException("Invalid size {}", size);
|
||||
}
|
||||
|
||||
bool IsSizeInt32(Size size) {
|
||||
switch (size) {
|
||||
case Size::U32:
|
||||
case Size::S32:
|
||||
case Size::SD32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
|
||||
IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
|
||||
u64 bound_offset, bool is_bindless, bool write_result) {
|
||||
if (clamp != Clamp::IGN) {
|
||||
throw NotImplementedException("Clamp {}", clamp);
|
||||
}
|
||||
if (!IsSizeInt32(size)) {
|
||||
throw NotImplementedException("Size {}", size);
|
||||
}
|
||||
const bool is_signed{size == Size::S32};
|
||||
const ImageFormat format{Format(size)};
|
||||
const TextureType tex_type{GetType(type)};
|
||||
const IR::Value coords{MakeCoords(v, coord_reg, type)};
|
||||
|
||||
const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
|
||||
: v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
|
||||
IR::TextureInstInfo info{};
|
||||
info.type.Assign(tex_type);
|
||||
info.image_format.Assign(format);
|
||||
|
||||
// TODO: float/64-bit operand
|
||||
const IR::Value op_b{v.X(operand_reg)};
|
||||
const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
|
||||
|
||||
if (write_result) {
|
||||
v.X(dest_reg, IR::U32{color});
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::SUATOM(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<54, 1, u64> is_bindless;
|
||||
BitField<29, 4, AtomicOp> op;
|
||||
BitField<33, 3, Type> type;
|
||||
BitField<51, 3, Size> size;
|
||||
BitField<49, 2, Clamp> clamp;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> coord_reg;
|
||||
BitField<20, 8, IR::Reg> operand_reg;
|
||||
BitField<36, 13, u64> bound_offset; // !is_bindless
|
||||
BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
|
||||
} const suatom{insn};
|
||||
|
||||
ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
|
||||
suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
|
||||
suatom.is_bindless != 0, true);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SURED(u64 insn) {
|
||||
// TODO: confirm offsets
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<51, 1, u64> is_bound;
|
||||
BitField<21, 3, AtomicOp> op;
|
||||
BitField<33, 3, Type> type;
|
||||
BitField<20, 3, Size> size;
|
||||
BitField<49, 2, Clamp> clamp;
|
||||
BitField<0, 8, IR::Reg> operand_reg;
|
||||
BitField<8, 8, IR::Reg> coord_reg;
|
||||
BitField<36, 13, u64> bound_offset; // is_bound
|
||||
BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
|
||||
} const sured{insn};
|
||||
ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
|
||||
sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
|
||||
sured.is_bound == 0, false);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
281
src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
Executable file
281
src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
Executable file
@@ -0,0 +1,281 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <bit>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Type : u64 {
|
||||
_1D,
|
||||
BUFFER_1D,
|
||||
ARRAY_1D,
|
||||
_2D,
|
||||
ARRAY_2D,
|
||||
_3D,
|
||||
};
|
||||
|
||||
constexpr unsigned R = 1 << 0;
|
||||
constexpr unsigned G = 1 << 1;
|
||||
constexpr unsigned B = 1 << 2;
|
||||
constexpr unsigned A = 1 << 3;
|
||||
|
||||
constexpr std::array MASK{
|
||||
0U, //
|
||||
R, //
|
||||
G, //
|
||||
R | G, //
|
||||
B, //
|
||||
R | B, //
|
||||
G | B, //
|
||||
R | G | B, //
|
||||
A, //
|
||||
R | A, //
|
||||
G | A, //
|
||||
R | G | A, //
|
||||
B | A, //
|
||||
R | B | A, //
|
||||
G | B | A, //
|
||||
R | G | B | A, //
|
||||
};
|
||||
|
||||
enum class Size : u64 {
|
||||
U8,
|
||||
S8,
|
||||
U16,
|
||||
S16,
|
||||
B32,
|
||||
B64,
|
||||
B128,
|
||||
};
|
||||
|
||||
enum class Clamp : u64 {
|
||||
IGN,
|
||||
Default,
|
||||
TRAP,
|
||||
};
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
|
||||
enum class LoadCache : u64 {
|
||||
CA, // Cache at all levels, likely to be accessed again
|
||||
CG, // Cache at global level (L2 and below, not L1)
|
||||
CI, // ???
|
||||
CV, // Don't cache and fetch again (volatile)
|
||||
};
|
||||
|
||||
enum class StoreCache : u64 {
|
||||
WB, // Cache write-back all coherent levels
|
||||
CG, // Cache at global level (L2 and below, not L1)
|
||||
CS, // Cache streaming, likely to be accessed once
|
||||
WT, // Cache write-through (to system memory, volatile?)
|
||||
};
|
||||
|
||||
ImageFormat Format(Size size) {
|
||||
switch (size) {
|
||||
case Size::U8:
|
||||
return ImageFormat::R8_UINT;
|
||||
case Size::S8:
|
||||
return ImageFormat::R8_SINT;
|
||||
case Size::U16:
|
||||
return ImageFormat::R16_UINT;
|
||||
case Size::S16:
|
||||
return ImageFormat::R16_SINT;
|
||||
case Size::B32:
|
||||
return ImageFormat::R32_UINT;
|
||||
case Size::B64:
|
||||
return ImageFormat::R32G32_UINT;
|
||||
case Size::B128:
|
||||
return ImageFormat::R32G32B32A32_UINT;
|
||||
}
|
||||
throw NotImplementedException("Invalid size {}", size);
|
||||
}
|
||||
|
||||
int SizeInRegs(Size size) {
|
||||
switch (size) {
|
||||
case Size::U8:
|
||||
case Size::S8:
|
||||
case Size::U16:
|
||||
case Size::S16:
|
||||
case Size::B32:
|
||||
return 1;
|
||||
case Size::B64:
|
||||
return 2;
|
||||
case Size::B128:
|
||||
return 4;
|
||||
}
|
||||
throw NotImplementedException("Invalid size {}", size);
|
||||
}
|
||||
|
||||
TextureType GetType(Type type) {
|
||||
switch (type) {
|
||||
case Type::_1D:
|
||||
return TextureType::Color1D;
|
||||
case Type::BUFFER_1D:
|
||||
return TextureType::Buffer;
|
||||
case Type::ARRAY_1D:
|
||||
return TextureType::ColorArray1D;
|
||||
case Type::_2D:
|
||||
return TextureType::Color2D;
|
||||
case Type::ARRAY_2D:
|
||||
return TextureType::ColorArray2D;
|
||||
case Type::_3D:
|
||||
return TextureType::Color3D;
|
||||
}
|
||||
throw NotImplementedException("Invalid type {}", type);
|
||||
}
|
||||
|
||||
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
|
||||
const auto array{[&](int index) {
|
||||
return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
|
||||
}};
|
||||
switch (type) {
|
||||
case Type::_1D:
|
||||
case Type::BUFFER_1D:
|
||||
return v.X(reg);
|
||||
case Type::ARRAY_1D:
|
||||
return v.ir.CompositeConstruct(v.X(reg), array(1));
|
||||
case Type::_2D:
|
||||
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
|
||||
case Type::ARRAY_2D:
|
||||
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
|
||||
case Type::_3D:
|
||||
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
|
||||
}
|
||||
throw NotImplementedException("Invalid type {}", type);
|
||||
}
|
||||
|
||||
unsigned SwizzleMask(u64 swizzle) {
|
||||
if (swizzle == 0 || swizzle >= MASK.size()) {
|
||||
throw NotImplementedException("Invalid swizzle {}", swizzle);
|
||||
}
|
||||
return MASK[swizzle];
|
||||
}
|
||||
|
||||
IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
|
||||
std::array<IR::U32, 4> colors;
|
||||
for (int i = 0; i < num_regs; ++i) {
|
||||
colors[i] = ir.GetReg(reg + i);
|
||||
}
|
||||
for (int i = num_regs; i < 4; ++i) {
|
||||
colors[i] = ir.Imm32(0);
|
||||
}
|
||||
return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::SULD(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<51, 1, u64> is_bound;
|
||||
BitField<52, 1, u64> d;
|
||||
BitField<23, 1, u64> ba;
|
||||
BitField<33, 3, Type> type;
|
||||
BitField<24, 2, LoadCache> cache;
|
||||
BitField<20, 3, Size> size; // .D
|
||||
BitField<20, 4, u64> swizzle; // .P
|
||||
BitField<49, 2, Clamp> clamp;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> coord_reg;
|
||||
BitField<36, 13, u64> bound_offset; // is_bound
|
||||
BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
|
||||
} const suld{insn};
|
||||
|
||||
if (suld.clamp != Clamp::IGN) {
|
||||
throw NotImplementedException("Clamp {}", suld.clamp.Value());
|
||||
}
|
||||
if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
|
||||
throw NotImplementedException("Cache {}", suld.cache.Value());
|
||||
}
|
||||
const bool is_typed{suld.d != 0};
|
||||
if (is_typed && suld.ba != 0) {
|
||||
throw NotImplementedException("BA");
|
||||
}
|
||||
|
||||
const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
|
||||
const TextureType type{GetType(suld.type)};
|
||||
const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
|
||||
const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
|
||||
: X(suld.bindless_reg)};
|
||||
IR::TextureInstInfo info{};
|
||||
info.type.Assign(type);
|
||||
info.image_format.Assign(format);
|
||||
|
||||
const IR::Value result{ir.ImageRead(handle, coords, info)};
|
||||
IR::Reg dest_reg{suld.dest_reg};
|
||||
if (is_typed) {
|
||||
const int num_regs{SizeInRegs(suld.size)};
|
||||
for (int i = 0; i < num_regs; ++i) {
|
||||
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
|
||||
}
|
||||
} else {
|
||||
const unsigned mask{SwizzleMask(suld.swizzle)};
|
||||
const int bits{std::popcount(mask)};
|
||||
if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : bits)) {
|
||||
throw NotImplementedException("Unaligned destination register");
|
||||
}
|
||||
for (unsigned component = 0; component < 4; ++component) {
|
||||
if (((mask >> component) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
|
||||
++dest_reg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SUST(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<51, 1, u64> is_bound;
|
||||
BitField<52, 1, u64> d;
|
||||
BitField<23, 1, u64> ba;
|
||||
BitField<33, 3, Type> type;
|
||||
BitField<24, 2, StoreCache> cache;
|
||||
BitField<20, 3, Size> size; // .D
|
||||
BitField<20, 4, u64> swizzle; // .P
|
||||
BitField<49, 2, Clamp> clamp;
|
||||
BitField<0, 8, IR::Reg> data_reg;
|
||||
BitField<8, 8, IR::Reg> coord_reg;
|
||||
BitField<36, 13, u64> bound_offset; // is_bound
|
||||
BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
|
||||
} const sust{insn};
|
||||
|
||||
if (sust.clamp != Clamp::IGN) {
|
||||
throw NotImplementedException("Clamp {}", sust.clamp.Value());
|
||||
}
|
||||
if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
|
||||
throw NotImplementedException("Cache {}", sust.cache.Value());
|
||||
}
|
||||
const bool is_typed{sust.d != 0};
|
||||
if (is_typed && sust.ba != 0) {
|
||||
throw NotImplementedException("BA");
|
||||
}
|
||||
const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
|
||||
const TextureType type{GetType(sust.type)};
|
||||
const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
|
||||
const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
|
||||
: X(sust.bindless_reg)};
|
||||
IR::TextureInstInfo info{};
|
||||
info.type.Assign(type);
|
||||
info.image_format.Assign(format);
|
||||
|
||||
IR::Value color;
|
||||
if (is_typed) {
|
||||
color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
|
||||
} else {
|
||||
const unsigned mask{SwizzleMask(sust.swizzle)};
|
||||
if (mask != 0xf) {
|
||||
throw NotImplementedException("Non-full mask");
|
||||
}
|
||||
color = MakeColor(ir, sust.data_reg, 4);
|
||||
}
|
||||
ir.ImageWrite(handle, coords, color, info);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
236
src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
Executable file
236
src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
Executable file
@@ -0,0 +1,236 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Blod : u64 {
|
||||
None,
|
||||
LZ,
|
||||
LB,
|
||||
LL,
|
||||
INVALIDBLOD4,
|
||||
INVALIDBLOD5,
|
||||
LBA,
|
||||
LLA,
|
||||
};
|
||||
|
||||
enum class TextureType : u64 {
|
||||
_1D,
|
||||
ARRAY_1D,
|
||||
_2D,
|
||||
ARRAY_2D,
|
||||
_3D,
|
||||
ARRAY_3D,
|
||||
CUBE,
|
||||
ARRAY_CUBE,
|
||||
};
|
||||
|
||||
Shader::TextureType GetType(TextureType type) {
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
return Shader::TextureType::Color1D;
|
||||
case TextureType::ARRAY_1D:
|
||||
return Shader::TextureType::ColorArray1D;
|
||||
case TextureType::_2D:
|
||||
return Shader::TextureType::Color2D;
|
||||
case TextureType::ARRAY_2D:
|
||||
return Shader::TextureType::ColorArray2D;
|
||||
case TextureType::_3D:
|
||||
return Shader::TextureType::Color3D;
|
||||
case TextureType::ARRAY_3D:
|
||||
throw NotImplementedException("3D array texture type");
|
||||
case TextureType::CUBE:
|
||||
return Shader::TextureType::ColorCube;
|
||||
case TextureType::ARRAY_CUBE:
|
||||
return Shader::TextureType::ColorArrayCube;
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
|
||||
const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
return v.F(reg);
|
||||
case TextureType::ARRAY_1D:
|
||||
return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
|
||||
case TextureType::_2D:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
|
||||
case TextureType::ARRAY_2D:
|
||||
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
|
||||
case TextureType::_3D:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
|
||||
case TextureType::ARRAY_3D:
|
||||
throw NotImplementedException("3D array texture type");
|
||||
case TextureType::CUBE:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
|
||||
case TextureType::ARRAY_CUBE:
|
||||
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
|
||||
switch (blod) {
|
||||
case Blod::None:
|
||||
return v.ir.Imm32(0.0f);
|
||||
case Blod::LZ:
|
||||
return v.ir.Imm32(0.0f);
|
||||
case Blod::LB:
|
||||
case Blod::LL:
|
||||
case Blod::LBA:
|
||||
case Blod::LLA:
|
||||
return v.F(reg++);
|
||||
case Blod::INVALIDBLOD4:
|
||||
case Blod::INVALIDBLOD5:
|
||||
break;
|
||||
}
|
||||
throw NotImplementedException("Invalid blod {}", blod);
|
||||
}
|
||||
|
||||
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
|
||||
const IR::U32 value{v.X(reg++)};
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
case TextureType::ARRAY_1D:
|
||||
return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
|
||||
case TextureType::_2D:
|
||||
case TextureType::ARRAY_2D:
|
||||
return v.ir.CompositeConstruct(
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
|
||||
case TextureType::_3D:
|
||||
case TextureType::ARRAY_3D:
|
||||
return v.ir.CompositeConstruct(
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
|
||||
case TextureType::CUBE:
|
||||
case TextureType::ARRAY_CUBE:
|
||||
throw NotImplementedException("Illegal offset on CUBE sample");
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
bool HasExplicitLod(Blod blod) {
|
||||
switch (blod) {
|
||||
case Blod::LL:
|
||||
case Blod::LLA:
|
||||
case Blod::LZ:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
|
||||
std::optional<u32> cbuf_offset) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<35, 1, u64> ndv;
|
||||
BitField<49, 1, u64> nodep;
|
||||
BitField<50, 1, u64> dc;
|
||||
BitField<51, 3, IR::Pred> sparse_pred;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> coord_reg;
|
||||
BitField<20, 8, IR::Reg> meta_reg;
|
||||
BitField<28, 3, TextureType> type;
|
||||
BitField<31, 4, u64> mask;
|
||||
} const tex{insn};
|
||||
|
||||
if (lc) {
|
||||
throw NotImplementedException("LC");
|
||||
}
|
||||
const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
|
||||
|
||||
IR::Reg meta_reg{tex.meta_reg};
|
||||
IR::Value handle;
|
||||
IR::Value offset;
|
||||
IR::F32 dref;
|
||||
IR::F32 lod_clamp;
|
||||
if (cbuf_offset) {
|
||||
handle = v.ir.Imm32(*cbuf_offset);
|
||||
} else {
|
||||
handle = v.X(meta_reg++);
|
||||
}
|
||||
const IR::F32 lod{MakeLod(v, meta_reg, blod)};
|
||||
if (aoffi) {
|
||||
offset = MakeOffset(v, meta_reg, tex.type);
|
||||
}
|
||||
if (tex.dc != 0) {
|
||||
dref = v.F(meta_reg++);
|
||||
}
|
||||
IR::TextureInstInfo info{};
|
||||
info.type.Assign(GetType(tex.type));
|
||||
info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
|
||||
info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
|
||||
info.has_lod_clamp.Assign(lc ? 1 : 0);
|
||||
|
||||
const IR::Value sample{[&]() -> IR::Value {
|
||||
if (tex.dc == 0) {
|
||||
if (HasExplicitLod(blod)) {
|
||||
return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
|
||||
} else {
|
||||
return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
|
||||
}
|
||||
}
|
||||
if (HasExplicitLod(blod)) {
|
||||
return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
|
||||
} else {
|
||||
return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
|
||||
info);
|
||||
}
|
||||
}()};
|
||||
|
||||
IR::Reg dest_reg{tex.dest_reg};
|
||||
for (int element = 0; element < 4; ++element) {
|
||||
if (((tex.mask >> element) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
IR::F32 value;
|
||||
if (tex.dc != 0) {
|
||||
value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
|
||||
} else {
|
||||
value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
|
||||
}
|
||||
v.F(dest_reg, value);
|
||||
++dest_reg;
|
||||
}
|
||||
if (tex.sparse_pred != IR::Pred::PT) {
|
||||
v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::TEX(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<54, 1, u64> aoffi;
|
||||
BitField<55, 3, Blod> blod;
|
||||
BitField<58, 1, u64> lc;
|
||||
BitField<36, 13, u64> cbuf_offset;
|
||||
} const tex{insn};
|
||||
|
||||
Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TEX_b(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<36, 1, u64> aoffi;
|
||||
BitField<37, 3, Blod> blod;
|
||||
BitField<40, 1, u64> lc;
|
||||
} const tex{insn};
|
||||
|
||||
Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
266
src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
Executable file
266
src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
Executable file
@@ -0,0 +1,266 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Precision : u64 {
|
||||
F16,
|
||||
F32,
|
||||
};
|
||||
|
||||
union Encoding {
|
||||
u64 raw;
|
||||
BitField<59, 1, Precision> precision;
|
||||
BitField<53, 4, u64> encoding;
|
||||
BitField<49, 1, u64> nodep;
|
||||
BitField<28, 8, IR::Reg> dest_reg_b;
|
||||
BitField<0, 8, IR::Reg> dest_reg_a;
|
||||
BitField<8, 8, IR::Reg> src_reg_a;
|
||||
BitField<20, 8, IR::Reg> src_reg_b;
|
||||
BitField<36, 13, u64> cbuf_offset;
|
||||
BitField<50, 3, u64> swizzle;
|
||||
};
|
||||
|
||||
constexpr unsigned R = 1;
|
||||
constexpr unsigned G = 2;
|
||||
constexpr unsigned B = 4;
|
||||
constexpr unsigned A = 8;
|
||||
|
||||
constexpr std::array RG_LUT{
|
||||
R, //
|
||||
G, //
|
||||
B, //
|
||||
A, //
|
||||
R | G, //
|
||||
R | A, //
|
||||
G | A, //
|
||||
B | A, //
|
||||
};
|
||||
|
||||
constexpr std::array RGBA_LUT{
|
||||
R | G | B, //
|
||||
R | G | A, //
|
||||
R | B | A, //
|
||||
G | B | A, //
|
||||
R | G | B | A, //
|
||||
};
|
||||
|
||||
void CheckAlignment(IR::Reg reg, size_t alignment) {
|
||||
if (!IR::IsAligned(reg, alignment)) {
|
||||
throw NotImplementedException("Unaligned source register {}", reg);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
IR::Value Composite(TranslatorVisitor& v, Args... regs) {
|
||||
return v.ir.CompositeConstruct(v.F(regs)...);
|
||||
}
|
||||
|
||||
IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
|
||||
return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
|
||||
}
|
||||
|
||||
IR::Value Sample(TranslatorVisitor& v, u64 insn) {
|
||||
const Encoding texs{insn};
|
||||
const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
|
||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||
const IR::Reg reg_a{texs.src_reg_a};
|
||||
const IR::Reg reg_b{texs.src_reg_b};
|
||||
IR::TextureInstInfo info{};
|
||||
if (texs.precision == Precision::F16) {
|
||||
info.relaxed_precision.Assign(1);
|
||||
}
|
||||
switch (texs.encoding) {
|
||||
case 0: // 1D.LZ
|
||||
info.type.Assign(TextureType::Color1D);
|
||||
return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
|
||||
case 1: // 2D
|
||||
info.type.Assign(TextureType::Color2D);
|
||||
return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
|
||||
case 2: // 2D.LZ
|
||||
info.type.Assign(TextureType::Color2D);
|
||||
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
|
||||
case 3: // 2D.LL
|
||||
CheckAlignment(reg_a, 2);
|
||||
info.type.Assign(TextureType::Color2D);
|
||||
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
|
||||
info);
|
||||
case 4: // 2D.DC
|
||||
CheckAlignment(reg_a, 2);
|
||||
info.type.Assign(TextureType::Color2D);
|
||||
info.is_depth.Assign(1);
|
||||
return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
|
||||
{}, {}, {}, info);
|
||||
case 5: // 2D.LL.DC
|
||||
CheckAlignment(reg_a, 2);
|
||||
CheckAlignment(reg_b, 2);
|
||||
info.type.Assign(TextureType::Color2D);
|
||||
info.is_depth.Assign(1);
|
||||
return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
|
||||
v.F(reg_b + 1), v.F(reg_b), {}, info);
|
||||
case 6: // 2D.LZ.DC
|
||||
CheckAlignment(reg_a, 2);
|
||||
info.type.Assign(TextureType::Color2D);
|
||||
info.is_depth.Assign(1);
|
||||
return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
|
||||
zero, {}, info);
|
||||
case 7: // ARRAY_2D
|
||||
CheckAlignment(reg_a, 2);
|
||||
info.type.Assign(TextureType::ColorArray2D);
|
||||
return v.ir.ImageSampleImplicitLod(
|
||||
handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
|
||||
{}, {}, {}, info);
|
||||
case 8: // ARRAY_2D.LZ
|
||||
CheckAlignment(reg_a, 2);
|
||||
info.type.Assign(TextureType::ColorArray2D);
|
||||
return v.ir.ImageSampleExplicitLod(
|
||||
handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
|
||||
zero, {}, info);
|
||||
case 9: // ARRAY_2D.LZ.DC
|
||||
CheckAlignment(reg_a, 2);
|
||||
CheckAlignment(reg_b, 2);
|
||||
info.type.Assign(TextureType::ColorArray2D);
|
||||
info.is_depth.Assign(1);
|
||||
return v.ir.ImageSampleDrefExplicitLod(
|
||||
handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
|
||||
v.F(reg_b + 1), zero, {}, info);
|
||||
case 10: // 3D
|
||||
CheckAlignment(reg_a, 2);
|
||||
info.type.Assign(TextureType::Color3D);
|
||||
return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
|
||||
{}, info);
|
||||
case 11: // 3D.LZ
|
||||
CheckAlignment(reg_a, 2);
|
||||
info.type.Assign(TextureType::Color3D);
|
||||
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
|
||||
info);
|
||||
case 12: // CUBE
|
||||
CheckAlignment(reg_a, 2);
|
||||
info.type.Assign(TextureType::ColorCube);
|
||||
return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
|
||||
{}, info);
|
||||
case 13: // CUBE.LL
|
||||
CheckAlignment(reg_a, 2);
|
||||
CheckAlignment(reg_b, 2);
|
||||
info.type.Assign(TextureType::ColorCube);
|
||||
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
|
||||
v.F(reg_b + 1), {}, info);
|
||||
default:
|
||||
throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
|
||||
}
|
||||
}
|
||||
|
||||
unsigned Swizzle(u64 insn) {
|
||||
const Encoding texs{insn};
|
||||
const size_t encoding{texs.swizzle};
|
||||
if (texs.dest_reg_b == IR::Reg::RZ) {
|
||||
if (encoding >= RG_LUT.size()) {
|
||||
throw NotImplementedException("Illegal RG encoding {}", encoding);
|
||||
}
|
||||
return RG_LUT[encoding];
|
||||
} else {
|
||||
if (encoding >= RGBA_LUT.size()) {
|
||||
throw NotImplementedException("Illegal RGBA encoding {}", encoding);
|
||||
}
|
||||
return RGBA_LUT[encoding];
|
||||
}
|
||||
}
|
||||
|
||||
IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
|
||||
const bool is_shadow{sample.Type() == IR::Type::F32};
|
||||
if (is_shadow) {
|
||||
const bool is_alpha{component == 3};
|
||||
return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
|
||||
} else {
|
||||
return IR::F32{v.ir.CompositeExtract(sample, component)};
|
||||
}
|
||||
}
|
||||
|
||||
IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
|
||||
const Encoding texs{insn};
|
||||
switch (index) {
|
||||
case 0:
|
||||
return texs.dest_reg_a;
|
||||
case 1:
|
||||
CheckAlignment(texs.dest_reg_a, 2);
|
||||
return texs.dest_reg_a + 1;
|
||||
case 2:
|
||||
return texs.dest_reg_b;
|
||||
case 3:
|
||||
CheckAlignment(texs.dest_reg_b, 2);
|
||||
return texs.dest_reg_b + 1;
|
||||
}
|
||||
throw LogicError("Invalid store index {}", index);
|
||||
}
|
||||
|
||||
void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
||||
const unsigned swizzle{Swizzle(insn)};
|
||||
unsigned store_index{0};
|
||||
for (unsigned component = 0; component < 4; ++component) {
|
||||
if (((swizzle >> component) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
const IR::Reg dest{RegStoreComponent32(insn, store_index)};
|
||||
v.F(dest, Extract(v, sample, component));
|
||||
++store_index;
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
|
||||
return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
|
||||
}
|
||||
|
||||
void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
||||
const unsigned swizzle{Swizzle(insn)};
|
||||
unsigned store_index{0};
|
||||
std::array<IR::F32, 4> swizzled;
|
||||
for (unsigned component = 0; component < 4; ++component) {
|
||||
if (((swizzle >> component) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
swizzled[store_index] = Extract(v, sample, component);
|
||||
++store_index;
|
||||
}
|
||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||
const Encoding texs{insn};
|
||||
switch (store_index) {
|
||||
case 1:
|
||||
v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
|
||||
switch (store_index) {
|
||||
case 2:
|
||||
break;
|
||||
case 3:
|
||||
v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
|
||||
break;
|
||||
case 4:
|
||||
v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::TEXS(u64 insn) {
|
||||
const IR::Value sample{Sample(*this, insn)};
|
||||
if (Encoding{insn}.precision == Precision::F32) {
|
||||
Store32(*this, insn, sample);
|
||||
} else {
|
||||
Store16(*this, insn, sample);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
208
src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
Executable file
208
src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
Executable file
@@ -0,0 +1,208 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
|
||||
enum class TextureType : u64 {
|
||||
_1D,
|
||||
ARRAY_1D,
|
||||
_2D,
|
||||
ARRAY_2D,
|
||||
_3D,
|
||||
ARRAY_3D,
|
||||
CUBE,
|
||||
ARRAY_CUBE,
|
||||
};
|
||||
|
||||
enum class OffsetType : u64 {
|
||||
None = 0,
|
||||
AOFFI,
|
||||
PTP,
|
||||
Invalid,
|
||||
};
|
||||
|
||||
enum class ComponentType : u64 {
|
||||
R = 0,
|
||||
G = 1,
|
||||
B = 2,
|
||||
A = 3,
|
||||
};
|
||||
|
||||
Shader::TextureType GetType(TextureType type) {
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
return Shader::TextureType::Color1D;
|
||||
case TextureType::ARRAY_1D:
|
||||
return Shader::TextureType::ColorArray1D;
|
||||
case TextureType::_2D:
|
||||
return Shader::TextureType::Color2D;
|
||||
case TextureType::ARRAY_2D:
|
||||
return Shader::TextureType::ColorArray2D;
|
||||
case TextureType::_3D:
|
||||
return Shader::TextureType::Color3D;
|
||||
case TextureType::ARRAY_3D:
|
||||
throw NotImplementedException("3D array texture type");
|
||||
case TextureType::CUBE:
|
||||
return Shader::TextureType::ColorCube;
|
||||
case TextureType::ARRAY_CUBE:
|
||||
return Shader::TextureType::ColorArrayCube;
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
|
||||
const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
return v.F(reg);
|
||||
case TextureType::ARRAY_1D:
|
||||
return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
|
||||
case TextureType::_2D:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
|
||||
case TextureType::ARRAY_2D:
|
||||
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
|
||||
case TextureType::_3D:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
|
||||
case TextureType::ARRAY_3D:
|
||||
throw NotImplementedException("3D array texture type");
|
||||
case TextureType::CUBE:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
|
||||
case TextureType::ARRAY_CUBE:
|
||||
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
|
||||
const IR::U32 value{v.X(reg++)};
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
case TextureType::ARRAY_1D:
|
||||
return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
|
||||
case TextureType::_2D:
|
||||
case TextureType::ARRAY_2D:
|
||||
return v.ir.CompositeConstruct(
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
|
||||
case TextureType::_3D:
|
||||
case TextureType::ARRAY_3D:
|
||||
return v.ir.CompositeConstruct(
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
|
||||
case TextureType::CUBE:
|
||||
case TextureType::ARRAY_CUBE:
|
||||
throw NotImplementedException("Illegal offset on CUBE sample");
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
|
||||
const IR::U32 value1{v.X(reg++)};
|
||||
const IR::U32 value2{v.X(reg++)};
|
||||
const IR::U32 bitsize{v.ir.Imm32(6)};
|
||||
const auto make_vector{[&v, &bitsize](const IR::U32& value) {
|
||||
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
|
||||
}};
|
||||
return {make_vector(value1), make_vector(value2)};
|
||||
}
|
||||
|
||||
void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
|
||||
bool is_bindless) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<35, 1, u64> ndv;
|
||||
BitField<49, 1, u64> nodep;
|
||||
BitField<50, 1, u64> dc;
|
||||
BitField<51, 3, IR::Pred> sparse_pred;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> coord_reg;
|
||||
BitField<20, 8, IR::Reg> meta_reg;
|
||||
BitField<28, 3, TextureType> type;
|
||||
BitField<31, 4, u64> mask;
|
||||
BitField<36, 13, u64> cbuf_offset;
|
||||
} const tld4{insn};
|
||||
|
||||
const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
|
||||
|
||||
IR::Reg meta_reg{tld4.meta_reg};
|
||||
IR::Value handle;
|
||||
IR::Value offset;
|
||||
IR::Value offset2;
|
||||
IR::F32 dref;
|
||||
if (!is_bindless) {
|
||||
handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
|
||||
} else {
|
||||
handle = v.X(meta_reg++);
|
||||
}
|
||||
switch (offset_type) {
|
||||
case OffsetType::None:
|
||||
break;
|
||||
case OffsetType::AOFFI:
|
||||
offset = MakeOffset(v, meta_reg, tld4.type);
|
||||
break;
|
||||
case OffsetType::PTP:
|
||||
std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Invalid offset type {}", offset_type);
|
||||
}
|
||||
if (tld4.dc != 0) {
|
||||
dref = v.F(meta_reg++);
|
||||
}
|
||||
IR::TextureInstInfo info{};
|
||||
info.type.Assign(GetType(tld4.type));
|
||||
info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
|
||||
info.gather_component.Assign(static_cast<u32>(component_type));
|
||||
const IR::Value sample{[&] {
|
||||
if (tld4.dc == 0) {
|
||||
return v.ir.ImageGather(handle, coords, offset, offset2, info);
|
||||
}
|
||||
return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
|
||||
}()};
|
||||
|
||||
IR::Reg dest_reg{tld4.dest_reg};
|
||||
for (size_t element = 0; element < 4; ++element) {
|
||||
if (((tld4.mask >> element) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
|
||||
++dest_reg;
|
||||
}
|
||||
if (tld4.sparse_pred != IR::Pred::PT) {
|
||||
v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::TLD4(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<56, 2, ComponentType> component;
|
||||
BitField<54, 2, OffsetType> offset;
|
||||
} const tld4{insn};
|
||||
Impl(*this, insn, tld4.component, tld4.offset, false);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TLD4_b(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<38, 2, ComponentType> component;
|
||||
BitField<36, 2, OffsetType> offset;
|
||||
} const tld4{insn};
|
||||
Impl(*this, insn, tld4.component, tld4.offset, true);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,134 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Precision : u64 {
|
||||
F32,
|
||||
F16,
|
||||
};
|
||||
|
||||
enum class ComponentType : u64 {
|
||||
R = 0,
|
||||
G = 1,
|
||||
B = 2,
|
||||
A = 3,
|
||||
};
|
||||
|
||||
union Encoding {
|
||||
u64 raw;
|
||||
BitField<55, 1, Precision> precision;
|
||||
BitField<52, 2, ComponentType> component_type;
|
||||
BitField<51, 1, u64> aoffi;
|
||||
BitField<50, 1, u64> dc;
|
||||
BitField<49, 1, u64> nodep;
|
||||
BitField<28, 8, IR::Reg> dest_reg_b;
|
||||
BitField<0, 8, IR::Reg> dest_reg_a;
|
||||
BitField<8, 8, IR::Reg> src_reg_a;
|
||||
BitField<20, 8, IR::Reg> src_reg_b;
|
||||
BitField<36, 13, u64> cbuf_offset;
|
||||
};
|
||||
|
||||
void CheckAlignment(IR::Reg reg, size_t alignment) {
|
||||
if (!IR::IsAligned(reg, alignment)) {
|
||||
throw NotImplementedException("Unaligned source register {}", reg);
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
|
||||
const IR::U32 value{v.X(reg)};
|
||||
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
|
||||
}
|
||||
|
||||
IR::Value Sample(TranslatorVisitor& v, u64 insn) {
|
||||
const Encoding tld4s{insn};
|
||||
const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
|
||||
const IR::Reg reg_a{tld4s.src_reg_a};
|
||||
const IR::Reg reg_b{tld4s.src_reg_b};
|
||||
IR::TextureInstInfo info{};
|
||||
if (tld4s.precision == Precision::F16) {
|
||||
info.relaxed_precision.Assign(1);
|
||||
}
|
||||
info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
|
||||
info.type.Assign(Shader::TextureType::Color2D);
|
||||
info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
|
||||
IR::Value coords;
|
||||
if (tld4s.aoffi != 0) {
|
||||
CheckAlignment(reg_a, 2);
|
||||
coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
|
||||
IR::Value offset = MakeOffset(v, reg_b);
|
||||
if (tld4s.dc != 0) {
|
||||
CheckAlignment(reg_b, 2);
|
||||
IR::F32 dref = v.F(reg_b + 1);
|
||||
return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
|
||||
}
|
||||
return v.ir.ImageGather(handle, coords, offset, {}, info);
|
||||
}
|
||||
if (tld4s.dc != 0) {
|
||||
CheckAlignment(reg_a, 2);
|
||||
coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
|
||||
IR::F32 dref = v.F(reg_b);
|
||||
return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
|
||||
}
|
||||
coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
|
||||
return v.ir.ImageGather(handle, coords, {}, {}, info);
|
||||
}
|
||||
|
||||
IR::Reg RegStoreComponent32(u64 insn, size_t index) {
|
||||
const Encoding tlds4{insn};
|
||||
switch (index) {
|
||||
case 0:
|
||||
return tlds4.dest_reg_a;
|
||||
case 1:
|
||||
CheckAlignment(tlds4.dest_reg_a, 2);
|
||||
return tlds4.dest_reg_a + 1;
|
||||
case 2:
|
||||
return tlds4.dest_reg_b;
|
||||
case 3:
|
||||
CheckAlignment(tlds4.dest_reg_b, 2);
|
||||
return tlds4.dest_reg_b + 1;
|
||||
}
|
||||
throw LogicError("Invalid store index {}", index);
|
||||
}
|
||||
|
||||
void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
||||
for (size_t component = 0; component < 4; ++component) {
|
||||
const IR::Reg dest{RegStoreComponent32(insn, component)};
|
||||
v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
|
||||
return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
|
||||
}
|
||||
|
||||
void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
||||
std::array<IR::F32, 4> swizzled;
|
||||
for (size_t component = 0; component < 4; ++component) {
|
||||
swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
|
||||
}
|
||||
const Encoding tld4s{insn};
|
||||
v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
|
||||
v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::TLD4S(u64 insn) {
|
||||
const IR::Value sample{Sample(*this, insn)};
|
||||
if (Encoding{insn}.precision == Precision::F32) {
|
||||
Store32(*this, insn, sample);
|
||||
} else {
|
||||
Store16(*this, insn, sample);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
182
src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
Executable file
182
src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
Executable file
@@ -0,0 +1,182 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
|
||||
enum class TextureType : u64 {
|
||||
_1D,
|
||||
ARRAY_1D,
|
||||
_2D,
|
||||
ARRAY_2D,
|
||||
_3D,
|
||||
ARRAY_3D,
|
||||
CUBE,
|
||||
ARRAY_CUBE,
|
||||
};
|
||||
|
||||
Shader::TextureType GetType(TextureType type) {
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
return Shader::TextureType::Color1D;
|
||||
case TextureType::ARRAY_1D:
|
||||
return Shader::TextureType::ColorArray1D;
|
||||
case TextureType::_2D:
|
||||
return Shader::TextureType::Color2D;
|
||||
case TextureType::ARRAY_2D:
|
||||
return Shader::TextureType::ColorArray2D;
|
||||
case TextureType::_3D:
|
||||
return Shader::TextureType::Color3D;
|
||||
case TextureType::ARRAY_3D:
|
||||
throw NotImplementedException("3D array texture type");
|
||||
case TextureType::CUBE:
|
||||
return Shader::TextureType::ColorCube;
|
||||
case TextureType::ARRAY_CUBE:
|
||||
return Shader::TextureType::ColorArrayCube;
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
|
||||
const IR::U32 value{v.X(reg)};
|
||||
const u32 base{has_lod_clamp ? 12U : 16U};
|
||||
return v.ir.CompositeConstruct(
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
|
||||
}
|
||||
|
||||
void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<49, 1, u64> nodep;
|
||||
BitField<35, 1, u64> aoffi;
|
||||
BitField<50, 1, u64> lc;
|
||||
BitField<51, 3, IR::Pred> sparse_pred;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> coord_reg;
|
||||
BitField<20, 8, IR::Reg> derivate_reg;
|
||||
BitField<28, 3, TextureType> type;
|
||||
BitField<31, 4, u64> mask;
|
||||
BitField<36, 13, u64> cbuf_offset;
|
||||
} const txd{insn};
|
||||
|
||||
const bool has_lod_clamp = txd.lc != 0;
|
||||
if (has_lod_clamp) {
|
||||
throw NotImplementedException("TXD.LC - CLAMP is not implemented");
|
||||
}
|
||||
|
||||
IR::Value coords;
|
||||
u32 num_derivates{};
|
||||
IR::Reg base_reg{txd.coord_reg};
|
||||
IR::Reg last_reg;
|
||||
IR::Value handle;
|
||||
if (is_bindless) {
|
||||
handle = v.X(base_reg++);
|
||||
} else {
|
||||
handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
|
||||
}
|
||||
|
||||
const auto read_array{[&]() -> IR::F32 {
|
||||
const IR::U32 base{v.ir.Imm32(0)};
|
||||
const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
|
||||
const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
|
||||
return v.ir.ConvertUToF(32, 16, array_index);
|
||||
}};
|
||||
switch (txd.type) {
|
||||
case TextureType::_1D: {
|
||||
coords = v.F(base_reg);
|
||||
num_derivates = 1;
|
||||
last_reg = base_reg + 1;
|
||||
break;
|
||||
}
|
||||
case TextureType::ARRAY_1D: {
|
||||
last_reg = base_reg + 1;
|
||||
coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
|
||||
num_derivates = 1;
|
||||
break;
|
||||
}
|
||||
case TextureType::_2D: {
|
||||
last_reg = base_reg + 2;
|
||||
coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
|
||||
num_derivates = 2;
|
||||
break;
|
||||
}
|
||||
case TextureType::ARRAY_2D: {
|
||||
last_reg = base_reg + 2;
|
||||
coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
|
||||
num_derivates = 2;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Invalid texture type");
|
||||
}
|
||||
|
||||
const IR::Reg derivate_reg{txd.derivate_reg};
|
||||
IR::Value derivates;
|
||||
switch (num_derivates) {
|
||||
case 1: {
|
||||
derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
|
||||
v.F(derivate_reg + 2), v.F(derivate_reg + 3));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Invalid texture type");
|
||||
}
|
||||
|
||||
IR::Value offset;
|
||||
if (txd.aoffi != 0) {
|
||||
offset = MakeOffset(v, last_reg, has_lod_clamp);
|
||||
}
|
||||
|
||||
IR::F32 lod_clamp;
|
||||
if (has_lod_clamp) {
|
||||
// Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
|
||||
// to convert a fixed point, float(value) / float(1 << fixed_point)
|
||||
// in this case the fixed_point is 8.
|
||||
const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
|
||||
const IR::F32 fixp_lc{v.ir.ConvertUToF(
|
||||
32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
|
||||
lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
|
||||
}
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
info.type.Assign(GetType(txd.type));
|
||||
info.num_derivates.Assign(num_derivates);
|
||||
info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
|
||||
const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
|
||||
|
||||
IR::Reg dest_reg{txd.dest_reg};
|
||||
for (size_t element = 0; element < 4; ++element) {
|
||||
if (((txd.mask >> element) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
|
||||
++dest_reg;
|
||||
}
|
||||
if (txd.sparse_pred != IR::Pred::PT) {
|
||||
v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::TXD(u64 insn) {
|
||||
Impl(*this, insn, false);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TXD_b(u64 insn) {
|
||||
Impl(*this, insn, true);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
165
src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
Executable file
165
src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
Executable file
@@ -0,0 +1,165 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
|
||||
enum class TextureType : u64 {
|
||||
_1D,
|
||||
ARRAY_1D,
|
||||
_2D,
|
||||
ARRAY_2D,
|
||||
_3D,
|
||||
ARRAY_3D,
|
||||
CUBE,
|
||||
ARRAY_CUBE,
|
||||
};
|
||||
|
||||
Shader::TextureType GetType(TextureType type) {
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
return Shader::TextureType::Color1D;
|
||||
case TextureType::ARRAY_1D:
|
||||
return Shader::TextureType::ColorArray1D;
|
||||
case TextureType::_2D:
|
||||
return Shader::TextureType::Color2D;
|
||||
case TextureType::ARRAY_2D:
|
||||
return Shader::TextureType::ColorArray2D;
|
||||
case TextureType::_3D:
|
||||
return Shader::TextureType::Color3D;
|
||||
case TextureType::ARRAY_3D:
|
||||
throw NotImplementedException("3D array texture type");
|
||||
case TextureType::CUBE:
|
||||
return Shader::TextureType::ColorCube;
|
||||
case TextureType::ARRAY_CUBE:
|
||||
return Shader::TextureType::ColorArrayCube;
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
|
||||
const auto read_array{
|
||||
[&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
return v.X(reg);
|
||||
case TextureType::ARRAY_1D:
|
||||
return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
|
||||
case TextureType::_2D:
|
||||
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
|
||||
case TextureType::ARRAY_2D:
|
||||
return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
|
||||
case TextureType::_3D:
|
||||
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
|
||||
case TextureType::ARRAY_3D:
|
||||
throw NotImplementedException("3D array texture type");
|
||||
case TextureType::CUBE:
|
||||
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
|
||||
case TextureType::ARRAY_CUBE:
|
||||
return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
|
||||
const IR::U32 value{v.X(reg++)};
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
case TextureType::ARRAY_1D:
|
||||
return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
|
||||
case TextureType::_2D:
|
||||
case TextureType::ARRAY_2D:
|
||||
return v.ir.CompositeConstruct(
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
|
||||
case TextureType::_3D:
|
||||
case TextureType::ARRAY_3D:
|
||||
return v.ir.CompositeConstruct(
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
|
||||
case TextureType::CUBE:
|
||||
case TextureType::ARRAY_CUBE:
|
||||
throw NotImplementedException("Illegal offset on CUBE sample");
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<49, 1, u64> nodep;
|
||||
BitField<55, 1, u64> lod;
|
||||
BitField<50, 1, u64> multisample;
|
||||
BitField<35, 1, u64> aoffi;
|
||||
BitField<54, 1, u64> clamp;
|
||||
BitField<51, 3, IR::Pred> sparse_pred;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> coord_reg;
|
||||
BitField<20, 8, IR::Reg> meta_reg;
|
||||
BitField<28, 3, TextureType> type;
|
||||
BitField<31, 4, u64> mask;
|
||||
BitField<36, 13, u64> cbuf_offset;
|
||||
} const tld{insn};
|
||||
|
||||
const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
|
||||
|
||||
IR::Reg meta_reg{tld.meta_reg};
|
||||
IR::Value handle;
|
||||
IR::Value offset;
|
||||
IR::U32 lod;
|
||||
IR::U32 multisample;
|
||||
if (is_bindless) {
|
||||
handle = v.X(meta_reg++);
|
||||
} else {
|
||||
handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
|
||||
}
|
||||
if (tld.lod != 0) {
|
||||
lod = v.X(meta_reg++);
|
||||
} else {
|
||||
lod = v.ir.Imm32(0U);
|
||||
}
|
||||
if (tld.aoffi != 0) {
|
||||
offset = MakeOffset(v, meta_reg, tld.type);
|
||||
}
|
||||
if (tld.multisample != 0) {
|
||||
multisample = v.X(meta_reg++);
|
||||
}
|
||||
if (tld.clamp != 0) {
|
||||
throw NotImplementedException("TLD.CL - CLAMP is not implmented");
|
||||
}
|
||||
IR::TextureInstInfo info{};
|
||||
info.type.Assign(GetType(tld.type));
|
||||
const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
|
||||
|
||||
IR::Reg dest_reg{tld.dest_reg};
|
||||
for (size_t element = 0; element < 4; ++element) {
|
||||
if (((tld.mask >> element) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
|
||||
++dest_reg;
|
||||
}
|
||||
if (tld.sparse_pred != IR::Pred::PT) {
|
||||
v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::TLD(u64 insn) {
|
||||
Impl(*this, insn, false);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TLD_b(u64 insn) {
|
||||
Impl(*this, insn, true);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
242
src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
Executable file
242
src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
Executable file
@@ -0,0 +1,242 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Precision : u64 {
|
||||
F16,
|
||||
F32,
|
||||
};
|
||||
|
||||
constexpr unsigned R = 1;
|
||||
constexpr unsigned G = 2;
|
||||
constexpr unsigned B = 4;
|
||||
constexpr unsigned A = 8;
|
||||
|
||||
constexpr std::array RG_LUT{
|
||||
R, //
|
||||
G, //
|
||||
B, //
|
||||
A, //
|
||||
R | G, //
|
||||
R | A, //
|
||||
G | A, //
|
||||
B | A, //
|
||||
};
|
||||
|
||||
constexpr std::array RGBA_LUT{
|
||||
R | G | B, //
|
||||
R | G | A, //
|
||||
R | B | A, //
|
||||
G | B | A, //
|
||||
R | G | B | A, //
|
||||
};
|
||||
|
||||
union Encoding {
|
||||
u64 raw;
|
||||
BitField<59, 1, Precision> precision;
|
||||
BitField<54, 1, u64> aoffi;
|
||||
BitField<53, 1, u64> lod;
|
||||
BitField<55, 1, u64> ms;
|
||||
BitField<49, 1, u64> nodep;
|
||||
BitField<28, 8, IR::Reg> dest_reg_b;
|
||||
BitField<0, 8, IR::Reg> dest_reg_a;
|
||||
BitField<8, 8, IR::Reg> src_reg_a;
|
||||
BitField<20, 8, IR::Reg> src_reg_b;
|
||||
BitField<36, 13, u64> cbuf_offset;
|
||||
BitField<50, 3, u64> swizzle;
|
||||
BitField<53, 4, u64> encoding;
|
||||
};
|
||||
|
||||
void CheckAlignment(IR::Reg reg, size_t alignment) {
|
||||
if (!IR::IsAligned(reg, alignment)) {
|
||||
throw NotImplementedException("Unaligned source register {}", reg);
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
|
||||
const IR::U32 value{v.X(reg)};
|
||||
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
|
||||
}
|
||||
|
||||
IR::Value Sample(TranslatorVisitor& v, u64 insn) {
|
||||
const Encoding tlds{insn};
|
||||
const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
|
||||
const IR::Reg reg_a{tlds.src_reg_a};
|
||||
const IR::Reg reg_b{tlds.src_reg_b};
|
||||
IR::Value coords;
|
||||
IR::U32 lod{v.ir.Imm32(0U)};
|
||||
IR::Value offsets;
|
||||
IR::U32 multisample;
|
||||
Shader::TextureType texture_type{};
|
||||
switch (tlds.encoding) {
|
||||
case 0:
|
||||
texture_type = Shader::TextureType::Color1D;
|
||||
coords = v.X(reg_a);
|
||||
break;
|
||||
case 1:
|
||||
texture_type = Shader::TextureType::Color1D;
|
||||
coords = v.X(reg_a);
|
||||
lod = v.X(reg_b);
|
||||
break;
|
||||
case 2:
|
||||
texture_type = Shader::TextureType::Color2D;
|
||||
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
|
||||
break;
|
||||
case 4:
|
||||
CheckAlignment(reg_a, 2);
|
||||
texture_type = Shader::TextureType::Color2D;
|
||||
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
|
||||
offsets = MakeOffset(v, reg_b);
|
||||
break;
|
||||
case 5:
|
||||
CheckAlignment(reg_a, 2);
|
||||
texture_type = Shader::TextureType::Color2D;
|
||||
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
|
||||
lod = v.X(reg_b);
|
||||
break;
|
||||
case 6:
|
||||
CheckAlignment(reg_a, 2);
|
||||
texture_type = Shader::TextureType::Color2D;
|
||||
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
|
||||
multisample = v.X(reg_b);
|
||||
break;
|
||||
case 7:
|
||||
CheckAlignment(reg_a, 2);
|
||||
texture_type = Shader::TextureType::Color3D;
|
||||
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
|
||||
break;
|
||||
case 8: {
|
||||
CheckAlignment(reg_b, 2);
|
||||
const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
|
||||
texture_type = Shader::TextureType::ColorArray2D;
|
||||
coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
|
||||
break;
|
||||
}
|
||||
case 12:
|
||||
CheckAlignment(reg_a, 2);
|
||||
CheckAlignment(reg_b, 2);
|
||||
texture_type = Shader::TextureType::Color2D;
|
||||
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
|
||||
lod = v.X(reg_b);
|
||||
offsets = MakeOffset(v, reg_b + 1);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
|
||||
}
|
||||
IR::TextureInstInfo info{};
|
||||
if (tlds.precision == Precision::F16) {
|
||||
info.relaxed_precision.Assign(1);
|
||||
}
|
||||
info.type.Assign(texture_type);
|
||||
return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
|
||||
}
|
||||
|
||||
unsigned Swizzle(u64 insn) {
|
||||
const Encoding tlds{insn};
|
||||
const size_t encoding{tlds.swizzle};
|
||||
if (tlds.dest_reg_b == IR::Reg::RZ) {
|
||||
if (encoding >= RG_LUT.size()) {
|
||||
throw NotImplementedException("Illegal RG encoding {}", encoding);
|
||||
}
|
||||
return RG_LUT[encoding];
|
||||
} else {
|
||||
if (encoding >= RGBA_LUT.size()) {
|
||||
throw NotImplementedException("Illegal RGBA encoding {}", encoding);
|
||||
}
|
||||
return RGBA_LUT[encoding];
|
||||
}
|
||||
}
|
||||
|
||||
IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
|
||||
return IR::F32{v.ir.CompositeExtract(sample, component)};
|
||||
}
|
||||
|
||||
IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
|
||||
const Encoding tlds{insn};
|
||||
switch (index) {
|
||||
case 0:
|
||||
return tlds.dest_reg_a;
|
||||
case 1:
|
||||
CheckAlignment(tlds.dest_reg_a, 2);
|
||||
return tlds.dest_reg_a + 1;
|
||||
case 2:
|
||||
return tlds.dest_reg_b;
|
||||
case 3:
|
||||
CheckAlignment(tlds.dest_reg_b, 2);
|
||||
return tlds.dest_reg_b + 1;
|
||||
}
|
||||
throw LogicError("Invalid store index {}", index);
|
||||
}
|
||||
|
||||
void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
||||
const unsigned swizzle{Swizzle(insn)};
|
||||
unsigned store_index{0};
|
||||
for (unsigned component = 0; component < 4; ++component) {
|
||||
if (((swizzle >> component) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
const IR::Reg dest{RegStoreComponent32(insn, store_index)};
|
||||
v.F(dest, Extract(v, sample, component));
|
||||
++store_index;
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
|
||||
return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
|
||||
}
|
||||
|
||||
void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
||||
const unsigned swizzle{Swizzle(insn)};
|
||||
unsigned store_index{0};
|
||||
std::array<IR::F32, 4> swizzled;
|
||||
for (unsigned component = 0; component < 4; ++component) {
|
||||
if (((swizzle >> component) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
swizzled[store_index] = Extract(v, sample, component);
|
||||
++store_index;
|
||||
}
|
||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||
const Encoding tlds{insn};
|
||||
switch (store_index) {
|
||||
case 1:
|
||||
v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
|
||||
switch (store_index) {
|
||||
case 2:
|
||||
break;
|
||||
case 3:
|
||||
v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
|
||||
break;
|
||||
case 4:
|
||||
v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::TLDS(u64 insn) {
|
||||
const IR::Value sample{Sample(*this, insn)};
|
||||
if (Encoding{insn}.precision == Precision::F32) {
|
||||
Store32(*this, insn, sample);
|
||||
} else {
|
||||
Store16(*this, insn, sample);
|
||||
}
|
||||
}
|
||||
} // namespace Shader::Maxwell
|
131
src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
Executable file
131
src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
Executable file
@@ -0,0 +1,131 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
|
||||
enum class TextureType : u64 {
|
||||
_1D,
|
||||
ARRAY_1D,
|
||||
_2D,
|
||||
ARRAY_2D,
|
||||
_3D,
|
||||
ARRAY_3D,
|
||||
CUBE,
|
||||
ARRAY_CUBE,
|
||||
};
|
||||
|
||||
Shader::TextureType GetType(TextureType type) {
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
return Shader::TextureType::Color1D;
|
||||
case TextureType::ARRAY_1D:
|
||||
return Shader::TextureType::ColorArray1D;
|
||||
case TextureType::_2D:
|
||||
return Shader::TextureType::Color2D;
|
||||
case TextureType::ARRAY_2D:
|
||||
return Shader::TextureType::ColorArray2D;
|
||||
case TextureType::_3D:
|
||||
return Shader::TextureType::Color3D;
|
||||
case TextureType::ARRAY_3D:
|
||||
throw NotImplementedException("3D array texture type");
|
||||
case TextureType::CUBE:
|
||||
return Shader::TextureType::ColorCube;
|
||||
case TextureType::ARRAY_CUBE:
|
||||
return Shader::TextureType::ColorArrayCube;
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
|
||||
// The ISA reads an array component here, but this is not needed on high level shading languages
|
||||
// We are dropping this information.
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
return v.F(reg);
|
||||
case TextureType::ARRAY_1D:
|
||||
return v.F(reg + 1);
|
||||
case TextureType::_2D:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
|
||||
case TextureType::ARRAY_2D:
|
||||
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
|
||||
case TextureType::_3D:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
|
||||
case TextureType::ARRAY_3D:
|
||||
throw NotImplementedException("3D array texture type");
|
||||
case TextureType::CUBE:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
|
||||
case TextureType::ARRAY_CUBE:
|
||||
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<49, 1, u64> nodep;
|
||||
BitField<35, 1, u64> ndv;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> coord_reg;
|
||||
BitField<20, 8, IR::Reg> meta_reg;
|
||||
BitField<28, 3, TextureType> type;
|
||||
BitField<31, 4, u64> mask;
|
||||
BitField<36, 13, u64> cbuf_offset;
|
||||
} const tmml{insn};
|
||||
|
||||
if ((tmml.mask & 0b1100) != 0) {
|
||||
throw NotImplementedException("TMML BA results are not implmented");
|
||||
}
|
||||
const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
|
||||
|
||||
IR::U32 handle;
|
||||
IR::Reg meta_reg{tmml.meta_reg};
|
||||
if (is_bindless) {
|
||||
handle = v.X(meta_reg++);
|
||||
} else {
|
||||
handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
|
||||
}
|
||||
IR::TextureInstInfo info{};
|
||||
info.type.Assign(GetType(tmml.type));
|
||||
const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
|
||||
|
||||
IR::Reg dest_reg{tmml.dest_reg};
|
||||
for (size_t element = 0; element < 4; ++element) {
|
||||
if (((tmml.mask >> element) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
IR::F32 value{v.ir.CompositeExtract(sample, element)};
|
||||
if (element < 2) {
|
||||
IR::U32 casted_value;
|
||||
if (element == 0) {
|
||||
casted_value = v.ir.ConvertFToU(32, value);
|
||||
} else {
|
||||
casted_value = v.ir.ConvertFToS(16, value);
|
||||
}
|
||||
v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
|
||||
} else {
|
||||
v.F(dest_reg, value);
|
||||
}
|
||||
++dest_reg;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::TMML(u64 insn) {
|
||||
Impl(*this, insn, false);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TMML_b(u64 insn) {
|
||||
Impl(*this, insn, true);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
76
src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
Executable file
76
src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
Executable file
@@ -0,0 +1,76 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Mode : u64 {
|
||||
Dimension = 1,
|
||||
TextureType = 2,
|
||||
SamplePos = 5,
|
||||
};
|
||||
|
||||
IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
|
||||
switch (mode) {
|
||||
case Mode::Dimension: {
|
||||
const IR::U32 lod{v.X(src_reg)};
|
||||
return v.ir.ImageQueryDimension(handle, lod);
|
||||
}
|
||||
case Mode::TextureType:
|
||||
case Mode::SamplePos:
|
||||
default:
|
||||
throw NotImplementedException("Mode {}", mode);
|
||||
}
|
||||
}
|
||||
|
||||
void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<49, 1, u64> nodep;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<22, 3, Mode> mode;
|
||||
BitField<31, 4, u64> mask;
|
||||
} const txq{insn};
|
||||
|
||||
IR::Reg src_reg{txq.src_reg};
|
||||
IR::U32 handle;
|
||||
if (cbuf_offset) {
|
||||
handle = v.ir.Imm32(*cbuf_offset);
|
||||
} else {
|
||||
handle = v.X(src_reg);
|
||||
++src_reg;
|
||||
}
|
||||
const IR::Value query{Query(v, handle, txq.mode, src_reg)};
|
||||
IR::Reg dest_reg{txq.dest_reg};
|
||||
for (int element = 0; element < 4; ++element) {
|
||||
if (((txq.mask >> element) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
|
||||
++dest_reg;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::TXQ(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<36, 13, u64> cbuf_offset;
|
||||
} const txq{insn};
|
||||
|
||||
Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TXQ_b(u64 insn) {
|
||||
Impl(*this, insn, std::nullopt);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
30
src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
Executable file
30
src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
Executable file
@@ -0,0 +1,30 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
|
||||
IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
|
||||
u32 selector, bool is_signed) {
|
||||
switch (width) {
|
||||
case VideoWidth::Byte:
|
||||
case VideoWidth::Unknown:
|
||||
return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
|
||||
case VideoWidth::Short:
|
||||
return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
|
||||
case VideoWidth::Word:
|
||||
return value;
|
||||
default:
|
||||
throw NotImplementedException("Unknown VideoWidth {}", width);
|
||||
}
|
||||
}
|
||||
|
||||
VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
|
||||
// immediates must be 16-bit format.
|
||||
return is_immediate ? VideoWidth::Short : width;
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
23
src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
Executable file
23
src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
Executable file
@@ -0,0 +1,23 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
enum class VideoWidth : u64 {
|
||||
Byte,
|
||||
Unknown,
|
||||
Short,
|
||||
Word,
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
|
||||
VideoWidth width, u32 selector, bool is_signed);
|
||||
|
||||
[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,92 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class VideoMinMaxOps : u64 {
|
||||
MRG_16H,
|
||||
MRG_16L,
|
||||
MRG_8B0,
|
||||
MRG_8B2,
|
||||
ACC,
|
||||
MIN,
|
||||
MAX,
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
|
||||
VideoMinMaxOps op, bool is_signed) {
|
||||
switch (op) {
|
||||
case VideoMinMaxOps::MIN:
|
||||
return ir.IMin(lhs, rhs, is_signed);
|
||||
case VideoMinMaxOps::MAX:
|
||||
return ir.IMax(lhs, rhs, is_signed);
|
||||
default:
|
||||
throw NotImplementedException("VMNMX op {}", op);
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::VMNMX(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<20, 16, u64> src_b_imm;
|
||||
BitField<28, 2, u64> src_b_selector;
|
||||
BitField<29, 2, VideoWidth> src_b_width;
|
||||
BitField<36, 2, u64> src_a_selector;
|
||||
BitField<37, 2, VideoWidth> src_a_width;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> src_a_sign;
|
||||
BitField<49, 1, u64> src_b_sign;
|
||||
BitField<50, 1, u64> is_src_b_reg;
|
||||
BitField<51, 3, VideoMinMaxOps> op;
|
||||
BitField<54, 1, u64> dest_sign;
|
||||
BitField<55, 1, u64> sat;
|
||||
BitField<56, 1, u64> mx;
|
||||
} const vmnmx{insn};
|
||||
|
||||
if (vmnmx.cc != 0) {
|
||||
throw NotImplementedException("VMNMX CC");
|
||||
}
|
||||
if (vmnmx.sat != 0) {
|
||||
throw NotImplementedException("VMNMX SAT");
|
||||
}
|
||||
// Selectors were shown to default to 2 in unit tests
|
||||
if (vmnmx.src_a_selector != 2) {
|
||||
throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
|
||||
}
|
||||
if (vmnmx.src_b_selector != 2) {
|
||||
throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
|
||||
}
|
||||
if (vmnmx.src_a_width != VideoWidth::Word) {
|
||||
throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
|
||||
}
|
||||
|
||||
const bool is_b_imm{vmnmx.is_src_b_reg == 0};
|
||||
const IR::U32 src_a{GetReg8(insn)};
|
||||
const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
|
||||
const IR::U32 src_c{GetReg39(insn)};
|
||||
|
||||
const VideoWidth a_width{vmnmx.src_a_width};
|
||||
const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
|
||||
|
||||
const bool src_a_signed{vmnmx.src_a_sign != 0};
|
||||
const bool src_b_signed{vmnmx.src_b_sign != 0};
|
||||
const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
|
||||
const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
|
||||
|
||||
// First operation's sign is only dependent on operand b's sign
|
||||
const bool op_1_signed{src_b_signed};
|
||||
|
||||
const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
|
||||
: ir.IMin(op_a, op_b, op_1_signed)};
|
||||
X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
64
src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
Executable file
64
src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
Executable file
@@ -0,0 +1,64 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
void TranslatorVisitor::VMAD(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<20, 16, u64> src_b_imm;
|
||||
BitField<28, 2, u64> src_b_selector;
|
||||
BitField<29, 2, VideoWidth> src_b_width;
|
||||
BitField<36, 2, u64> src_a_selector;
|
||||
BitField<37, 2, VideoWidth> src_a_width;
|
||||
BitField<47, 1, u64> cc;
|
||||
BitField<48, 1, u64> src_a_sign;
|
||||
BitField<49, 1, u64> src_b_sign;
|
||||
BitField<50, 1, u64> is_src_b_reg;
|
||||
BitField<51, 2, u64> scale;
|
||||
BitField<53, 1, u64> src_c_neg;
|
||||
BitField<54, 1, u64> src_a_neg;
|
||||
BitField<55, 1, u64> sat;
|
||||
} const vmad{insn};
|
||||
|
||||
if (vmad.cc != 0) {
|
||||
throw NotImplementedException("VMAD CC");
|
||||
}
|
||||
if (vmad.sat != 0) {
|
||||
throw NotImplementedException("VMAD SAT");
|
||||
}
|
||||
if (vmad.scale != 0) {
|
||||
throw NotImplementedException("VMAD SCALE");
|
||||
}
|
||||
if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
|
||||
throw NotImplementedException("VMAD PO");
|
||||
}
|
||||
if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
|
||||
throw NotImplementedException("VMAD NEG");
|
||||
}
|
||||
const bool is_b_imm{vmad.is_src_b_reg == 0};
|
||||
const IR::U32 src_a{GetReg8(insn)};
|
||||
const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
|
||||
const IR::U32 src_c{GetReg39(insn)};
|
||||
|
||||
const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
|
||||
// Immediate values can't have a selector
|
||||
const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
|
||||
const VideoWidth a_width{vmad.src_a_width};
|
||||
const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
|
||||
|
||||
const bool src_a_signed{vmad.src_a_sign != 0};
|
||||
const bool src_b_signed{vmad.src_b_sign != 0};
|
||||
const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
|
||||
const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
|
||||
|
||||
X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@@ -0,0 +1,92 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class VsetpCompareOp : u64 {
|
||||
False = 0,
|
||||
LessThan,
|
||||
Equal,
|
||||
LessThanEqual,
|
||||
GreaterThan = 16,
|
||||
NotEqual,
|
||||
GreaterThanEqual,
|
||||
True,
|
||||
};
|
||||
|
||||
CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
|
||||
switch (op) {
|
||||
case VsetpCompareOp::False:
|
||||
return CompareOp::False;
|
||||
case VsetpCompareOp::LessThan:
|
||||
return CompareOp::LessThan;
|
||||
case VsetpCompareOp::Equal:
|
||||
return CompareOp::Equal;
|
||||
case VsetpCompareOp::LessThanEqual:
|
||||
return CompareOp::LessThanEqual;
|
||||
case VsetpCompareOp::GreaterThan:
|
||||
return CompareOp::GreaterThan;
|
||||
case VsetpCompareOp::NotEqual:
|
||||
return CompareOp::NotEqual;
|
||||
case VsetpCompareOp::GreaterThanEqual:
|
||||
return CompareOp::GreaterThanEqual;
|
||||
case VsetpCompareOp::True:
|
||||
return CompareOp::True;
|
||||
default:
|
||||
throw NotImplementedException("Invalid compare op {}", op);
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::VSETP(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 3, IR::Pred> dest_pred_b;
|
||||
BitField<3, 3, IR::Pred> dest_pred_a;
|
||||
BitField<20, 16, u64> src_b_imm;
|
||||
BitField<28, 2, u64> src_b_selector;
|
||||
BitField<29, 2, VideoWidth> src_b_width;
|
||||
BitField<36, 2, u64> src_a_selector;
|
||||
BitField<37, 2, VideoWidth> src_a_width;
|
||||
BitField<39, 3, IR::Pred> bop_pred;
|
||||
BitField<42, 1, u64> neg_bop_pred;
|
||||
BitField<43, 5, VsetpCompareOp> compare_op;
|
||||
BitField<45, 2, BooleanOp> bop;
|
||||
BitField<48, 1, u64> src_a_sign;
|
||||
BitField<49, 1, u64> src_b_sign;
|
||||
BitField<50, 1, u64> is_src_b_reg;
|
||||
} const vsetp{insn};
|
||||
|
||||
const bool is_b_imm{vsetp.is_src_b_reg == 0};
|
||||
const IR::U32 src_a{GetReg8(insn)};
|
||||
const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
|
||||
|
||||
const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
|
||||
const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
|
||||
const VideoWidth a_width{vsetp.src_a_width};
|
||||
const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
|
||||
|
||||
const bool src_a_signed{vsetp.src_a_sign != 0};
|
||||
const bool src_b_signed{vsetp.src_b_sign != 0};
|
||||
const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
|
||||
const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
|
||||
|
||||
// Compare operation's sign is only dependent on operand b's sign
|
||||
const bool compare_signed{src_b_signed};
|
||||
const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
|
||||
const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
|
||||
const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
|
||||
const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
|
||||
const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
|
||||
ir.SetPred(vsetp.dest_pred_a, result_a);
|
||||
ir.SetPred(vsetp.dest_pred_b, result_b);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
54
src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
Executable file
54
src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
Executable file
@@ -0,0 +1,54 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class VoteOp : u64 {
|
||||
ALL,
|
||||
ANY,
|
||||
EQ,
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
|
||||
switch (vote_op) {
|
||||
case VoteOp::ALL:
|
||||
return ir.VoteAll(pred);
|
||||
case VoteOp::ANY:
|
||||
return ir.VoteAny(pred);
|
||||
case VoteOp::EQ:
|
||||
return ir.VoteEqual(pred);
|
||||
default:
|
||||
throw NotImplementedException("Invalid VOTE op {}", vote_op);
|
||||
}
|
||||
}
|
||||
|
||||
void Vote(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<39, 3, IR::Pred> pred_a;
|
||||
BitField<42, 1, u64> neg_pred_a;
|
||||
BitField<45, 3, IR::Pred> pred_b;
|
||||
BitField<48, 2, VoteOp> vote_op;
|
||||
} const vote{insn};
|
||||
|
||||
const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
|
||||
v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
|
||||
v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::VOTE(u64 insn) {
|
||||
Vote(*this, insn);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VOTE_vtg(u64) {
|
||||
LOG_WARNING(Shader, "(STUBBED) called");
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
69
src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
Executable file
69
src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
Executable file
@@ -0,0 +1,69 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class ShuffleMode : u64 {
|
||||
IDX,
|
||||
UP,
|
||||
DOWN,
|
||||
BFLY,
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
|
||||
const IR::U32& index, const IR::U32& mask,
|
||||
ShuffleMode shfl_op) {
|
||||
const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
|
||||
const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
|
||||
switch (shfl_op) {
|
||||
case ShuffleMode::IDX:
|
||||
return ir.ShuffleIndex(value, index, clamp, seg_mask);
|
||||
case ShuffleMode::UP:
|
||||
return ir.ShuffleUp(value, index, clamp, seg_mask);
|
||||
case ShuffleMode::DOWN:
|
||||
return ir.ShuffleDown(value, index, clamp, seg_mask);
|
||||
case ShuffleMode::BFLY:
|
||||
return ir.ShuffleButterfly(value, index, clamp, seg_mask);
|
||||
default:
|
||||
throw NotImplementedException("Invalid SHFL op {}", shfl_op);
|
||||
}
|
||||
}
|
||||
|
||||
void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> src_reg;
|
||||
BitField<30, 2, ShuffleMode> mode;
|
||||
BitField<48, 3, IR::Pred> pred;
|
||||
} const shfl{insn};
|
||||
|
||||
const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
|
||||
v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
|
||||
v.X(shfl.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::SHFL(u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<20, 5, u64> src_a_imm;
|
||||
BitField<28, 1, u64> src_a_flag;
|
||||
BitField<29, 1, u64> src_b_flag;
|
||||
BitField<34, 13, u64> src_b_imm;
|
||||
} const flags{insn};
|
||||
const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
|
||||
: GetReg20(insn)};
|
||||
const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
|
||||
: GetReg39(insn)};
|
||||
Shuffle(*this, insn, src_a, src_b);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
Reference in New Issue
Block a user