early-access version 1866

This commit is contained in:
pineappleEA
2021-07-09 23:54:15 +02:00
parent 335eeff822
commit 7d21887d40
469 changed files with 201995 additions and 78488 deletions

View File

@@ -0,0 +1,214 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class AtomOp : u64 {
ADD,
MIN,
MAX,
INC,
DEC,
AND,
OR,
XOR,
EXCH,
SAFEADD,
};
enum class AtomSize : u64 {
U32,
S32,
U64,
F32,
F16x2,
S64,
};
IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
AtomOp op, bool is_signed) {
switch (op) {
case AtomOp::ADD:
return ir.GlobalAtomicIAdd(offset, op_b);
case AtomOp::MIN:
return ir.GlobalAtomicIMin(offset, op_b, is_signed);
case AtomOp::MAX:
return ir.GlobalAtomicIMax(offset, op_b, is_signed);
case AtomOp::INC:
return ir.GlobalAtomicInc(offset, op_b);
case AtomOp::DEC:
return ir.GlobalAtomicDec(offset, op_b);
case AtomOp::AND:
return ir.GlobalAtomicAnd(offset, op_b);
case AtomOp::OR:
return ir.GlobalAtomicOr(offset, op_b);
case AtomOp::XOR:
return ir.GlobalAtomicXor(offset, op_b);
case AtomOp::EXCH:
return ir.GlobalAtomicExchange(offset, op_b);
default:
throw NotImplementedException("Integer Atom Operation {}", op);
}
}
IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
AtomSize size) {
static constexpr IR::FpControl f16_control{
.no_contraction{false},
.rounding{IR::FpRounding::RN},
.fmz_mode{IR::FmzMode::DontCare},
};
static constexpr IR::FpControl f32_control{
.no_contraction{false},
.rounding{IR::FpRounding::RN},
.fmz_mode{IR::FmzMode::FTZ},
};
switch (op) {
case AtomOp::ADD:
return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
: ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
case AtomOp::MIN:
return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
case AtomOp::MAX:
return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
default:
throw NotImplementedException("FP Atom Operation {}", op);
}
}
IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
union {
u64 raw;
BitField<8, 8, IR::Reg> addr_reg;
BitField<28, 20, s64> addr_offset;
BitField<28, 20, u64> rz_addr_offset;
BitField<48, 1, u64> e;
} const mem{insn};
const IR::U64 address{[&]() -> IR::U64 {
if (mem.e == 0) {
return v.ir.UConvert(64, v.X(mem.addr_reg));
}
return v.L(mem.addr_reg);
}()};
const u64 addr_offset{[&]() -> u64 {
if (mem.addr_reg == IR::Reg::RZ) {
// When RZ is used, the address is an absolute address
return static_cast<u64>(mem.rz_addr_offset.Value());
} else {
return static_cast<u64>(mem.addr_offset.Value());
}
}()};
return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
}
bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
// TODO: SAFEADD
switch (size) {
case AtomSize::S32:
case AtomSize::U64:
return (op == AtomOp::INC || op == AtomOp::DEC);
case AtomSize::S64:
return !(op == AtomOp::MIN || op == AtomOp::MAX);
case AtomSize::F32:
return op != AtomOp::ADD;
case AtomSize::F16x2:
return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
default:
return false;
}
}
IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
switch (size) {
case AtomSize::U32:
case AtomSize::S32:
case AtomSize::F32:
case AtomSize::F16x2:
return ir.LoadGlobal32(offset);
case AtomSize::U64:
case AtomSize::S64:
return ir.PackUint2x32(ir.LoadGlobal64(offset));
default:
throw NotImplementedException("Atom Size {}", size);
}
}
void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
switch (size) {
case AtomSize::U32:
case AtomSize::S32:
case AtomSize::F16x2:
return v.X(dest_reg, IR::U32{result});
case AtomSize::U64:
case AtomSize::S64:
return v.L(dest_reg, IR::U64{result});
case AtomSize::F32:
return v.F(dest_reg, IR::F32{result});
default:
break;
}
}
IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
AtomSize size, AtomOp op) {
switch (size) {
case AtomSize::U32:
case AtomSize::S32:
return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
case AtomSize::U64:
case AtomSize::S64:
return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
case AtomSize::F32:
return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
case AtomSize::F16x2: {
return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
}
default:
throw NotImplementedException("Atom Size {}", size);
}
}
void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
IR::Value result;
if (AtomOpNotApplicable(size, op)) {
result = LoadGlobal(v.ir, offset, size);
} else {
result = ApplyAtomOp(v, operand_reg, offset, size, op);
}
if (write_dest) {
StoreResult(v, dest_reg, result, size);
}
}
} // Anonymous namespace
void TranslatorVisitor::ATOM(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<20, 8, IR::Reg> operand_reg;
BitField<49, 3, AtomSize> size;
BitField<52, 4, AtomOp> op;
} const atom{insn};
const IR::U64 offset{AtomOffset(*this, insn)};
GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
}
void TranslatorVisitor::RED(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> operand_reg;
BitField<20, 3, AtomSize> size;
BitField<23, 3, AtomOp> op;
} const red{insn};
const IR::U64 offset{AtomOffset(*this, insn)};
GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,110 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class AtomOp : u64 {
ADD,
MIN,
MAX,
INC,
DEC,
AND,
OR,
XOR,
EXCH,
};
enum class AtomsSize : u64 {
U32,
S32,
U64,
};
IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
bool is_signed) {
switch (op) {
case AtomOp::ADD:
return ir.SharedAtomicIAdd(offset, op_b);
case AtomOp::MIN:
return ir.SharedAtomicIMin(offset, op_b, is_signed);
case AtomOp::MAX:
return ir.SharedAtomicIMax(offset, op_b, is_signed);
case AtomOp::INC:
return ir.SharedAtomicInc(offset, op_b);
case AtomOp::DEC:
return ir.SharedAtomicDec(offset, op_b);
case AtomOp::AND:
return ir.SharedAtomicAnd(offset, op_b);
case AtomOp::OR:
return ir.SharedAtomicOr(offset, op_b);
case AtomOp::XOR:
return ir.SharedAtomicXor(offset, op_b);
case AtomOp::EXCH:
return ir.SharedAtomicExchange(offset, op_b);
default:
throw NotImplementedException("Integer Atoms Operation {}", op);
}
}
IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
union {
u64 raw;
BitField<8, 8, IR::Reg> offset_reg;
BitField<30, 22, u64> absolute_offset;
BitField<30, 22, s64> relative_offset;
} const encoding{insn};
if (encoding.offset_reg == IR::Reg::RZ) {
return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
} else {
const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
}
}
void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
switch (size) {
case AtomsSize::U32:
case AtomsSize::S32:
return v.X(dest_reg, IR::U32{result});
case AtomsSize::U64:
return v.L(dest_reg, IR::U64{result});
default:
break;
}
}
} // Anonymous namespace
void TranslatorVisitor::ATOMS(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> addr_reg;
BitField<20, 8, IR::Reg> src_reg_b;
BitField<28, 2, AtomsSize> size;
BitField<52, 4, AtomOp> op;
} const atoms{insn};
const bool size_64{atoms.size == AtomsSize::U64};
if (size_64 && atoms.op != AtomOp::EXCH) {
throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
}
const bool is_signed{atoms.size == AtomsSize::S32};
const IR::U32 offset{AtomsOffset(*this, insn)};
IR::Value result;
if (size_64) {
result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
} else {
result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
}
StoreResult(*this, atoms.dest_reg, result, atoms.size);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,35 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
enum class BitSize : u64 {
B32,
B64,
B96,
B128,
};
void TranslatorVisitor::AL2P(u64 inst) {
union {
u64 raw;
BitField<0, 8, IR::Reg> result_register;
BitField<8, 8, IR::Reg> indexing_register;
BitField<20, 11, s64> offset;
BitField<47, 2, BitSize> bitsize;
} al2p{inst};
if (al2p.bitsize != BitSize::B32) {
throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
}
const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
X(al2p.result_register, result);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,96 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
// Seems to be in CUDA terminology.
enum class LocalScope : u64 {
CTA,
GL,
SYS,
VC,
};
} // Anonymous namespace
void TranslatorVisitor::MEMBAR(u64 inst) {
union {
u64 raw;
BitField<8, 2, LocalScope> scope;
} const membar{inst};
if (membar.scope == LocalScope::CTA) {
ir.WorkgroupMemoryBarrier();
} else {
ir.DeviceMemoryBarrier();
}
}
void TranslatorVisitor::DEPBAR() {
// DEPBAR is a no-op
}
void TranslatorVisitor::BAR(u64 insn) {
enum class Mode {
RedPopc,
Scan,
RedAnd,
RedOr,
Sync,
Arrive,
};
union {
u64 raw;
BitField<43, 1, u64> is_a_imm;
BitField<44, 1, u64> is_b_imm;
BitField<8, 8, u64> imm_a;
BitField<20, 12, u64> imm_b;
BitField<42, 1, u64> neg_pred;
BitField<39, 3, IR::Pred> pred;
} const bar{insn};
const Mode mode{[insn] {
switch (insn & 0x0000009B00000000ULL) {
case 0x0000000200000000ULL:
return Mode::RedPopc;
case 0x0000000300000000ULL:
return Mode::Scan;
case 0x0000000A00000000ULL:
return Mode::RedAnd;
case 0x0000001200000000ULL:
return Mode::RedOr;
case 0x0000008000000000ULL:
return Mode::Sync;
case 0x0000008100000000ULL:
return Mode::Arrive;
}
throw NotImplementedException("Invalid encoding");
}()};
if (mode != Mode::Sync) {
throw NotImplementedException("BAR mode {}", mode);
}
if (bar.is_a_imm == 0) {
throw NotImplementedException("Non-immediate input A");
}
if (bar.imm_a != 0) {
throw NotImplementedException("Non-zero input A");
}
if (bar.is_b_imm == 0) {
throw NotImplementedException("Non-immediate input B");
}
if (bar.imm_b != 0) {
throw NotImplementedException("Non-zero input B");
}
if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
throw NotImplementedException("Non-true input predicate");
}
ir.Barrier();
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,74 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> offset_reg;
BitField<40, 1, u64> brev;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_signed;
} const bfe{insn};
const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
// Common constants
const IR::U32 zero{v.ir.Imm32(0)};
const IR::U32 one{v.ir.Imm32(1)};
const IR::U32 max_size{v.ir.Imm32(32)};
// Edge case conditions
const IR::U1 zero_count{v.ir.IEqual(count, zero)};
const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
IR::U32 base{v.X(bfe.offset_reg)};
if (bfe.brev != 0) {
base = v.ir.BitReverse(base);
}
IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
if (bfe.is_signed != 0) {
const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
// Replicate condition
result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
// Exceeding condition
const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
}
// Zero count condition
result = IR::U32{v.ir.Select(zero_count, zero, result)};
v.X(bfe.dest_reg, result);
if (bfe.cc != 0) {
v.SetZFlag(v.ir.IEqual(result, zero));
v.SetSFlag(v.ir.ILessThan(result, zero, true));
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::BFE_reg(u64 insn) {
BFE(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::BFE_cbuf(u64 insn) {
BFE(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::BFE_imm(u64 insn) {
BFE(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,62 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> insert_reg;
BitField<47, 1, u64> cc;
} const bfi{insn};
const IR::U32 zero{v.ir.Imm32(0)};
const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
const IR::U32 max_size{v.ir.Imm32(32)};
// Edge case conditions
const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
const IR::U32 insert{v.X(bfi.insert_reg)};
IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
result = IR::U32{v.ir.Select(exceed_offset, base, result)};
v.X(bfi.dest_reg, result);
if (bfi.cc != 0) {
v.SetZFlag(v.ir.IEqual(result, zero));
v.SetSFlag(v.ir.ILessThan(result, zero, true));
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::BFI_reg(u64 insn) {
BFI(*this, insn, GetReg20(insn), GetReg39(insn));
}
void TranslatorVisitor::BFI_rc(u64 insn) {
BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
}
void TranslatorVisitor::BFI_cr(u64 insn) {
BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
}
void TranslatorVisitor::BFI_imm(u64 insn) {
BFI(*this, insn, GetImm20(insn), GetReg39(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,36 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void Check(u64 insn) {
union {
u64 raw;
BitField<5, 1, u64> cbuf_mode;
BitField<6, 1, u64> lmt;
} const encoding{insn};
if (encoding.cbuf_mode != 0) {
throw NotImplementedException("Constant buffer mode");
}
if (encoding.lmt != 0) {
throw NotImplementedException("LMT");
}
}
} // Anonymous namespace
void TranslatorVisitor::BRX(u64 insn) {
Check(insn);
}
void TranslatorVisitor::JMX(u64 insn) {
Check(insn);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,57 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
namespace Shader::Maxwell {
enum class FpRounding : u64 {
RN,
RM,
RP,
RZ,
};
enum class FmzMode : u64 {
None,
FTZ,
FMZ,
INVALIDFMZ3,
};
inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
switch (fp_rounding) {
case FpRounding::RN:
return IR::FpRounding::RN;
case FpRounding::RM:
return IR::FpRounding::RM;
case FpRounding::RP:
return IR::FpRounding::RP;
case FpRounding::RZ:
return IR::FpRounding::RZ;
}
throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
}
inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
switch (fmz_mode) {
case FmzMode::None:
return IR::FmzMode::None;
case FmzMode::FTZ:
return IR::FmzMode::FTZ;
case FmzMode::FMZ:
// FMZ is manually handled in the instruction
return IR::FmzMode::FTZ;
case FmzMode::INVALIDFMZ3:
break;
}
throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,110 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
namespace Shader::Maxwell {
IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
CompareOp compare_op, bool is_signed) {
switch (compare_op) {
case CompareOp::False:
return ir.Imm1(false);
case CompareOp::LessThan:
return ir.ILessThan(operand_1, operand_2, is_signed);
case CompareOp::Equal:
return ir.IEqual(operand_1, operand_2);
case CompareOp::LessThanEqual:
return ir.ILessThanEqual(operand_1, operand_2, is_signed);
case CompareOp::GreaterThan:
return ir.IGreaterThan(operand_1, operand_2, is_signed);
case CompareOp::NotEqual:
return ir.INotEqual(operand_1, operand_2);
case CompareOp::GreaterThanEqual:
return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
case CompareOp::True:
return ir.Imm1(true);
default:
throw NotImplementedException("Invalid compare op {}", compare_op);
}
}
IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
BooleanOp bop) {
switch (bop) {
case BooleanOp::AND:
return ir.LogicalAnd(predicate_1, predicate_2);
case BooleanOp::OR:
return ir.LogicalOr(predicate_1, predicate_2);
case BooleanOp::XOR:
return ir.LogicalXor(predicate_1, predicate_2);
default:
throw NotImplementedException("Invalid bop {}", bop);
}
}
IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
switch (op) {
case PredicateOp::False:
return ir.Imm1(false);
case PredicateOp::True:
return ir.Imm1(true);
case PredicateOp::Zero:
return ir.IEqual(result, ir.Imm32(0));
case PredicateOp::NonZero:
return ir.INotEqual(result, ir.Imm32(0));
default:
throw NotImplementedException("Invalid Predicate operation {}", op);
}
}
bool IsCompareOpOrdered(FPCompareOp op) {
switch (op) {
case FPCompareOp::LTU:
case FPCompareOp::EQU:
case FPCompareOp::LEU:
case FPCompareOp::GTU:
case FPCompareOp::NEU:
case FPCompareOp::GEU:
return false;
default:
return true;
}
}
IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
const IR::F16F32F64& operand_2, FPCompareOp compare_op,
IR::FpControl control) {
const bool ordered{IsCompareOpOrdered(compare_op)};
switch (compare_op) {
case FPCompareOp::F:
return ir.Imm1(false);
case FPCompareOp::LT:
case FPCompareOp::LTU:
return ir.FPLessThan(operand_1, operand_2, control, ordered);
case FPCompareOp::EQ:
case FPCompareOp::EQU:
return ir.FPEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::LE:
case FPCompareOp::LEU:
return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::GT:
case FPCompareOp::GTU:
return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
case FPCompareOp::NE:
case FPCompareOp::NEU:
return ir.FPNotEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::GE:
case FPCompareOp::GEU:
return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::NUM:
return ir.FPOrdered(operand_1, operand_2);
case FPCompareOp::Nan:
return ir.FPUnordered(operand_1, operand_2);
case FPCompareOp::T:
return ir.Imm1(true);
default:
throw NotImplementedException("Invalid FP compare op {}", compare_op);
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,24 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
const IR::U1& predicate_2, BooleanOp bop);
[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
const IR::F16F32F64& operand_2, FPCompareOp compare_op,
IR::FpControl control = {});
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,66 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
void TranslatorVisitor::CSET(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 5, IR::FlowTest> cc_test;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<44, 1, u64> bf;
BitField<45, 2, BooleanOp> bop;
BitField<47, 1, u64> cc;
} const cset{insn};
const IR::U32 one_mask{ir.Imm32(-1)};
const IR::U32 fp_one{ir.Imm32(0x3f800000)};
const IR::U32 zero{ir.Imm32(0)};
const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
X(cset.dest_reg, result);
if (cset.cc != 0) {
const IR::U1 is_zero{ir.IEqual(result, zero)};
SetZFlag(is_zero);
if (cset.bf != 0) {
ResetSFlag();
} else {
SetSFlag(ir.LogicalNot(is_zero));
}
ResetOFlag();
ResetCFlag();
}
}
void TranslatorVisitor::CSETP(u64 insn) {
union {
u64 raw;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<8, 5, IR::FlowTest> cc_test;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<45, 2, BooleanOp> bop;
} const csetp{insn};
const BooleanOp bop{csetp.bop};
const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
ir.SetPred(csetp.dest_pred_a, result_a);
ir.SetPred(csetp.dest_pred_b, result_b);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,55 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 2, FpRounding> fp_rounding;
BitField<45, 1, u64> neg_b;
BitField<46, 1, u64> abs_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_a;
BitField<49, 1, u64> abs_b;
} const dadd{insn};
if (dadd.cc != 0) {
throw NotImplementedException("DADD CC");
}
const IR::F64 src_a{v.D(dadd.src_a_reg)};
const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
const IR::FpControl control{
.no_contraction = true,
.rounding = CastFpRounding(dadd.fp_rounding),
.fmz_mode = IR::FmzMode::None,
};
v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
}
} // Anonymous namespace
void TranslatorVisitor::DADD_reg(u64 insn) {
DADD(*this, insn, GetDoubleReg20(insn));
}
void TranslatorVisitor::DADD_cbuf(u64 insn) {
DADD(*this, insn, GetDoubleCbuf(insn));
}
void TranslatorVisitor::DADD_imm(u64 insn) {
DADD(*this, insn, GetDoubleImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,72 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_b;
BitField<45, 2, BooleanOp> bop;
BitField<47, 1, u64> cc;
BitField<48, 4, FPCompareOp> compare_op;
BitField<52, 1, u64> bf;
BitField<53, 1, u64> negate_b;
BitField<54, 1, u64> abs_a;
} const dset{insn};
const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
IR::U1 pred{v.ir.GetPred(dset.pred)};
if (dset.neg_pred != 0) {
pred = v.ir.LogicalNot(pred);
}
const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
const IR::U32 one_mask{v.ir.Imm32(-1)};
const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
const IR::U32 zero{v.ir.Imm32(0)};
const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
v.X(dset.dest_reg, result);
if (dset.cc != 0) {
const IR::U1 is_zero{v.ir.IEqual(result, zero)};
v.SetZFlag(is_zero);
if (dset.bf != 0) {
v.ResetSFlag();
} else {
v.SetSFlag(v.ir.LogicalNot(is_zero));
}
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::DSET_reg(u64 insn) {
DSET(*this, insn, GetDoubleReg20(insn));
}
void TranslatorVisitor::DSET_cbuf(u64 insn) {
DSET(*this, insn, GetDoubleCbuf(insn));
}
void TranslatorVisitor::DSET_imm(u64 insn) {
DSET(*this, insn, GetDoubleImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,58 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<50, 2, FpRounding> fp_rounding;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_b;
BitField<49, 1, u64> neg_c;
} const dfma{insn};
if (dfma.cc != 0) {
throw NotImplementedException("DFMA CC");
}
const IR::F64 src_a{v.D(dfma.src_a_reg)};
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
const IR::FpControl control{
.no_contraction = true,
.rounding = CastFpRounding(dfma.fp_rounding),
.fmz_mode = IR::FmzMode::None,
};
v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
}
} // Anonymous namespace
void TranslatorVisitor::DFMA_reg(u64 insn) {
DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
}
void TranslatorVisitor::DFMA_cr(u64 insn) {
DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
}
void TranslatorVisitor::DFMA_rc(u64 insn) {
DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
}
void TranslatorVisitor::DFMA_imm(u64 insn) {
DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,55 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<45, 1, u64> negate_b;
BitField<46, 1, u64> abs_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> negate_a;
BitField<49, 1, u64> abs_b;
} const dmnmx{insn};
if (dmnmx.cc != 0) {
throw NotImplementedException("DMNMX CC");
}
const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
IR::F64 max{v.ir.FPMax(op_a, op_b)};
IR::F64 min{v.ir.FPMin(op_a, op_b)};
if (dmnmx.neg_pred != 0) {
std::swap(min, max);
}
v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
}
} // Anonymous namespace
void TranslatorVisitor::DMNMX_reg(u64 insn) {
DMNMX(*this, insn, GetDoubleReg20(insn));
}
void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
DMNMX(*this, insn, GetDoubleCbuf(insn));
}
void TranslatorVisitor::DMNMX_imm(u64 insn) {
DMNMX(*this, insn, GetDoubleImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,50 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 2, FpRounding> fp_rounding;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg;
} const dmul{insn};
if (dmul.cc != 0) {
throw NotImplementedException("DMUL CC");
}
const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
const IR::FpControl control{
.no_contraction = true,
.rounding = CastFpRounding(dmul.fp_rounding),
.fmz_mode = IR::FmzMode::None,
};
v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
}
} // Anonymous namespace
void TranslatorVisitor::DMUL_reg(u64 insn) {
DMUL(*this, insn, GetDoubleReg20(insn));
}
void TranslatorVisitor::DMUL_cbuf(u64 insn) {
DMUL(*this, insn, GetDoubleCbuf(insn));
}
void TranslatorVisitor::DMUL_imm(u64 insn) {
DMUL(*this, insn, GetDoubleImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,54 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
union {
u64 insn;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<6, 1, u64> negate_b;
BitField<7, 1, u64> abs_a;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_b;
BitField<45, 2, BooleanOp> bop;
BitField<48, 4, FPCompareOp> compare_op;
} const dsetp{insn};
const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
const BooleanOp bop{dsetp.bop};
const FPCompareOp compare_op{dsetp.compare_op};
const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
v.ir.SetPred(dsetp.dest_pred_a, result_a);
v.ir.SetPred(dsetp.dest_pred_b, result_b);
}
} // Anonymous namespace
void TranslatorVisitor::DSETP_reg(u64 insn) {
DSETP(*this, insn, GetDoubleReg20(insn));
}
void TranslatorVisitor::DSETP_cbuf(u64 insn) {
DSETP(*this, insn, GetDoubleCbuf(insn));
}
void TranslatorVisitor::DSETP_imm(u64 insn) {
DSETP(*this, insn, GetDoubleImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,43 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void ExitFragment(TranslatorVisitor& v) {
const ProgramHeader sph{v.env.SPH()};
IR::Reg src_reg{IR::Reg::R0};
for (u32 render_target = 0; render_target < 8; ++render_target) {
const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
for (u32 component = 0; component < 4; ++component) {
if (!mask[component]) {
continue;
}
v.ir.SetFragColor(render_target, component, v.F(src_reg));
++src_reg;
}
}
if (sph.ps.omap.sample_mask != 0) {
v.ir.SetSampleMask(v.X(src_reg));
}
if (sph.ps.omap.depth != 0) {
v.ir.SetFragDepth(v.F(src_reg + 1));
}
}
} // Anonymous namespace
void TranslatorVisitor::EXIT() {
switch (env.ShaderStage()) {
case Stage::Fragment:
ExitFragment(*this);
break;
default:
break;
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,47 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<40, 1, u64> tilde;
BitField<41, 1, u64> shift;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_signed;
} const flo{insn};
if (flo.cc != 0) {
throw NotImplementedException("CC");
}
if (flo.tilde != 0) {
src = v.ir.BitwiseNot(src);
}
IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
if (flo.shift != 0) {
const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
}
v.X(flo.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::FLO_reg(u64 insn) {
FLO(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::FLO_cbuf(u64 insn) {
FLO(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::FLO_imm(u64 insn) {
FLO(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,82 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const fadd{insn};
if (cc) {
throw NotImplementedException("FADD CC");
}
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
IR::FpControl control{
.no_contraction = true,
.rounding = CastFpRounding(fp_rounding),
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
if (sat) {
value = v.ir.FPSaturate(value);
}
v.F(fadd.dest_reg, value);
}
void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 raw;
BitField<39, 2, FpRounding> fp_rounding;
BitField<44, 1, u64> ftz;
BitField<45, 1, u64> neg_b;
BitField<46, 1, u64> abs_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_a;
BitField<49, 1, u64> abs_b;
BitField<50, 1, u64> sat;
} const fadd{insn};
FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
}
} // Anonymous namespace
void TranslatorVisitor::FADD_reg(u64 insn) {
FADD(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FADD_cbuf(u64 insn) {
FADD(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FADD_imm(u64 insn) {
FADD(*this, insn, GetFloatImm20(insn));
}
void TranslatorVisitor::FADD32I(u64 insn) {
union {
u64 raw;
BitField<55, 1, u64> ftz;
BitField<56, 1, u64> neg_a;
BitField<54, 1, u64> abs_a;
BitField<52, 1, u64> cc;
BitField<53, 1, u64> neg_b;
BitField<57, 1, u64> abs_b;
} const fadd32i{insn};
FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,55 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<47, 1, u64> ftz;
BitField<48, 4, FPCompareOp> compare_op;
} const fcmp{insn};
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
const IR::U32 src_reg{v.X(fcmp.src_reg)};
const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
v.X(fcmp.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::FCMP_reg(u64 insn) {
FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FCMP_rc(u64 insn) {
FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
}
void TranslatorVisitor::FCMP_cr(u64 insn) {
FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FCMP_imm(u64 insn) {
union {
u64 raw;
BitField<20, 19, u64> value;
BitField<56, 1, u64> is_negative;
} const fcmp{insn};
const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
const u32 value{static_cast<u32>(fcmp.value) << 12};
FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,78 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_b;
BitField<45, 2, BooleanOp> bop;
BitField<47, 1, u64> cc;
BitField<48, 4, FPCompareOp> compare_op;
BitField<52, 1, u64> bf;
BitField<53, 1, u64> negate_b;
BitField<54, 1, u64> abs_a;
BitField<55, 1, u64> ftz;
} const fset{insn};
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
const IR::FpControl control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::U1 pred{v.ir.GetPred(fset.pred)};
if (fset.neg_pred != 0) {
pred = v.ir.LogicalNot(pred);
}
const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
const IR::U32 one_mask{v.ir.Imm32(-1)};
const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
const IR::U32 zero{v.ir.Imm32(0)};
const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
v.X(fset.dest_reg, result);
if (fset.cc != 0) {
const IR::U1 is_zero{v.ir.IEqual(result, zero)};
v.SetZFlag(is_zero);
if (fset.bf != 0) {
v.ResetSFlag();
} else {
v.SetSFlag(v.ir.LogicalNot(is_zero));
}
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::FSET_reg(u64 insn) {
FSET(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FSET_cbuf(u64 insn) {
FSET(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FSET_imm(u64 insn) {
FSET(*this, insn, GetFloatImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,214 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
enum class FloatFormat : u64 {
F16 = 1,
F32 = 2,
F64 = 3,
};
enum class RoundingOp : u64 {
None = 0,
Pass = 3,
Round = 8,
Floor = 9,
Ceil = 10,
Trunc = 11,
};
[[nodiscard]] u32 WidthSize(FloatFormat width) {
switch (width) {
case FloatFormat::F16:
return 16;
case FloatFormat::F32:
return 32;
case FloatFormat::F64:
return 64;
default:
throw NotImplementedException("Invalid width {}", width);
}
}
void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<44, 1, u64> ftz;
BitField<45, 1, u64> neg;
BitField<47, 1, u64> cc;
BitField<50, 1, u64> sat;
BitField<39, 4, u64> rounding_op;
BitField<39, 2, FpRounding> rounding;
BitField<10, 2, FloatFormat> src_size;
BitField<8, 2, FloatFormat> dst_size;
[[nodiscard]] RoundingOp RoundingOperation() const {
constexpr u64 rounding_mask = 0x0B;
return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
}
} const f2f{insn};
if (f2f.cc != 0) {
throw NotImplementedException("F2F CC");
}
IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
IR::FpControl fp_control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
if (f2f.src_size != f2f.dst_size) {
fp_control.rounding = CastFpRounding(f2f.rounding);
input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
} else {
switch (f2f.RoundingOperation()) {
case RoundingOp::None:
case RoundingOp::Pass:
// Make sure NANs are handled properly
switch (f2f.src_size) {
case FloatFormat::F16:
input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
break;
case FloatFormat::F32:
input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
break;
case FloatFormat::F64:
input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
break;
}
break;
case RoundingOp::Round:
input = v.ir.FPRoundEven(input, fp_control);
break;
case RoundingOp::Floor:
input = v.ir.FPFloor(input, fp_control);
break;
case RoundingOp::Ceil:
input = v.ir.FPCeil(input, fp_control);
break;
case RoundingOp::Trunc:
input = v.ir.FPTrunc(input, fp_control);
break;
default:
throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
}
}
if (f2f.sat != 0 && !any_fp64) {
input = v.ir.FPSaturate(input);
}
switch (f2f.dst_size) {
case FloatFormat::F16: {
const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
break;
}
case FloatFormat::F32:
v.F(f2f.dest_reg, input);
break;
case FloatFormat::F64:
v.D(f2f.dest_reg, input);
break;
default:
throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
}
}
} // Anonymous namespace
void TranslatorVisitor::F2F_reg(u64 insn) {
union {
u64 insn;
BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size;
BitField<41, 1, u64> selector;
} const f2f{insn};
IR::F16F32F64 src_a;
switch (f2f.src_size) {
case FloatFormat::F16: {
auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
break;
}
case FloatFormat::F32:
src_a = GetFloatReg20(insn);
break;
case FloatFormat::F64:
src_a = GetDoubleReg20(insn);
break;
default:
throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
}
F2F(*this, insn, src_a, f2f.abs != 0);
}
void TranslatorVisitor::F2F_cbuf(u64 insn) {
union {
u64 insn;
BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size;
BitField<41, 1, u64> selector;
} const f2f{insn};
IR::F16F32F64 src_a;
switch (f2f.src_size) {
case FloatFormat::F16: {
auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
break;
}
case FloatFormat::F32:
src_a = GetFloatCbuf(insn);
break;
case FloatFormat::F64:
src_a = GetDoubleCbuf(insn);
break;
default:
throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
}
F2F(*this, insn, src_a, f2f.abs != 0);
}
void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
union {
u64 insn;
BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size;
BitField<41, 1, u64> selector;
BitField<20, 19, u64> imm;
BitField<56, 1, u64> imm_neg;
} const f2f{insn};
IR::F16F32F64 src_a;
switch (f2f.src_size) {
case FloatFormat::F16: {
const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
if (f2f.imm_neg != 0) {
throw NotImplementedException("Neg bit on F16");
}
break;
}
case FloatFormat::F32:
src_a = GetFloatImm20(insn);
break;
case FloatFormat::F64:
src_a = GetDoubleImm20(insn);
break;
default:
throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
}
F2F(*this, insn, src_a, f2f.abs != 0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,253 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <limits>
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class DestFormat : u64 {
Invalid,
I16,
I32,
I64,
};
enum class SrcFormat : u64 {
Invalid,
F16,
F32,
F64,
};
enum class Rounding : u64 {
Round,
Floor,
Ceil,
Trunc,
};
union F2I {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 2, DestFormat> dest_format;
BitField<10, 2, SrcFormat> src_format;
BitField<12, 1, u64> is_signed;
BitField<39, 2, Rounding> rounding;
BitField<41, 1, u64> half;
BitField<44, 1, u64> ftz;
BitField<45, 1, u64> abs;
BitField<47, 1, u64> cc;
BitField<49, 1, u64> neg;
};
size_t BitSize(DestFormat dest_format) {
switch (dest_format) {
case DestFormat::I16:
return 16;
case DestFormat::I32:
return 32;
case DestFormat::I64:
return 64;
default:
throw NotImplementedException("Invalid destination format {}", dest_format);
}
}
std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
if (is_signed) {
switch (format) {
case DestFormat::I16:
return {static_cast<f64>(std::numeric_limits<s16>::max()),
static_cast<f64>(std::numeric_limits<s16>::min())};
case DestFormat::I32:
return {static_cast<f64>(std::numeric_limits<s32>::max()),
static_cast<f64>(std::numeric_limits<s32>::min())};
case DestFormat::I64:
return {static_cast<f64>(std::numeric_limits<s64>::max()),
static_cast<f64>(std::numeric_limits<s64>::min())};
default:
break;
}
} else {
switch (format) {
case DestFormat::I16:
return {static_cast<f64>(std::numeric_limits<u16>::max()),
static_cast<f64>(std::numeric_limits<u16>::min())};
case DestFormat::I32:
return {static_cast<f64>(std::numeric_limits<u32>::max()),
static_cast<f64>(std::numeric_limits<u32>::min())};
case DestFormat::I64:
return {static_cast<f64>(std::numeric_limits<u64>::max()),
static_cast<f64>(std::numeric_limits<u64>::min())};
default:
break;
}
}
throw NotImplementedException("Invalid destination format {}", format);
}
IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
union {
u64 raw;
BitField<20, 14, s64> offset;
BitField<34, 5, u64> binding;
} const cbuf{insn};
if (cbuf.binding >= 18) {
throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
}
if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
}
if (cbuf.offset % 2 != 0) {
throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
}
const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
return v.ir.PackDouble2x32(vector);
}
void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
// F2I is used to convert from a floating point value to an integer
const F2I f2i{insn};
const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
f2i.dest_format != DestFormat::I64};
IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
if (denorm_cares) {
fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
}
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = fmz_mode,
};
const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
const IR::F16F32F64 rounded_value{[&] {
switch (f2i.rounding) {
case Rounding::Round:
return v.ir.FPRoundEven(op_a, fp_control);
case Rounding::Floor:
return v.ir.FPFloor(op_a, fp_control);
case Rounding::Ceil:
return v.ir.FPCeil(op_a, fp_control);
case Rounding::Trunc:
return v.ir.FPTrunc(op_a, fp_control);
default:
throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
}
}()};
const bool is_signed{f2i.is_signed != 0};
const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
IR::F16F32F64 intermediate;
switch (f2i.src_format) {
case SrcFormat::F16: {
const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
break;
}
case SrcFormat::F32: {
const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
break;
}
case SrcFormat::F64: {
const IR::F64 max_val{v.ir.Imm64(max_bound)};
const IR::F64 min_val{v.ir.Imm64(min_bound)};
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
break;
}
default:
throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
}
const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
bool handled_special_case = false;
const bool special_nan_cases =
(f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
if (special_nan_cases) {
if (f2i.dest_format == DestFormat::I32) {
handled_special_case = true;
result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
} else if (f2i.dest_format == DestFormat::I64) {
handled_special_case = true;
result = IR::U64{
v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
}
}
if (!handled_special_case && is_signed) {
if (bitsize != 64) {
result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
} else {
result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
}
}
if (bitsize == 64) {
v.L(f2i.dest_reg, result);
} else {
v.X(f2i.dest_reg, result);
}
if (f2i.cc != 0) {
throw NotImplementedException("F2I CC");
}
}
} // Anonymous namespace
void TranslatorVisitor::F2I_reg(u64 insn) {
union {
u64 raw;
F2I base;
BitField<20, 8, IR::Reg> src_reg;
} const f2i{insn};
const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
switch (f2i.base.src_format) {
case SrcFormat::F16:
return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
case SrcFormat::F32:
return F(f2i.src_reg);
case SrcFormat::F64:
return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
default:
throw NotImplementedException("Invalid F2I source format {}",
f2i.base.src_format.Value());
}
}()};
TranslateF2I(*this, insn, op_a);
}
void TranslatorVisitor::F2I_cbuf(u64 insn) {
const F2I f2i{insn};
const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
switch (f2i.src_format) {
case SrcFormat::F16:
return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
case SrcFormat::F32:
return GetFloatCbuf(insn);
case SrcFormat::F64: {
return UnpackCbuf(*this, insn);
}
default:
throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
}
}()};
TranslateF2I(*this, insn, op_a);
}
void TranslatorVisitor::F2I_imm(u64) {
throw NotImplementedException("{}", Opcode::F2I_imm);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,94 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const ffma{insn};
if (cc) {
throw NotImplementedException("FFMA CC");
}
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = CastFpRounding(fp_rounding),
.fmz_mode = CastFmzMode(fmz_mode),
};
IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
if (fmz_mode == FmzMode::FMZ && !sat) {
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
value = IR::F32{v.ir.Select(any_zero, op_c, value)};
}
if (sat) {
value = v.ir.FPSaturate(value);
}
v.F(ffma.dest_reg, value);
}
void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
union {
u64 raw;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_b;
BitField<49, 1, u64> neg_c;
BitField<50, 1, u64> sat;
BitField<51, 2, FpRounding> fp_rounding;
BitField<53, 2, FmzMode> fmz_mode;
} const ffma{insn};
FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
}
} // Anonymous namespace
void TranslatorVisitor::FFMA_reg(u64 insn) {
FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FFMA_rc(u64 insn) {
FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
}
void TranslatorVisitor::FFMA_cr(u64 insn) {
FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FFMA_imm(u64 insn) {
FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FFMA32I(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
BitField<52, 1, u64> cc;
BitField<53, 2, FmzMode> fmz_mode;
BitField<55, 1, u64> sat;
BitField<56, 1, u64> neg_a;
BitField<57, 1, u64> neg_c;
} const ffma32i{insn};
FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,62 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<44, 1, u64> ftz;
BitField<45, 1, u64> negate_b;
BitField<46, 1, u64> abs_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> negate_a;
BitField<49, 1, u64> abs_b;
} const fmnmx{insn};
if (fmnmx.cc) {
throw NotImplementedException("FMNMX CC");
}
const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
const IR::FpControl control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
if (fmnmx.neg_pred != 0) {
std::swap(min, max);
}
v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
}
} // Anonymous namespace
void TranslatorVisitor::FMNMX_reg(u64 insn) {
FMNMX(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
FMNMX(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FMNMX_imm(u64 insn) {
FMNMX(*this, insn, GetFloatImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,71 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Operation : u64 {
Cos = 0,
Sin = 1,
Ex2 = 2, // Base 2 exponent
Lg2 = 3, // Base 2 logarithm
Rcp = 4, // Reciprocal
Rsq = 5, // Reciprocal square root
Rcp64H = 6, // 64-bit reciprocal
Rsq64H = 7, // 64-bit reciprocal square root
Sqrt = 8,
};
} // Anonymous namespace
void TranslatorVisitor::MUFU(u64 insn) {
// MUFU is used to implement a bunch of special functions. See Operation.
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<20, 4, Operation> operation;
BitField<46, 1, u64> abs;
BitField<48, 1, u64> neg;
BitField<50, 1, u64> sat;
} const mufu{insn};
const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
IR::F32 value{[&]() -> IR::F32 {
switch (mufu.operation) {
case Operation::Cos:
return ir.FPCos(op_a);
case Operation::Sin:
return ir.FPSin(op_a);
case Operation::Ex2:
return ir.FPExp2(op_a);
case Operation::Lg2:
return ir.FPLog2(op_a);
case Operation::Rcp:
return ir.FPRecip(op_a);
case Operation::Rsq:
return ir.FPRecipSqrt(op_a);
case Operation::Rcp64H:
throw NotImplementedException("MUFU.RCP64H");
case Operation::Rsq64H:
throw NotImplementedException("MUFU.RSQ64H");
case Operation::Sqrt:
return ir.FPSqrt(op_a);
default:
throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
}
}()};
if (mufu.sat) {
value = ir.FPSaturate(value);
}
F(mufu.dest_reg, value);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,127 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Scale : u64 {
None,
D2,
D4,
D8,
M8,
M4,
M2,
INVALIDSCALE37,
};
float ScaleFactor(Scale scale) {
switch (scale) {
case Scale::None:
return 1.0f;
case Scale::D2:
return 1.0f / 2.0f;
case Scale::D4:
return 1.0f / 4.0f;
case Scale::D8:
return 1.0f / 8.0f;
case Scale::M8:
return 8.0f;
case Scale::M4:
return 4.0f;
case Scale::M2:
return 2.0f;
case Scale::INVALIDSCALE37:
break;
}
throw NotImplementedException("Invalid FMUL scale {}", scale);
}
void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const fmul{insn};
if (cc) {
throw NotImplementedException("FMUL CC");
}
IR::F32 op_a{v.F(fmul.src_a)};
if (scale != Scale::None) {
if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
}
op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
}
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = CastFpRounding(fp_rounding),
.fmz_mode = CastFmzMode(fmz_mode),
};
IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
if (fmz_mode == FmzMode::FMZ && !sat) {
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
value = IR::F32{v.ir.Select(any_zero, zero, value)};
}
if (sat) {
value = v.ir.FPSaturate(value);
}
v.F(fmul.dest_reg, value);
}
void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 raw;
BitField<39, 2, FpRounding> fp_rounding;
BitField<41, 3, Scale> scale;
BitField<44, 2, FmzMode> fmz;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_b;
BitField<50, 1, u64> sat;
} const fmul{insn};
FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
fmul.neg_b != 0);
}
} // Anonymous namespace
void TranslatorVisitor::FMUL_reg(u64 insn) {
return FMUL(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FMUL_cbuf(u64 insn) {
return FMUL(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FMUL_imm(u64 insn) {
return FMUL(*this, insn, GetFloatImm20(insn));
}
void TranslatorVisitor::FMUL32I(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> cc;
BitField<53, 2, FmzMode> fmz;
BitField<55, 1, u64> sat;
} const fmul32i{insn};
FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
fmul32i.sat != 0, fmul32i.cc != 0, false);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,41 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Mode : u64 {
SINCOS,
EX2,
};
void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<39, 1, Mode> mode;
BitField<45, 1, u64> neg;
BitField<49, 1, u64> abs;
} const rro{insn};
v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
}
} // Anonymous namespace
void TranslatorVisitor::RRO_reg(u64 insn) {
RRO(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::RRO_cbuf(u64 insn) {
RRO(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::RRO_imm(u64) {
throw NotImplementedException("RRO (imm)");
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,60 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 insn;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<6, 1, u64> negate_b;
BitField<7, 1, u64> abs_a;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_b;
BitField<45, 2, BooleanOp> bop;
BitField<47, 1, u64> ftz;
BitField<48, 4, FPCompareOp> compare_op;
} const fsetp{insn};
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
const IR::FpControl control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
const BooleanOp bop{fsetp.bop};
const FPCompareOp compare_op{fsetp.compare_op};
const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
v.ir.SetPred(fsetp.dest_pred_a, result_a);
v.ir.SetPred(fsetp.dest_pred_b, result_b);
}
} // Anonymous namespace
void TranslatorVisitor::FSETP_reg(u64 insn) {
FSETP(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FSETP_cbuf(u64 insn) {
FSETP(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FSETP_imm(u64 insn) {
FSETP(*this, insn, GetFloatImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,44 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
void TranslatorVisitor::FSWZADD(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<28, 8, u64> swizzle;
BitField<38, 1, u64> ndv;
BitField<39, 2, FpRounding> round;
BitField<44, 1, u64> ftz;
BitField<47, 1, u64> cc;
} const fswzadd{insn};
if (fswzadd.ndv != 0) {
throw NotImplementedException("FSWZADD NDV");
}
const IR::F32 src_a{GetFloatReg8(insn)};
const IR::F32 src_b{GetFloatReg20(insn)};
const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
const IR::FpControl fp_control{
.no_contraction = false,
.rounding = CastFpRounding(fswzadd.round),
.fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
F(fswzadd.dest_reg, result);
if (fswzadd.cc != 0) {
throw NotImplementedException("FSWZADD CC");
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,125 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const hadd2{insn};
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
const bool promotion{lhs_a.Type() != lhs_b.Type()};
if (promotion) {
if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a);
}
if (lhs_b.Type() == IR::Type::F16) {
lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b);
}
}
lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
if (sat) {
lhs = v.ir.FPSaturate(lhs);
rhs = v.ir.FPSaturate(rhs);
}
if (promotion) {
lhs = v.ir.FPConvert(16, lhs);
rhs = v.ir.FPConvert(16, rhs);
}
v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
}
void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
const IR::U32& src_b) {
union {
u64 raw;
BitField<49, 2, Merge> merge;
BitField<39, 1, u64> ftz;
BitField<43, 1, u64> neg_a;
BitField<44, 1, u64> abs_a;
BitField<47, 2, Swizzle> swizzle_a;
} const hadd2{insn};
HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
}
} // Anonymous namespace
void TranslatorVisitor::HADD2_reg(u64 insn) {
union {
u64 raw;
BitField<32, 1, u64> sat;
BitField<31, 1, u64> neg_b;
BitField<30, 1, u64> abs_b;
BitField<28, 2, Swizzle> swizzle_b;
} const hadd2{insn};
HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
GetReg20(insn));
}
void TranslatorVisitor::HADD2_cbuf(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> sat;
BitField<56, 1, u64> neg_b;
BitField<54, 1, u64> abs_b;
} const hadd2{insn};
HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
GetCbuf(insn));
}
void TranslatorVisitor::HADD2_imm(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> sat;
BitField<56, 1, u64> neg_high;
BitField<30, 9, u64> high;
BitField<29, 1, u64> neg_low;
BitField<20, 9, u64> low;
} const hadd2{insn};
const u32 imm{
static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
}
void TranslatorVisitor::HADD2_32I(u64 insn) {
union {
u64 raw;
BitField<55, 1, u64> ftz;
BitField<52, 1, u64> sat;
BitField<56, 1, u64> neg_a;
BitField<53, 2, Swizzle> swizzle_a;
BitField<20, 32, u64> imm32;
} const hadd2{insn};
const u32 imm{static_cast<u32>(hadd2.imm32)};
HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,169 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
bool sat, HalfPrecision precision) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const hfma2{insn};
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
if (promotion) {
if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a);
}
if (lhs_b.Type() == IR::Type::F16) {
lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b);
}
if (lhs_c.Type() == IR::Type::F16) {
lhs_c = v.ir.FPConvert(32, lhs_c);
rhs_c = v.ir.FPConvert(32, rhs_c);
}
}
lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = HalfPrecision2FmzMode(precision),
};
IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
if (precision == HalfPrecision::FMZ && !sat) {
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
}
if (sat) {
lhs = v.ir.FPSaturate(lhs);
rhs = v.ir.FPSaturate(rhs);
}
if (promotion) {
lhs = v.ir.FPConvert(16, lhs);
rhs = v.ir.FPConvert(16, rhs);
}
v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
}
void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
HalfPrecision precision) {
union {
u64 raw;
BitField<47, 2, Swizzle> swizzle_a;
BitField<49, 2, Merge> merge;
} const hfma2{insn};
HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
sat, precision);
}
} // Anonymous namespace
void TranslatorVisitor::HFMA2_reg(u64 insn) {
union {
u64 raw;
BitField<28, 2, Swizzle> swizzle_b;
BitField<32, 1, u64> saturate;
BitField<31, 1, u64> neg_b;
BitField<30, 1, u64> neg_c;
BitField<35, 2, Swizzle> swizzle_c;
BitField<37, 2, HalfPrecision> precision;
} const hfma2{insn};
HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
}
void TranslatorVisitor::HFMA2_rc(u64 insn) {
union {
u64 raw;
BitField<51, 1, u64> neg_c;
BitField<52, 1, u64> saturate;
BitField<53, 2, Swizzle> swizzle_b;
BitField<56, 1, u64> neg_b;
BitField<57, 2, HalfPrecision> precision;
} const hfma2{insn};
HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
}
void TranslatorVisitor::HFMA2_cr(u64 insn) {
union {
u64 raw;
BitField<51, 1, u64> neg_c;
BitField<52, 1, u64> saturate;
BitField<53, 2, Swizzle> swizzle_c;
BitField<56, 1, u64> neg_b;
BitField<57, 2, HalfPrecision> precision;
} const hfma2{insn};
HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
}
void TranslatorVisitor::HFMA2_imm(u64 insn) {
union {
u64 raw;
BitField<51, 1, u64> neg_c;
BitField<52, 1, u64> saturate;
BitField<53, 2, Swizzle> swizzle_c;
BitField<56, 1, u64> neg_high;
BitField<30, 9, u64> high;
BitField<29, 1, u64> neg_low;
BitField<20, 9, u64> low;
BitField<57, 2, HalfPrecision> precision;
} const hfma2{insn};
const u32 imm{
static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
}
void TranslatorVisitor::HFMA2_32I(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> src_c;
BitField<20, 32, u64> imm32;
BitField<52, 1, u64> neg_c;
BitField<53, 2, Swizzle> swizzle_a;
BitField<55, 2, HalfPrecision> precision;
} const hfma2{insn};
const u32 imm{static_cast<u32>(hfma2.imm32)};
HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,62 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
switch (precision) {
case HalfPrecision::None:
return IR::FmzMode::None;
case HalfPrecision::FTZ:
return IR::FmzMode::FTZ;
case HalfPrecision::FMZ:
return IR::FmzMode::FMZ;
default:
return IR::FmzMode::DontCare;
}
}
std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
switch (swizzle) {
case Swizzle::H1_H0: {
const IR::Value vector{ir.UnpackFloat2x16(value)};
return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
}
case Swizzle::H0_H0: {
const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
return {scalar, scalar};
}
case Swizzle::H1_H1: {
const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
return {scalar, scalar};
}
case Swizzle::F32: {
const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
return {scalar, scalar};
}
}
throw InvalidArgument("Invalid swizzle {}", swizzle);
}
IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
Merge merge) {
switch (merge) {
case Merge::H1_H0:
return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
case Merge::F32:
return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
case Merge::MRG_H0:
case Merge::MRG_H1: {
const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
const bool is_h0{merge == Merge::MRG_H0};
const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
}
}
throw InvalidArgument("Invalid merge {}", merge);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,42 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
enum class Merge : u64 {
H1_H0,
F32,
MRG_H0,
MRG_H1,
};
enum class Swizzle : u64 {
H1_H0,
F32,
H0_H0,
H1_H1,
};
enum class HalfPrecision : u64 {
None = 0,
FTZ = 1,
FMZ = 2,
};
IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
Merge merge);
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,143 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
HalfPrecision precision) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const hmul2{insn};
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
const bool promotion{lhs_a.Type() != lhs_b.Type()};
if (promotion) {
if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a);
}
if (lhs_b.Type() == IR::Type::F16) {
lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b);
}
}
lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = HalfPrecision2FmzMode(precision),
};
IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
if (precision == HalfPrecision::FMZ && !sat) {
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
}
if (sat) {
lhs = v.ir.FPSaturate(lhs);
rhs = v.ir.FPSaturate(rhs);
}
if (promotion) {
lhs = v.ir.FPConvert(16, lhs);
rhs = v.ir.FPConvert(16, rhs);
}
v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
}
void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
Swizzle swizzle_b, const IR::U32& src_b) {
union {
u64 raw;
BitField<49, 2, Merge> merge;
BitField<47, 2, Swizzle> swizzle_a;
BitField<39, 2, HalfPrecision> precision;
} const hmul2{insn};
HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
hmul2.precision);
}
} // Anonymous namespace
void TranslatorVisitor::HMUL2_reg(u64 insn) {
union {
u64 raw;
BitField<32, 1, u64> sat;
BitField<31, 1, u64> neg_b;
BitField<30, 1, u64> abs_b;
BitField<44, 1, u64> abs_a;
BitField<28, 2, Swizzle> swizzle_b;
} const hmul2{insn};
HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
hmul2.swizzle_b, GetReg20(insn));
}
void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> sat;
BitField<54, 1, u64> abs_b;
BitField<43, 1, u64> neg_a;
BitField<44, 1, u64> abs_a;
} const hmul2{insn};
HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
Swizzle::F32, GetCbuf(insn));
}
void TranslatorVisitor::HMUL2_imm(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> sat;
BitField<56, 1, u64> neg_high;
BitField<30, 9, u64> high;
BitField<29, 1, u64> neg_low;
BitField<20, 9, u64> low;
BitField<43, 1, u64> neg_a;
BitField<44, 1, u64> abs_a;
} const hmul2{insn};
const u32 imm{
static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
Swizzle::H1_H0, ir.Imm32(imm));
}
void TranslatorVisitor::HMUL2_32I(u64 insn) {
union {
u64 raw;
BitField<55, 2, HalfPrecision> precision;
BitField<52, 1, u64> sat;
BitField<53, 2, Swizzle> swizzle_a;
BitField<20, 32, u64> imm32;
} const hmul2{insn};
const u32 imm{static_cast<u32>(hmul2.imm32)};
HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,117 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> neg_a;
BitField<45, 2, BooleanOp> bop;
BitField<44, 1, u64> abs_a;
BitField<47, 2, Swizzle> swizzle_a;
} const hset2{insn};
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
if (lhs_a.Type() != lhs_b.Type()) {
if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a);
}
if (lhs_b.Type() == IR::Type::F16) {
lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b);
}
}
lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
const IR::FpControl control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::U1 pred{v.ir.GetPred(hset2.pred)};
if (hset2.neg_pred != 0) {
pred = v.ir.LogicalNot(pred);
}
const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
const u32 true_value = bf ? 0x3c00 : 0xffff;
const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
const IR::U32 fail_result{v.ir.Imm32(0)};
const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
}
} // Anonymous namespace
void TranslatorVisitor::HSET2_reg(u64 insn) {
union {
u64 insn;
BitField<30, 1, u64> abs_b;
BitField<49, 1, u64> bf;
BitField<31, 1, u64> neg_b;
BitField<50, 1, u64> ftz;
BitField<35, 4, FPCompareOp> compare_op;
BitField<28, 2, Swizzle> swizzle_b;
} const hset2{insn};
HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
}
void TranslatorVisitor::HSET2_cbuf(u64 insn) {
union {
u64 insn;
BitField<53, 1, u64> bf;
BitField<56, 1, u64> neg_b;
BitField<54, 1, u64> ftz;
BitField<49, 4, FPCompareOp> compare_op;
} const hset2{insn};
HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
hset2.compare_op, Swizzle::F32);
}
void TranslatorVisitor::HSET2_imm(u64 insn) {
union {
u64 insn;
BitField<53, 1, u64> bf;
BitField<54, 1, u64> ftz;
BitField<49, 4, FPCompareOp> compare_op;
BitField<56, 1, u64> neg_high;
BitField<30, 9, u64> high;
BitField<29, 1, u64> neg_low;
BitField<20, 9, u64> low;
} const hset2{insn};
const u32 imm{
static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
Swizzle::H1_H0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,118 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
union {
u64 insn;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> neg_a;
BitField<45, 2, BooleanOp> bop;
BitField<44, 1, u64> abs_a;
BitField<6, 1, u64> ftz;
BitField<47, 2, Swizzle> swizzle_a;
} const hsetp2{insn};
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
if (lhs_a.Type() != lhs_b.Type()) {
if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a);
}
if (lhs_b.Type() == IR::Type::F16) {
lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b);
}
}
lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
const IR::FpControl control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
if (hsetp2.neg_pred != 0) {
pred = v.ir.LogicalNot(pred);
}
const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
if (h_and) {
auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
v.ir.SetPred(hsetp2.dest_pred_a, result);
v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
} else {
v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
}
}
} // Anonymous namespace
void TranslatorVisitor::HSETP2_reg(u64 insn) {
union {
u64 insn;
BitField<30, 1, u64> abs_b;
BitField<49, 1, u64> h_and;
BitField<31, 1, u64> neg_b;
BitField<35, 4, FPCompareOp> compare_op;
BitField<28, 2, Swizzle> swizzle_b;
} const hsetp2{insn};
HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
hsetp2.compare_op, hsetp2.h_and != 0);
}
void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
union {
u64 insn;
BitField<53, 1, u64> h_and;
BitField<54, 1, u64> abs_b;
BitField<56, 1, u64> neg_b;
BitField<49, 4, FPCompareOp> compare_op;
} const hsetp2{insn};
HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
hsetp2.compare_op, hsetp2.h_and != 0);
}
void TranslatorVisitor::HSETP2_imm(u64 insn) {
union {
u64 insn;
BitField<53, 1, u64> h_and;
BitField<54, 1, u64> ftz;
BitField<49, 4, FPCompareOp> compare_op;
BitField<56, 1, u64> neg_high;
BitField<30, 9, u64> high;
BitField<29, 1, u64> neg_low;
BitField<20, 9, u64> low;
} const hsetp2{insn};
const u32 imm{static_cast<u32>(hsetp2.low << 6) |
static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hsetp2.high << 22) |
static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
hsetp2.h_and != 0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,272 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
u32 offset) {
if (unaligned) {
return ir.Imm32(0);
}
return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
}
} // Anonymous namespace
IR::U32 TranslatorVisitor::X(IR::Reg reg) {
return ir.GetReg(reg);
}
IR::U64 TranslatorVisitor::L(IR::Reg reg) {
if (!IR::IsAligned(reg, 2)) {
throw NotImplementedException("Unaligned source register {}", reg);
}
return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
}
IR::F32 TranslatorVisitor::F(IR::Reg reg) {
return ir.BitCast<IR::F32>(X(reg));
}
IR::F64 TranslatorVisitor::D(IR::Reg reg) {
if (!IR::IsAligned(reg, 2)) {
throw NotImplementedException("Unaligned source register {}", reg);
}
return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
}
void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
ir.SetReg(dest_reg, value);
}
void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
if (!IR::IsAligned(dest_reg, 2)) {
throw NotImplementedException("Unaligned destination register {}", dest_reg);
}
const IR::Value result{ir.UnpackUint2x32(value)};
for (int i = 0; i < 2; i++) {
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
}
}
void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
X(dest_reg, ir.BitCast<IR::U32>(value));
}
void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
if (!IR::IsAligned(dest_reg, 2)) {
throw NotImplementedException("Unaligned destination register {}", dest_reg);
}
const IR::Value result{ir.UnpackDouble2x32(value)};
for (int i = 0; i < 2; i++) {
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
}
}
IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
union {
u64 raw;
BitField<8, 8, IR::Reg> index;
} const reg{insn};
return X(reg.index);
}
IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
union {
u64 raw;
BitField<20, 8, IR::Reg> index;
} const reg{insn};
return X(reg.index);
}
IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
union {
u64 raw;
BitField<39, 8, IR::Reg> index;
} const reg{insn};
return X(reg.index);
}
IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
return ir.BitCast<IR::F32>(GetReg8(insn));
}
IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
return ir.BitCast<IR::F32>(GetReg20(insn));
}
IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
return ir.BitCast<IR::F32>(GetReg39(insn));
}
IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
union {
u64 raw;
BitField<20, 8, IR::Reg> index;
} const reg{insn};
return D(reg.index);
}
IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
union {
u64 raw;
BitField<39, 8, IR::Reg> index;
} const reg{insn};
return D(reg.index);
}
static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
union {
u64 raw;
BitField<20, 14, u64> offset;
BitField<34, 5, u64> binding;
} const cbuf{insn};
if (cbuf.binding >= 18) {
throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
}
if (cbuf.offset >= 0x10'000) {
throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
}
const IR::Value binding{static_cast<u32>(cbuf.binding)};
const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
return {IR::U32{binding}, IR::U32{byte_offset}};
}
IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
const auto [binding, byte_offset]{CbufAddr(insn)};
return ir.GetCbuf(binding, byte_offset);
}
IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
const auto [binding, byte_offset]{CbufAddr(insn)};
return ir.GetFloatCbuf(binding, byte_offset);
}
IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
union {
u64 raw;
BitField<20, 1, u64> unaligned;
} const cbuf{insn};
const auto [binding, offset_value]{CbufAddr(insn)};
const bool unaligned{cbuf.unaligned != 0};
const u32 offset{offset_value.U32()};
const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
}
IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
union {
u64 raw;
BitField<20, 1, u64> unaligned;
} const cbuf{insn};
if (cbuf.unaligned != 0) {
throw NotImplementedException("Unaligned packed constant buffer read");
}
const auto [binding, lower_offset]{CbufAddr(insn)};
const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
}
IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
union {
u64 raw;
BitField<20, 19, u64> value;
BitField<56, 1, u64> is_negative;
} const imm{insn};
if (imm.is_negative != 0) {
const s64 raw{static_cast<s64>(imm.value)};
return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
} else {
return ir.Imm32(static_cast<u32>(imm.value));
}
}
IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
union {
u64 raw;
BitField<20, 19, u64> value;
BitField<56, 1, u64> is_negative;
} const imm{insn};
const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
const u32 value{static_cast<u32>(imm.value) << 12};
return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
}
IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
union {
u64 raw;
BitField<20, 19, u64> value;
BitField<56, 1, u64> is_negative;
} const imm{insn};
const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
const u64 value{imm.value << 44};
return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
}
IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
const s64 value{GetImm20(insn).U32()};
return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
}
IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
union {
u64 raw;
BitField<20, 32, u64> value;
} const imm{insn};
return ir.Imm32(static_cast<u32>(imm.value));
}
IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
union {
u64 raw;
BitField<20, 32, u64> value;
} const imm{insn};
return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
}
void TranslatorVisitor::SetZFlag(const IR::U1& value) {
ir.SetZFlag(value);
}
void TranslatorVisitor::SetSFlag(const IR::U1& value) {
ir.SetSFlag(value);
}
void TranslatorVisitor::SetCFlag(const IR::U1& value) {
ir.SetCFlag(value);
}
void TranslatorVisitor::SetOFlag(const IR::U1& value) {
ir.SetOFlag(value);
}
void TranslatorVisitor::ResetZero() {
SetZFlag(ir.Imm1(false));
}
void TranslatorVisitor::ResetSFlag() {
SetSFlag(ir.Imm1(false));
}
void TranslatorVisitor::ResetCFlag() {
SetCFlag(ir.Imm1(false));
}
void TranslatorVisitor::ResetOFlag() {
SetOFlag(ir.Imm1(false));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,387 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/maxwell/instruction.h"
namespace Shader::Maxwell {
enum class CompareOp : u64 {
False,
LessThan,
Equal,
LessThanEqual,
GreaterThan,
NotEqual,
GreaterThanEqual,
True,
};
enum class BooleanOp : u64 {
AND,
OR,
XOR,
};
enum class PredicateOp : u64 {
False,
True,
Zero,
NonZero,
};
enum class FPCompareOp : u64 {
F,
LT,
EQ,
LE,
GT,
NE,
GE,
NUM,
Nan,
LTU,
EQU,
LEU,
GTU,
NEU,
GEU,
T,
};
class TranslatorVisitor {
public:
explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
Environment& env;
IR::IREmitter ir;
void AL2P(u64 insn);
void ALD(u64 insn);
void AST(u64 insn);
void ATOM_cas(u64 insn);
void ATOM(u64 insn);
void ATOMS_cas(u64 insn);
void ATOMS(u64 insn);
void B2R(u64 insn);
void BAR(u64 insn);
void BFE_reg(u64 insn);
void BFE_cbuf(u64 insn);
void BFE_imm(u64 insn);
void BFI_reg(u64 insn);
void BFI_rc(u64 insn);
void BFI_cr(u64 insn);
void BFI_imm(u64 insn);
void BPT(u64 insn);
void BRA(u64 insn);
void BRK(u64 insn);
void BRX(u64 insn);
void CAL();
void CCTL(u64 insn);
void CCTLL(u64 insn);
void CONT(u64 insn);
void CS2R(u64 insn);
void CSET(u64 insn);
void CSETP(u64 insn);
void DADD_reg(u64 insn);
void DADD_cbuf(u64 insn);
void DADD_imm(u64 insn);
void DEPBAR();
void DFMA_reg(u64 insn);
void DFMA_rc(u64 insn);
void DFMA_cr(u64 insn);
void DFMA_imm(u64 insn);
void DMNMX_reg(u64 insn);
void DMNMX_cbuf(u64 insn);
void DMNMX_imm(u64 insn);
void DMUL_reg(u64 insn);
void DMUL_cbuf(u64 insn);
void DMUL_imm(u64 insn);
void DSET_reg(u64 insn);
void DSET_cbuf(u64 insn);
void DSET_imm(u64 insn);
void DSETP_reg(u64 insn);
void DSETP_cbuf(u64 insn);
void DSETP_imm(u64 insn);
void EXIT();
void F2F_reg(u64 insn);
void F2F_cbuf(u64 insn);
void F2F_imm(u64 insn);
void F2I_reg(u64 insn);
void F2I_cbuf(u64 insn);
void F2I_imm(u64 insn);
void FADD_reg(u64 insn);
void FADD_cbuf(u64 insn);
void FADD_imm(u64 insn);
void FADD32I(u64 insn);
void FCHK_reg(u64 insn);
void FCHK_cbuf(u64 insn);
void FCHK_imm(u64 insn);
void FCMP_reg(u64 insn);
void FCMP_rc(u64 insn);
void FCMP_cr(u64 insn);
void FCMP_imm(u64 insn);
void FFMA_reg(u64 insn);
void FFMA_rc(u64 insn);
void FFMA_cr(u64 insn);
void FFMA_imm(u64 insn);
void FFMA32I(u64 insn);
void FLO_reg(u64 insn);
void FLO_cbuf(u64 insn);
void FLO_imm(u64 insn);
void FMNMX_reg(u64 insn);
void FMNMX_cbuf(u64 insn);
void FMNMX_imm(u64 insn);
void FMUL_reg(u64 insn);
void FMUL_cbuf(u64 insn);
void FMUL_imm(u64 insn);
void FMUL32I(u64 insn);
void FSET_reg(u64 insn);
void FSET_cbuf(u64 insn);
void FSET_imm(u64 insn);
void FSETP_reg(u64 insn);
void FSETP_cbuf(u64 insn);
void FSETP_imm(u64 insn);
void FSWZADD(u64 insn);
void GETCRSPTR(u64 insn);
void GETLMEMBASE(u64 insn);
void HADD2_reg(u64 insn);
void HADD2_cbuf(u64 insn);
void HADD2_imm(u64 insn);
void HADD2_32I(u64 insn);
void HFMA2_reg(u64 insn);
void HFMA2_rc(u64 insn);
void HFMA2_cr(u64 insn);
void HFMA2_imm(u64 insn);
void HFMA2_32I(u64 insn);
void HMUL2_reg(u64 insn);
void HMUL2_cbuf(u64 insn);
void HMUL2_imm(u64 insn);
void HMUL2_32I(u64 insn);
void HSET2_reg(u64 insn);
void HSET2_cbuf(u64 insn);
void HSET2_imm(u64 insn);
void HSETP2_reg(u64 insn);
void HSETP2_cbuf(u64 insn);
void HSETP2_imm(u64 insn);
void I2F_reg(u64 insn);
void I2F_cbuf(u64 insn);
void I2F_imm(u64 insn);
void I2I_reg(u64 insn);
void I2I_cbuf(u64 insn);
void I2I_imm(u64 insn);
void IADD_reg(u64 insn);
void IADD_cbuf(u64 insn);
void IADD_imm(u64 insn);
void IADD3_reg(u64 insn);
void IADD3_cbuf(u64 insn);
void IADD3_imm(u64 insn);
void IADD32I(u64 insn);
void ICMP_reg(u64 insn);
void ICMP_rc(u64 insn);
void ICMP_cr(u64 insn);
void ICMP_imm(u64 insn);
void IDE(u64 insn);
void IDP_reg(u64 insn);
void IDP_imm(u64 insn);
void IMAD_reg(u64 insn);
void IMAD_rc(u64 insn);
void IMAD_cr(u64 insn);
void IMAD_imm(u64 insn);
void IMAD32I(u64 insn);
void IMADSP_reg(u64 insn);
void IMADSP_rc(u64 insn);
void IMADSP_cr(u64 insn);
void IMADSP_imm(u64 insn);
void IMNMX_reg(u64 insn);
void IMNMX_cbuf(u64 insn);
void IMNMX_imm(u64 insn);
void IMUL_reg(u64 insn);
void IMUL_cbuf(u64 insn);
void IMUL_imm(u64 insn);
void IMUL32I(u64 insn);
void IPA(u64 insn);
void ISBERD(u64 insn);
void ISCADD_reg(u64 insn);
void ISCADD_cbuf(u64 insn);
void ISCADD_imm(u64 insn);
void ISCADD32I(u64 insn);
void ISET_reg(u64 insn);
void ISET_cbuf(u64 insn);
void ISET_imm(u64 insn);
void ISETP_reg(u64 insn);
void ISETP_cbuf(u64 insn);
void ISETP_imm(u64 insn);
void JCAL(u64 insn);
void JMP(u64 insn);
void JMX(u64 insn);
void KIL();
void LD(u64 insn);
void LDC(u64 insn);
void LDG(u64 insn);
void LDL(u64 insn);
void LDS(u64 insn);
void LEA_hi_reg(u64 insn);
void LEA_hi_cbuf(u64 insn);
void LEA_lo_reg(u64 insn);
void LEA_lo_cbuf(u64 insn);
void LEA_lo_imm(u64 insn);
void LEPC(u64 insn);
void LONGJMP(u64 insn);
void LOP_reg(u64 insn);
void LOP_cbuf(u64 insn);
void LOP_imm(u64 insn);
void LOP3_reg(u64 insn);
void LOP3_cbuf(u64 insn);
void LOP3_imm(u64 insn);
void LOP32I(u64 insn);
void MEMBAR(u64 insn);
void MOV_reg(u64 insn);
void MOV_cbuf(u64 insn);
void MOV_imm(u64 insn);
void MOV32I(u64 insn);
void MUFU(u64 insn);
void NOP(u64 insn);
void OUT_reg(u64 insn);
void OUT_cbuf(u64 insn);
void OUT_imm(u64 insn);
void P2R_reg(u64 insn);
void P2R_cbuf(u64 insn);
void P2R_imm(u64 insn);
void PBK();
void PCNT();
void PEXIT(u64 insn);
void PIXLD(u64 insn);
void PLONGJMP(u64 insn);
void POPC_reg(u64 insn);
void POPC_cbuf(u64 insn);
void POPC_imm(u64 insn);
void PRET(u64 insn);
void PRMT_reg(u64 insn);
void PRMT_rc(u64 insn);
void PRMT_cr(u64 insn);
void PRMT_imm(u64 insn);
void PSET(u64 insn);
void PSETP(u64 insn);
void R2B(u64 insn);
void R2P_reg(u64 insn);
void R2P_cbuf(u64 insn);
void R2P_imm(u64 insn);
void RAM(u64 insn);
void RED(u64 insn);
void RET(u64 insn);
void RRO_reg(u64 insn);
void RRO_cbuf(u64 insn);
void RRO_imm(u64 insn);
void RTT(u64 insn);
void S2R(u64 insn);
void SAM(u64 insn);
void SEL_reg(u64 insn);
void SEL_cbuf(u64 insn);
void SEL_imm(u64 insn);
void SETCRSPTR(u64 insn);
void SETLMEMBASE(u64 insn);
void SHF_l_reg(u64 insn);
void SHF_l_imm(u64 insn);
void SHF_r_reg(u64 insn);
void SHF_r_imm(u64 insn);
void SHFL(u64 insn);
void SHL_reg(u64 insn);
void SHL_cbuf(u64 insn);
void SHL_imm(u64 insn);
void SHR_reg(u64 insn);
void SHR_cbuf(u64 insn);
void SHR_imm(u64 insn);
void SSY();
void ST(u64 insn);
void STG(u64 insn);
void STL(u64 insn);
void STP(u64 insn);
void STS(u64 insn);
void SUATOM(u64 insn);
void SUATOM_cas(u64 insn);
void SULD(u64 insn);
void SURED(u64 insn);
void SUST(u64 insn);
void SYNC(u64 insn);
void TEX(u64 insn);
void TEX_b(u64 insn);
void TEXS(u64 insn);
void TLD(u64 insn);
void TLD_b(u64 insn);
void TLD4(u64 insn);
void TLD4_b(u64 insn);
void TLD4S(u64 insn);
void TLDS(u64 insn);
void TMML(u64 insn);
void TMML_b(u64 insn);
void TXA(u64 insn);
void TXD(u64 insn);
void TXD_b(u64 insn);
void TXQ(u64 insn);
void TXQ_b(u64 insn);
void VABSDIFF(u64 insn);
void VABSDIFF4(u64 insn);
void VADD(u64 insn);
void VMAD(u64 insn);
void VMNMX(u64 insn);
void VOTE(u64 insn);
void VOTE_vtg(u64 insn);
void VSET(u64 insn);
void VSETP(u64 insn);
void VSHL(u64 insn);
void VSHR(u64 insn);
void XMAD_reg(u64 insn);
void XMAD_rc(u64 insn);
void XMAD_cr(u64 insn);
void XMAD_imm(u64 insn);
[[nodiscard]] IR::U32 X(IR::Reg reg);
[[nodiscard]] IR::U64 L(IR::Reg reg);
[[nodiscard]] IR::F32 F(IR::Reg reg);
[[nodiscard]] IR::F64 D(IR::Reg reg);
void X(IR::Reg dest_reg, const IR::U32& value);
void L(IR::Reg dest_reg, const IR::U64& value);
void F(IR::Reg dest_reg, const IR::F32& value);
void D(IR::Reg dest_reg, const IR::F64& value);
[[nodiscard]] IR::U32 GetReg8(u64 insn);
[[nodiscard]] IR::U32 GetReg20(u64 insn);
[[nodiscard]] IR::U32 GetReg39(u64 insn);
[[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
[[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
[[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
[[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
[[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
[[nodiscard]] IR::U32 GetCbuf(u64 insn);
[[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
[[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
[[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
[[nodiscard]] IR::U32 GetImm20(u64 insn);
[[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
[[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
[[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
[[nodiscard]] IR::U32 GetImm32(u64 insn);
[[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
void SetZFlag(const IR::U1& value);
void SetSFlag(const IR::U1& value);
void SetCFlag(const IR::U1& value);
void SetOFlag(const IR::U1& value);
void ResetZero();
void ResetSFlag();
void ResetCFlag();
void ResetOFlag();
};
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,105 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
bool cc) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const iadd{insn};
if (sat) {
throw NotImplementedException("IADD SAT");
}
if (x && po) {
throw NotImplementedException("IADD X+PO");
}
// Operand A is always read from here, negated if needed
IR::U32 op_a{v.X(iadd.src_a)};
if (neg_a) {
op_a = v.ir.INeg(op_a);
}
// Add both operands
IR::U32 result{v.ir.IAdd(op_a, op_b)};
if (x) {
const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
result = v.ir.IAdd(result, carry);
}
if (po) {
// .PO adds one to the result
result = v.ir.IAdd(result, v.ir.Imm32(1));
}
if (cc) {
// Store flags
// TODO: Does this grab the result pre-PO or after?
if (po) {
throw NotImplementedException("IADD CC+PO");
}
// TODO: How does CC behave when X is set?
if (x) {
throw NotImplementedException("IADD X+CC");
}
v.SetZFlag(v.ir.GetZeroFromOp(result));
v.SetSFlag(v.ir.GetSignFromOp(result));
v.SetCFlag(v.ir.GetCarryFromOp(result));
v.SetOFlag(v.ir.GetOverflowFromOp(result));
}
// Store result
v.X(iadd.dest_reg, result);
}
void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
union {
u64 insn;
BitField<43, 1, u64> x;
BitField<47, 1, u64> cc;
BitField<48, 2, u64> three_for_po;
BitField<48, 1, u64> neg_b;
BitField<49, 1, u64> neg_a;
BitField<50, 1, u64> sat;
} const iadd{insn};
const bool po{iadd.three_for_po == 3};
if (!po && iadd.neg_b != 0) {
op_b = v.ir.INeg(op_b);
}
IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
}
} // Anonymous namespace
void TranslatorVisitor::IADD_reg(u64 insn) {
IADD(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::IADD_cbuf(u64 insn) {
IADD(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::IADD_imm(u64 insn) {
IADD(*this, insn, GetImm20(insn));
}
void TranslatorVisitor::IADD32I(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> cc;
BitField<53, 1, u64> x;
BitField<54, 1, u64> sat;
BitField<55, 2, u64> three_for_po;
BitField<56, 1, u64> neg_a;
} const iadd32i{insn};
const bool po{iadd32i.three_for_po == 3};
const bool neg_a{!po && iadd32i.neg_a != 0};
IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,122 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Shift : u64 {
None,
Right,
Left,
};
enum class Half : u64 {
All,
Lower,
Upper,
};
[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
constexpr bool is_signed{false};
switch (half) {
case Half::All:
return value;
case Half::Lower:
return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
case Half::Upper:
return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
}
throw NotImplementedException("Invalid half");
}
[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
switch (shift) {
case Shift::None:
return value;
case Shift::Right: {
// 33-bit RS IADD3 edge case
const IR::U1 edge_case{ir.GetCarryFromOp(value)};
const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
}
case Shift::Left:
return ir.ShiftLeftLogical(value, ir.Imm32(16));
}
throw NotImplementedException("Invalid shift");
}
void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
Shift shift = Shift::None) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> x;
BitField<49, 1, u64> neg_c;
BitField<50, 1, u64> neg_b;
BitField<51, 1, u64> neg_a;
} iadd3{insn};
if (iadd3.neg_a != 0) {
op_a = v.ir.INeg(op_a);
}
if (iadd3.neg_b != 0) {
op_b = v.ir.INeg(op_b);
}
if (iadd3.neg_c != 0) {
op_c = v.ir.INeg(op_c);
}
IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
if (iadd3.x != 0) {
// TODO: How does RS behave when X is set?
if (shift == Shift::Right) {
throw NotImplementedException("IADD3 X+RS");
}
const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
lhs_1 = v.ir.IAdd(lhs_1, carry);
}
const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
v.X(iadd3.dest_reg, result);
if (iadd3.cc != 0) {
// TODO: How does CC behave when X is set?
if (iadd3.x != 0) {
throw NotImplementedException("IADD3 X+CC");
}
v.SetZFlag(v.ir.GetZeroFromOp(result));
v.SetSFlag(v.ir.GetSignFromOp(result));
v.SetCFlag(v.ir.GetCarryFromOp(result));
const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
}
}
} // Anonymous namespace
void TranslatorVisitor::IADD3_reg(u64 insn) {
union {
u64 insn;
BitField<37, 2, Shift> shift;
BitField<35, 2, Half> half_a;
BitField<33, 2, Half> half_b;
BitField<31, 2, Half> half_c;
} const iadd3{insn};
const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
}
void TranslatorVisitor::IADD3_cbuf(u64 insn) {
IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
}
void TranslatorVisitor::IADD3_imm(u64 insn) {
IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,48 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<48, 1, u64> is_signed;
BitField<49, 3, CompareOp> compare_op;
} const icmp{insn};
const IR::U32 zero{v.ir.Imm32(0)};
const bool is_signed{icmp.is_signed != 0};
const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
const IR::U32 src_reg{v.X(icmp.src_reg)};
const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
v.X(icmp.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::ICMP_reg(u64 insn) {
ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
}
void TranslatorVisitor::ICMP_rc(u64 insn) {
ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
}
void TranslatorVisitor::ICMP_cr(u64 insn) {
ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
}
void TranslatorVisitor::ICMP_imm(u64 insn) {
ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,123 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
CompareOp compare_op, bool is_signed) {
const IR::U32 zero{ir.Imm32(0)};
const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
const IR::U1 z_flag{ir.GetZFlag()};
const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
: ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
ir.ILessThan(operand_2, zero, true))};
switch (compare_op) {
case CompareOp::False:
return ir.Imm1(false);
case CompareOp::LessThan:
return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
ir.ILessThan(intermediate, zero, true))};
case CompareOp::Equal:
return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
case CompareOp::LessThanEqual: {
const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
ir.ILessThan(intermediate, zero, true))};
return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
}
case CompareOp::GreaterThan: {
const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
ir.IGreaterThan(intermediate, zero, true))};
const IR::U1 not_z{ir.LogicalNot(z_flag)};
return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
}
case CompareOp::NotEqual:
return ir.LogicalOr(ir.INotEqual(intermediate, zero),
ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
case CompareOp::GreaterThanEqual: {
const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
ir.IGreaterThanEqual(intermediate, zero, true))};
return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
}
case CompareOp::True:
return ir.Imm1(true);
default:
throw NotImplementedException("Invalid compare op {}", compare_op);
}
}
IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
CompareOp compare_op, bool is_signed, bool x) {
return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
: IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
}
void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> x;
BitField<44, 1, u64> bf;
BitField<45, 2, BooleanOp> bop;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_signed;
BitField<49, 3, CompareOp> compare_op;
} const iset{insn};
const IR::U32 src_a{v.X(iset.src_reg)};
const bool is_signed{iset.is_signed != 0};
const IR::U32 zero{v.ir.Imm32(0)};
const bool x{iset.x != 0};
const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
IR::U1 pred{v.ir.GetPred(iset.pred)};
if (iset.neg_pred != 0) {
pred = v.ir.LogicalNot(pred);
}
const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
const IR::U32 one_mask{v.ir.Imm32(-1)};
const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
v.X(iset.dest_reg, result);
if (iset.cc != 0) {
if (x) {
throw NotImplementedException("ISET.CC + X");
}
const IR::U1 is_zero{v.ir.IEqual(result, zero)};
v.SetZFlag(is_zero);
if (iset.bf != 0) {
v.ResetSFlag();
} else {
v.SetSFlag(v.ir.LogicalNot(is_zero));
}
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::ISET_reg(u64 insn) {
ISET(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::ISET_cbuf(u64 insn) {
ISET(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::ISET_imm(u64 insn) {
ISET(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,180 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class FloatFormat : u64 {
F16 = 1,
F32 = 2,
F64 = 3,
};
enum class IntFormat : u64 {
U8 = 0,
U16 = 1,
U32 = 2,
U64 = 3,
};
union Encoding {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 2, FloatFormat> float_format;
BitField<10, 2, IntFormat> int_format;
BitField<13, 1, u64> is_signed;
BitField<39, 2, FpRounding> fp_rounding;
BitField<41, 2, u64> selector;
BitField<47, 1, u64> cc;
BitField<45, 1, u64> neg;
BitField<49, 1, u64> abs;
};
bool Is64(u64 insn) {
return Encoding{insn}.int_format == IntFormat::U64;
}
int BitSize(FloatFormat format) {
switch (format) {
case FloatFormat::F16:
return 16;
case FloatFormat::F32:
return 32;
case FloatFormat::F64:
return 64;
}
throw NotImplementedException("Invalid float format {}", format);
}
IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
const IR::U1 is_least{v.ir.IEqual(value, least_value)};
return IR::U32{v.ir.Select(is_least, value, absolute)};
}
void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
const Encoding i2f{insn};
if (i2f.cc != 0) {
throw NotImplementedException("I2F CC");
}
const bool is_signed{i2f.is_signed != 0};
int src_bitsize{};
switch (i2f.int_format) {
case IntFormat::U8:
src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
v.ir.Imm32(8), is_signed);
if (i2f.abs != 0) {
src = SmallAbs(v, src, 8);
}
src_bitsize = 8;
break;
case IntFormat::U16:
if (i2f.selector == 1 || i2f.selector == 3) {
throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
}
src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
v.ir.Imm32(16), is_signed);
if (i2f.abs != 0) {
src = SmallAbs(v, src, 16);
}
src_bitsize = 16;
break;
case IntFormat::U32:
case IntFormat::U64:
if (i2f.selector != 0) {
throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
}
if (i2f.abs != 0 && is_signed) {
src = v.ir.IAbs(src);
}
src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
break;
}
const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
const int dst_bitsize{BitSize(i2f.float_format)};
const IR::FpControl fp_control{
.no_contraction = false,
.rounding = CastFpRounding(i2f.fp_rounding),
.fmz_mode = IR::FmzMode::DontCare,
};
auto value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src, fp_control)};
if (i2f.neg != 0) {
if (i2f.abs != 0 || !is_signed) {
// We know the value is positive
value = v.ir.FPNeg(value);
} else {
// Only negate if the input isn't the lowest value
IR::U1 is_least;
if (src_bitsize == 64) {
is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
} else if (src_bitsize == 32) {
is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
} else {
const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
is_least = v.ir.IEqual(src, least_value);
}
value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
}
}
switch (i2f.float_format) {
case FloatFormat::F16: {
const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
break;
}
case FloatFormat::F32:
v.F(i2f.dest_reg, value);
break;
case FloatFormat::F64: {
if (!IR::IsAligned(i2f.dest_reg, 2)) {
throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
}
const IR::Value vector{v.ir.UnpackDouble2x32(value)};
for (int i = 0; i < 2; ++i) {
v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
}
break;
}
default:
throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
}
}
} // Anonymous namespace
void TranslatorVisitor::I2F_reg(u64 insn) {
if (Is64(insn)) {
union {
u64 raw;
BitField<20, 8, IR::Reg> reg;
} const value{insn};
const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
I2F(*this, insn, ir.PackUint2x32(regs));
} else {
I2F(*this, insn, GetReg20(insn));
}
}
void TranslatorVisitor::I2F_cbuf(u64 insn) {
if (Is64(insn)) {
I2F(*this, insn, GetPackedCbuf(insn));
} else {
I2F(*this, insn, GetCbuf(insn));
}
}
void TranslatorVisitor::I2F_imm(u64 insn) {
if (Is64(insn)) {
I2F(*this, insn, GetPackedImm20(insn));
} else {
I2F(*this, insn, GetImm20(insn));
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,82 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class MaxShift : u64 {
U32,
Undefined,
U64,
S64,
};
IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
bool right_shift, bool is_signed) {
if (!right_shift) {
return ir.ShiftLeftLogical(packed_int, safe_shift);
}
if (is_signed) {
return ir.ShiftRightArithmetic(packed_int, safe_shift);
}
return ir.ShiftRightLogical(packed_int, safe_shift);
}
void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
bool right_shift) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<0, 8, IR::Reg> lo_bits_reg;
BitField<37, 2, MaxShift> max_shift;
BitField<47, 1, u64> cc;
BitField<48, 2, u64> x_mode;
BitField<50, 1, u64> wrap;
} const shf{insn};
if (shf.cc != 0) {
throw NotImplementedException("SHF CC");
}
if (shf.x_mode != 0) {
throw NotImplementedException("SHF X Mode");
}
if (shf.max_shift == MaxShift::Undefined) {
throw NotImplementedException("SHF Use of undefined MaxShift value");
}
const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
const IR::U32 safe_shift{shf.wrap != 0
? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
: v.ir.UMin(shift, max_shift)};
const bool is_signed{shf.max_shift == MaxShift::S64};
const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
v.X(shf.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::SHF_l_reg(u64 insn) {
SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
}
void TranslatorVisitor::SHF_l_imm(u64 insn) {
SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
}
void TranslatorVisitor::SHF_r_reg(u64 insn) {
SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
}
void TranslatorVisitor::SHF_r_imm(u64 insn) {
SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,64 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 2, u64> mode;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_signed;
} const imnmx{insn};
if (imnmx.cc != 0) {
throw NotImplementedException("IMNMX CC");
}
if (imnmx.mode != 0) {
throw NotImplementedException("IMNMX.MODE");
}
const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
const IR::U32 op_a{v.X(imnmx.src_reg)};
IR::U32 min;
IR::U32 max;
if (imnmx.is_signed != 0) {
min = IR::U32{v.ir.SMin(op_a, op_b)};
max = IR::U32{v.ir.SMax(op_a, op_b)};
} else {
min = IR::U32{v.ir.UMin(op_a, op_b)};
max = IR::U32{v.ir.UMax(op_a, op_b)};
}
if (imnmx.neg_pred != 0) {
std::swap(min, max);
}
const IR::U32 result{v.ir.Select(pred, min, max)};
v.X(imnmx.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::IMNMX_reg(u64 insn) {
IMNMX(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
IMNMX(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::IMNMX_imm(u64 insn) {
IMNMX(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,36 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<40, 1, u64> tilde;
} const popc{insn};
const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
const IR::U32 result = v.ir.BitCount(operand);
v.X(popc.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::POPC_reg(u64 insn) {
POPC(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::POPC_cbuf(u64 insn) {
POPC(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::POPC_imm(u64 insn) {
POPC(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,86 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
u64 scale_imm) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> op_a;
} const iscadd{insn};
const bool po{neg_a && neg_b};
IR::U32 op_a{v.X(iscadd.op_a)};
if (po) {
// When PO is present, add one
op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
} else {
// When PO is not present, the bits are interpreted as negation
if (neg_a) {
op_a = v.ir.INeg(op_a);
}
if (neg_b) {
op_b = v.ir.INeg(op_b);
}
}
// With the operands already processed, scale A
const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
v.X(iscadd.dest_reg, result);
if (cc) {
v.SetZFlag(v.ir.GetZeroFromOp(result));
v.SetSFlag(v.ir.GetSignFromOp(result));
const IR::U1 carry{v.ir.GetCarryFromOp(result)};
const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
}
}
void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
union {
u64 raw;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_b;
BitField<49, 1, u64> neg_a;
BitField<39, 5, u64> scale;
} const iscadd{insn};
ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
}
} // Anonymous namespace
void TranslatorVisitor::ISCADD_reg(u64 insn) {
ISCADD(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
ISCADD(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::ISCADD_imm(u64 insn) {
ISCADD(*this, insn, GetImm20(insn));
}
void TranslatorVisitor::ISCADD32I(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> cc;
BitField<53, 5, u64> scale;
} const iscadd{insn};
return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,49 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
union {
u64 raw;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<45, 2, BooleanOp> bop;
BitField<48, 1, u64> is_signed;
BitField<49, 3, CompareOp> compare_op;
} const isetp{insn};
const BooleanOp bop{isetp.bop};
const CompareOp compare_op{isetp.compare_op};
const IR::U32 op_a{v.X(isetp.src_reg_a)};
const IR::U1 comparison{IntegerCompare(v.ir, op_a, op_b, compare_op, isetp.is_signed != 0)};
const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
v.ir.SetPred(isetp.dest_pred_a, result_a);
v.ir.SetPred(isetp.dest_pred_b, result_b);
}
} // Anonymous namespace
void TranslatorVisitor::ISETP_reg(u64 insn) {
ISETP(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::ISETP_cbuf(u64 insn) {
ISETP(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::ISETP_imm(u64 insn) {
ISETP(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,71 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<39, 1, u64> w;
BitField<43, 1, u64> x;
BitField<47, 1, u64> cc;
} const shl{insn};
if (shl.x != 0) {
throw NotImplementedException("SHL.X");
}
if (shl.cc != 0) {
throw NotImplementedException("SHL.CC");
}
const IR::U32 base{v.X(shl.src_reg_a)};
IR::U32 result;
if (shl.w != 0) {
// When .W is set, the shift value is wrapped
// To emulate this we just have to wrap it ourselves.
const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
result = v.ir.ShiftLeftLogical(base, shift);
} else {
// When .W is not set, the shift value is clamped between 0 and 32.
// To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
// We can safely evaluate an out of bounds shift according to the SPIR-V specification:
//
// https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
// "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
// or equal to the bit width of the components of Base."
//
// And on the GLASM specification it is also safe to evaluate out of bounds:
//
// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
// "The results of a shift operation ("<<") are undefined if the value of the second operand
// is negative, or greater than or equal to the number of bits in the first operand."
//
// Emphasis on undefined results in contrast to undefined behavior.
//
const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
}
v.X(shl.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::SHL_reg(u64 insn) {
SHL(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::SHL_cbuf(u64 insn) {
SHL(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::SHL_imm(u64 insn) {
SHL(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,66 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<39, 1, u64> is_wrapped;
BitField<40, 1, u64> brev;
BitField<43, 1, u64> xmode;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_signed;
} const shr{insn};
if (shr.xmode != 0) {
throw NotImplementedException("SHR.XMODE");
}
if (shr.cc != 0) {
throw NotImplementedException("SHR.CC");
}
IR::U32 base{v.X(shr.src_reg_a)};
if (shr.brev == 1) {
base = v.ir.BitReverse(base);
}
IR::U32 result;
const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
if (shr.is_signed == 1) {
result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
} else {
result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
}
if (shr.is_wrapped == 0) {
const IR::U32 zero{v.ir.Imm32(0)};
const IR::U32 safe_bits{v.ir.Imm32(32)};
const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
}
v.X(shr.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::SHR_reg(u64 insn) {
SHR(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::SHR_cbuf(u64 insn) {
SHR(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::SHR_imm(u64 insn) {
SHR(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,135 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class SelectMode : u64 {
Default,
CLO,
CHI,
CSFU,
CBCC,
};
enum class Half : u64 {
H0, // Least-significant bits (15:0)
H1, // Most-significant bits (31:16)
};
IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
}
void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_a_signed;
BitField<49, 1, u64> is_b_signed;
BitField<53, 1, Half> half_a;
} const xmad{insn};
if (x) {
throw NotImplementedException("XMAD X");
}
const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
IR::U32 product{v.ir.IMul(op_a, op_b)};
if (psl) {
// .PSL shifts the product 16 bits
product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
}
const IR::U32 op_c{[&]() -> IR::U32 {
switch (select_mode) {
case SelectMode::Default:
return src_c;
case SelectMode::CLO:
return ExtractHalf(v, src_c, Half::H0, false);
case SelectMode::CHI:
return ExtractHalf(v, src_c, Half::H1, false);
case SelectMode::CBCC:
return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
case SelectMode::CSFU:
throw NotImplementedException("XMAD CSFU");
}
throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
}()};
IR::U32 result{v.ir.IAdd(product, op_c)};
if (mrg) {
// .MRG inserts src_b [15:0] into result's [31:16].
const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
}
if (xmad.cc) {
throw NotImplementedException("XMAD CC");
}
// Store result
v.X(xmad.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::XMAD_reg(u64 insn) {
union {
u64 raw;
BitField<35, 1, Half> half_b;
BitField<36, 1, u64> psl;
BitField<37, 1, u64> mrg;
BitField<38, 1, u64> x;
BitField<50, 3, SelectMode> select_mode;
} const xmad{insn};
XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
xmad.mrg != 0, xmad.x != 0);
}
void TranslatorVisitor::XMAD_rc(u64 insn) {
union {
u64 raw;
BitField<50, 2, SelectMode> select_mode;
BitField<52, 1, Half> half_b;
BitField<54, 1, u64> x;
} const xmad{insn};
XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
xmad.x != 0);
}
void TranslatorVisitor::XMAD_cr(u64 insn) {
union {
u64 raw;
BitField<50, 2, SelectMode> select_mode;
BitField<52, 1, Half> half_b;
BitField<54, 1, u64> x;
BitField<55, 1, u64> psl;
BitField<56, 1, u64> mrg;
} const xmad{insn};
XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
xmad.mrg != 0, xmad.x != 0);
}
void TranslatorVisitor::XMAD_imm(u64 insn) {
union {
u64 raw;
BitField<20, 16, u64> src_b;
BitField<36, 1, u64> psl;
BitField<37, 1, u64> mrg;
BitField<38, 1, u64> x;
BitField<50, 3, SelectMode> select_mode;
} const xmad{insn};
XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,126 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class IntegerWidth : u64 {
Byte,
Short,
Word,
};
[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
switch (width) {
case IntegerWidth::Byte:
return ir.Imm32(8);
case IntegerWidth::Short:
return ir.Imm32(16);
case IntegerWidth::Word:
return ir.Imm32(32);
default:
throw NotImplementedException("Invalid width {}", width);
}
}
[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
IntegerWidth dst_width) {
const IR::U32 zero{ir.Imm32(0)};
const IR::U32 count{WidthSize(ir, dst_width)};
return ir.BitFieldExtract(src, zero, count, false);
}
[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
bool dst_signed, bool src_signed) {
IR::U32 min{};
IR::U32 max{};
const IR::U32 zero{ir.Imm32(0)};
switch (dst_width) {
case IntegerWidth::Byte:
min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
break;
case IntegerWidth::Short:
min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
break;
case IntegerWidth::Word:
min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
break;
default:
throw NotImplementedException("Invalid width {}", dst_width);
}
const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
}
void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 2, IntegerWidth> dst_fmt;
BitField<12, 1, u64> dst_fmt_sign;
BitField<10, 2, IntegerWidth> src_fmt;
BitField<13, 1, u64> src_fmt_sign;
BitField<41, 3, u64> selector;
BitField<45, 1, u64> neg;
BitField<47, 1, u64> cc;
BitField<49, 1, u64> abs;
BitField<50, 1, u64> sat;
} const i2i{insn};
if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
throw NotImplementedException("16-bit source format incompatible with selector {}",
i2i.selector);
}
if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
throw NotImplementedException("32-bit source format incompatible with selector {}",
i2i.selector);
}
const s32 selector{static_cast<s32>(i2i.selector)};
const IR::U32 offset{v.ir.Imm32(selector * 8)};
const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
const bool src_signed{i2i.src_fmt_sign != 0};
const bool dst_signed{i2i.dst_fmt_sign != 0};
const bool sat{i2i.sat != 0};
IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
if (i2i.abs != 0) {
src_values = v.ir.IAbs(src_values);
}
if (i2i.neg != 0) {
src_values = v.ir.INeg(src_values);
}
const IR::U32 result{
sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
: ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
v.X(i2i.dest_reg, result);
if (i2i.cc != 0) {
v.SetZFlag(v.ir.GetZeroFromOp(result));
v.SetSFlag(v.ir.GetSignFromOp(result));
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::I2I_reg(u64 insn) {
I2I(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::I2I_cbuf(u64 insn) {
I2I(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::I2I_imm(u64 insn) {
I2I(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,53 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Mode : u64 {
Default,
Patch,
Prim,
Attr,
};
enum class Shift : u64 {
Default,
U16,
B32,
};
} // Anonymous namespace
void TranslatorVisitor::ISBERD(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<31, 1, u64> skew;
BitField<32, 1, u64> o;
BitField<33, 2, Mode> mode;
BitField<47, 2, Shift> shift;
} const isberd{insn};
if (isberd.skew != 0) {
throw NotImplementedException("SKEW");
}
if (isberd.o != 0) {
throw NotImplementedException("O");
}
if (isberd.mode != Mode::Default) {
throw NotImplementedException("Mode {}", isberd.mode.Value());
}
if (isberd.shift != Shift::Default) {
throw NotImplementedException("Shift {}", isberd.shift.Value());
}
LOG_WARNING(Shader, "(STUBBED) called");
X(isberd.dest_reg, X(isberd.src_reg));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,62 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
namespace Shader::Maxwell {
using namespace LDC;
namespace {
std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
const IR::U32& reg, const IR::U32& imm) {
switch (mode) {
case Mode::Default:
return {imm_index, ir.IAdd(reg, imm)};
default:
break;
}
throw NotImplementedException("Mode {}", mode);
}
} // Anonymous namespace
void TranslatorVisitor::LDC(u64 insn) {
const Encoding ldc{insn};
const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
const IR::U32 reg{X(ldc.src_reg)};
const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
switch (ldc.size) {
case Size::U8:
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
break;
case Size::S8:
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
break;
case Size::U16:
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
break;
case Size::S16:
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
break;
case Size::B32:
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
break;
case Size::B64: {
if (!IR::IsAligned(ldc.dest_reg, 2)) {
throw NotImplementedException("Unaligned destination register");
}
const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
for (int i = 0; i < 2; ++i) {
X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
}
break;
}
default:
throw NotImplementedException("Invalid size {}", ldc.size.Value());
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,39 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/reg.h"
namespace Shader::Maxwell::LDC {
enum class Mode : u64 {
Default,
IL,
IS,
ISL,
};
enum class Size : u64 {
U8,
S8,
U16,
S16,
B32,
B64,
};
union Encoding {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<20, 16, s64> offset;
BitField<36, 5, u64> index;
BitField<44, 2, Mode> mode;
BitField<48, 3, Size> size;
};
} // namespace Shader::Maxwell::LDC

View File

@@ -0,0 +1,108 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
bool neg, bool x) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> offset_lo_reg;
BitField<47, 1, u64> cc;
BitField<48, 3, IR::Pred> pred;
} const lea{insn};
if (x) {
throw NotImplementedException("LEA.HI X");
}
if (lea.pred != IR::Pred::PT) {
throw NotImplementedException("LEA.HI Pred");
}
if (lea.cc != 0) {
throw NotImplementedException("LEA.HI CC");
}
const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
const s32 hi_scale{32 - static_cast<s32>(scale)};
const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
v.X(lea.dest_reg, result);
}
void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> offset_lo_reg;
BitField<39, 5, u64> scale;
BitField<45, 1, u64> neg;
BitField<46, 1, u64> x;
BitField<47, 1, u64> cc;
BitField<48, 3, IR::Pred> pred;
} const lea{insn};
if (lea.x != 0) {
throw NotImplementedException("LEA.LO X");
}
if (lea.pred != IR::Pred::PT) {
throw NotImplementedException("LEA.LO Pred");
}
if (lea.cc != 0) {
throw NotImplementedException("LEA.LO CC");
}
const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
const s32 scale{static_cast<s32>(lea.scale)};
const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
IR::U32 result{v.ir.IAdd(base, scaled_offset)};
v.X(lea.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::LEA_hi_reg(u64 insn) {
union {
u64 insn;
BitField<28, 5, u64> scale;
BitField<37, 1, u64> neg;
BitField<38, 1, u64> x;
} const lea{insn};
LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
}
void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
union {
u64 insn;
BitField<51, 5, u64> scale;
BitField<56, 1, u64> neg;
BitField<57, 1, u64> x;
} const lea{insn};
LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
}
void TranslatorVisitor::LEA_lo_reg(u64 insn) {
LEA_lo(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
LEA_lo(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::LEA_lo_imm(u64 insn) {
LEA_lo(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,196 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Size : u64 {
B32,
B64,
B96,
B128,
};
enum class InterpolationMode : u64 {
Pass,
Multiply,
Constant,
Sc,
};
enum class SampleMode : u64 {
Default,
Centroid,
Offset,
};
u32 NumElements(Size size) {
switch (size) {
case Size::B32:
return 1;
case Size::B64:
return 2;
case Size::B96:
return 3;
case Size::B128:
return 4;
}
throw InvalidArgument("Invalid size {}", size);
}
template <typename F>
void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
const IR::U32 index_value{v.X(index_reg)};
for (u32 element = 0; element < num_elements; ++element) {
const IR::U32 final_offset{
element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
f(element, final_offset);
}
}
} // Anonymous namespace
void TranslatorVisitor::ALD(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> index_reg;
BitField<20, 10, u64> absolute_offset;
BitField<20, 11, s64> relative_offset;
BitField<39, 8, IR::Reg> vertex_reg;
BitField<32, 1, u64> o;
BitField<31, 1, u64> patch;
BitField<47, 2, Size> size;
} const ald{insn};
const u64 offset{ald.absolute_offset.Value()};
if (offset % 4 != 0) {
throw NotImplementedException("Unaligned absolute offset {}", offset);
}
const IR::U32 vertex{X(ald.vertex_reg)};
const u32 num_elements{NumElements(ald.size)};
if (ald.index_reg == IR::Reg::RZ) {
for (u32 element = 0; element < num_elements; ++element) {
if (ald.patch != 0) {
const IR::Patch patch{offset / 4 + element};
F(ald.dest_reg + element, ir.GetPatch(patch));
} else {
const IR::Attribute attr{offset / 4 + element};
F(ald.dest_reg + element, ir.GetAttribute(attr, vertex));
}
}
return;
}
if (ald.patch != 0) {
throw NotImplementedException("Indirect patch read");
}
HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex));
});
}
void TranslatorVisitor::AST(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> src_reg;
BitField<8, 8, IR::Reg> index_reg;
BitField<20, 10, u64> absolute_offset;
BitField<20, 11, s64> relative_offset;
BitField<31, 1, u64> patch;
BitField<39, 8, IR::Reg> vertex_reg;
BitField<47, 2, Size> size;
} const ast{insn};
if (ast.index_reg != IR::Reg::RZ) {
throw NotImplementedException("Indexed store");
}
const u64 offset{ast.absolute_offset.Value()};
if (offset % 4 != 0) {
throw NotImplementedException("Unaligned absolute offset {}", offset);
}
const IR::U32 vertex{X(ast.vertex_reg)};
const u32 num_elements{NumElements(ast.size)};
if (ast.index_reg == IR::Reg::RZ) {
for (u32 element = 0; element < num_elements; ++element) {
if (ast.patch != 0) {
const IR::Patch patch{offset / 4 + element};
ir.SetPatch(patch, F(ast.src_reg + element));
} else {
const IR::Attribute attr{offset / 4 + element};
ir.SetAttribute(attr, F(ast.src_reg + element), vertex);
}
}
return;
}
if (ast.patch != 0) {
throw NotImplementedException("Indexed tessellation patch store");
}
HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex);
});
}
void TranslatorVisitor::IPA(u64 insn) {
// IPA is the instruction used to read varyings from a fragment shader.
// gl_FragCoord is mapped to the gl_Position attribute.
// It yields unknown results when used outside of the fragment shader stage.
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> index_reg;
BitField<20, 8, IR::Reg> multiplier;
BitField<30, 8, IR::Attribute> attribute;
BitField<38, 1, u64> idx;
BitField<51, 1, u64> sat;
BitField<52, 2, SampleMode> sample_mode;
BitField<54, 2, InterpolationMode> interpolation_mode;
} const ipa{insn};
// Indexed IPAs are used for indexed varyings.
// For example:
//
// in vec4 colors[4];
// uniform int idx;
// void main() {
// gl_FragColor = colors[idx];
// }
const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
const IR::Attribute attribute{ipa.attribute};
IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
: ir.GetAttribute(attribute)};
if (IR::IsGeneric(attribute)) {
const ProgramHeader& sph{env.SPH()};
const u32 attr_index{IR::GenericAttributeIndex(attribute)};
const u32 element{static_cast<u32>(attribute) % 4};
const std::array input_map{sph.ps.GenericInputMap(attr_index)};
const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
if (is_perspective) {
const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
value = ir.FPMul(value, position_w);
}
}
if (ipa.interpolation_mode == InterpolationMode::Multiply) {
value = ir.FPMul(value, F(ipa.multiplier));
}
// Saturated IPAs are generally generated out of clamped varyings.
// For example: clamp(some_varying, 0.0, 1.0)
const bool is_saturated{ipa.sat != 0};
if (is_saturated) {
if (attribute == IR::Attribute::FrontFace) {
throw NotImplementedException("IPA.SAT on FrontFace");
}
value = ir.FPSaturate(value);
}
F(ipa.dest_reg, value);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,201 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Size : u64 {
U8,
S8,
U16,
S16,
B32,
B64,
B128,
};
IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
union {
u64 raw;
BitField<8, 8, IR::Reg> offset_reg;
BitField<20, 24, u64> absolute_offset;
BitField<20, 24, s64> relative_offset;
} const encoding{insn};
if (encoding.offset_reg == IR::Reg::RZ) {
return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
} else {
const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
}
}
std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
const IR::U32 offset{Offset(v, insn)};
if (offset.IsImmediate()) {
return {v.ir.Imm32(offset.U32() / 4), offset};
} else {
return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
}
}
std::pair<int, bool> GetSize(u64 insn) {
union {
u64 raw;
BitField<48, 3, Size> size;
} const encoding{insn};
switch (encoding.size) {
case Size::U8:
return {8, false};
case Size::S8:
return {8, true};
case Size::U16:
return {16, false};
case Size::S16:
return {16, true};
case Size::B32:
return {32, false};
case Size::B64:
return {64, false};
case Size::B128:
return {128, false};
default:
throw NotImplementedException("Invalid size {}", encoding.size.Value());
}
}
IR::Reg Reg(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> reg;
} const encoding{insn};
return encoding.reg;
}
IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
}
IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
}
} // Anonymous namespace
void TranslatorVisitor::LDL(u64 insn) {
const auto [word_offset, offset]{WordOffset(*this, insn)};
const IR::Reg dest{Reg(insn)};
const auto [bit_size, is_signed]{GetSize(insn)};
switch (bit_size) {
case 8: {
const IR::U32 bit{ByteOffset(ir, offset)};
X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed));
break;
}
case 16: {
const IR::U32 bit{ShortOffset(ir, offset)};
X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed));
break;
}
case 32:
case 64:
case 128:
if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
throw NotImplementedException("Unaligned destination register {}", dest);
}
X(dest, ir.LoadLocal(word_offset));
for (int i = 1; i < bit_size / 32; ++i) {
X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i))));
}
break;
}
}
void TranslatorVisitor::LDS(u64 insn) {
const IR::U32 offset{Offset(*this, insn)};
const IR::Reg dest{Reg(insn)};
const auto [bit_size, is_signed]{GetSize(insn)};
const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
switch (bit_size) {
case 8:
case 16:
case 32:
X(dest, IR::U32{value});
break;
case 64:
case 128:
if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
throw NotImplementedException("Unaligned destination register {}", dest);
}
for (int element = 0; element < bit_size / 32; ++element) {
X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
}
break;
}
}
void TranslatorVisitor::STL(u64 insn) {
const auto [word_offset, offset]{WordOffset(*this, insn)};
const IR::Reg reg{Reg(insn)};
const IR::U32 src{X(reg)};
const int bit_size{GetSize(insn).first};
switch (bit_size) {
case 8: {
const IR::U32 bit{ByteOffset(ir, offset)};
const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
ir.WriteLocal(word_offset, value);
break;
}
case 16: {
const IR::U32 bit{ShortOffset(ir, offset)};
const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
ir.WriteLocal(word_offset, value);
break;
}
case 32:
case 64:
case 128:
if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
throw NotImplementedException("Unaligned source register");
}
ir.WriteLocal(word_offset, src);
for (int i = 1; i < bit_size / 32; ++i) {
ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
}
break;
}
}
void TranslatorVisitor::STS(u64 insn) {
const IR::U32 offset{Offset(*this, insn)};
const IR::Reg reg{Reg(insn)};
const int bit_size{GetSize(insn).first};
switch (bit_size) {
case 8:
case 16:
case 32:
ir.WriteShared(bit_size, offset, X(reg));
break;
case 64:
if (!IR::IsAligned(reg, 2)) {
throw NotImplementedException("Unaligned source register {}", reg);
}
ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
break;
case 128: {
if (!IR::IsAligned(reg, 2)) {
throw NotImplementedException("Unaligned source register {}", reg);
}
const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
ir.WriteShared(128, offset, vector);
break;
}
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,184 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class LoadSize : u64 {
U8, // Zero-extend
S8, // Sign-extend
U16, // Zero-extend
S16, // Sign-extend
B32,
B64,
B128,
U128, // ???
};
enum class StoreSize : u64 {
U8, // Zero-extend
S8, // Sign-extend
U16, // Zero-extend
S16, // Sign-extend
B32,
B64,
B128,
};
// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
enum class LoadCache : u64 {
CA, // Cache at all levels, likely to be accessed again
CG, // Cache at global level (cache in L2 and below, not L1)
CI, // ???
CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
};
// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
enum class StoreCache : u64 {
WB, // Cache write-back all coherent levels
CG, // Cache at global level
CS, // Cache streaming, likely to be accessed once
WT, // Cache write-through (to system memory)
};
IR::U64 Address(TranslatorVisitor& v, u64 insn) {
union {
u64 raw;
BitField<8, 8, IR::Reg> addr_reg;
BitField<20, 24, s64> addr_offset;
BitField<20, 24, u64> rz_addr_offset;
BitField<45, 1, u64> e;
} const mem{insn};
const IR::U64 address{[&]() -> IR::U64 {
if (mem.e == 0) {
// LDG/STG without .E uses a 32-bit pointer, zero-extend it
return v.ir.UConvert(64, v.X(mem.addr_reg));
}
if (!IR::IsAligned(mem.addr_reg, 2)) {
throw NotImplementedException("Unaligned address register");
}
// Pack two registers to build the 64-bit address
return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
}()};
const u64 addr_offset{[&]() -> u64 {
if (mem.addr_reg == IR::Reg::RZ) {
// When RZ is used, the address is an absolute address
return static_cast<u64>(mem.rz_addr_offset.Value());
} else {
return static_cast<u64>(mem.addr_offset.Value());
}
}()};
// Apply the offset
return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
}
} // Anonymous namespace
void TranslatorVisitor::LDG(u64 insn) {
// LDG loads global memory into registers
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<46, 2, LoadCache> cache;
BitField<48, 3, LoadSize> size;
} const ldg{insn};
// Pointer to load data from
const IR::U64 address{Address(*this, insn)};
const IR::Reg dest_reg{ldg.dest_reg};
switch (ldg.size) {
case LoadSize::U8:
X(dest_reg, ir.LoadGlobalU8(address));
break;
case LoadSize::S8:
X(dest_reg, ir.LoadGlobalS8(address));
break;
case LoadSize::U16:
X(dest_reg, ir.LoadGlobalU16(address));
break;
case LoadSize::S16:
X(dest_reg, ir.LoadGlobalS16(address));
break;
case LoadSize::B32:
X(dest_reg, ir.LoadGlobal32(address));
break;
case LoadSize::B64: {
if (!IR::IsAligned(dest_reg, 2)) {
throw NotImplementedException("Unaligned data registers");
}
const IR::Value vector{ir.LoadGlobal64(address)};
for (int i = 0; i < 2; ++i) {
X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
}
break;
}
case LoadSize::B128:
case LoadSize::U128: {
if (!IR::IsAligned(dest_reg, 4)) {
throw NotImplementedException("Unaligned data registers");
}
const IR::Value vector{ir.LoadGlobal128(address)};
for (int i = 0; i < 4; ++i) {
X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
}
break;
}
default:
throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
}
}
void TranslatorVisitor::STG(u64 insn) {
// STG stores registers into global memory.
union {
u64 raw;
BitField<0, 8, IR::Reg> data_reg;
BitField<46, 2, StoreCache> cache;
BitField<48, 3, StoreSize> size;
} const stg{insn};
// Pointer to store data into
const IR::U64 address{Address(*this, insn)};
const IR::Reg data_reg{stg.data_reg};
switch (stg.size) {
case StoreSize::U8:
ir.WriteGlobalU8(address, X(data_reg));
break;
case StoreSize::S8:
ir.WriteGlobalS8(address, X(data_reg));
break;
case StoreSize::U16:
ir.WriteGlobalU16(address, X(data_reg));
break;
case StoreSize::S16:
ir.WriteGlobalS16(address, X(data_reg));
break;
case StoreSize::B32:
ir.WriteGlobal32(address, X(data_reg));
break;
case StoreSize::B64: {
if (!IR::IsAligned(data_reg, 2)) {
throw NotImplementedException("Unaligned data registers");
}
const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
ir.WriteGlobal64(address, vector);
break;
}
case StoreSize::B128:
if (!IR::IsAligned(data_reg, 4)) {
throw NotImplementedException("Unaligned data registers");
}
const IR::Value vector{
ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
ir.WriteGlobal128(address, vector);
break;
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,116 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class LogicalOp : u64 {
AND,
OR,
XOR,
PASS_B,
};
[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
const IR::U32& operand_2, LogicalOp op) {
switch (op) {
case LogicalOp::AND:
return ir.BitwiseAnd(operand_1, operand_2);
case LogicalOp::OR:
return ir.BitwiseOr(operand_1, operand_2);
case LogicalOp::XOR:
return ir.BitwiseXor(operand_1, operand_2);
case LogicalOp::PASS_B:
return operand_2;
default:
throw NotImplementedException("Invalid Logical operation {}", op);
}
}
void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
IR::Pred dest_pred = IR::Pred::PT) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
} const lop{insn};
if (x) {
throw NotImplementedException("X");
}
IR::U32 op_a{v.X(lop.src_reg)};
if (inv_a != 0) {
op_a = v.ir.BitwiseNot(op_a);
}
if (inv_b != 0) {
op_b = v.ir.BitwiseNot(op_b);
}
const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
if (pred_op) {
const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
v.ir.SetPred(dest_pred, pred_result);
}
if (cc) {
if (bit_op == LogicalOp::PASS_B) {
v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
} else {
v.SetZFlag(v.ir.GetZeroFromOp(result));
v.SetSFlag(v.ir.GetSignFromOp(result));
}
v.ResetCFlag();
v.ResetOFlag();
}
v.X(lop.dest_reg, result);
}
void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
union {
u64 insn;
BitField<39, 1, u64> inv_a;
BitField<40, 1, u64> inv_b;
BitField<41, 2, LogicalOp> bit_op;
BitField<43, 1, u64> x;
BitField<44, 2, PredicateOp> pred_op;
BitField<47, 1, u64> cc;
BitField<48, 3, IR::Pred> dest_pred;
} const lop{insn};
LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
lop.pred_op, lop.dest_pred);
}
} // Anonymous namespace
void TranslatorVisitor::LOP_reg(u64 insn) {
LOP(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::LOP_cbuf(u64 insn) {
LOP(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::LOP_imm(u64 insn) {
LOP(*this, insn, GetImm20(insn));
}
void TranslatorVisitor::LOP32I(u64 insn) {
union {
u64 raw;
BitField<53, 2, LogicalOp> bit_op;
BitField<57, 1, u64> x;
BitField<52, 1, u64> cc;
BitField<55, 1, u64> inv_a;
BitField<56, 1, u64> inv_b;
} const lop32i{insn};
LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
lop32i.inv_b != 0, lop32i.bit_op);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,122 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
u64 ttbl) {
IR::U32 r{ir.Imm32(0)};
const IR::U32 not_a{ir.BitwiseNot(a)};
const IR::U32 not_b{ir.BitwiseNot(b)};
const IR::U32 not_c{ir.BitwiseNot(c)};
if (ttbl & 0x01) {
// r |= ~a & ~b & ~c;
const auto lhs{ir.BitwiseAnd(not_a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x02) {
// r |= ~a & ~b & c;
const auto lhs{ir.BitwiseAnd(not_a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x04) {
// r |= ~a & b & ~c;
const auto lhs{ir.BitwiseAnd(not_a, b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x08) {
// r |= ~a & b & c;
const auto lhs{ir.BitwiseAnd(not_a, b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x10) {
// r |= a & ~b & ~c;
const auto lhs{ir.BitwiseAnd(a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x20) {
// r |= a & ~b & c;
const auto lhs{ir.BitwiseAnd(a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x40) {
// r |= a & b & ~c;
const auto lhs{ir.BitwiseAnd(a, b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x80) {
// r |= a & b & c;
const auto lhs{ir.BitwiseAnd(a, b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
return r;
}
IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<47, 1, u64> cc;
} const lop3{insn};
if (lop3.cc != 0) {
throw NotImplementedException("LOP3 CC");
}
const IR::U32 op_a{v.X(lop3.src_reg)};
const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
v.X(lop3.dest_reg, result);
return result;
}
u64 GetLut48(u64 insn) {
union {
u64 raw;
BitField<48, 8, u64> lut;
} const lut{insn};
return lut.lut;
}
} // Anonymous namespace
void TranslatorVisitor::LOP3_reg(u64 insn) {
union {
u64 insn;
BitField<28, 8, u64> lut;
BitField<38, 1, u64> x;
BitField<36, 2, PredicateOp> pred_op;
BitField<48, 3, IR::Pred> pred;
} const lop3{insn};
if (lop3.x != 0) {
throw NotImplementedException("LOP3 X");
}
const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
ir.SetPred(lop3.pred, pred_result);
}
void TranslatorVisitor::LOP3_cbuf(u64 insn) {
LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
}
void TranslatorVisitor::LOP3_imm(u64 insn) {
LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,66 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Mode : u64 {
PR,
CC,
};
} // Anonymous namespace
void TranslatorVisitor::P2R_reg(u64) {
throw NotImplementedException("P2R (reg)");
}
void TranslatorVisitor::P2R_cbuf(u64) {
throw NotImplementedException("P2R (cbuf)");
}
void TranslatorVisitor::P2R_imm(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src;
BitField<40, 1, Mode> mode;
BitField<41, 2, u64> byte_selector;
} const p2r{insn};
const u32 mask{GetImm20(insn).U32()};
const bool pr_mode{p2r.mode == Mode::PR};
const u32 num_items{pr_mode ? 7U : 4U};
const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
IR::U32 insert{ir.Imm32(0)};
for (u32 index = 0; index < num_items; ++index) {
if (((mask >> index) & 1) == 0) {
continue;
}
const IR::U1 cond{[this, index, pr_mode] {
if (pr_mode) {
return ir.GetPred(IR::Pred{index});
}
switch (index) {
case 0:
return ir.GetZFlag();
case 1:
return ir.GetSFlag();
case 2:
return ir.GetCFlag();
case 3:
return ir.GetOFlag();
}
throw LogicError("Unreachable P2R index");
}()};
const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
insert = ir.BitwiseOr(insert, bit);
}
const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,44 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<39, 4, u64> mask;
BitField<12, 4, u64> mov32i_mask;
} const mov{insn};
if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
throw NotImplementedException("Non-full move mask");
}
v.X(mov.dest_reg, src);
}
} // Anonymous namespace
void TranslatorVisitor::MOV_reg(u64 insn) {
MOV(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::MOV_cbuf(u64 insn) {
MOV(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::MOV_imm(u64 insn) {
MOV(*this, insn, GetImm20(insn));
}
void TranslatorVisitor::MOV32I(u64 insn) {
MOV(*this, insn, GetImm32(insn), true);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,71 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Mode : u64 {
PR,
CC,
};
void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
switch (index) {
case 0:
return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
case 1:
return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
case 2:
return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
case 3:
return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
default:
throw LogicError("Unreachable R2P index");
}
}
void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
union {
u64 raw;
BitField<8, 8, IR::Reg> src_reg;
BitField<40, 1, Mode> mode;
BitField<41, 2, u64> byte_selector;
} const r2p{insn};
const IR::U32 src{v.X(r2p.src_reg)};
const IR::U32 count{v.ir.Imm32(1)};
const bool pr_mode{r2p.mode == Mode::PR};
const u32 num_items{pr_mode ? 7U : 4U};
const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
for (u32 index = 0; index < num_items; ++index) {
const IR::U32 offset{v.ir.Imm32(offset_base + index)};
const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
if (pr_mode) {
const IR::Pred pred{index};
v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
} else {
SetFlag(v.ir, inv_mask_bit, src_bit, index);
}
}
}
} // Anonymous namespace
void TranslatorVisitor::R2P_reg(u64 insn) {
R2P(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::R2P_cbuf(u64 insn) {
R2P(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::R2P_imm(u64 insn) {
R2P(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,181 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class SpecialRegister : u64 {
SR_LANEID = 0,
SR_CLOCK = 1,
SR_VIRTCFG = 2,
SR_VIRTID = 3,
SR_PM0 = 4,
SR_PM1 = 5,
SR_PM2 = 6,
SR_PM3 = 7,
SR_PM4 = 8,
SR_PM5 = 9,
SR_PM6 = 10,
SR_PM7 = 11,
SR12 = 12,
SR13 = 13,
SR14 = 14,
SR_ORDERING_TICKET = 15,
SR_PRIM_TYPE = 16,
SR_INVOCATION_ID = 17,
SR_Y_DIRECTION = 18,
SR_THREAD_KILL = 19,
SM_SHADER_TYPE = 20,
SR_DIRECTCBEWRITEADDRESSLOW = 21,
SR_DIRECTCBEWRITEADDRESSHIGH = 22,
SR_DIRECTCBEWRITEENABLE = 23,
SR_MACHINE_ID_0 = 24,
SR_MACHINE_ID_1 = 25,
SR_MACHINE_ID_2 = 26,
SR_MACHINE_ID_3 = 27,
SR_AFFINITY = 28,
SR_INVOCATION_INFO = 29,
SR_WSCALEFACTOR_XY = 30,
SR_WSCALEFACTOR_Z = 31,
SR_TID = 32,
SR_TID_X = 33,
SR_TID_Y = 34,
SR_TID_Z = 35,
SR_CTA_PARAM = 36,
SR_CTAID_X = 37,
SR_CTAID_Y = 38,
SR_CTAID_Z = 39,
SR_NTID = 40,
SR_CirQueueIncrMinusOne = 41,
SR_NLATC = 42,
SR43 = 43,
SR_SM_SPA_VERSION = 44,
SR_MULTIPASSSHADERINFO = 45,
SR_LWINHI = 46,
SR_SWINHI = 47,
SR_SWINLO = 48,
SR_SWINSZ = 49,
SR_SMEMSZ = 50,
SR_SMEMBANKS = 51,
SR_LWINLO = 52,
SR_LWINSZ = 53,
SR_LMEMLOSZ = 54,
SR_LMEMHIOFF = 55,
SR_EQMASK = 56,
SR_LTMASK = 57,
SR_LEMASK = 58,
SR_GTMASK = 59,
SR_GEMASK = 60,
SR_REGALLOC = 61,
SR_BARRIERALLOC = 62,
SR63 = 63,
SR_GLOBALERRORSTATUS = 64,
SR65 = 65,
SR_WARPERRORSTATUS = 66,
SR_WARPERRORSTATUSCLEAR = 67,
SR68 = 68,
SR69 = 69,
SR70 = 70,
SR71 = 71,
SR_PM_HI0 = 72,
SR_PM_HI1 = 73,
SR_PM_HI2 = 74,
SR_PM_HI3 = 75,
SR_PM_HI4 = 76,
SR_PM_HI5 = 77,
SR_PM_HI6 = 78,
SR_PM_HI7 = 79,
SR_CLOCKLO = 80,
SR_CLOCKHI = 81,
SR_GLOBALTIMERLO = 82,
SR_GLOBALTIMERHI = 83,
SR84 = 84,
SR85 = 85,
SR86 = 86,
SR87 = 87,
SR88 = 88,
SR89 = 89,
SR90 = 90,
SR91 = 91,
SR92 = 92,
SR93 = 93,
SR94 = 94,
SR95 = 95,
SR_HWTASKID = 96,
SR_CIRCULARQUEUEENTRYINDEX = 97,
SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
};
[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
switch (special_register) {
case SpecialRegister::SR_INVOCATION_ID:
return ir.InvocationId();
case SpecialRegister::SR_THREAD_KILL:
return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
case SpecialRegister::SR_INVOCATION_INFO:
LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
return ir.Imm32(0x00ff'0000);
case SpecialRegister::SR_TID: {
const IR::Value tid{ir.LocalInvocationId()};
return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
IR::U32{ir.CompositeExtract(tid, 1)},
ir.Imm32(16), ir.Imm32(8)),
IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
}
case SpecialRegister::SR_TID_X:
return ir.LocalInvocationIdX();
case SpecialRegister::SR_TID_Y:
return ir.LocalInvocationIdY();
case SpecialRegister::SR_TID_Z:
return ir.LocalInvocationIdZ();
case SpecialRegister::SR_CTAID_X:
return ir.WorkgroupIdX();
case SpecialRegister::SR_CTAID_Y:
return ir.WorkgroupIdY();
case SpecialRegister::SR_CTAID_Z:
return ir.WorkgroupIdZ();
case SpecialRegister::SR_WSCALEFACTOR_XY:
LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
return ir.Imm32(Common::BitCast<u32>(1.0f));
case SpecialRegister::SR_WSCALEFACTOR_Z:
LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
return ir.Imm32(Common::BitCast<u32>(1.0f));
case SpecialRegister::SR_LANEID:
return ir.LaneId();
case SpecialRegister::SR_EQMASK:
return ir.SubgroupEqMask();
case SpecialRegister::SR_LTMASK:
return ir.SubgroupLtMask();
case SpecialRegister::SR_LEMASK:
return ir.SubgroupLeMask();
case SpecialRegister::SR_GTMASK:
return ir.SubgroupGtMask();
case SpecialRegister::SR_GEMASK:
return ir.SubgroupGeMask();
case SpecialRegister::SR_Y_DIRECTION:
return ir.BitCast<IR::U32>(ir.YDirection());
case SpecialRegister::SR_AFFINITY:
LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
return ir.Imm32(0); // This is the default value hardware returns.
default:
throw NotImplementedException("S2R special register {}", special_register);
}
}
} // Anonymous namespace
void TranslatorVisitor::S2R(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<20, 8, SpecialRegister> src_reg;
} const s2r{insn};
X(s2r.dest_reg, Read(ir, s2r.src_reg));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,283 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
throw NotImplementedException("Instruction {} is not implemented", opcode);
}
void TranslatorVisitor::ATOM_cas(u64) {
ThrowNotImplemented(Opcode::ATOM_cas);
}
void TranslatorVisitor::ATOMS_cas(u64) {
ThrowNotImplemented(Opcode::ATOMS_cas);
}
void TranslatorVisitor::B2R(u64) {
ThrowNotImplemented(Opcode::B2R);
}
void TranslatorVisitor::BPT(u64) {
ThrowNotImplemented(Opcode::BPT);
}
void TranslatorVisitor::BRA(u64) {
ThrowNotImplemented(Opcode::BRA);
}
void TranslatorVisitor::BRK(u64) {
ThrowNotImplemented(Opcode::BRK);
}
void TranslatorVisitor::CAL() {
// CAL is a no-op
}
void TranslatorVisitor::CCTL(u64) {
ThrowNotImplemented(Opcode::CCTL);
}
void TranslatorVisitor::CCTLL(u64) {
ThrowNotImplemented(Opcode::CCTLL);
}
void TranslatorVisitor::CONT(u64) {
ThrowNotImplemented(Opcode::CONT);
}
void TranslatorVisitor::CS2R(u64) {
ThrowNotImplemented(Opcode::CS2R);
}
void TranslatorVisitor::FCHK_reg(u64) {
ThrowNotImplemented(Opcode::FCHK_reg);
}
void TranslatorVisitor::FCHK_cbuf(u64) {
ThrowNotImplemented(Opcode::FCHK_cbuf);
}
void TranslatorVisitor::FCHK_imm(u64) {
ThrowNotImplemented(Opcode::FCHK_imm);
}
void TranslatorVisitor::GETCRSPTR(u64) {
ThrowNotImplemented(Opcode::GETCRSPTR);
}
void TranslatorVisitor::GETLMEMBASE(u64) {
ThrowNotImplemented(Opcode::GETLMEMBASE);
}
void TranslatorVisitor::IDE(u64) {
ThrowNotImplemented(Opcode::IDE);
}
void TranslatorVisitor::IDP_reg(u64) {
ThrowNotImplemented(Opcode::IDP_reg);
}
void TranslatorVisitor::IDP_imm(u64) {
ThrowNotImplemented(Opcode::IDP_imm);
}
void TranslatorVisitor::IMAD_reg(u64) {
ThrowNotImplemented(Opcode::IMAD_reg);
}
void TranslatorVisitor::IMAD_rc(u64) {
ThrowNotImplemented(Opcode::IMAD_rc);
}
void TranslatorVisitor::IMAD_cr(u64) {
ThrowNotImplemented(Opcode::IMAD_cr);
}
void TranslatorVisitor::IMAD_imm(u64) {
ThrowNotImplemented(Opcode::IMAD_imm);
}
void TranslatorVisitor::IMAD32I(u64) {
ThrowNotImplemented(Opcode::IMAD32I);
}
void TranslatorVisitor::IMADSP_reg(u64) {
ThrowNotImplemented(Opcode::IMADSP_reg);
}
void TranslatorVisitor::IMADSP_rc(u64) {
ThrowNotImplemented(Opcode::IMADSP_rc);
}
void TranslatorVisitor::IMADSP_cr(u64) {
ThrowNotImplemented(Opcode::IMADSP_cr);
}
void TranslatorVisitor::IMADSP_imm(u64) {
ThrowNotImplemented(Opcode::IMADSP_imm);
}
void TranslatorVisitor::IMUL_reg(u64) {
ThrowNotImplemented(Opcode::IMUL_reg);
}
void TranslatorVisitor::IMUL_cbuf(u64) {
ThrowNotImplemented(Opcode::IMUL_cbuf);
}
void TranslatorVisitor::IMUL_imm(u64) {
ThrowNotImplemented(Opcode::IMUL_imm);
}
void TranslatorVisitor::IMUL32I(u64) {
ThrowNotImplemented(Opcode::IMUL32I);
}
void TranslatorVisitor::JCAL(u64) {
ThrowNotImplemented(Opcode::JCAL);
}
void TranslatorVisitor::JMP(u64) {
ThrowNotImplemented(Opcode::JMP);
}
void TranslatorVisitor::KIL() {
// KIL is a no-op
}
void TranslatorVisitor::LD(u64) {
ThrowNotImplemented(Opcode::LD);
}
void TranslatorVisitor::LEPC(u64) {
ThrowNotImplemented(Opcode::LEPC);
}
void TranslatorVisitor::LONGJMP(u64) {
ThrowNotImplemented(Opcode::LONGJMP);
}
void TranslatorVisitor::NOP(u64) {
// NOP is No-Op.
}
void TranslatorVisitor::PBK() {
// PBK is a no-op
}
void TranslatorVisitor::PCNT() {
// PCNT is a no-op
}
void TranslatorVisitor::PEXIT(u64) {
ThrowNotImplemented(Opcode::PEXIT);
}
void TranslatorVisitor::PLONGJMP(u64) {
ThrowNotImplemented(Opcode::PLONGJMP);
}
void TranslatorVisitor::PRET(u64) {
ThrowNotImplemented(Opcode::PRET);
}
void TranslatorVisitor::PRMT_reg(u64) {
ThrowNotImplemented(Opcode::PRMT_reg);
}
void TranslatorVisitor::PRMT_rc(u64) {
ThrowNotImplemented(Opcode::PRMT_rc);
}
void TranslatorVisitor::PRMT_cr(u64) {
ThrowNotImplemented(Opcode::PRMT_cr);
}
void TranslatorVisitor::PRMT_imm(u64) {
ThrowNotImplemented(Opcode::PRMT_imm);
}
void TranslatorVisitor::R2B(u64) {
ThrowNotImplemented(Opcode::R2B);
}
void TranslatorVisitor::RAM(u64) {
ThrowNotImplemented(Opcode::RAM);
}
void TranslatorVisitor::RET(u64) {
ThrowNotImplemented(Opcode::RET);
}
void TranslatorVisitor::RTT(u64) {
ThrowNotImplemented(Opcode::RTT);
}
void TranslatorVisitor::SAM(u64) {
ThrowNotImplemented(Opcode::SAM);
}
void TranslatorVisitor::SETCRSPTR(u64) {
ThrowNotImplemented(Opcode::SETCRSPTR);
}
void TranslatorVisitor::SETLMEMBASE(u64) {
ThrowNotImplemented(Opcode::SETLMEMBASE);
}
void TranslatorVisitor::SSY() {
// SSY is a no-op
}
void TranslatorVisitor::ST(u64) {
ThrowNotImplemented(Opcode::ST);
}
void TranslatorVisitor::STP(u64) {
ThrowNotImplemented(Opcode::STP);
}
void TranslatorVisitor::SUATOM_cas(u64) {
ThrowNotImplemented(Opcode::SUATOM_cas);
}
void TranslatorVisitor::SYNC(u64) {
ThrowNotImplemented(Opcode::SYNC);
}
void TranslatorVisitor::TXA(u64) {
ThrowNotImplemented(Opcode::TXA);
}
void TranslatorVisitor::VABSDIFF(u64) {
ThrowNotImplemented(Opcode::VABSDIFF);
}
void TranslatorVisitor::VABSDIFF4(u64) {
ThrowNotImplemented(Opcode::VABSDIFF4);
}
void TranslatorVisitor::VADD(u64) {
ThrowNotImplemented(Opcode::VADD);
}
void TranslatorVisitor::VSET(u64) {
ThrowNotImplemented(Opcode::VSET);
}
void TranslatorVisitor::VSHL(u64) {
ThrowNotImplemented(Opcode::VSHL);
}
void TranslatorVisitor::VSHR(u64) {
ThrowNotImplemented(Opcode::VSHR);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,45 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> output_reg; // Not needed on host
BitField<39, 1, u64> emit;
BitField<40, 1, u64> cut;
} const out{insn};
stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
if (out.emit != 0) {
v.ir.EmitVertex(stream_index);
}
if (out.cut != 0) {
v.ir.EndPrimitive(stream_index);
}
// Host doesn't need the output register, but we can write to it to avoid undefined reads
v.X(out.dest_reg, v.ir.Imm32(0));
}
} // Anonymous namespace
void TranslatorVisitor::OUT_reg(u64 insn) {
OUT(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::OUT_cbuf(u64 insn) {
OUT(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::OUT_imm(u64 insn) {
OUT(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,46 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Mode : u64 {
Default,
CovMask,
Covered,
Offset,
CentroidOffset,
MyIndex,
};
} // Anonymous namespace
void TranslatorVisitor::PIXLD(u64 insn) {
union {
u64 raw;
BitField<31, 3, Mode> mode;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> addr_reg;
BitField<20, 8, s64> addr_offset;
BitField<45, 3, IR::Pred> dest_pred;
} const pixld{insn};
if (pixld.dest_pred != IR::Pred::PT) {
throw NotImplementedException("Destination predicate");
}
if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
throw NotImplementedException("Non-zero source register");
}
switch (pixld.mode) {
case Mode::MyIndex:
X(pixld.dest_reg, ir.SampleId());
break;
default:
throw NotImplementedException("Mode {}", pixld.mode.Value());
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,38 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
void TranslatorVisitor::PSETP(u64 insn) {
union {
u64 raw;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<12, 3, IR::Pred> pred_a;
BitField<15, 1, u64> neg_pred_a;
BitField<24, 2, BooleanOp> bop_1;
BitField<29, 3, IR::Pred> pred_b;
BitField<32, 1, u64> neg_pred_b;
BitField<39, 3, IR::Pred> pred_c;
BitField<42, 1, u64> neg_pred_c;
BitField<45, 2, BooleanOp> bop_2;
} const pset{insn};
const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
ir.SetPred(pset.dest_pred_a, result_a);
ir.SetPred(pset.dest_pred_b, result_b);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,53 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
void TranslatorVisitor::PSET(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<12, 3, IR::Pred> pred_a;
BitField<15, 1, u64> neg_pred_a;
BitField<24, 2, BooleanOp> bop_1;
BitField<29, 3, IR::Pred> pred_b;
BitField<32, 1, u64> neg_pred_b;
BitField<39, 3, IR::Pred> pred_c;
BitField<42, 1, u64> neg_pred_c;
BitField<44, 1, u64> bf;
BitField<45, 2, BooleanOp> bop_2;
BitField<47, 1, u64> cc;
} const pset{insn};
const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
const IR::U32 zero{ir.Imm32(0)};
const IR::U32 result{ir.Select(res_2, true_result, zero)};
X(pset.dest_reg, result);
if (pset.cc != 0) {
const IR::U1 is_zero{ir.IEqual(result, zero)};
SetZFlag(is_zero);
if (pset.bf != 0) {
ResetSFlag();
} else {
SetSFlag(ir.LogicalNot(is_zero));
}
ResetOFlag();
ResetCFlag();
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,44 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
} const sel{insn};
const IR::U1 pred = v.ir.GetPred(sel.pred);
IR::U32 op_a{v.X(sel.src_reg)};
IR::U32 op_b{src};
if (sel.neg_pred != 0) {
std::swap(op_a, op_b);
}
const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
v.X(sel.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::SEL_reg(u64 insn) {
SEL(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::SEL_cbuf(u64 insn) {
SEL(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::SEL_imm(u64 insn) {
SEL(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,208 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <bit>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Type : u64 {
_1D,
BUFFER_1D,
ARRAY_1D,
_2D,
ARRAY_2D,
_3D,
};
enum class Size : u64 {
U32,
S32,
U64,
S64,
F32FTZRN,
F16x2FTZRN,
SD32,
SD64,
};
enum class AtomicOp : u64 {
ADD,
MIN,
MAX,
INC,
DEC,
AND,
OR,
XOR,
EXCH,
};
enum class Clamp : u64 {
IGN,
Default,
TRAP,
};
TextureType GetType(Type type) {
switch (type) {
case Type::_1D:
return TextureType::Color1D;
case Type::BUFFER_1D:
return TextureType::Buffer;
case Type::ARRAY_1D:
return TextureType::ColorArray1D;
case Type::_2D:
return TextureType::Color2D;
case Type::ARRAY_2D:
return TextureType::ColorArray2D;
case Type::_3D:
return TextureType::Color3D;
}
throw NotImplementedException("Invalid type {}", type);
}
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
const auto array{[&](int index) {
return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
}};
switch (type) {
case Type::_1D:
case Type::BUFFER_1D:
return v.X(reg);
case Type::_2D:
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
case Type::_3D:
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
default:
break;
}
throw NotImplementedException("Invalid type {}", type);
}
IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
bool is_signed) {
switch (op) {
case AtomicOp::ADD:
return ir.ImageAtomicIAdd(handle, coords, op_b, info);
case AtomicOp::MIN:
return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
case AtomicOp::MAX:
return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
case AtomicOp::INC:
return ir.ImageAtomicInc(handle, coords, op_b, info);
case AtomicOp::DEC:
return ir.ImageAtomicDec(handle, coords, op_b, info);
case AtomicOp::AND:
return ir.ImageAtomicAnd(handle, coords, op_b, info);
case AtomicOp::OR:
return ir.ImageAtomicOr(handle, coords, op_b, info);
case AtomicOp::XOR:
return ir.ImageAtomicXor(handle, coords, op_b, info);
case AtomicOp::EXCH:
return ir.ImageAtomicExchange(handle, coords, op_b, info);
default:
throw NotImplementedException("Atomic Operation {}", op);
}
}
ImageFormat Format(Size size) {
switch (size) {
case Size::U32:
case Size::S32:
case Size::SD32:
return ImageFormat::R32_UINT;
default:
break;
}
throw NotImplementedException("Invalid size {}", size);
}
bool IsSizeInt32(Size size) {
switch (size) {
case Size::U32:
case Size::S32:
case Size::SD32:
return true;
default:
return false;
}
}
void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
u64 bound_offset, bool is_bindless, bool write_result) {
if (clamp != Clamp::IGN) {
throw NotImplementedException("Clamp {}", clamp);
}
if (!IsSizeInt32(size)) {
throw NotImplementedException("Size {}", size);
}
const bool is_signed{size == Size::S32};
const ImageFormat format{Format(size)};
const TextureType tex_type{GetType(type)};
const IR::Value coords{MakeCoords(v, coord_reg, type)};
const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
: v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
IR::TextureInstInfo info{};
info.type.Assign(tex_type);
info.image_format.Assign(format);
// TODO: float/64-bit operand
const IR::Value op_b{v.X(operand_reg)};
const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
if (write_result) {
v.X(dest_reg, IR::U32{color});
}
}
} // Anonymous namespace
void TranslatorVisitor::SUATOM(u64 insn) {
union {
u64 raw;
BitField<54, 1, u64> is_bindless;
BitField<29, 4, AtomicOp> op;
BitField<33, 3, Type> type;
BitField<51, 3, Size> size;
BitField<49, 2, Clamp> clamp;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> coord_reg;
BitField<20, 8, IR::Reg> operand_reg;
BitField<36, 13, u64> bound_offset; // !is_bindless
BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
} const suatom{insn};
ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
suatom.is_bindless != 0, true);
}
void TranslatorVisitor::SURED(u64 insn) {
// TODO: confirm offsets
union {
u64 raw;
BitField<51, 1, u64> is_bound;
BitField<21, 3, AtomicOp> op;
BitField<33, 3, Type> type;
BitField<20, 3, Size> size;
BitField<49, 2, Clamp> clamp;
BitField<0, 8, IR::Reg> operand_reg;
BitField<8, 8, IR::Reg> coord_reg;
BitField<36, 13, u64> bound_offset; // is_bound
BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
} const sured{insn};
ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
sured.is_bound == 0, false);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,281 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <bit>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Type : u64 {
_1D,
BUFFER_1D,
ARRAY_1D,
_2D,
ARRAY_2D,
_3D,
};
constexpr unsigned R = 1 << 0;
constexpr unsigned G = 1 << 1;
constexpr unsigned B = 1 << 2;
constexpr unsigned A = 1 << 3;
constexpr std::array MASK{
0U, //
R, //
G, //
R | G, //
B, //
R | B, //
G | B, //
R | G | B, //
A, //
R | A, //
G | A, //
R | G | A, //
B | A, //
R | B | A, //
G | B | A, //
R | G | B | A, //
};
enum class Size : u64 {
U8,
S8,
U16,
S16,
B32,
B64,
B128,
};
enum class Clamp : u64 {
IGN,
Default,
TRAP,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
enum class LoadCache : u64 {
CA, // Cache at all levels, likely to be accessed again
CG, // Cache at global level (L2 and below, not L1)
CI, // ???
CV, // Don't cache and fetch again (volatile)
};
enum class StoreCache : u64 {
WB, // Cache write-back all coherent levels
CG, // Cache at global level (L2 and below, not L1)
CS, // Cache streaming, likely to be accessed once
WT, // Cache write-through (to system memory, volatile?)
};
ImageFormat Format(Size size) {
switch (size) {
case Size::U8:
return ImageFormat::R8_UINT;
case Size::S8:
return ImageFormat::R8_SINT;
case Size::U16:
return ImageFormat::R16_UINT;
case Size::S16:
return ImageFormat::R16_SINT;
case Size::B32:
return ImageFormat::R32_UINT;
case Size::B64:
return ImageFormat::R32G32_UINT;
case Size::B128:
return ImageFormat::R32G32B32A32_UINT;
}
throw NotImplementedException("Invalid size {}", size);
}
int SizeInRegs(Size size) {
switch (size) {
case Size::U8:
case Size::S8:
case Size::U16:
case Size::S16:
case Size::B32:
return 1;
case Size::B64:
return 2;
case Size::B128:
return 4;
}
throw NotImplementedException("Invalid size {}", size);
}
TextureType GetType(Type type) {
switch (type) {
case Type::_1D:
return TextureType::Color1D;
case Type::BUFFER_1D:
return TextureType::Buffer;
case Type::ARRAY_1D:
return TextureType::ColorArray1D;
case Type::_2D:
return TextureType::Color2D;
case Type::ARRAY_2D:
return TextureType::ColorArray2D;
case Type::_3D:
return TextureType::Color3D;
}
throw NotImplementedException("Invalid type {}", type);
}
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
const auto array{[&](int index) {
return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
}};
switch (type) {
case Type::_1D:
case Type::BUFFER_1D:
return v.X(reg);
case Type::ARRAY_1D:
return v.ir.CompositeConstruct(v.X(reg), array(1));
case Type::_2D:
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
case Type::ARRAY_2D:
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
case Type::_3D:
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
}
throw NotImplementedException("Invalid type {}", type);
}
unsigned SwizzleMask(u64 swizzle) {
if (swizzle == 0 || swizzle >= MASK.size()) {
throw NotImplementedException("Invalid swizzle {}", swizzle);
}
return MASK[swizzle];
}
IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
std::array<IR::U32, 4> colors;
for (int i = 0; i < num_regs; ++i) {
colors[i] = ir.GetReg(reg + i);
}
for (int i = num_regs; i < 4; ++i) {
colors[i] = ir.Imm32(0);
}
return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
}
} // Anonymous namespace
void TranslatorVisitor::SULD(u64 insn) {
union {
u64 raw;
BitField<51, 1, u64> is_bound;
BitField<52, 1, u64> d;
BitField<23, 1, u64> ba;
BitField<33, 3, Type> type;
BitField<24, 2, LoadCache> cache;
BitField<20, 3, Size> size; // .D
BitField<20, 4, u64> swizzle; // .P
BitField<49, 2, Clamp> clamp;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> coord_reg;
BitField<36, 13, u64> bound_offset; // is_bound
BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
} const suld{insn};
if (suld.clamp != Clamp::IGN) {
throw NotImplementedException("Clamp {}", suld.clamp.Value());
}
if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
throw NotImplementedException("Cache {}", suld.cache.Value());
}
const bool is_typed{suld.d != 0};
if (is_typed && suld.ba != 0) {
throw NotImplementedException("BA");
}
const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
const TextureType type{GetType(suld.type)};
const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
: X(suld.bindless_reg)};
IR::TextureInstInfo info{};
info.type.Assign(type);
info.image_format.Assign(format);
const IR::Value result{ir.ImageRead(handle, coords, info)};
IR::Reg dest_reg{suld.dest_reg};
if (is_typed) {
const int num_regs{SizeInRegs(suld.size)};
for (int i = 0; i < num_regs; ++i) {
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
}
} else {
const unsigned mask{SwizzleMask(suld.swizzle)};
const int bits{std::popcount(mask)};
if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : bits)) {
throw NotImplementedException("Unaligned destination register");
}
for (unsigned component = 0; component < 4; ++component) {
if (((mask >> component) & 1) == 0) {
continue;
}
X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
++dest_reg;
}
}
}
void TranslatorVisitor::SUST(u64 insn) {
union {
u64 raw;
BitField<51, 1, u64> is_bound;
BitField<52, 1, u64> d;
BitField<23, 1, u64> ba;
BitField<33, 3, Type> type;
BitField<24, 2, StoreCache> cache;
BitField<20, 3, Size> size; // .D
BitField<20, 4, u64> swizzle; // .P
BitField<49, 2, Clamp> clamp;
BitField<0, 8, IR::Reg> data_reg;
BitField<8, 8, IR::Reg> coord_reg;
BitField<36, 13, u64> bound_offset; // is_bound
BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
} const sust{insn};
if (sust.clamp != Clamp::IGN) {
throw NotImplementedException("Clamp {}", sust.clamp.Value());
}
if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
throw NotImplementedException("Cache {}", sust.cache.Value());
}
const bool is_typed{sust.d != 0};
if (is_typed && sust.ba != 0) {
throw NotImplementedException("BA");
}
const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
const TextureType type{GetType(sust.type)};
const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
: X(sust.bindless_reg)};
IR::TextureInstInfo info{};
info.type.Assign(type);
info.image_format.Assign(format);
IR::Value color;
if (is_typed) {
color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
} else {
const unsigned mask{SwizzleMask(sust.swizzle)};
if (mask != 0xf) {
throw NotImplementedException("Non-full mask");
}
color = MakeColor(ir, sust.data_reg, 4);
}
ir.ImageWrite(handle, coords, color, info);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,236 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <optional>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Blod : u64 {
None,
LZ,
LB,
LL,
INVALIDBLOD4,
INVALIDBLOD5,
LBA,
LLA,
};
enum class TextureType : u64 {
_1D,
ARRAY_1D,
_2D,
ARRAY_2D,
_3D,
ARRAY_3D,
CUBE,
ARRAY_CUBE,
};
Shader::TextureType GetType(TextureType type) {
switch (type) {
case TextureType::_1D:
return Shader::TextureType::Color1D;
case TextureType::ARRAY_1D:
return Shader::TextureType::ColorArray1D;
case TextureType::_2D:
return Shader::TextureType::Color2D;
case TextureType::ARRAY_2D:
return Shader::TextureType::ColorArray2D;
case TextureType::_3D:
return Shader::TextureType::Color3D;
case TextureType::ARRAY_3D:
throw NotImplementedException("3D array texture type");
case TextureType::CUBE:
return Shader::TextureType::ColorCube;
case TextureType::ARRAY_CUBE:
return Shader::TextureType::ColorArrayCube;
}
throw NotImplementedException("Invalid texture type {}", type);
}
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
switch (type) {
case TextureType::_1D:
return v.F(reg);
case TextureType::ARRAY_1D:
return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
case TextureType::_2D:
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
case TextureType::ARRAY_2D:
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
case TextureType::_3D:
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
case TextureType::ARRAY_3D:
throw NotImplementedException("3D array texture type");
case TextureType::CUBE:
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
case TextureType::ARRAY_CUBE:
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
}
throw NotImplementedException("Invalid texture type {}", type);
}
IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
switch (blod) {
case Blod::None:
return v.ir.Imm32(0.0f);
case Blod::LZ:
return v.ir.Imm32(0.0f);
case Blod::LB:
case Blod::LL:
case Blod::LBA:
case Blod::LLA:
return v.F(reg++);
case Blod::INVALIDBLOD4:
case Blod::INVALIDBLOD5:
break;
}
throw NotImplementedException("Invalid blod {}", blod);
}
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
const IR::U32 value{v.X(reg++)};
switch (type) {
case TextureType::_1D:
case TextureType::ARRAY_1D:
return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
case TextureType::_2D:
case TextureType::ARRAY_2D:
return v.ir.CompositeConstruct(
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
case TextureType::_3D:
case TextureType::ARRAY_3D:
return v.ir.CompositeConstruct(
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
case TextureType::CUBE:
case TextureType::ARRAY_CUBE:
throw NotImplementedException("Illegal offset on CUBE sample");
}
throw NotImplementedException("Invalid texture type {}", type);
}
bool HasExplicitLod(Blod blod) {
switch (blod) {
case Blod::LL:
case Blod::LLA:
case Blod::LZ:
return true;
default:
return false;
}
}
void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
std::optional<u32> cbuf_offset) {
union {
u64 raw;
BitField<35, 1, u64> ndv;
BitField<49, 1, u64> nodep;
BitField<50, 1, u64> dc;
BitField<51, 3, IR::Pred> sparse_pred;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> coord_reg;
BitField<20, 8, IR::Reg> meta_reg;
BitField<28, 3, TextureType> type;
BitField<31, 4, u64> mask;
} const tex{insn};
if (lc) {
throw NotImplementedException("LC");
}
const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
IR::Reg meta_reg{tex.meta_reg};
IR::Value handle;
IR::Value offset;
IR::F32 dref;
IR::F32 lod_clamp;
if (cbuf_offset) {
handle = v.ir.Imm32(*cbuf_offset);
} else {
handle = v.X(meta_reg++);
}
const IR::F32 lod{MakeLod(v, meta_reg, blod)};
if (aoffi) {
offset = MakeOffset(v, meta_reg, tex.type);
}
if (tex.dc != 0) {
dref = v.F(meta_reg++);
}
IR::TextureInstInfo info{};
info.type.Assign(GetType(tex.type));
info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
info.has_lod_clamp.Assign(lc ? 1 : 0);
const IR::Value sample{[&]() -> IR::Value {
if (tex.dc == 0) {
if (HasExplicitLod(blod)) {
return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
} else {
return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
}
}
if (HasExplicitLod(blod)) {
return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
} else {
return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
info);
}
}()};
IR::Reg dest_reg{tex.dest_reg};
for (int element = 0; element < 4; ++element) {
if (((tex.mask >> element) & 1) == 0) {
continue;
}
IR::F32 value;
if (tex.dc != 0) {
value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
} else {
value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
}
v.F(dest_reg, value);
++dest_reg;
}
if (tex.sparse_pred != IR::Pred::PT) {
v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
}
}
} // Anonymous namespace
void TranslatorVisitor::TEX(u64 insn) {
union {
u64 raw;
BitField<54, 1, u64> aoffi;
BitField<55, 3, Blod> blod;
BitField<58, 1, u64> lc;
BitField<36, 13, u64> cbuf_offset;
} const tex{insn};
Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
}
void TranslatorVisitor::TEX_b(u64 insn) {
union {
u64 raw;
BitField<36, 1, u64> aoffi;
BitField<37, 3, Blod> blod;
BitField<40, 1, u64> lc;
} const tex{insn};
Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,266 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <utility>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Precision : u64 {
F16,
F32,
};
union Encoding {
u64 raw;
BitField<59, 1, Precision> precision;
BitField<53, 4, u64> encoding;
BitField<49, 1, u64> nodep;
BitField<28, 8, IR::Reg> dest_reg_b;
BitField<0, 8, IR::Reg> dest_reg_a;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<20, 8, IR::Reg> src_reg_b;
BitField<36, 13, u64> cbuf_offset;
BitField<50, 3, u64> swizzle;
};
constexpr unsigned R = 1;
constexpr unsigned G = 2;
constexpr unsigned B = 4;
constexpr unsigned A = 8;
constexpr std::array RG_LUT{
R, //
G, //
B, //
A, //
R | G, //
R | A, //
G | A, //
B | A, //
};
constexpr std::array RGBA_LUT{
R | G | B, //
R | G | A, //
R | B | A, //
G | B | A, //
R | G | B | A, //
};
void CheckAlignment(IR::Reg reg, size_t alignment) {
if (!IR::IsAligned(reg, alignment)) {
throw NotImplementedException("Unaligned source register {}", reg);
}
}
template <typename... Args>
IR::Value Composite(TranslatorVisitor& v, Args... regs) {
return v.ir.CompositeConstruct(v.F(regs)...);
}
IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
}
IR::Value Sample(TranslatorVisitor& v, u64 insn) {
const Encoding texs{insn};
const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::Reg reg_a{texs.src_reg_a};
const IR::Reg reg_b{texs.src_reg_b};
IR::TextureInstInfo info{};
if (texs.precision == Precision::F16) {
info.relaxed_precision.Assign(1);
}
switch (texs.encoding) {
case 0: // 1D.LZ
info.type.Assign(TextureType::Color1D);
return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
case 1: // 2D
info.type.Assign(TextureType::Color2D);
return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
case 2: // 2D.LZ
info.type.Assign(TextureType::Color2D);
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
case 3: // 2D.LL
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::Color2D);
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
info);
case 4: // 2D.DC
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::Color2D);
info.is_depth.Assign(1);
return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
{}, {}, {}, info);
case 5: // 2D.LL.DC
CheckAlignment(reg_a, 2);
CheckAlignment(reg_b, 2);
info.type.Assign(TextureType::Color2D);
info.is_depth.Assign(1);
return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
v.F(reg_b + 1), v.F(reg_b), {}, info);
case 6: // 2D.LZ.DC
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::Color2D);
info.is_depth.Assign(1);
return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
zero, {}, info);
case 7: // ARRAY_2D
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::ColorArray2D);
return v.ir.ImageSampleImplicitLod(
handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
{}, {}, {}, info);
case 8: // ARRAY_2D.LZ
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::ColorArray2D);
return v.ir.ImageSampleExplicitLod(
handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
zero, {}, info);
case 9: // ARRAY_2D.LZ.DC
CheckAlignment(reg_a, 2);
CheckAlignment(reg_b, 2);
info.type.Assign(TextureType::ColorArray2D);
info.is_depth.Assign(1);
return v.ir.ImageSampleDrefExplicitLod(
handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
v.F(reg_b + 1), zero, {}, info);
case 10: // 3D
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::Color3D);
return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
{}, info);
case 11: // 3D.LZ
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::Color3D);
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
info);
case 12: // CUBE
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::ColorCube);
return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
{}, info);
case 13: // CUBE.LL
CheckAlignment(reg_a, 2);
CheckAlignment(reg_b, 2);
info.type.Assign(TextureType::ColorCube);
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
v.F(reg_b + 1), {}, info);
default:
throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
}
}
unsigned Swizzle(u64 insn) {
const Encoding texs{insn};
const size_t encoding{texs.swizzle};
if (texs.dest_reg_b == IR::Reg::RZ) {
if (encoding >= RG_LUT.size()) {
throw NotImplementedException("Illegal RG encoding {}", encoding);
}
return RG_LUT[encoding];
} else {
if (encoding >= RGBA_LUT.size()) {
throw NotImplementedException("Illegal RGBA encoding {}", encoding);
}
return RGBA_LUT[encoding];
}
}
IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
const bool is_shadow{sample.Type() == IR::Type::F32};
if (is_shadow) {
const bool is_alpha{component == 3};
return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
} else {
return IR::F32{v.ir.CompositeExtract(sample, component)};
}
}
IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
const Encoding texs{insn};
switch (index) {
case 0:
return texs.dest_reg_a;
case 1:
CheckAlignment(texs.dest_reg_a, 2);
return texs.dest_reg_a + 1;
case 2:
return texs.dest_reg_b;
case 3:
CheckAlignment(texs.dest_reg_b, 2);
return texs.dest_reg_b + 1;
}
throw LogicError("Invalid store index {}", index);
}
void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)};
unsigned store_index{0};
for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) {
continue;
}
const IR::Reg dest{RegStoreComponent32(insn, store_index)};
v.F(dest, Extract(v, sample, component));
++store_index;
}
}
IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
}
void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)};
unsigned store_index{0};
std::array<IR::F32, 4> swizzled;
for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) {
continue;
}
swizzled[store_index] = Extract(v, sample, component);
++store_index;
}
const IR::F32 zero{v.ir.Imm32(0.0f)};
const Encoding texs{insn};
switch (store_index) {
case 1:
v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
break;
case 2:
case 3:
case 4:
v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
switch (store_index) {
case 2:
break;
case 3:
v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
break;
case 4:
v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
break;
}
break;
}
}
} // Anonymous namespace
void TranslatorVisitor::TEXS(u64 insn) {
const IR::Value sample{Sample(*this, insn)};
if (Encoding{insn}.precision == Precision::F32) {
Store32(*this, insn, sample);
} else {
Store16(*this, insn, sample);
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,208 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <optional>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class TextureType : u64 {
_1D,
ARRAY_1D,
_2D,
ARRAY_2D,
_3D,
ARRAY_3D,
CUBE,
ARRAY_CUBE,
};
enum class OffsetType : u64 {
None = 0,
AOFFI,
PTP,
Invalid,
};
enum class ComponentType : u64 {
R = 0,
G = 1,
B = 2,
A = 3,
};
Shader::TextureType GetType(TextureType type) {
switch (type) {
case TextureType::_1D:
return Shader::TextureType::Color1D;
case TextureType::ARRAY_1D:
return Shader::TextureType::ColorArray1D;
case TextureType::_2D:
return Shader::TextureType::Color2D;
case TextureType::ARRAY_2D:
return Shader::TextureType::ColorArray2D;
case TextureType::_3D:
return Shader::TextureType::Color3D;
case TextureType::ARRAY_3D:
throw NotImplementedException("3D array texture type");
case TextureType::CUBE:
return Shader::TextureType::ColorCube;
case TextureType::ARRAY_CUBE:
return Shader::TextureType::ColorArrayCube;
}
throw NotImplementedException("Invalid texture type {}", type);
}
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
switch (type) {
case TextureType::_1D:
return v.F(reg);
case TextureType::ARRAY_1D:
return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
case TextureType::_2D:
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
case TextureType::ARRAY_2D:
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
case TextureType::_3D:
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
case TextureType::ARRAY_3D:
throw NotImplementedException("3D array texture type");
case TextureType::CUBE:
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
case TextureType::ARRAY_CUBE:
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
}
throw NotImplementedException("Invalid texture type {}", type);
}
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
const IR::U32 value{v.X(reg++)};
switch (type) {
case TextureType::_1D:
case TextureType::ARRAY_1D:
return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
case TextureType::_2D:
case TextureType::ARRAY_2D:
return v.ir.CompositeConstruct(
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
case TextureType::_3D:
case TextureType::ARRAY_3D:
return v.ir.CompositeConstruct(
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
case TextureType::CUBE:
case TextureType::ARRAY_CUBE:
throw NotImplementedException("Illegal offset on CUBE sample");
}
throw NotImplementedException("Invalid texture type {}", type);
}
std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
const IR::U32 value1{v.X(reg++)};
const IR::U32 value2{v.X(reg++)};
const IR::U32 bitsize{v.ir.Imm32(6)};
const auto make_vector{[&v, &bitsize](const IR::U32& value) {
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
}};
return {make_vector(value1), make_vector(value2)};
}
void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
bool is_bindless) {
union {
u64 raw;
BitField<35, 1, u64> ndv;
BitField<49, 1, u64> nodep;
BitField<50, 1, u64> dc;
BitField<51, 3, IR::Pred> sparse_pred;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> coord_reg;
BitField<20, 8, IR::Reg> meta_reg;
BitField<28, 3, TextureType> type;
BitField<31, 4, u64> mask;
BitField<36, 13, u64> cbuf_offset;
} const tld4{insn};
const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
IR::Reg meta_reg{tld4.meta_reg};
IR::Value handle;
IR::Value offset;
IR::Value offset2;
IR::F32 dref;
if (!is_bindless) {
handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
} else {
handle = v.X(meta_reg++);
}
switch (offset_type) {
case OffsetType::None:
break;
case OffsetType::AOFFI:
offset = MakeOffset(v, meta_reg, tld4.type);
break;
case OffsetType::PTP:
std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
break;
default:
throw NotImplementedException("Invalid offset type {}", offset_type);
}
if (tld4.dc != 0) {
dref = v.F(meta_reg++);
}
IR::TextureInstInfo info{};
info.type.Assign(GetType(tld4.type));
info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
info.gather_component.Assign(static_cast<u32>(component_type));
const IR::Value sample{[&] {
if (tld4.dc == 0) {
return v.ir.ImageGather(handle, coords, offset, offset2, info);
}
return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
}()};
IR::Reg dest_reg{tld4.dest_reg};
for (size_t element = 0; element < 4; ++element) {
if (((tld4.mask >> element) & 1) == 0) {
continue;
}
v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
++dest_reg;
}
if (tld4.sparse_pred != IR::Pred::PT) {
v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
}
}
} // Anonymous namespace
void TranslatorVisitor::TLD4(u64 insn) {
union {
u64 raw;
BitField<56, 2, ComponentType> component;
BitField<54, 2, OffsetType> offset;
} const tld4{insn};
Impl(*this, insn, tld4.component, tld4.offset, false);
}
void TranslatorVisitor::TLD4_b(u64 insn) {
union {
u64 raw;
BitField<38, 2, ComponentType> component;
BitField<36, 2, OffsetType> offset;
} const tld4{insn};
Impl(*this, insn, tld4.component, tld4.offset, true);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,134 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <utility>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Precision : u64 {
F32,
F16,
};
enum class ComponentType : u64 {
R = 0,
G = 1,
B = 2,
A = 3,
};
union Encoding {
u64 raw;
BitField<55, 1, Precision> precision;
BitField<52, 2, ComponentType> component_type;
BitField<51, 1, u64> aoffi;
BitField<50, 1, u64> dc;
BitField<49, 1, u64> nodep;
BitField<28, 8, IR::Reg> dest_reg_b;
BitField<0, 8, IR::Reg> dest_reg_a;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<20, 8, IR::Reg> src_reg_b;
BitField<36, 13, u64> cbuf_offset;
};
void CheckAlignment(IR::Reg reg, size_t alignment) {
if (!IR::IsAligned(reg, alignment)) {
throw NotImplementedException("Unaligned source register {}", reg);
}
}
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
const IR::U32 value{v.X(reg)};
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
}
IR::Value Sample(TranslatorVisitor& v, u64 insn) {
const Encoding tld4s{insn};
const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
const IR::Reg reg_a{tld4s.src_reg_a};
const IR::Reg reg_b{tld4s.src_reg_b};
IR::TextureInstInfo info{};
if (tld4s.precision == Precision::F16) {
info.relaxed_precision.Assign(1);
}
info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
info.type.Assign(Shader::TextureType::Color2D);
info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
IR::Value coords;
if (tld4s.aoffi != 0) {
CheckAlignment(reg_a, 2);
coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
IR::Value offset = MakeOffset(v, reg_b);
if (tld4s.dc != 0) {
CheckAlignment(reg_b, 2);
IR::F32 dref = v.F(reg_b + 1);
return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
}
return v.ir.ImageGather(handle, coords, offset, {}, info);
}
if (tld4s.dc != 0) {
CheckAlignment(reg_a, 2);
coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
IR::F32 dref = v.F(reg_b);
return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
}
coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
return v.ir.ImageGather(handle, coords, {}, {}, info);
}
IR::Reg RegStoreComponent32(u64 insn, size_t index) {
const Encoding tlds4{insn};
switch (index) {
case 0:
return tlds4.dest_reg_a;
case 1:
CheckAlignment(tlds4.dest_reg_a, 2);
return tlds4.dest_reg_a + 1;
case 2:
return tlds4.dest_reg_b;
case 3:
CheckAlignment(tlds4.dest_reg_b, 2);
return tlds4.dest_reg_b + 1;
}
throw LogicError("Invalid store index {}", index);
}
void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
for (size_t component = 0; component < 4; ++component) {
const IR::Reg dest{RegStoreComponent32(insn, component)};
v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
}
}
IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
}
void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
std::array<IR::F32, 4> swizzled;
for (size_t component = 0; component < 4; ++component) {
swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
}
const Encoding tld4s{insn};
v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
}
} // Anonymous namespace
void TranslatorVisitor::TLD4S(u64 insn) {
const IR::Value sample{Sample(*this, insn)};
if (Encoding{insn}.precision == Precision::F32) {
Store32(*this, insn, sample);
} else {
Store16(*this, insn, sample);
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,182 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <optional>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class TextureType : u64 {
_1D,
ARRAY_1D,
_2D,
ARRAY_2D,
_3D,
ARRAY_3D,
CUBE,
ARRAY_CUBE,
};
Shader::TextureType GetType(TextureType type) {
switch (type) {
case TextureType::_1D:
return Shader::TextureType::Color1D;
case TextureType::ARRAY_1D:
return Shader::TextureType::ColorArray1D;
case TextureType::_2D:
return Shader::TextureType::Color2D;
case TextureType::ARRAY_2D:
return Shader::TextureType::ColorArray2D;
case TextureType::_3D:
return Shader::TextureType::Color3D;
case TextureType::ARRAY_3D:
throw NotImplementedException("3D array texture type");
case TextureType::CUBE:
return Shader::TextureType::ColorCube;
case TextureType::ARRAY_CUBE:
return Shader::TextureType::ColorArrayCube;
}
throw NotImplementedException("Invalid texture type {}", type);
}
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
const IR::U32 value{v.X(reg)};
const u32 base{has_lod_clamp ? 12U : 16U};
return v.ir.CompositeConstruct(
v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
}
void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
union {
u64 raw;
BitField<49, 1, u64> nodep;
BitField<35, 1, u64> aoffi;
BitField<50, 1, u64> lc;
BitField<51, 3, IR::Pred> sparse_pred;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> coord_reg;
BitField<20, 8, IR::Reg> derivate_reg;
BitField<28, 3, TextureType> type;
BitField<31, 4, u64> mask;
BitField<36, 13, u64> cbuf_offset;
} const txd{insn};
const bool has_lod_clamp = txd.lc != 0;
if (has_lod_clamp) {
throw NotImplementedException("TXD.LC - CLAMP is not implemented");
}
IR::Value coords;
u32 num_derivates{};
IR::Reg base_reg{txd.coord_reg};
IR::Reg last_reg;
IR::Value handle;
if (is_bindless) {
handle = v.X(base_reg++);
} else {
handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
}
const auto read_array{[&]() -> IR::F32 {
const IR::U32 base{v.ir.Imm32(0)};
const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
return v.ir.ConvertUToF(32, 16, array_index);
}};
switch (txd.type) {
case TextureType::_1D: {
coords = v.F(base_reg);
num_derivates = 1;
last_reg = base_reg + 1;
break;
}
case TextureType::ARRAY_1D: {
last_reg = base_reg + 1;
coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
num_derivates = 1;
break;
}
case TextureType::_2D: {
last_reg = base_reg + 2;
coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
num_derivates = 2;
break;
}
case TextureType::ARRAY_2D: {
last_reg = base_reg + 2;
coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
num_derivates = 2;
break;
}
default:
throw NotImplementedException("Invalid texture type");
}
const IR::Reg derivate_reg{txd.derivate_reg};
IR::Value derivates;
switch (num_derivates) {
case 1: {
derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
break;
}
case 2: {
derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
v.F(derivate_reg + 2), v.F(derivate_reg + 3));
break;
}
default:
throw NotImplementedException("Invalid texture type");
}
IR::Value offset;
if (txd.aoffi != 0) {
offset = MakeOffset(v, last_reg, has_lod_clamp);
}
IR::F32 lod_clamp;
if (has_lod_clamp) {
// Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
// to convert a fixed point, float(value) / float(1 << fixed_point)
// in this case the fixed_point is 8.
const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
const IR::F32 fixp_lc{v.ir.ConvertUToF(
32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
}
IR::TextureInstInfo info{};
info.type.Assign(GetType(txd.type));
info.num_derivates.Assign(num_derivates);
info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
IR::Reg dest_reg{txd.dest_reg};
for (size_t element = 0; element < 4; ++element) {
if (((txd.mask >> element) & 1) == 0) {
continue;
}
v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
++dest_reg;
}
if (txd.sparse_pred != IR::Pred::PT) {
v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
}
}
} // Anonymous namespace
void TranslatorVisitor::TXD(u64 insn) {
Impl(*this, insn, false);
}
void TranslatorVisitor::TXD_b(u64 insn) {
Impl(*this, insn, true);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,165 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <optional>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class TextureType : u64 {
_1D,
ARRAY_1D,
_2D,
ARRAY_2D,
_3D,
ARRAY_3D,
CUBE,
ARRAY_CUBE,
};
Shader::TextureType GetType(TextureType type) {
switch (type) {
case TextureType::_1D:
return Shader::TextureType::Color1D;
case TextureType::ARRAY_1D:
return Shader::TextureType::ColorArray1D;
case TextureType::_2D:
return Shader::TextureType::Color2D;
case TextureType::ARRAY_2D:
return Shader::TextureType::ColorArray2D;
case TextureType::_3D:
return Shader::TextureType::Color3D;
case TextureType::ARRAY_3D:
throw NotImplementedException("3D array texture type");
case TextureType::CUBE:
return Shader::TextureType::ColorCube;
case TextureType::ARRAY_CUBE:
return Shader::TextureType::ColorArrayCube;
}
throw NotImplementedException("Invalid texture type {}", type);
}
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
const auto read_array{
[&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
switch (type) {
case TextureType::_1D:
return v.X(reg);
case TextureType::ARRAY_1D:
return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
case TextureType::_2D:
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
case TextureType::ARRAY_2D:
return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
case TextureType::_3D:
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
case TextureType::ARRAY_3D:
throw NotImplementedException("3D array texture type");
case TextureType::CUBE:
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
case TextureType::ARRAY_CUBE:
return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
}
throw NotImplementedException("Invalid texture type {}", type);
}
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
const IR::U32 value{v.X(reg++)};
switch (type) {
case TextureType::_1D:
case TextureType::ARRAY_1D:
return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
case TextureType::_2D:
case TextureType::ARRAY_2D:
return v.ir.CompositeConstruct(
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
case TextureType::_3D:
case TextureType::ARRAY_3D:
return v.ir.CompositeConstruct(
v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
case TextureType::CUBE:
case TextureType::ARRAY_CUBE:
throw NotImplementedException("Illegal offset on CUBE sample");
}
throw NotImplementedException("Invalid texture type {}", type);
}
void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
union {
u64 raw;
BitField<49, 1, u64> nodep;
BitField<55, 1, u64> lod;
BitField<50, 1, u64> multisample;
BitField<35, 1, u64> aoffi;
BitField<54, 1, u64> clamp;
BitField<51, 3, IR::Pred> sparse_pred;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> coord_reg;
BitField<20, 8, IR::Reg> meta_reg;
BitField<28, 3, TextureType> type;
BitField<31, 4, u64> mask;
BitField<36, 13, u64> cbuf_offset;
} const tld{insn};
const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
IR::Reg meta_reg{tld.meta_reg};
IR::Value handle;
IR::Value offset;
IR::U32 lod;
IR::U32 multisample;
if (is_bindless) {
handle = v.X(meta_reg++);
} else {
handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
}
if (tld.lod != 0) {
lod = v.X(meta_reg++);
} else {
lod = v.ir.Imm32(0U);
}
if (tld.aoffi != 0) {
offset = MakeOffset(v, meta_reg, tld.type);
}
if (tld.multisample != 0) {
multisample = v.X(meta_reg++);
}
if (tld.clamp != 0) {
throw NotImplementedException("TLD.CL - CLAMP is not implmented");
}
IR::TextureInstInfo info{};
info.type.Assign(GetType(tld.type));
const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
IR::Reg dest_reg{tld.dest_reg};
for (size_t element = 0; element < 4; ++element) {
if (((tld.mask >> element) & 1) == 0) {
continue;
}
v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
++dest_reg;
}
if (tld.sparse_pred != IR::Pred::PT) {
v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
}
}
} // Anonymous namespace
void TranslatorVisitor::TLD(u64 insn) {
Impl(*this, insn, false);
}
void TranslatorVisitor::TLD_b(u64 insn) {
Impl(*this, insn, true);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,242 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Precision : u64 {
F16,
F32,
};
constexpr unsigned R = 1;
constexpr unsigned G = 2;
constexpr unsigned B = 4;
constexpr unsigned A = 8;
constexpr std::array RG_LUT{
R, //
G, //
B, //
A, //
R | G, //
R | A, //
G | A, //
B | A, //
};
constexpr std::array RGBA_LUT{
R | G | B, //
R | G | A, //
R | B | A, //
G | B | A, //
R | G | B | A, //
};
union Encoding {
u64 raw;
BitField<59, 1, Precision> precision;
BitField<54, 1, u64> aoffi;
BitField<53, 1, u64> lod;
BitField<55, 1, u64> ms;
BitField<49, 1, u64> nodep;
BitField<28, 8, IR::Reg> dest_reg_b;
BitField<0, 8, IR::Reg> dest_reg_a;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<20, 8, IR::Reg> src_reg_b;
BitField<36, 13, u64> cbuf_offset;
BitField<50, 3, u64> swizzle;
BitField<53, 4, u64> encoding;
};
void CheckAlignment(IR::Reg reg, size_t alignment) {
if (!IR::IsAligned(reg, alignment)) {
throw NotImplementedException("Unaligned source register {}", reg);
}
}
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
const IR::U32 value{v.X(reg)};
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
}
IR::Value Sample(TranslatorVisitor& v, u64 insn) {
const Encoding tlds{insn};
const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
const IR::Reg reg_a{tlds.src_reg_a};
const IR::Reg reg_b{tlds.src_reg_b};
IR::Value coords;
IR::U32 lod{v.ir.Imm32(0U)};
IR::Value offsets;
IR::U32 multisample;
Shader::TextureType texture_type{};
switch (tlds.encoding) {
case 0:
texture_type = Shader::TextureType::Color1D;
coords = v.X(reg_a);
break;
case 1:
texture_type = Shader::TextureType::Color1D;
coords = v.X(reg_a);
lod = v.X(reg_b);
break;
case 2:
texture_type = Shader::TextureType::Color2D;
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
break;
case 4:
CheckAlignment(reg_a, 2);
texture_type = Shader::TextureType::Color2D;
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
offsets = MakeOffset(v, reg_b);
break;
case 5:
CheckAlignment(reg_a, 2);
texture_type = Shader::TextureType::Color2D;
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
lod = v.X(reg_b);
break;
case 6:
CheckAlignment(reg_a, 2);
texture_type = Shader::TextureType::Color2D;
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
multisample = v.X(reg_b);
break;
case 7:
CheckAlignment(reg_a, 2);
texture_type = Shader::TextureType::Color3D;
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
break;
case 8: {
CheckAlignment(reg_b, 2);
const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
texture_type = Shader::TextureType::ColorArray2D;
coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
break;
}
case 12:
CheckAlignment(reg_a, 2);
CheckAlignment(reg_b, 2);
texture_type = Shader::TextureType::Color2D;
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
lod = v.X(reg_b);
offsets = MakeOffset(v, reg_b + 1);
break;
default:
throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
}
IR::TextureInstInfo info{};
if (tlds.precision == Precision::F16) {
info.relaxed_precision.Assign(1);
}
info.type.Assign(texture_type);
return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
}
unsigned Swizzle(u64 insn) {
const Encoding tlds{insn};
const size_t encoding{tlds.swizzle};
if (tlds.dest_reg_b == IR::Reg::RZ) {
if (encoding >= RG_LUT.size()) {
throw NotImplementedException("Illegal RG encoding {}", encoding);
}
return RG_LUT[encoding];
} else {
if (encoding >= RGBA_LUT.size()) {
throw NotImplementedException("Illegal RGBA encoding {}", encoding);
}
return RGBA_LUT[encoding];
}
}
IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
return IR::F32{v.ir.CompositeExtract(sample, component)};
}
IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
const Encoding tlds{insn};
switch (index) {
case 0:
return tlds.dest_reg_a;
case 1:
CheckAlignment(tlds.dest_reg_a, 2);
return tlds.dest_reg_a + 1;
case 2:
return tlds.dest_reg_b;
case 3:
CheckAlignment(tlds.dest_reg_b, 2);
return tlds.dest_reg_b + 1;
}
throw LogicError("Invalid store index {}", index);
}
void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)};
unsigned store_index{0};
for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) {
continue;
}
const IR::Reg dest{RegStoreComponent32(insn, store_index)};
v.F(dest, Extract(v, sample, component));
++store_index;
}
}
IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
}
void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)};
unsigned store_index{0};
std::array<IR::F32, 4> swizzled;
for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) {
continue;
}
swizzled[store_index] = Extract(v, sample, component);
++store_index;
}
const IR::F32 zero{v.ir.Imm32(0.0f)};
const Encoding tlds{insn};
switch (store_index) {
case 1:
v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
break;
case 2:
case 3:
case 4:
v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
switch (store_index) {
case 2:
break;
case 3:
v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
break;
case 4:
v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
break;
}
break;
}
}
} // Anonymous namespace
void TranslatorVisitor::TLDS(u64 insn) {
const IR::Value sample{Sample(*this, insn)};
if (Encoding{insn}.precision == Precision::F32) {
Store32(*this, insn, sample);
} else {
Store16(*this, insn, sample);
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,131 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <optional>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class TextureType : u64 {
_1D,
ARRAY_1D,
_2D,
ARRAY_2D,
_3D,
ARRAY_3D,
CUBE,
ARRAY_CUBE,
};
Shader::TextureType GetType(TextureType type) {
switch (type) {
case TextureType::_1D:
return Shader::TextureType::Color1D;
case TextureType::ARRAY_1D:
return Shader::TextureType::ColorArray1D;
case TextureType::_2D:
return Shader::TextureType::Color2D;
case TextureType::ARRAY_2D:
return Shader::TextureType::ColorArray2D;
case TextureType::_3D:
return Shader::TextureType::Color3D;
case TextureType::ARRAY_3D:
throw NotImplementedException("3D array texture type");
case TextureType::CUBE:
return Shader::TextureType::ColorCube;
case TextureType::ARRAY_CUBE:
return Shader::TextureType::ColorArrayCube;
}
throw NotImplementedException("Invalid texture type {}", type);
}
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
// The ISA reads an array component here, but this is not needed on high level shading languages
// We are dropping this information.
switch (type) {
case TextureType::_1D:
return v.F(reg);
case TextureType::ARRAY_1D:
return v.F(reg + 1);
case TextureType::_2D:
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
case TextureType::ARRAY_2D:
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
case TextureType::_3D:
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
case TextureType::ARRAY_3D:
throw NotImplementedException("3D array texture type");
case TextureType::CUBE:
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
case TextureType::ARRAY_CUBE:
return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
}
throw NotImplementedException("Invalid texture type {}", type);
}
void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
union {
u64 raw;
BitField<49, 1, u64> nodep;
BitField<35, 1, u64> ndv;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> coord_reg;
BitField<20, 8, IR::Reg> meta_reg;
BitField<28, 3, TextureType> type;
BitField<31, 4, u64> mask;
BitField<36, 13, u64> cbuf_offset;
} const tmml{insn};
if ((tmml.mask & 0b1100) != 0) {
throw NotImplementedException("TMML BA results are not implmented");
}
const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
IR::U32 handle;
IR::Reg meta_reg{tmml.meta_reg};
if (is_bindless) {
handle = v.X(meta_reg++);
} else {
handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
}
IR::TextureInstInfo info{};
info.type.Assign(GetType(tmml.type));
const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
IR::Reg dest_reg{tmml.dest_reg};
for (size_t element = 0; element < 4; ++element) {
if (((tmml.mask >> element) & 1) == 0) {
continue;
}
IR::F32 value{v.ir.CompositeExtract(sample, element)};
if (element < 2) {
IR::U32 casted_value;
if (element == 0) {
casted_value = v.ir.ConvertFToU(32, value);
} else {
casted_value = v.ir.ConvertFToS(16, value);
}
v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
} else {
v.F(dest_reg, value);
}
++dest_reg;
}
}
} // Anonymous namespace
void TranslatorVisitor::TMML(u64 insn) {
Impl(*this, insn, false);
}
void TranslatorVisitor::TMML_b(u64 insn) {
Impl(*this, insn, true);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,76 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <optional>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Mode : u64 {
Dimension = 1,
TextureType = 2,
SamplePos = 5,
};
IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
switch (mode) {
case Mode::Dimension: {
const IR::U32 lod{v.X(src_reg)};
return v.ir.ImageQueryDimension(handle, lod);
}
case Mode::TextureType:
case Mode::SamplePos:
default:
throw NotImplementedException("Mode {}", mode);
}
}
void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
union {
u64 raw;
BitField<49, 1, u64> nodep;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<22, 3, Mode> mode;
BitField<31, 4, u64> mask;
} const txq{insn};
IR::Reg src_reg{txq.src_reg};
IR::U32 handle;
if (cbuf_offset) {
handle = v.ir.Imm32(*cbuf_offset);
} else {
handle = v.X(src_reg);
++src_reg;
}
const IR::Value query{Query(v, handle, txq.mode, src_reg)};
IR::Reg dest_reg{txq.dest_reg};
for (int element = 0; element < 4; ++element) {
if (((txq.mask >> element) & 1) == 0) {
continue;
}
v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
++dest_reg;
}
}
} // Anonymous namespace
void TranslatorVisitor::TXQ(u64 insn) {
union {
u64 raw;
BitField<36, 13, u64> cbuf_offset;
} const txq{insn};
Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
}
void TranslatorVisitor::TXQ_b(u64 insn) {
Impl(*this, insn, std::nullopt);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,30 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
namespace Shader::Maxwell {
IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
u32 selector, bool is_signed) {
switch (width) {
case VideoWidth::Byte:
case VideoWidth::Unknown:
return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
case VideoWidth::Short:
return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
case VideoWidth::Word:
return value;
default:
throw NotImplementedException("Unknown VideoWidth {}", width);
}
}
VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
// immediates must be 16-bit format.
return is_immediate ? VideoWidth::Short : width;
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,23 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
enum class VideoWidth : u64 {
Byte,
Unknown,
Short,
Word,
};
[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
VideoWidth width, u32 selector, bool is_signed);
[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,92 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
namespace Shader::Maxwell {
namespace {
enum class VideoMinMaxOps : u64 {
MRG_16H,
MRG_16L,
MRG_8B0,
MRG_8B2,
ACC,
MIN,
MAX,
};
[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
VideoMinMaxOps op, bool is_signed) {
switch (op) {
case VideoMinMaxOps::MIN:
return ir.IMin(lhs, rhs, is_signed);
case VideoMinMaxOps::MAX:
return ir.IMax(lhs, rhs, is_signed);
default:
throw NotImplementedException("VMNMX op {}", op);
}
}
} // Anonymous namespace
void TranslatorVisitor::VMNMX(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<20, 16, u64> src_b_imm;
BitField<28, 2, u64> src_b_selector;
BitField<29, 2, VideoWidth> src_b_width;
BitField<36, 2, u64> src_a_selector;
BitField<37, 2, VideoWidth> src_a_width;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> src_a_sign;
BitField<49, 1, u64> src_b_sign;
BitField<50, 1, u64> is_src_b_reg;
BitField<51, 3, VideoMinMaxOps> op;
BitField<54, 1, u64> dest_sign;
BitField<55, 1, u64> sat;
BitField<56, 1, u64> mx;
} const vmnmx{insn};
if (vmnmx.cc != 0) {
throw NotImplementedException("VMNMX CC");
}
if (vmnmx.sat != 0) {
throw NotImplementedException("VMNMX SAT");
}
// Selectors were shown to default to 2 in unit tests
if (vmnmx.src_a_selector != 2) {
throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
}
if (vmnmx.src_b_selector != 2) {
throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
}
if (vmnmx.src_a_width != VideoWidth::Word) {
throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
}
const bool is_b_imm{vmnmx.is_src_b_reg == 0};
const IR::U32 src_a{GetReg8(insn)};
const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
const IR::U32 src_c{GetReg39(insn)};
const VideoWidth a_width{vmnmx.src_a_width};
const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
const bool src_a_signed{vmnmx.src_a_sign != 0};
const bool src_b_signed{vmnmx.src_b_sign != 0};
const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
// First operation's sign is only dependent on operand b's sign
const bool op_1_signed{src_b_signed};
const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
: ir.IMin(op_a, op_b, op_1_signed)};
X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,64 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
namespace Shader::Maxwell {
void TranslatorVisitor::VMAD(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<20, 16, u64> src_b_imm;
BitField<28, 2, u64> src_b_selector;
BitField<29, 2, VideoWidth> src_b_width;
BitField<36, 2, u64> src_a_selector;
BitField<37, 2, VideoWidth> src_a_width;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> src_a_sign;
BitField<49, 1, u64> src_b_sign;
BitField<50, 1, u64> is_src_b_reg;
BitField<51, 2, u64> scale;
BitField<53, 1, u64> src_c_neg;
BitField<54, 1, u64> src_a_neg;
BitField<55, 1, u64> sat;
} const vmad{insn};
if (vmad.cc != 0) {
throw NotImplementedException("VMAD CC");
}
if (vmad.sat != 0) {
throw NotImplementedException("VMAD SAT");
}
if (vmad.scale != 0) {
throw NotImplementedException("VMAD SCALE");
}
if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
throw NotImplementedException("VMAD PO");
}
if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
throw NotImplementedException("VMAD NEG");
}
const bool is_b_imm{vmad.is_src_b_reg == 0};
const IR::U32 src_a{GetReg8(insn)};
const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
const IR::U32 src_c{GetReg39(insn)};
const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
// Immediate values can't have a selector
const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
const VideoWidth a_width{vmad.src_a_width};
const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
const bool src_a_signed{vmad.src_a_sign != 0};
const bool src_b_signed{vmad.src_b_sign != 0};
const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,92 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
namespace Shader::Maxwell {
namespace {
enum class VsetpCompareOp : u64 {
False = 0,
LessThan,
Equal,
LessThanEqual,
GreaterThan = 16,
NotEqual,
GreaterThanEqual,
True,
};
CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
switch (op) {
case VsetpCompareOp::False:
return CompareOp::False;
case VsetpCompareOp::LessThan:
return CompareOp::LessThan;
case VsetpCompareOp::Equal:
return CompareOp::Equal;
case VsetpCompareOp::LessThanEqual:
return CompareOp::LessThanEqual;
case VsetpCompareOp::GreaterThan:
return CompareOp::GreaterThan;
case VsetpCompareOp::NotEqual:
return CompareOp::NotEqual;
case VsetpCompareOp::GreaterThanEqual:
return CompareOp::GreaterThanEqual;
case VsetpCompareOp::True:
return CompareOp::True;
default:
throw NotImplementedException("Invalid compare op {}", op);
}
}
} // Anonymous namespace
void TranslatorVisitor::VSETP(u64 insn) {
union {
u64 raw;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<20, 16, u64> src_b_imm;
BitField<28, 2, u64> src_b_selector;
BitField<29, 2, VideoWidth> src_b_width;
BitField<36, 2, u64> src_a_selector;
BitField<37, 2, VideoWidth> src_a_width;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<43, 5, VsetpCompareOp> compare_op;
BitField<45, 2, BooleanOp> bop;
BitField<48, 1, u64> src_a_sign;
BitField<49, 1, u64> src_b_sign;
BitField<50, 1, u64> is_src_b_reg;
} const vsetp{insn};
const bool is_b_imm{vsetp.is_src_b_reg == 0};
const IR::U32 src_a{GetReg8(insn)};
const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
const VideoWidth a_width{vsetp.src_a_width};
const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
const bool src_a_signed{vsetp.src_a_sign != 0};
const bool src_b_signed{vsetp.src_b_sign != 0};
const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
// Compare operation's sign is only dependent on operand b's sign
const bool compare_signed{src_b_signed};
const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
ir.SetPred(vsetp.dest_pred_a, result_a);
ir.SetPred(vsetp.dest_pred_b, result_b);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,54 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class VoteOp : u64 {
ALL,
ANY,
EQ,
};
[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
switch (vote_op) {
case VoteOp::ALL:
return ir.VoteAll(pred);
case VoteOp::ANY:
return ir.VoteAny(pred);
case VoteOp::EQ:
return ir.VoteEqual(pred);
default:
throw NotImplementedException("Invalid VOTE op {}", vote_op);
}
}
void Vote(TranslatorVisitor& v, u64 insn) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<39, 3, IR::Pred> pred_a;
BitField<42, 1, u64> neg_pred_a;
BitField<45, 3, IR::Pred> pred_b;
BitField<48, 2, VoteOp> vote_op;
} const vote{insn};
const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
}
} // Anonymous namespace
void TranslatorVisitor::VOTE(u64 insn) {
Vote(*this, insn);
}
void TranslatorVisitor::VOTE_vtg(u64) {
LOG_WARNING(Shader, "(STUBBED) called");
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,69 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <optional>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class ShuffleMode : u64 {
IDX,
UP,
DOWN,
BFLY,
};
[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
const IR::U32& index, const IR::U32& mask,
ShuffleMode shfl_op) {
const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
switch (shfl_op) {
case ShuffleMode::IDX:
return ir.ShuffleIndex(value, index, clamp, seg_mask);
case ShuffleMode::UP:
return ir.ShuffleUp(value, index, clamp, seg_mask);
case ShuffleMode::DOWN:
return ir.ShuffleDown(value, index, clamp, seg_mask);
case ShuffleMode::BFLY:
return ir.ShuffleButterfly(value, index, clamp, seg_mask);
default:
throw NotImplementedException("Invalid SHFL op {}", shfl_op);
}
}
void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<30, 2, ShuffleMode> mode;
BitField<48, 3, IR::Pred> pred;
} const shfl{insn};
const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
v.X(shfl.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::SHFL(u64 insn) {
union {
u64 insn;
BitField<20, 5, u64> src_a_imm;
BitField<28, 1, u64> src_a_flag;
BitField<29, 1, u64> src_b_flag;
BitField<34, 13, u64> src_b_imm;
} const flags{insn};
const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
: GetReg20(insn)};
const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
: GetReg39(insn)};
Shuffle(*this, insn, src_a, src_b);
}
} // namespace Shader::Maxwell