early-access version 1866

This commit is contained in:
pineappleEA
2021-07-09 23:54:15 +02:00
parent 335eeff822
commit 7d21887d40
469 changed files with 201995 additions and 78488 deletions

View File

@@ -0,0 +1,154 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Backend::GLASM {
namespace {
std::string_view InterpDecorator(Interpolation interp) {
switch (interp) {
case Interpolation::Smooth:
return "";
case Interpolation::Flat:
return "FLAT ";
case Interpolation::NoPerspective:
return "NOPERSPECTIVE ";
}
throw InvalidArgument("Invalid interpolation {}", interp);
}
bool IsInputArray(Stage stage) {
return stage == Stage::Geometry || stage == Stage::TessellationControl ||
stage == Stage::TessellationEval;
}
} // Anonymous namespace
EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
const RuntimeInfo& runtime_info_)
: info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
// FIXME: Temporary partial implementation
u32 cbuf_index{};
for (const auto& desc : info.constant_buffer_descriptors) {
if (desc.count != 1) {
throw NotImplementedException("Constant buffer descriptor array");
}
Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index);
++cbuf_index;
}
u32 ssbo_index{};
for (const auto& desc : info.storage_buffers_descriptors) {
if (desc.count != 1) {
throw NotImplementedException("Storage buffer descriptor array");
}
if (runtime_info.glasm_use_storage_buffers) {
Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer);
++bindings.storage_buffer;
++ssbo_index;
}
}
if (!runtime_info.glasm_use_storage_buffers) {
if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
}
}
stage = program.stage;
switch (program.stage) {
case Stage::VertexA:
case Stage::VertexB:
stage_name = "vertex";
attrib_name = "vertex";
break;
case Stage::TessellationControl:
case Stage::TessellationEval:
stage_name = "primitive";
attrib_name = "primitive";
break;
case Stage::Geometry:
stage_name = "primitive";
attrib_name = "vertex";
break;
case Stage::Fragment:
stage_name = "fragment";
attrib_name = "fragment";
break;
case Stage::Compute:
stage_name = "invocation";
break;
}
const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"};
const VaryingState loads{info.loads.mask | info.passthrough.mask};
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
if (loads.Generic(index)) {
Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};",
InterpDecorator(info.interpolation[index]), index, attr_stage, index, index);
}
}
if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) {
Add("ATTRIB vertex_position=vertex.position;");
}
if (info.uses_invocation_id) {
Add("ATTRIB primitive_invocation=primitive.invocation;");
}
if (info.stores_tess_level_outer) {
Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};");
}
if (info.stores_tess_level_inner) {
Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};");
}
if (info.stores.ClipDistances()) {
Add("OUTPUT result_clip[]={{result.clip[0..7]}};");
}
for (size_t index = 0; index < info.uses_patches.size(); ++index) {
if (!info.uses_patches[index]) {
continue;
}
if (stage == Stage::TessellationControl) {
Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};"
"ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};",
index, index, index, index, index, index);
} else {
Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index,
index, index);
}
}
if (stage == Stage::Fragment) {
Add("OUTPUT frag_color0=result.color;");
for (size_t index = 1; index < info.stores_frag_color.size(); ++index) {
Add("OUTPUT frag_color{}=result.color[{}];", index, index);
}
}
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
if (info.stores.Generic(index)) {
Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index);
}
}
image_buffer_bindings.reserve(info.image_buffer_descriptors.size());
for (const auto& desc : info.image_buffer_descriptors) {
image_buffer_bindings.push_back(bindings.image);
bindings.image += desc.count;
}
image_bindings.reserve(info.image_descriptors.size());
for (const auto& desc : info.image_descriptors) {
image_bindings.push_back(bindings.image);
bindings.image += desc.count;
}
texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size());
for (const auto& desc : info.texture_buffer_descriptors) {
texture_buffer_bindings.push_back(bindings.texture);
bindings.texture += desc.count;
}
texture_bindings.reserve(info.texture_descriptors.size());
for (const auto& desc : info.texture_descriptors) {
texture_bindings.push_back(bindings.texture);
bindings.texture += desc.count;
}
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,80 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <utility>
#include <vector>
#include <fmt/format.h>
#include "shader_recompiler/backend/glasm/reg_alloc.h"
#include "shader_recompiler/stage.h"
namespace Shader {
struct Info;
struct Profile;
struct RuntimeInfo;
} // namespace Shader
namespace Shader::Backend {
struct Bindings;
}
namespace Shader::IR {
class Inst;
struct Program;
} // namespace Shader::IR
namespace Shader::Backend::GLASM {
class EmitContext {
public:
explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
const RuntimeInfo& runtime_info_);
template <typename... Args>
void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst),
std::forward<Args>(args)...);
// TODO: Remove this
code += '\n';
}
template <typename... Args>
void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) {
code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst),
std::forward<Args>(args)...);
// TODO: Remove this
code += '\n';
}
template <typename... Args>
void Add(const char* format_str, Args&&... args) {
code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
// TODO: Remove this
code += '\n';
}
std::string code;
RegAlloc reg_alloc{*this};
const Info& info;
const Profile& profile;
const RuntimeInfo& runtime_info;
std::vector<u32> texture_buffer_bindings;
std::vector<u32> image_buffer_bindings;
std::vector<u32> texture_bindings;
std::vector<u32> image_bindings;
Stage stage{};
std::string_view stage_name = "invalid";
std::string_view attrib_name = "invalid";
u32 num_safety_loop_vars{};
bool uses_y_direction{};
};
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,489 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <ranges>
#include <string>
#include <tuple>
#include "common/div_ceil.h"
#include "common/settings.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Backend::GLASM {
namespace {
template <class Func>
struct FuncTraits {};
template <class ReturnType_, class... Args>
struct FuncTraits<ReturnType_ (*)(Args...)> {
using ReturnType = ReturnType_;
static constexpr size_t NUM_ARGS = sizeof...(Args);
template <size_t I>
using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
};
template <typename T>
struct Identity {
Identity(T data_) : data{data_} {}
T Extract() {
return data;
}
T data;
};
template <bool scalar>
class RegWrapper {
public:
RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} {
const Value value{reg_alloc.Peek(ir_value)};
if (value.type == Type::Register) {
inst = ir_value.InstRecursive();
reg = Register{value};
} else {
reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg();
}
switch (value.type) {
case Type::Register:
case Type::Void:
break;
case Type::U32:
ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32);
break;
case Type::U64:
ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64);
break;
}
}
auto Extract() {
if (inst) {
reg_alloc.Unref(*inst);
} else {
reg_alloc.FreeReg(reg);
}
return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}};
}
private:
RegAlloc& reg_alloc;
IR::Inst* inst{};
Register reg{};
};
template <typename ArgType>
class ValueWrapper {
public:
ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_)
: reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {}
ArgType Extract() {
if (!ir_value.IsImmediate()) {
reg_alloc.Unref(*ir_value.InstRecursive());
}
return value;
}
private:
RegAlloc& reg_alloc;
const IR::Value& ir_value;
ArgType value;
};
template <typename ArgType>
auto Arg(EmitContext& ctx, const IR::Value& arg) {
if constexpr (std::is_same_v<ArgType, Register>) {
return RegWrapper<false>{ctx, arg};
} else if constexpr (std::is_same_v<ArgType, ScalarRegister>) {
return RegWrapper<true>{ctx, arg};
} else if constexpr (std::is_base_of_v<Value, ArgType>) {
return ValueWrapper<ArgType>{ctx, arg};
} else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
return Identity<const IR::Value&>{arg};
} else if constexpr (std::is_same_v<ArgType, u32>) {
return Identity{arg.U32()};
} else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
return Identity{arg.Attribute()};
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
return Identity{arg.Patch()};
} else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
return Identity{arg.Reg()};
}
}
template <auto func, bool is_first_arg_inst>
struct InvokeCall {
template <typename... Args>
InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) {
if constexpr (is_first_arg_inst) {
func(ctx, *inst, args.Extract()...);
} else {
func(ctx, args.Extract()...);
}
}
};
template <auto func, bool is_first_arg_inst, size_t... I>
void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
using Traits = FuncTraits<decltype(func)>;
if constexpr (is_first_arg_inst) {
InvokeCall<func, is_first_arg_inst>{
ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...};
} else {
InvokeCall<func, is_first_arg_inst>{
ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...};
}
}
template <auto func>
void Invoke(EmitContext& ctx, IR::Inst* inst) {
using Traits = FuncTraits<decltype(func)>;
static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
if constexpr (Traits::NUM_ARGS == 1) {
Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
} else {
using FirstArgType = typename Traits::template ArgType<1>;
static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
}
}
void EmitInst(EmitContext& ctx, IR::Inst* inst) {
switch (inst->GetOpcode()) {
#define OPCODE(name, result_type, ...) \
case IR::Opcode::name: \
return Invoke<&Emit##name>(ctx, inst);
#include "shader_recompiler/frontend/ir/opcodes.inc"
#undef OPCODE
}
throw LogicError("Invalid opcode {}", inst->GetOpcode());
}
bool IsReference(IR::Inst& inst) {
return inst.GetOpcode() == IR::Opcode::Reference;
}
void PrecolorInst(IR::Inst& phi) {
// Insert phi moves before references to avoid overwritting other phis
const size_t num_args{phi.NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
IR::Block& phi_block{*phi.PhiBlock(i)};
auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
IR::IREmitter ir{phi_block, it};
const IR::Value arg{phi.Arg(i)};
if (arg.IsImmediate()) {
ir.PhiMove(phi, arg);
} else {
ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
}
}
for (size_t i = 0; i < num_args; ++i) {
IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
}
}
void Precolor(const IR::Program& program) {
for (IR::Block* const block : program.blocks) {
for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) {
PrecolorInst(phi);
}
}
}
void EmitCode(EmitContext& ctx, const IR::Program& program) {
const auto eval{
[&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }};
for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
switch (node.type) {
case IR::AbstractSyntaxNode::Type::Block:
for (IR::Inst& inst : node.data.block->Instructions()) {
EmitInst(ctx, &inst);
}
break;
case IR::AbstractSyntaxNode::Type::If:
ctx.Add("MOV.S.CC RC,{};"
"IF NE.x;",
eval(node.data.if_node.cond));
break;
case IR::AbstractSyntaxNode::Type::EndIf:
ctx.Add("ENDIF;");
break;
case IR::AbstractSyntaxNode::Type::Loop:
ctx.Add("REP;");
break;
case IR::AbstractSyntaxNode::Type::Repeat:
if (!Settings::values.disable_shader_loop_safety_checks) {
const u32 loop_index{ctx.num_safety_loop_vars++};
const u32 vector_index{loop_index / 4};
const char component{"xyzw"[loop_index % 4]};
ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;"
"BRK(LT.{});",
vector_index, component, vector_index, component, component);
}
if (node.data.repeat.cond.IsImmediate()) {
if (node.data.repeat.cond.U1()) {
ctx.Add("ENDREP;");
} else {
ctx.Add("BRK;"
"ENDREP;");
}
} else {
ctx.Add("MOV.S.CC RC,{};"
"BRK(EQ.x);"
"ENDREP;",
eval(node.data.repeat.cond));
}
break;
case IR::AbstractSyntaxNode::Type::Break:
if (node.data.break_node.cond.IsImmediate()) {
if (node.data.break_node.cond.U1()) {
ctx.Add("BRK;");
}
} else {
ctx.Add("MOV.S.CC RC,{};"
"BRK (NE.x);",
eval(node.data.break_node.cond));
}
break;
case IR::AbstractSyntaxNode::Type::Return:
case IR::AbstractSyntaxNode::Type::Unreachable:
ctx.Add("RET;");
break;
}
}
if (!ctx.reg_alloc.IsEmpty()) {
LOG_WARNING(Shader_GLASM, "Register leak after generating code");
}
}
void SetupOptions(const IR::Program& program, const Profile& profile,
const RuntimeInfo& runtime_info, std::string& header) {
const Info& info{program.info};
const Stage stage{program.stage};
// TODO: Track the shared atomic ops
header += "OPTION NV_internal;"
"OPTION NV_shader_storage_buffer;"
"OPTION NV_gpu_program_fp64;";
if (info.uses_int64_bit_atomics) {
header += "OPTION NV_shader_atomic_int64;";
}
if (info.uses_atomic_f32_add) {
header += "OPTION NV_shader_atomic_float;";
}
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
header += "OPTION NV_shader_atomic_fp16_vector;";
}
if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
info.uses_fswzadd) {
header += "OPTION NV_shader_thread_group;";
}
if (info.uses_subgroup_shuffles) {
header += "OPTION NV_shader_thread_shuffle;";
}
if (info.uses_sparse_residency) {
header += "OPTION EXT_sparse_texture2;";
}
const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] ||
info.stores[IR::Attribute::Layer]};
if ((stage != Stage::Geometry && stores_viewport_layer) ||
info.stores[IR::Attribute::ViewportMask]) {
if (profile.support_viewport_index_layer_non_geometry) {
header += "OPTION NV_viewport_array2;";
}
}
if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) {
header += "OPTION NV_geometry_shader_passthrough;";
}
if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
header += "OPTION EXT_shader_image_load_formatted;";
}
if (profile.support_derivative_control) {
header += "OPTION ARB_derivative_control;";
}
if (stage == Stage::Fragment && runtime_info.force_early_z != 0) {
header += "OPTION NV_early_fragment_tests;";
}
if (stage == Stage::Fragment) {
header += "OPTION ARB_draw_buffers;";
}
}
std::string_view StageHeader(Stage stage) {
switch (stage) {
case Stage::VertexA:
case Stage::VertexB:
return "!!NVvp5.0\n";
case Stage::TessellationControl:
return "!!NVtcp5.0\n";
case Stage::TessellationEval:
return "!!NVtep5.0\n";
case Stage::Geometry:
return "!!NVgp5.0\n";
case Stage::Fragment:
return "!!NVfp5.0\n";
case Stage::Compute:
return "!!NVcp5.0\n";
}
throw InvalidArgument("Invalid stage {}", stage);
}
std::string_view InputPrimitive(InputTopology topology) {
switch (topology) {
case InputTopology::Points:
return "POINTS";
case InputTopology::Lines:
return "LINES";
case InputTopology::LinesAdjacency:
return "LINESS_ADJACENCY";
case InputTopology::Triangles:
return "TRIANGLES";
case InputTopology::TrianglesAdjacency:
return "TRIANGLES_ADJACENCY";
}
throw InvalidArgument("Invalid input topology {}", topology);
}
std::string_view OutputPrimitive(OutputTopology topology) {
switch (topology) {
case OutputTopology::PointList:
return "POINTS";
case OutputTopology::LineStrip:
return "LINE_STRIP";
case OutputTopology::TriangleStrip:
return "TRIANGLE_STRIP";
}
throw InvalidArgument("Invalid output topology {}", topology);
}
std::string_view GetTessMode(TessPrimitive primitive) {
switch (primitive) {
case TessPrimitive::Triangles:
return "TRIANGLES";
case TessPrimitive::Quads:
return "QUADS";
case TessPrimitive::Isolines:
return "ISOLINES";
}
throw InvalidArgument("Invalid tessellation primitive {}", primitive);
}
std::string_view GetTessSpacing(TessSpacing spacing) {
switch (spacing) {
case TessSpacing::Equal:
return "EQUAL";
case TessSpacing::FractionalOdd:
return "FRACTIONAL_ODD";
case TessSpacing::FractionalEven:
return "FRACTIONAL_EVEN";
}
throw InvalidArgument("Invalid tessellation spacing {}", spacing);
}
} // Anonymous namespace
std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
Bindings& bindings) {
EmitContext ctx{program, bindings, profile, runtime_info};
Precolor(program);
EmitCode(ctx, program);
std::string header{StageHeader(program.stage)};
SetupOptions(program, profile, runtime_info, header);
switch (program.stage) {
case Stage::TessellationControl:
header += fmt::format("VERTICES_OUT {};", program.invocations);
break;
case Stage::TessellationEval:
header += fmt::format("TESS_MODE {};"
"TESS_SPACING {};"
"TESS_VERTEX_ORDER {};",
GetTessMode(runtime_info.tess_primitive),
GetTessSpacing(runtime_info.tess_spacing),
runtime_info.tess_clockwise ? "CW" : "CCW");
break;
case Stage::Geometry:
header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology));
if (program.is_geometry_passthrough) {
if (profile.support_geometry_shader_passthrough) {
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
if (program.info.passthrough.Generic(index)) {
header += fmt::format("PASSTHROUGH result.attrib[{}];", index);
}
}
if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
header += "PASSTHROUGH result.position;";
}
} else {
LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported");
}
} else {
header +=
fmt::format("VERTICES_OUT {};"
"PRIMITIVE_OUT {};",
program.output_vertices, OutputPrimitive(program.output_topology));
}
break;
case Stage::Compute:
header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0],
program.workgroup_size[1], program.workgroup_size[2]);
break;
default:
break;
}
if (program.shared_memory_size > 0) {
header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
}
header += "TEMP ";
for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
header += fmt::format("R{},", index);
}
if (program.local_memory_size > 0) {
header += fmt::format("lmem[{}],", program.local_memory_size);
}
if (program.info.uses_fswzadd) {
header += "FSWZA[4],FSWZB[4],";
}
const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)};
for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
header += fmt::format("loop{},", index);
}
header += "RC;"
"LONG TEMP ";
for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) {
header += fmt::format("D{},", index);
}
header += "DC;";
if (program.info.uses_fswzadd) {
header += "MOV.F FSWZA[0],-1;"
"MOV.F FSWZA[1],1;"
"MOV.F FSWZA[2],-1;"
"MOV.F FSWZA[3],0;"
"MOV.F FSWZB[0],-1;"
"MOV.F FSWZB[1],-1;"
"MOV.F FSWZB[2],1;"
"MOV.F FSWZB[3],-1;";
}
for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index);
}
if (ctx.uses_y_direction) {
header += "PARAM y_direction[1]={state.material.front.ambient};";
}
ctx.code.insert(0, header);
ctx.code += "END";
return ctx.code;
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,25 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Backend::GLASM {
[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
IR::Program& program, Bindings& bindings);
[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
IR::Program& program) {
Bindings binding;
return EmitGLASM(profile, runtime_info, program, binding);
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,91 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLASM {
static void Alias(IR::Inst& inst, const IR::Value& value) {
if (value.IsImmediate()) {
return;
}
IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())};
value_inst.DestructiveAddUsage(inst.UseCount());
value_inst.DestructiveRemoveUsage();
inst.SetDefinition(value_inst.Definition<Id>());
}
void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
Alias(inst, value);
}
void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
// Fake one usage to get a real register out of the condition
inst.DestructiveAddUsage(1);
const Register ret{ctx.reg_alloc.Define(inst)};
const ScalarS32 input{ctx.reg_alloc.Consume(value)};
if (ret != input) {
ctx.Add("MOV.S {},{};", ret, input);
}
}
void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
Alias(inst, value);
}
void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
Alias(inst, value);
}
void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
Alias(inst, value);
}
void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
Alias(inst, value);
}
void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
Alias(inst, value);
}
void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
Alias(inst, value);
}
void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
ctx.LongAdd("PK64.U {}.x,{};", inst, value);
}
void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
ctx.Add("UP64.U {}.xy,{}.x;", inst, value);
}
void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
ctx.Add("PK2H {}.x,{};", inst, value);
}
void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
ctx.Add("UP2H {}.xy,{}.x;", inst, value);
}
void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
ctx.LongAdd("PK64 {}.x,{};", inst, value);
}
void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
ctx.Add("UP64 {}.xy,{}.x;", inst, value);
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,244 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLASM {
namespace {
template <auto read_imm, char type, typename... Values>
void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) {
const Register ret{ctx.reg_alloc.Define(inst)};
if (std::ranges::any_of(std::array{elements...},
[](const IR::Value& value) { return value.IsImmediate(); })) {
using Type = std::invoke_result_t<decltype(read_imm), IR::Value>;
const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...};
ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]),
fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3]));
}
size_t index{};
for (const IR::Value& element : {elements...}) {
if (!element.IsImmediate()) {
const ScalarU32 value{ctx.reg_alloc.Consume(element)};
ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value);
}
++index;
}
}
void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) {
const Register ret{ctx.reg_alloc.Define(inst)};
if (ret == composite && index == 0) {
// No need to do anything here, the source and destination are the same register
return;
}
ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]);
}
template <typename ObjectType>
void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object,
u32 index, char type) {
const Register ret{ctx.reg_alloc.Define(inst)};
const char swizzle{"xyzw"[index]};
if (ret != composite && ret == object) {
// The object is aliased with the return value, so we have to use a temporary to insert
ctx.Add("MOV.{} RC,{};"
"MOV.{} RC.{},{};"
"MOV.{} {},RC;",
type, composite, type, swizzle, object, type, ret);
} else if (ret != composite) {
// The input composite is not aliased with the return value so we have to copy it before
// hand. But the insert object is not aliased with the return value, so we don't have to
// worry about that
ctx.Add("MOV.{} {},{};"
"MOV.{} {}.{},{};",
type, ret, composite, type, ret, swizzle, object);
} else {
// The return value is alised so we can just insert the object, it doesn't matter if it's
// aliased
ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object);
}
}
} // Anonymous namespace
void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2) {
CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2);
}
void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2, const IR::Value& e3) {
CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3);
}
void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4);
}
void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
CompositeExtract(ctx, inst, composite, index, 'U');
}
void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
CompositeExtract(ctx, inst, composite, index, 'U');
}
void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
CompositeExtract(ctx, inst, composite, index, 'U');
}
void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite,
[[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite,
[[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite,
[[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
[[maybe_unused]] Register e2) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
[[maybe_unused]] Register e2, [[maybe_unused]] Register e3) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
[[maybe_unused]] Register e2, [[maybe_unused]] Register e3,
[[maybe_unused]] Register e4) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite, [[maybe_unused]] Register object,
[[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite, [[maybe_unused]] Register object,
[[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite, [[maybe_unused]] Register object,
[[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2) {
CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2);
}
void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2, const IR::Value& e3) {
CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3);
}
void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4);
}
void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
CompositeExtract(ctx, inst, composite, index, 'F');
}
void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
CompositeExtract(ctx, inst, composite, index, 'F');
}
void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
CompositeExtract(ctx, inst, composite, index, 'F');
}
void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
ScalarF32 object, u32 index) {
CompositeInsert(ctx, inst, composite, object, index, 'F');
}
void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
ScalarF32 object, u32 index) {
CompositeInsert(ctx, inst, composite, object, index, 'F');
}
void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
ScalarF32 object, u32 index) {
CompositeInsert(ctx, inst, composite, object, index, 'F');
}
void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite, [[maybe_unused]] Register object,
[[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite, [[maybe_unused]] Register object,
[[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] Register composite, [[maybe_unused]] Register object,
[[maybe_unused]] u32 index) {
throw NotImplementedException("GLASM instruction");
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,346 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/shader_info.h"
namespace Shader::Backend::GLASM {
namespace {
void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
std::string_view size) {
if (!binding.IsImmediate()) {
throw NotImplementedException("Indirect constant buffer loading");
}
const Register ret{ctx.reg_alloc.Define(inst)};
if (offset.type == Type::U32) {
// Avoid reading arrays out of bounds, matching hardware's behavior
if (offset.imm_u32 >= 0x10'000) {
ctx.Add("MOV.S {},0;", ret);
return;
}
}
ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
}
bool IsInputArray(Stage stage) {
return stage == Stage::Geometry || stage == Stage::TessellationControl ||
stage == Stage::TessellationEval;
}
std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) {
return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
}
u32 TexCoordIndex(IR::Attribute attr) {
return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
}
} // Anonymous namespace
void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
GetCbuf(ctx, inst, binding, offset, "U8");
}
void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
GetCbuf(ctx, inst, binding, offset, "S8");
}
void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
GetCbuf(ctx, inst, binding, offset, "U16");
}
void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
GetCbuf(ctx, inst, binding, offset, "S16");
}
void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
GetCbuf(ctx, inst, binding, offset, "U32");
}
void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
GetCbuf(ctx, inst, binding, offset, "F32");
}
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset) {
GetCbuf(ctx, inst, binding, offset, "U32X2");
}
void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) {
const u32 element{static_cast<u32>(attr) % 4};
const char swizzle{"xyzw"[element]};
if (IR::IsGeneric(attr)) {
const u32 index{IR::GenericAttributeIndex(attr)};
ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle);
return;
}
if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) {
const u32 index{TexCoordIndex(attr)};
ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle);
return;
}
switch (attr) {
case IR::Attribute::PrimitiveId:
ctx.Add("MOV.S {}.x,primitive.id;", inst);
break;
case IR::Attribute::PositionX:
case IR::Attribute::PositionY:
case IR::Attribute::PositionZ:
case IR::Attribute::PositionW:
if (IsInputArray(ctx.stage)) {
ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle);
} else {
ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle);
}
break;
case IR::Attribute::ColorFrontDiffuseR:
case IR::Attribute::ColorFrontDiffuseG:
case IR::Attribute::ColorFrontDiffuseB:
case IR::Attribute::ColorFrontDiffuseA:
ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle);
break;
case IR::Attribute::PointSpriteS:
case IR::Attribute::PointSpriteT:
ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle);
break;
case IR::Attribute::TessellationEvaluationPointU:
case IR::Attribute::TessellationEvaluationPointV:
ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle);
break;
case IR::Attribute::InstanceId:
ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
break;
case IR::Attribute::VertexId:
ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
break;
case IR::Attribute::FrontFace:
ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
break;
default:
throw NotImplementedException("Get attribute {}", attr);
}
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
[[maybe_unused]] ScalarU32 vertex) {
const u32 element{static_cast<u32>(attr) % 4};
const char swizzle{"xyzw"[element]};
if (IR::IsGeneric(attr)) {
const u32 index{IR::GenericAttributeIndex(attr)};
ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value);
return;
}
if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) {
const u32 index{TexCoordIndex(attr)};
ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value);
return;
}
switch (attr) {
case IR::Attribute::Layer:
if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
ctx.Add("MOV.F result.layer.x,{};", value);
} else {
LOG_WARNING(Shader_GLASM,
"Layer stored outside of geometry shader not supported by device");
}
break;
case IR::Attribute::ViewportIndex:
if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
ctx.Add("MOV.F result.viewport.x,{};", value);
} else {
LOG_WARNING(Shader_GLASM,
"Viewport stored outside of geometry shader not supported by device");
}
break;
case IR::Attribute::ViewportMask:
// NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage.
if (ctx.profile.support_viewport_index_layer_non_geometry) {
ctx.Add("MOV.F result.viewportmask[0].x,{};", value);
} else {
LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask");
}
break;
case IR::Attribute::PointSize:
ctx.Add("MOV.F result.pointsize.x,{};", value);
break;
case IR::Attribute::PositionX:
case IR::Attribute::PositionY:
case IR::Attribute::PositionZ:
case IR::Attribute::PositionW:
ctx.Add("MOV.F result.position.{},{};", swizzle, value);
break;
case IR::Attribute::ColorFrontDiffuseR:
case IR::Attribute::ColorFrontDiffuseG:
case IR::Attribute::ColorFrontDiffuseB:
case IR::Attribute::ColorFrontDiffuseA:
ctx.Add("MOV.F result.color.{},{};", swizzle, value);
break;
case IR::Attribute::ColorFrontSpecularR:
case IR::Attribute::ColorFrontSpecularG:
case IR::Attribute::ColorFrontSpecularB:
case IR::Attribute::ColorFrontSpecularA:
ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value);
break;
case IR::Attribute::ColorBackDiffuseR:
case IR::Attribute::ColorBackDiffuseG:
case IR::Attribute::ColorBackDiffuseB:
case IR::Attribute::ColorBackDiffuseA:
ctx.Add("MOV.F result.color.back.{},{};", swizzle, value);
break;
case IR::Attribute::ColorBackSpecularR:
case IR::Attribute::ColorBackSpecularG:
case IR::Attribute::ColorBackSpecularB:
case IR::Attribute::ColorBackSpecularA:
ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value);
break;
case IR::Attribute::FogCoordinate:
ctx.Add("MOV.F result.fogcoord.x,{};", value);
break;
case IR::Attribute::ClipDistance0:
case IR::Attribute::ClipDistance1:
case IR::Attribute::ClipDistance2:
case IR::Attribute::ClipDistance3:
case IR::Attribute::ClipDistance4:
case IR::Attribute::ClipDistance5:
case IR::Attribute::ClipDistance6:
case IR::Attribute::ClipDistance7: {
const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
ctx.Add("MOV.F result.clip[{}].x,{};", index, value);
break;
}
default:
throw NotImplementedException("Set attribute {}", attr);
}
}
void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) {
// RC.x = base_index
// RC.y = masked_index
// RC.z = compare_index
ctx.Add("SHR.S RC.x,{},2;"
"AND.S RC.y,RC.x,3;"
"SHR.S RC.z,{},4;",
offset, offset);
const Register ret{ctx.reg_alloc.Define(inst)};
u32 num_endifs{};
const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) {
++num_endifs;
ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index
"IF NE.w;"
// X
"SEQ.S.CC RC.w,RC.y,0;"
"IF NE.w;"
"MOV {}.x,{};"
"ELSE;"
// Y
"SEQ.S.CC RC.w,RC.y,1;"
"IF NE.w;"
"MOV {}.x,{};"
"ELSE;"
// Z
"SEQ.S.CC RC.w,RC.y,2;"
"IF NE.w;"
"MOV {}.x,{};"
"ELSE;"
// W
"MOV {}.x,{};"
"ENDIF;"
"ENDIF;"
"ENDIF;"
"ELSE;",
compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]);
}};
const auto read_swizzled{[&](u32 compare_index, std::string_view value) {
const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value),
fmt::format("{}.z", value), fmt::format("{}.w", value)};
read(compare_index, values);
}};
if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) {
const u32 index{static_cast<u32>(IR::Attribute::PositionX)};
if (IsInputArray(ctx.stage)) {
read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex)));
} else {
read_swizzled(index, fmt::format("{}.position", ctx.attrib_name));
}
}
for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
if (!ctx.info.loads.Generic(index)) {
continue;
}
read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex)));
}
for (u32 i = 0; i < num_endifs; ++i) {
ctx.Add("ENDIF;");
}
}
void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset,
[[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) {
throw NotImplementedException("GLASM instruction");
}
void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
if (!IR::IsGeneric(patch)) {
throw NotImplementedException("Non-generic patch load");
}
const u32 index{IR::GenericPatchIndex(patch)};
const u32 element{IR::GenericPatchElement(patch)};
const char swizzle{"xyzw"[element]};
const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""};
ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle);
}
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) {
if (IR::IsGeneric(patch)) {
const u32 index{IR::GenericPatchIndex(patch)};
const u32 element{IR::GenericPatchElement(patch)};
ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value);
return;
}
switch (patch) {
case IR::Patch::TessellationLodLeft:
case IR::Patch::TessellationLodRight:
case IR::Patch::TessellationLodTop:
case IR::Patch::TessellationLodBottom: {
const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value);
break;
}
case IR::Patch::TessellationLodInteriorU:
ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value);
break;
case IR::Patch::TessellationLodInteriorV:
ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value);
break;
default:
throw NotImplementedException("Patch {}", patch);
}
}
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) {
ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value);
}
void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) {
ctx.Add("MOV.S result.samplemask.x,{};", value);
}
void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) {
ctx.Add("MOV.F result.depth.z,{};", value);
}
void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) {
ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset);
}
void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) {
ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value);
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,231 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLASM {
namespace {
std::string_view FpRounding(IR::FpRounding fp_rounding) {
switch (fp_rounding) {
case IR::FpRounding::DontCare:
return "";
case IR::FpRounding::RN:
return ".ROUND";
case IR::FpRounding::RZ:
return ".TRUNC";
case IR::FpRounding::RM:
return ".FLR";
case IR::FpRounding::RP:
return ".CEIL";
}
throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding);
}
template <typename InputType>
void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest,
std::string_view src, bool is_long_result) {
const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)};
const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)};
ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value);
}
} // Anonymous namespace
void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "S16", "F16", false);
}
void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
Convert(ctx, inst, value, "S16", "F32", false);
}
void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
Convert(ctx, inst, value, "S16", "F64", false);
}
void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "S32", "F16", false);
}
void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
Convert(ctx, inst, value, "S32", "F32", false);
}
void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
Convert(ctx, inst, value, "S32", "F64", false);
}
void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "S64", "F16", true);
}
void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
Convert(ctx, inst, value, "S64", "F32", true);
}
void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
Convert(ctx, inst, value, "S64", "F64", true);
}
void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "U16", "F16", false);
}
void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
Convert(ctx, inst, value, "U16", "F32", false);
}
void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
Convert(ctx, inst, value, "U16", "F64", false);
}
void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "U32", "F16", false);
}
void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
Convert(ctx, inst, value, "U32", "F32", false);
}
void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
Convert(ctx, inst, value, "U32", "F64", false);
}
void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "U64", "F16", true);
}
void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
Convert(ctx, inst, value, "U64", "F32", true);
}
void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
Convert(ctx, inst, value, "U64", "F64", true);
}
void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
Convert(ctx, inst, value, "U64", "U32", true);
}
void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "U32", "U64", false);
}
void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
Convert(ctx, inst, value, "F16", "F32", false);
}
void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F32", "F16", false);
}
void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
Convert(ctx, inst, value, "F32", "F64", false);
}
void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
Convert(ctx, inst, value, "F64", "F32", true);
}
void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F16", "S8", false);
}
void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F16", "S16", false);
}
void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
Convert(ctx, inst, value, "F16", "S32", false);
}
void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F16", "S64", false);
}
void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F16", "U8", false);
}
void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F16", "U16", false);
}
void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
Convert(ctx, inst, value, "F16", "U32", false);
}
void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F16", "U64", false);
}
void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F32", "S8", false);
}
void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F32", "S16", false);
}
void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
Convert(ctx, inst, value, "F32", "S32", false);
}
void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F32", "S64", false);
}
void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F32", "U8", false);
}
void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F32", "U16", false);
}
void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
Convert(ctx, inst, value, "F32", "U32", false);
}
void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F32", "U64", false);
}
void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F64", "S8", true);
}
void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F64", "S16", true);
}
void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
Convert(ctx, inst, value, "F64", "S32", true);
}
void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F64", "S64", true);
}
void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F64", "U8", true);
}
void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F64", "U16", true);
}
void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
Convert(ctx, inst, value, "F64", "U32", true);
}
void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) {
Convert(ctx, inst, value, "F64", "U64", true);
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,414 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLASM {
namespace {
template <typename InputType>
void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op,
std::string_view type, bool ordered, bool inequality = false) {
const Register ret{ctx.reg_alloc.Define(inst)};
ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs);
if (ordered && inequality) {
ctx.Add("SEQ.{} RC.y,{},{};"
"SEQ.{} RC.z,{},{};"
"AND.U RC.x,RC.x,RC.y;"
"AND.U RC.x,RC.x,RC.z;"
"SNE.S {}.x,RC.x,0;",
type, lhs, lhs, type, rhs, rhs, ret);
} else if (ordered) {
ctx.Add("SNE.S {}.x,RC.x,0;", ret);
} else {
ctx.Add("SNE.{} RC.y,{},{};"
"SNE.{} RC.z,{},{};"
"OR.U RC.x,RC.x,RC.y;"
"OR.U RC.x,RC.x,RC.z;"
"SNE.S {}.x,RC.x,0;",
type, lhs, lhs, type, rhs, rhs, ret);
}
}
template <typename InputType>
void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value,
InputType max_value, std::string_view type) {
// Call MAX first to properly clamp nan to min_value instead
ctx.Add("MAX.{} RC.x,{},{};"
"MIN.{} {}.x,RC.x,{};",
type, min_value, value, type, ret, max_value);
}
std::string_view Precise(IR::Inst& inst) {
const bool precise{inst.Flags<IR::FpControl>().no_contraction};
return precise ? ".PREC" : "";
}
} // Anonymous namespace
void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("MOV.F {}.x,|{}|;", inst, value);
}
void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value);
}
void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] Register a, [[maybe_unused]] Register b) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
}
void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
}
void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] Register a, [[maybe_unused]] Register b,
[[maybe_unused]] Register c) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) {
ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c);
}
void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) {
ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c);
}
void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
ctx.Add("MAX.F {}.x,{},{};", inst, a, b);
}
void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b);
}
void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
ctx.Add("MIN.F {}.x,{},{};", inst, a, b);
}
void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b);
}
void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] Register a, [[maybe_unused]] Register b) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
}
void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
}
void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) {
ctx.Add("MOV.F {}.x,-{};", inst, value);
}
void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value);
}
void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("SIN {}.x,{};", inst, value);
}
void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("COS {}.x,{};", inst, value);
}
void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("EX2 {}.x,{};", inst, value);
}
void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("LG2 {}.x,{};", inst, value);
}
void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("RCP {}.x,{};", inst, value);
}
void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("RSQ {}.x,{};", inst, value);
}
void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
const Register ret{ctx.reg_alloc.Define(inst)};
ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret);
}
void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("MOV.F.SAT {}.x,{};", inst, value);
}
void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value,
[[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
ScalarF32 max_value) {
Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F");
}
void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
ScalarF64 max_value) {
Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64");
}
void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("ROUND.F {}.x,{};", inst, value);
}
void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value);
}
void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("FLR.F {}.x,{};", inst, value);
}
void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
ctx.LongAdd("FLR.F64 {}.x,{};", inst, value);
}
void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("CEIL.F {}.x,{};", inst, value);
}
void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value);
}
void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
ctx.Add("TRUNC.F {}.x,{};", inst, value);
}
void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value);
}
void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SEQ", "F", true);
}
void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true);
}
void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SEQ", "F", false);
}
void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false);
}
void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true);
}
void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true);
}
void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true);
}
void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true);
}
void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SLT", "F", true);
}
void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SLT", "F64", true);
}
void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SLT", "F", false);
}
void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SLT", "F64", false);
}
void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SGT", "F", true);
}
void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SGT", "F64", true);
}
void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SGT", "F", false);
}
void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SGT", "F64", false);
}
void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SLE", "F", true);
}
void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SLE", "F64", true);
}
void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SLE", "F", false);
}
void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SLE", "F64", false);
}
void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SGE", "F", true);
}
void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SGE", "F64", true);
}
void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
[[maybe_unused]] Register rhs) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
Compare(ctx, inst, lhs, rhs, "SGE", "F", false);
}
void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
Compare(ctx, inst, lhs, rhs, "SGE", "F64", false);
}
void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
Compare(ctx, inst, value, value, "SNE", "F", true, false);
}
void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
Compare(ctx, inst, value, value, "SNE", "F64", true, false);
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,850 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <utility>
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLASM {
namespace {
struct ScopedRegister {
ScopedRegister() = default;
ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{&reg_alloc_}, reg{reg_alloc->AllocReg()} {}
~ScopedRegister() {
if (reg_alloc) {
reg_alloc->FreeReg(reg);
}
}
ScopedRegister& operator=(ScopedRegister&& rhs) noexcept {
if (reg_alloc) {
reg_alloc->FreeReg(reg);
}
reg_alloc = std::exchange(rhs.reg_alloc, nullptr);
reg = rhs.reg;
return *this;
}
ScopedRegister(ScopedRegister&& rhs) noexcept
: reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {}
ScopedRegister& operator=(const ScopedRegister&) = delete;
ScopedRegister(const ScopedRegister&) = delete;
RegAlloc* reg_alloc{};
Register reg;
};
std::string Texture(EmitContext& ctx, IR::TextureInstInfo info,
[[maybe_unused]] const IR::Value& index) {
// FIXME: indexed reads
if (info.type == TextureType::Buffer) {
return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index));
} else {
return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index));
}
}
std::string Image(EmitContext& ctx, IR::TextureInstInfo info,
[[maybe_unused]] const IR::Value& index) {
// FIXME: indexed reads
if (info.type == TextureType::Buffer) {
return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index));
} else {
return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index));
}
}
std::string_view TextureType(IR::TextureInstInfo info) {
if (info.is_depth) {
switch (info.type) {
case TextureType::Color1D:
return "SHADOW1D";
case TextureType::ColorArray1D:
return "SHADOWARRAY1D";
case TextureType::Color2D:
return "SHADOW2D";
case TextureType::ColorArray2D:
return "SHADOWARRAY2D";
case TextureType::Color3D:
return "SHADOW3D";
case TextureType::ColorCube:
return "SHADOWCUBE";
case TextureType::ColorArrayCube:
return "SHADOWARRAYCUBE";
case TextureType::Buffer:
return "SHADOWBUFFER";
}
} else {
switch (info.type) {
case TextureType::Color1D:
return "1D";
case TextureType::ColorArray1D:
return "ARRAY1D";
case TextureType::Color2D:
return "2D";
case TextureType::ColorArray2D:
return "ARRAY2D";
case TextureType::Color3D:
return "3D";
case TextureType::ColorCube:
return "CUBE";
case TextureType::ColorArrayCube:
return "ARRAYCUBE";
case TextureType::Buffer:
return "BUFFER";
}
}
throw InvalidArgument("Invalid texture type {}", info.type.Value());
}
std::string Offset(EmitContext& ctx, const IR::Value& offset) {
if (offset.IsEmpty()) {
return "";
}
return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)});
}
std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx,
const IR::Value& offset2) {
if (offset2.IsEmpty()) {
return {};
} else {
return {ctx.reg_alloc, ctx.reg_alloc};
}
}
void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1,
const IR::Value& offset2) {
const Register offsets_a{ctx.reg_alloc.Consume(offset1)};
const Register offsets_b{ctx.reg_alloc.Consume(offset2)};
// Input swizzle: [XYXY] [XYXY]
// Output swizzle: [XXXX] [YYYY]
ctx.Add("MOV {}.x,{}.x;"
"MOV {}.y,{}.z;"
"MOV {}.z,{}.x;"
"MOV {}.w,{}.z;"
"MOV {}.x,{}.y;"
"MOV {}.y,{}.w;"
"MOV {}.z,{}.y;"
"MOV {}.w,{}.w;",
off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y,
offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b);
}
std::string GradOffset(const IR::Value& offset) {
if (offset.IsImmediate()) {
LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate");
return "";
}
IR::Inst* const vector{offset.InstRecursive()};
if (!vector->AreAllArgsImmediates()) {
LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate");
return "";
}
switch (vector->NumArgs()) {
case 1:
return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32()));
case 2:
return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()),
static_cast<s32>(vector->Arg(1).U32()));
default:
throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs());
}
}
std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) {
if (coord.IsImmediate()) {
ScopedRegister scoped_reg(ctx.reg_alloc);
ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)});
return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)};
}
std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})};
if (coord.InstRecursive()->HasUses()) {
// Move non-dead coords to a separate register, although this should never happen because
// vectors are only assembled for immediate texture instructions
ctx.Add("MOV.F RC,{};", coord_vec);
coord_vec = "RC";
}
return {std::move(coord_vec), ScopedRegister{}};
}
void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {
if (!sparse_inst) {
return;
}
const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)};
ctx.Add("MOV.S {},-1;"
"MOV.S {}(NONRESIDENT),0;",
sparse_ret, sparse_ret);
}
std::string_view FormatStorage(ImageFormat format) {
switch (format) {
case ImageFormat::Typeless:
return "U";
case ImageFormat::R8_UINT:
return "U8";
case ImageFormat::R8_SINT:
return "S8";
case ImageFormat::R16_UINT:
return "U16";
case ImageFormat::R16_SINT:
return "S16";
case ImageFormat::R32_UINT:
return "U32";
case ImageFormat::R32G32_UINT:
return "U32X2";
case ImageFormat::R32G32B32A32_UINT:
return "U32X4";
}
throw InvalidArgument("Invalid image format {}", format);
}
template <typename T>
void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value,
std::string_view op) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const std::string_view type{TextureType(info)};
const std::string image{Image(ctx, info, index)};
const Register ret{ctx.reg_alloc.Define(inst)};
ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
}
IR::Inst* PrepareSparse(IR::Inst& inst) {
const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
if (sparse_inst) {
sparse_inst->Invalidate();
}
return sparse_inst;
}
} // Anonymous namespace
void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
const std::string offset_vec{Offset(ctx, offset)};
const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
const Register ret{ctx.reg_alloc.Define(inst)};
if (info.has_bias) {
if (info.type == TextureType::ColorArrayCube) {
ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec,
bias_lc, texture, offset_vec);
} else {
if (info.has_lod_clamp) {
ctx.Add("MOV.F {}.w,{}.x;"
"TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type,
offset_vec);
} else {
ctx.Add("MOV.F {}.w,{}.x;"
"TXB.F{} {},{},{},{}{};",
coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec);
}
}
} else {
if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) {
ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec,
bias_lc, texture, offset_vec);
} else {
ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture,
type, offset_vec);
}
}
StoreSparse(ctx, sparse_inst);
}
void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
const std::string offset_vec{Offset(ctx, offset)};
const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
const Register ret{ctx.reg_alloc.Define(inst)};
if (info.type == TextureType::ColorArrayCube) {
ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture,
offset_vec);
} else {
ctx.Add("MOV.F {}.w,{};"
"TXL.F{} {},{},{},{}{};",
coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
}
StoreSparse(ctx, sparse_inst);
}
void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& dref,
const IR::Value& bias_lc, const IR::Value& offset) {
// Allocate early to avoid aliases
const auto info{inst.Flags<IR::TextureInstInfo>()};
ScopedRegister staging;
if (info.type == TextureType::ColorArrayCube) {
staging = ScopedRegister{ctx.reg_alloc};
}
const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
const std::string offset_vec{Offset(ctx, offset)};
const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
const Register ret{ctx.reg_alloc.Define(inst)};
if (info.has_bias) {
if (info.has_lod_clamp) {
switch (info.type) {
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color2D:
ctx.Add("MOV.F {}.z,{};"
"MOV.F {}.w,{}.x;"
"TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
bias_lc_vec, texture, type, offset_vec);
break;
case TextureType::ColorArray2D:
case TextureType::ColorCube:
ctx.Add("MOV.F {}.w,{};"
"TXB.F.LODCLAMP{} {},{},{},{},{}{};",
coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
offset_vec);
break;
default:
throw NotImplementedException("Invalid type {} with bias and lod clamp",
info.type.Value());
}
} else {
switch (info.type) {
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color2D:
ctx.Add("MOV.F {}.z,{};"
"MOV.F {}.w,{}.x;"
"TXB.F{} {},{},{},{}{};",
coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
texture, type, offset_vec);
break;
case TextureType::ColorArray2D:
case TextureType::ColorCube:
ctx.Add("MOV.F {}.w,{};"
"TXB.F{} {},{},{},{},{}{};",
coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
offset_vec);
break;
case TextureType::ColorArrayCube:
ctx.Add("MOV.F {}.x,{};"
"MOV.F {}.y,{}.x;"
"TXB.F{} {},{},{},{},{}{};",
staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
staging.reg, texture, type, offset_vec);
break;
default:
throw NotImplementedException("Invalid type {}", info.type.Value());
}
}
} else {
if (info.has_lod_clamp) {
if (info.type != TextureType::ColorArrayCube) {
const bool w_swizzle{info.type == TextureType::ColorArray2D ||
info.type == TextureType::ColorCube};
const char dref_swizzle{w_swizzle ? 'w' : 'z'};
ctx.Add("MOV.F {}.{},{};"
"TEX.F.LODCLAMP{} {},{},{},{},{}{};",
coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec,
texture, type, offset_vec);
} else {
ctx.Add("MOV.F {}.x,{};"
"MOV.F {}.y,{};"
"TEX.F.LODCLAMP{} {},{},{},{},{}{};",
staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
staging.reg, texture, type, offset_vec);
}
} else {
if (info.type != TextureType::ColorArrayCube) {
const bool w_swizzle{info.type == TextureType::ColorArray2D ||
info.type == TextureType::ColorCube};
const char dref_swizzle{w_swizzle ? 'w' : 'z'};
ctx.Add("MOV.F {}.{},{};"
"TEX.F{} {},{},{},{}{};",
coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture,
type, offset_vec);
} else {
ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture,
type, offset_vec);
}
}
}
StoreSparse(ctx, sparse_inst);
}
void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& dref,
const IR::Value& lod, const IR::Value& offset) {
// Allocate early to avoid aliases
const auto info{inst.Flags<IR::TextureInstInfo>()};
ScopedRegister staging;
if (info.type == TextureType::ColorArrayCube) {
staging = ScopedRegister{ctx.reg_alloc};
}
const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
const std::string offset_vec{Offset(ctx, offset)};
const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
const Register ret{ctx.reg_alloc.Define(inst)};
switch (info.type) {
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color2D:
ctx.Add("MOV.F {}.z,{};"
"MOV.F {}.w,{};"
"TXL.F{} {},{},{},{}{};",
coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type,
offset_vec);
break;
case TextureType::ColorArray2D:
case TextureType::ColorCube:
ctx.Add("MOV.F {}.w,{};"
"TXL.F{} {},{},{},{},{}{};",
coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type,
offset_vec);
break;
case TextureType::ColorArrayCube:
ctx.Add("MOV.F {}.x,{};"
"MOV.F {}.y,{};"
"TXL.F{} {},{},{},{},{}{};",
staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec,
staging.reg, texture, type, offset_vec);
break;
default:
throw NotImplementedException("Invalid type {}", info.type.Value());
}
StoreSparse(ctx, sparse_inst);
}
void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) {
// Allocate offsets early so they don't overwrite any consumed register
const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
const char comp{"xyzw"[info.gather_component]};
const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
const Register coord_vec{ctx.reg_alloc.Consume(coord)};
const Register ret{ctx.reg_alloc.Define(inst)};
if (offset2.IsEmpty()) {
const std::string offset_vec{Offset(ctx, offset)};
ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type,
offset_vec);
} else {
SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg,
texture, comp, type);
}
StoreSparse(ctx, sparse_inst);
}
void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
const IR::Value& dref) {
// FIXME: This instruction is not working as expected
// Allocate offsets early so they don't overwrite any consumed register
const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
const auto info{inst.Flags<IR::TextureInstInfo>()};
const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
const Register coord_vec{ctx.reg_alloc.Consume(coord)};
const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)};
const Register ret{ctx.reg_alloc.Define(inst)};
std::string args;
switch (info.type) {
case TextureType::Color2D:
ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value);
args = fmt::to_string(coord_vec);
break;
case TextureType::ColorArray2D:
case TextureType::ColorCube:
ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value);
args = fmt::to_string(coord_vec);
break;
case TextureType::ColorArrayCube:
args = fmt::format("{},{}", coord_vec, dref_value);
break;
default:
throw NotImplementedException("Invalid type {}", info.type.Value());
}
if (offset2.IsEmpty()) {
const std::string offset_vec{Offset(ctx, offset)};
ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec);
} else {
SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture,
type);
}
StoreSparse(ctx, sparse_inst);
}
void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
const std::string offset_vec{Offset(ctx, offset)};
const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
const Register ret{ctx.reg_alloc.Define(inst)};
if (info.type == TextureType::Buffer) {
ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec);
} else if (ms.type != Type::Void) {
ctx.Add("MOV.S {}.w,{};"
"TXFMS.F{} {},{},{},{}{};",
coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec);
} else {
ctx.Add("MOV.S {}.w,{};"
"TXF.F{} {},{},{},{}{};",
coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
}
StoreSparse(ctx, sparse_inst);
}
void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
ScalarS32 lod) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const std::string texture{Texture(ctx, info, index)};
const std::string_view type{TextureType(info)};
ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type);
}
void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const std::string texture{Texture(ctx, info, index)};
const std::string_view type{TextureType(info)};
ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type);
}
void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& derivatives,
const IR::Value& offset, const IR::Value& lod_clamp) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
ScopedRegister dpdx, dpdy;
const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
if (multi_component) {
// Allocate this early to avoid aliasing other registers
dpdx = ScopedRegister{ctx.reg_alloc};
dpdy = ScopedRegister{ctx.reg_alloc};
}
const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string texture{Texture(ctx, info, index)};
const std::string offset_vec{GradOffset(offset)};
const Register coord_vec{ctx.reg_alloc.Consume(coord)};
const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)};
const Register ret{ctx.reg_alloc.Define(inst)};
if (multi_component) {
ctx.Add("MOV.F {}.x,{}.x;"
"MOV.F {}.y,{}.z;"
"MOV.F {}.x,{}.y;"
"MOV.F {}.y,{}.w;",
dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
dpdy.reg, derivatives_vec);
if (info.has_lod_clamp) {
const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
ctx.Add("MOV.F {}.w,{};"
"TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
texture, type, offset_vec);
} else {
ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
texture, type, offset_vec);
}
} else {
ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,
derivatives_vec, texture, type, offset_vec);
}
StoreSparse(ctx, sparse_inst);
}
void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const auto sparse_inst{PrepareSparse(inst)};
const std::string_view format{FormatStorage(info.image_format)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
const std::string_view type{TextureType(info)};
const std::string image{Image(ctx, info, index)};
const Register ret{ctx.reg_alloc.Define(inst)};
ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type);
StoreSparse(ctx, sparse_inst);
}
void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
Register color) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const std::string_view format{FormatStorage(info.image_format)};
const std::string_view type{TextureType(info)};
const std::string image{Image(ctx, info, index)};
ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
}
void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value) {
ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");
}
void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarS32 value) {
ImageAtomic(ctx, inst, index, coord, value, "MIN.S32");
}
void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value) {
ImageAtomic(ctx, inst, index, coord, value, "MIN.U32");
}
void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarS32 value) {
ImageAtomic(ctx, inst, index, coord, value, "MAX.S32");
}
void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value) {
ImageAtomic(ctx, inst, index, coord, value, "MAX.U32");
}
void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value) {
ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32");
}
void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value) {
ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32");
}
void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value) {
ImageAtomic(ctx, inst, index, coord, value, "AND.U32");
}
void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value) {
ImageAtomic(ctx, inst, index, coord, value, "OR.U32");
}
void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value) {
ImageAtomic(ctx, inst, index, coord, value, "XOR.U32");
}
void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
Register coord, ScalarU32 value) {
ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32");
}
void EmitBindlessImageSampleImplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageSampleExplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageGather(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageGatherDref(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageFetch(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageQueryDimensions(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageQueryLod(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageGradient(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageRead(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageWrite(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageSampleImplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageSampleExplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageGather(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageGatherDref(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageFetch(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageQueryDimensions(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageQueryLod(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageGradient(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageRead(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageWrite(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageAtomicIAdd32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageAtomicSMin32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageAtomicUMin32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageAtomicSMax32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageAtomicUMax32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageAtomicInc32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageAtomicDec32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageAtomicAnd32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageAtomicOr32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageAtomicXor32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBindlessImageAtomicExchange32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageAtomicIAdd32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageAtomicSMin32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageAtomicUMin32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageAtomicSMax32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageAtomicUMax32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageAtomicInc32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageAtomicDec32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageAtomicAnd32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageAtomicOr32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageAtomicXor32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitBoundImageAtomicExchange32(EmitContext&) {
throw LogicError("Unreachable instruction");
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,625 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "shader_recompiler/backend/glasm/reg_alloc.h"
namespace Shader::IR {
enum class Attribute : u64;
enum class Patch : u64;
class Inst;
class Value;
} // namespace Shader::IR
namespace Shader::Backend::GLASM {
class EmitContext;
// Microinstruction emitters
void EmitPhi(EmitContext& ctx, IR::Inst& inst);
void EmitVoid(EmitContext& ctx);
void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitReference(EmitContext&, const IR::Value& value);
void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
void EmitJoin(EmitContext& ctx);
void EmitDemoteToHelperInvocation(EmitContext& ctx);
void EmitBarrier(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
void EmitPrologue(EmitContext& ctx);
void EmitEpilogue(EmitContext& ctx);
void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream);
void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
void EmitGetRegister(EmitContext& ctx);
void EmitSetRegister(EmitContext& ctx);
void EmitGetPred(EmitContext& ctx);
void EmitSetPred(EmitContext& ctx);
void EmitSetGotoVariable(EmitContext& ctx);
void EmitGetGotoVariable(EmitContext& ctx);
void EmitSetIndirectBranchVariable(EmitContext& ctx);
void EmitGetIndirectBranchVariable(EmitContext& ctx);
void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value);
void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value);
void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value);
void EmitGetZFlag(EmitContext& ctx);
void EmitGetSFlag(EmitContext& ctx);
void EmitGetCFlag(EmitContext& ctx);
void EmitGetOFlag(EmitContext& ctx);
void EmitSetZFlag(EmitContext& ctx);
void EmitSetSFlag(EmitContext& ctx);
void EmitSetCFlag(EmitContext& ctx);
void EmitSetOFlag(EmitContext& ctx);
void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset);
void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value);
void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address);
void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address);
void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address);
void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address);
void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address);
void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address);
void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address);
void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value);
void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value);
void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value);
void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value);
void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value);
void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value);
void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value);
void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset);
void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset);
void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset);
void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset);
void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset);
void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset);
void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset);
void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
ScalarU32 value);
void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
ScalarS32 value);
void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
ScalarU32 value);
void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
ScalarS32 value);
void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
ScalarU32 value);
void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
Register value);
void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
Register value);
void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value);
void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value);
void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2);
void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2, const IR::Value& e3);
void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2);
void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3);
void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3,
Register e4);
void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index);
void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index);
void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index);
void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index);
void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index);
void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index);
void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2);
void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2, const IR::Value& e3);
void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
ScalarF32 object, u32 index);
void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
ScalarF32 object, u32 index);
void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
ScalarF32 object, u32 index);
void EmitCompositeConstructF64x2(EmitContext& ctx);
void EmitCompositeConstructF64x3(EmitContext& ctx);
void EmitCompositeConstructF64x4(EmitContext& ctx);
void EmitCompositeExtractF64x2(EmitContext& ctx);
void EmitCompositeExtractF64x3(EmitContext& ctx);
void EmitCompositeExtractF64x4(EmitContext& ctx);
void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index);
void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index);
void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index);
void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
ScalarS32 false_value);
void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
ScalarS32 false_value);
void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
Register false_value);
void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
ScalarS32 false_value);
void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitPackFloat2x16(EmitContext& ctx, Register value);
void EmitUnpackFloat2x16(EmitContext& ctx, Register value);
void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitGetZeroFromOp(EmitContext& ctx);
void EmitGetSignFromOp(EmitContext& ctx);
void EmitGetCarryFromOp(EmitContext& ctx);
void EmitGetOverflowFromOp(EmitContext& ctx);
void EmitGetSparseFromOp(EmitContext& ctx);
void EmitGetInBoundsFromOp(EmitContext& ctx);
void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c);
void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c);
void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c);
void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
void EmitFPNeg16(EmitContext& ctx, Register value);
void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value);
void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPRecip64(EmitContext& ctx, Register value);
void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPRecipSqrt64(EmitContext& ctx, Register value);
void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPSaturate16(EmitContext& ctx, Register value);
void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPSaturate64(EmitContext& ctx, Register value);
void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value);
void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
ScalarF32 max_value);
void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
ScalarF64 max_value);
void EmitFPRoundEven16(EmitContext& ctx, Register value);
void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitFPFloor16(EmitContext& ctx, Register value);
void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitFPCeil16(EmitContext& ctx, Register value);
void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitFPTrunc16(EmitContext& ctx, Register value);
void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
void EmitFPIsNan16(EmitContext& ctx, Register value);
void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift);
void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
ScalarU32 shift);
void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift);
void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
ScalarS32 shift);
void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
ScalarS32 offset, ScalarS32 count);
void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
ScalarS32 count);
void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
ScalarU32 count);
void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max);
void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max);
void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value);
void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarS32 value);
void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value);
void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarS32 value);
void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value);
void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value);
void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value);
void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value);
void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value);
void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value);
void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value);
void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
Register value);
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value);
void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarS32 value);
void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value);
void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarS32 value);
void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value);
void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value);
void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value);
void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value);
void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value);
void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value);
void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value);
void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarF32 value);
void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitGlobalAtomicIAdd32(EmitContext& ctx);
void EmitGlobalAtomicSMin32(EmitContext& ctx);
void EmitGlobalAtomicUMin32(EmitContext& ctx);
void EmitGlobalAtomicSMax32(EmitContext& ctx);
void EmitGlobalAtomicUMax32(EmitContext& ctx);
void EmitGlobalAtomicInc32(EmitContext& ctx);
void EmitGlobalAtomicDec32(EmitContext& ctx);
void EmitGlobalAtomicAnd32(EmitContext& ctx);
void EmitGlobalAtomicOr32(EmitContext& ctx);
void EmitGlobalAtomicXor32(EmitContext& ctx);
void EmitGlobalAtomicExchange32(EmitContext& ctx);
void EmitGlobalAtomicIAdd64(EmitContext& ctx);
void EmitGlobalAtomicSMin64(EmitContext& ctx);
void EmitGlobalAtomicUMin64(EmitContext& ctx);
void EmitGlobalAtomicSMax64(EmitContext& ctx);
void EmitGlobalAtomicUMax64(EmitContext& ctx);
void EmitGlobalAtomicInc64(EmitContext& ctx);
void EmitGlobalAtomicDec64(EmitContext& ctx);
void EmitGlobalAtomicAnd64(EmitContext& ctx);
void EmitGlobalAtomicOr64(EmitContext& ctx);
void EmitGlobalAtomicXor64(EmitContext& ctx);
void EmitGlobalAtomicExchange64(EmitContext& ctx);
void EmitGlobalAtomicAddF32(EmitContext& ctx);
void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value);
void EmitBindlessImageSampleImplicitLod(EmitContext&);
void EmitBindlessImageSampleExplicitLod(EmitContext&);
void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
void EmitBindlessImageGather(EmitContext&);
void EmitBindlessImageGatherDref(EmitContext&);
void EmitBindlessImageFetch(EmitContext&);
void EmitBindlessImageQueryDimensions(EmitContext&);
void EmitBindlessImageQueryLod(EmitContext&);
void EmitBindlessImageGradient(EmitContext&);
void EmitBindlessImageRead(EmitContext&);
void EmitBindlessImageWrite(EmitContext&);
void EmitBoundImageSampleImplicitLod(EmitContext&);
void EmitBoundImageSampleExplicitLod(EmitContext&);
void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
void EmitBoundImageGather(EmitContext&);
void EmitBoundImageGatherDref(EmitContext&);
void EmitBoundImageFetch(EmitContext&);
void EmitBoundImageQueryDimensions(EmitContext&);
void EmitBoundImageQueryLod(EmitContext&);
void EmitBoundImageGradient(EmitContext&);
void EmitBoundImageRead(EmitContext&);
void EmitBoundImageWrite(EmitContext&);
void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, Register bias_lc, const IR::Value& offset);
void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, ScalarF32 lod, const IR::Value& offset);
void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& dref,
const IR::Value& bias_lc, const IR::Value& offset);
void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& dref,
const IR::Value& lod, const IR::Value& offset);
void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2);
void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
const IR::Value& dref);
void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms);
void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
ScalarS32 lod);
void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& derivatives,
const IR::Value& offset, const IR::Value& lod_clamp);
void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
Register color);
void EmitBindlessImageAtomicIAdd32(EmitContext&);
void EmitBindlessImageAtomicSMin32(EmitContext&);
void EmitBindlessImageAtomicUMin32(EmitContext&);
void EmitBindlessImageAtomicSMax32(EmitContext&);
void EmitBindlessImageAtomicUMax32(EmitContext&);
void EmitBindlessImageAtomicInc32(EmitContext&);
void EmitBindlessImageAtomicDec32(EmitContext&);
void EmitBindlessImageAtomicAnd32(EmitContext&);
void EmitBindlessImageAtomicOr32(EmitContext&);
void EmitBindlessImageAtomicXor32(EmitContext&);
void EmitBindlessImageAtomicExchange32(EmitContext&);
void EmitBoundImageAtomicIAdd32(EmitContext&);
void EmitBoundImageAtomicSMin32(EmitContext&);
void EmitBoundImageAtomicUMin32(EmitContext&);
void EmitBoundImageAtomicSMax32(EmitContext&);
void EmitBoundImageAtomicUMax32(EmitContext&);
void EmitBoundImageAtomicInc32(EmitContext&);
void EmitBoundImageAtomicDec32(EmitContext&);
void EmitBoundImageAtomicAnd32(EmitContext&);
void EmitBoundImageAtomicOr32(EmitContext&);
void EmitBoundImageAtomicXor32(EmitContext&);
void EmitBoundImageAtomicExchange32(EmitContext&);
void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value);
void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarS32 value);
void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value);
void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarS32 value);
void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value);
void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value);
void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value);
void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value);
void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value);
void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
ScalarU32 value);
void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
Register coord, ScalarU32 value);
void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
const IR::Value& clamp, const IR::Value& segmentation_mask);
void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
const IR::Value& clamp, const IR::Value& segmentation_mask);
void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
const IR::Value& clamp, const IR::Value& segmentation_mask);
void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
const IR::Value& clamp, const IR::Value& segmentation_mask);
void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
ScalarU32 swizzle);
void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,294 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLASM {
namespace {
void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b,
std::string_view lop) {
const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
if (zero) {
zero->Invalidate();
}
if (sign) {
sign->Invalidate();
}
if (zero || sign) {
ctx.reg_alloc.InvalidateConditionCodes();
}
const auto ret{ctx.reg_alloc.Define(inst)};
ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b);
if (zero) {
ctx.Add("SEQ.S {},{},0;", *zero, ret);
}
if (sign) {
ctx.Add("SLT.S {},{},0;", *sign, ret);
}
}
} // Anonymous namespace
void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
const std::array flags{
inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
};
for (IR::Inst* const flag_inst : flags) {
if (flag_inst) {
flag_inst->Invalidate();
}
}
const bool cc{inst.HasAssociatedPseudoOperation()};
const std::string_view cc_mod{cc ? ".CC" : ""};
if (cc) {
ctx.reg_alloc.InvalidateConditionCodes();
}
const auto ret{ctx.reg_alloc.Define(inst)};
ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b);
if (!cc) {
return;
}
static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"};
for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) {
if (!flags[flag_index]) {
continue;
}
const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])};
if (flag_index == 0) {
ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret);
} else {
// We could use conditional execution here, but it's broken on Nvidia's compiler
ctx.Add("IF {}.x;"
"MOV.S {}.x,-1;"
"ELSE;"
"MOV.S {}.x,0;"
"ENDIF;",
masks[flag_index], flag_ret, flag_ret);
}
}
}
void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b);
}
void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
ctx.Add("SUB.S {}.x,{},{};", inst, a, b);
}
void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b);
}
void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
ctx.Add("MUL.S {}.x,{},{};", inst, a, b);
}
void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) {
ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32));
} else {
ctx.Add("MOV.S {},-{};", inst, value);
}
}
void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
ctx.LongAdd("MOV.S64 {},-{};", inst, value);
}
void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
ctx.Add("ABS.S {},{};", inst, value);
}
void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
ctx.Add("SHL.U {}.x,{},{};", inst, base, shift);
}
void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
ScalarU32 shift) {
ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift);
}
void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
ctx.Add("SHR.U {}.x,{},{};", inst, base, shift);
}
void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
ScalarU32 shift) {
ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift);
}
void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) {
ctx.Add("SHR.S {}.x,{},{};", inst, base, shift);
}
void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
ScalarS32 shift) {
ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift);
}
void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
BitwiseLogicalOp(ctx, inst, a, b, "AND");
}
void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
BitwiseLogicalOp(ctx, inst, a, b, "OR");
}
void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
BitwiseLogicalOp(ctx, inst, a, b, "XOR");
}
void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
ScalarS32 offset, ScalarS32 count) {
const Register ret{ctx.reg_alloc.Define(inst)};
if (count.type != Type::Register && offset.type != Type::Register) {
ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base);
} else {
ctx.Add("MOV.S RC.x,{};"
"MOV.S RC.y,{};"
"BFI.S {},RC,{},{};",
count, offset, ret, insert, base);
}
}
void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
ScalarS32 count) {
const Register ret{ctx.reg_alloc.Define(inst)};
if (count.type != Type::Register && offset.type != Type::Register) {
ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base);
} else {
ctx.Add("MOV.S RC.x,{};"
"MOV.S RC.y,{};"
"BFE.S {},RC,{};",
count, offset, ret, base);
}
}
void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
ScalarU32 count) {
const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
if (zero) {
zero->Invalidate();
}
if (sign) {
sign->Invalidate();
}
if (zero || sign) {
ctx.reg_alloc.InvalidateConditionCodes();
}
const Register ret{ctx.reg_alloc.Define(inst)};
if (count.type != Type::Register && offset.type != Type::Register) {
ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
} else {
ctx.Add("MOV.U RC.x,{};"
"MOV.U RC.y,{};"
"BFE.U {},RC,{};",
count, offset, ret, base);
}
if (zero) {
ctx.Add("SEQ.S {},{},0;", *zero, ret);
}
if (sign) {
ctx.Add("SLT.S {},{},0;", *sign, ret);
}
}
void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
ctx.Add("BFR {},{};", inst, value);
}
void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
ctx.Add("BTC {},{};", inst, value);
}
void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
ctx.Add("NOT.S {},{};", inst, value);
}
void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
ctx.Add("BTFM.S {},{};", inst, value);
}
void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
ctx.Add("BTFM.U {},{};", inst, value);
}
void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
ctx.Add("MIN.S {},{},{};", inst, a, b);
}
void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
ctx.Add("MIN.U {},{},{};", inst, a, b);
}
void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
ctx.Add("MAX.S {},{},{};", inst, a, b);
}
void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
ctx.Add("MAX.U {},{},{};", inst, a, b);
}
void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) {
const Register ret{ctx.reg_alloc.Define(inst)};
ctx.Add("MIN.S RC.x,{},{};"
"MAX.S {}.x,RC.x,{};",
max, value, ret, min);
}
void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) {
const Register ret{ctx.reg_alloc.Define(inst)};
ctx.Add("MIN.U RC.x,{},{};"
"MAX.U {}.x,RC.x,{};",
max, value, ret, min);
}
void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs);
}
void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs);
}
void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs);
}
void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs);
}
void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs);
}
void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs);
}
void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs);
}
void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs);
}
void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs);
}
void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs);
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,568 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Backend::GLASM {
namespace {
void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
std::string_view then_expr, std::string_view else_expr = {}) {
// Operate on bindless SSBO, call the expression with bounds checking
// address = c[binding].xy
// length = c[binding].z
const u32 sb_binding{binding.U32()};
ctx.Add("PK64.U DC,c[{}];" // pointer = address
"CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset)
"ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset
"SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length
sb_binding, offset, offset, sb_binding);
if (else_expr.empty()) {
ctx.Add("IF NE.x;{}ENDIF;", then_expr);
} else {
ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr);
}
}
void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr,
std::string_view else_expr = {}) {
const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()};
for (size_t index = 0; index < num_buffers; ++index) {
if (!ctx.info.nvn_buffer_used[index]) {
continue;
}
const auto& ssbo{ctx.info.storage_buffers_descriptors[index]};
ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr
"LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32
"CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32
"ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size
"SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0
"SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0
"AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b
"IF NE.x;" // if cond
"SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr
ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
address, address);
if (pointer_based) {
ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf
"ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
"{}"
"ELSE;",
index, expr);
} else {
ctx.Add("CVT.U32.U64 RC.x,DC.x;"
"{},ssbo{}[RC.x];"
"ELSE;",
expr, index);
}
}
if (!else_expr.empty()) {
ctx.Add("{}", else_expr);
}
const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()};
for (size_t index = 0; index < num_used_buffers; ++index) {
ctx.Add("ENDIF;");
}
}
template <typename ValueType>
void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value,
std::string_view size) {
if (ctx.runtime_info.glasm_use_storage_buffers) {
ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset);
} else {
StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
}
}
void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
std::string_view size) {
const Register ret{ctx.reg_alloc.Define(inst)};
if (ctx.runtime_info.glasm_use_storage_buffers) {
ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset);
} else {
StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
fmt::format("MOV.U {},{{0,0,0,0}};", ret));
}
}
template <typename ValueType>
void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) {
if (ctx.runtime_info.glasm_use_storage_buffers) {
GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value));
} else {
GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value));
}
}
void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) {
const Register ret{ctx.reg_alloc.Define(inst)};
if (ctx.runtime_info.glasm_use_storage_buffers) {
GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret));
} else {
GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret),
fmt::format("MOV.S {},0;", ret));
}
}
template <typename ValueType>
void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
ValueType value, std::string_view operation, std::string_view size) {
const Register ret{ctx.reg_alloc.Define(inst)};
if (ctx.runtime_info.glasm_use_storage_buffers) {
ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(),
offset);
} else {
StorageOp(ctx, binding, offset,
fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
}
}
} // Anonymous namespace
void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) {
GlobalLoad(ctx, inst, address, "U8");
}
void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) {
GlobalLoad(ctx, inst, address, "S8");
}
void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) {
GlobalLoad(ctx, inst, address, "U16");
}
void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) {
GlobalLoad(ctx, inst, address, "S16");
}
void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) {
GlobalLoad(ctx, inst, address, "U32");
}
void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) {
GlobalLoad(ctx, inst, address, "U32X2");
}
void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) {
GlobalLoad(ctx, inst, address, "U32X4");
}
void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) {
GlobalWrite(ctx, address, value, "U8");
}
void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) {
GlobalWrite(ctx, address, value, "S8");
}
void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) {
GlobalWrite(ctx, address, value, "U16");
}
void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) {
GlobalWrite(ctx, address, value, "S16");
}
void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) {
GlobalWrite(ctx, address, value, "U32");
}
void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) {
GlobalWrite(ctx, address, value, "U32X2");
}
void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) {
GlobalWrite(ctx, address, value, "U32X4");
}
void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset) {
Load(ctx, inst, binding, offset, "U8");
}
void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset) {
Load(ctx, inst, binding, offset, "S8");
}
void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset) {
Load(ctx, inst, binding, offset, "U16");
}
void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset) {
Load(ctx, inst, binding, offset, "S16");
}
void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset) {
Load(ctx, inst, binding, offset, "U32");
}
void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset) {
Load(ctx, inst, binding, offset, "U32X2");
}
void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset) {
Load(ctx, inst, binding, offset, "U32X4");
}
void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
ScalarU32 value) {
Write(ctx, binding, offset, value, "U8");
}
void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
ScalarS32 value) {
Write(ctx, binding, offset, value, "S8");
}
void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
ScalarU32 value) {
Write(ctx, binding, offset, value, "U16");
}
void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
ScalarS32 value) {
Write(ctx, binding, offset, value, "S16");
}
void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
ScalarU32 value) {
Write(ctx, binding, offset, value, "U32");
}
void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
Register value) {
Write(ctx, binding, offset, value, "U32X2");
}
void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
Register value) {
Write(ctx, binding, offset, value, "U32X4");
}
void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value) {
ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarS32 value) {
ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value) {
ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarS32 value) {
ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value) {
ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value) {
ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value) {
ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value) {
ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value) {
ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value) {
ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
ScalarU32 value) {
ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
Register value) {
ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value) {
Atom(ctx, inst, binding, offset, value, "ADD", "U32");
}
void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarS32 value) {
Atom(ctx, inst, binding, offset, value, "MIN", "S32");
}
void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value) {
Atom(ctx, inst, binding, offset, value, "MIN", "U32");
}
void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarS32 value) {
Atom(ctx, inst, binding, offset, value, "MAX", "S32");
}
void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value) {
Atom(ctx, inst, binding, offset, value, "MAX", "U32");
}
void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value) {
Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
}
void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value) {
Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
}
void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value) {
Atom(ctx, inst, binding, offset, value, "AND", "U32");
}
void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value) {
Atom(ctx, inst, binding, offset, value, "OR", "U32");
}
void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value) {
Atom(ctx, inst, binding, offset, value, "XOR", "U32");
}
void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value) {
Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
}
void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "ADD", "U64");
}
void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "MIN", "S64");
}
void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "MIN", "U64");
}
void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "MAX", "S64");
}
void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "MAX", "U64");
}
void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "AND", "U64");
}
void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "OR", "U64");
}
void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "XOR", "U64");
}
void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
}
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarF32 value) {
Atom(ctx, inst, binding, offset, value, "ADD", "F32");
}
void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
}
void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
}
void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value) {
Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
}
void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicIAdd32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicSMin32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicUMin32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicSMax32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicUMax32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicInc32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicDec32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicAnd32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicOr32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicXor32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicExchange32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicIAdd64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicSMin64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicUMin64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicSMax64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicUMax64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicInc64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicDec64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicAnd64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicOr64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicXor64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicExchange64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicAddF32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicAddF16x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicAddF32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicMinF16x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicMinF32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicMaxF16x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicMaxF32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,273 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/frontend/ir/value.h"
#ifdef _MSC_VER
#pragma warning(disable : 4100)
#endif
namespace Shader::Backend::GLASM {
#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__)
static void DefinePhi(EmitContext& ctx, IR::Inst& phi) {
switch (phi.Arg(0).Type()) {
case IR::Type::U1:
case IR::Type::U32:
case IR::Type::F32:
ctx.reg_alloc.Define(phi);
break;
case IR::Type::U64:
case IR::Type::F64:
ctx.reg_alloc.LongDefine(phi);
break;
default:
throw NotImplementedException("Phi node type {}", phi.Type());
}
}
void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
const size_t num_args{phi.NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
ctx.reg_alloc.Consume(phi.Arg(i));
}
if (!phi.Definition<Id>().is_valid) {
// The phi node wasn't forward defined
DefinePhi(ctx, phi);
}
}
void EmitVoid(EmitContext&) {}
void EmitReference(EmitContext& ctx, const IR::Value& value) {
ctx.reg_alloc.Consume(value);
}
void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())};
if (!phi.Definition<Id>().is_valid) {
// The phi node wasn't forward defined
DefinePhi(ctx, phi);
}
const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})};
const Value eval_value{ctx.reg_alloc.Consume(value)};
if (phi_reg == eval_value) {
return;
}
switch (phi.Flags<IR::Type>()) {
case IR::Type::U1:
case IR::Type::U32:
case IR::Type::F32:
ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value});
break;
case IR::Type::U64:
case IR::Type::F64:
ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value});
break;
default:
throw NotImplementedException("Phi node type {}", phi.Type());
}
}
void EmitJoin(EmitContext& ctx) {
NotImplemented();
}
void EmitDemoteToHelperInvocation(EmitContext& ctx) {
ctx.Add("KIL TR.x;");
}
void EmitBarrier(EmitContext& ctx) {
ctx.Add("BAR;");
}
void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
ctx.Add("MEMBAR.CTA;");
}
void EmitDeviceMemoryBarrier(EmitContext& ctx) {
ctx.Add("MEMBAR;");
}
void EmitPrologue(EmitContext& ctx) {
// TODO
}
void EmitEpilogue(EmitContext& ctx) {
// TODO
}
void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) {
if (stream.type == Type::U32 && stream.imm_u32 == 0) {
ctx.Add("EMIT;");
} else {
ctx.Add("EMITS {};", stream);
}
}
void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
if (!stream.IsImmediate()) {
LOG_WARNING(Shader_GLASM, "Stream is not immediate");
}
ctx.reg_alloc.Consume(stream);
ctx.Add("ENDPRIM;");
}
void EmitGetRegister(EmitContext& ctx) {
NotImplemented();
}
void EmitSetRegister(EmitContext& ctx) {
NotImplemented();
}
void EmitGetPred(EmitContext& ctx) {
NotImplemented();
}
void EmitSetPred(EmitContext& ctx) {
NotImplemented();
}
void EmitSetGotoVariable(EmitContext& ctx) {
NotImplemented();
}
void EmitGetGotoVariable(EmitContext& ctx) {
NotImplemented();
}
void EmitSetIndirectBranchVariable(EmitContext& ctx) {
NotImplemented();
}
void EmitGetIndirectBranchVariable(EmitContext& ctx) {
NotImplemented();
}
void EmitGetZFlag(EmitContext& ctx) {
NotImplemented();
}
void EmitGetSFlag(EmitContext& ctx) {
NotImplemented();
}
void EmitGetCFlag(EmitContext& ctx) {
NotImplemented();
}
void EmitGetOFlag(EmitContext& ctx) {
NotImplemented();
}
void EmitSetZFlag(EmitContext& ctx) {
NotImplemented();
}
void EmitSetSFlag(EmitContext& ctx) {
NotImplemented();
}
void EmitSetCFlag(EmitContext& ctx) {
NotImplemented();
}
void EmitSetOFlag(EmitContext& ctx) {
NotImplemented();
}
void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.S {},invocation.groupid;", inst);
}
void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.S {},invocation.localid;", inst);
}
void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst);
}
void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst);
}
void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst);
}
void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
ctx.uses_y_direction = true;
ctx.Add("MOV.F {}.x,y_direction[0].w;", inst);
}
void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.S {}.x,0;", inst);
}
void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.S {}.x,0;", inst);
}
void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.S {}.x,0;", inst);
}
void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.S {}.x,0;", inst);
}
void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
ctx.LongAdd("MOV.S64 {}.x,0;", inst);
}
void EmitGetZeroFromOp(EmitContext& ctx) {
NotImplemented();
}
void EmitGetSignFromOp(EmitContext& ctx) {
NotImplemented();
}
void EmitGetCarryFromOp(EmitContext& ctx) {
NotImplemented();
}
void EmitGetOverflowFromOp(EmitContext& ctx) {
NotImplemented();
}
void EmitGetSparseFromOp(EmitContext& ctx) {
NotImplemented();
}
void EmitGetInBoundsFromOp(EmitContext& ctx) {
NotImplemented();
}
void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
ctx.Add("OR.S {},{},{};", inst, a, b);
}
void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
ctx.Add("AND.S {},{},{};", inst, a, b);
}
void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
ctx.Add("XOR.S {},{},{};", inst, a, b);
}
void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
ctx.Add("SEQ.S {},{},0;", inst, value);
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,67 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLASM {
void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
ScalarS32 false_value) {
ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
}
void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
[[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
throw NotImplementedException("GLASM instruction");
}
void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
[[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
throw NotImplementedException("GLASM instruction");
}
void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
ScalarS32 false_value) {
ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
}
void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
Register false_value) {
ctx.reg_alloc.InvalidateConditionCodes();
const Register ret{ctx.reg_alloc.LongDefine(inst)};
if (ret == true_value) {
ctx.Add("MOV.S.CC RC.x,{};"
"MOV.U64 {}.x(EQ.x),{};",
cond, ret, false_value);
} else if (ret == false_value) {
ctx.Add("MOV.S.CC RC.x,{};"
"MOV.U64 {}.x(NE.x),{};",
cond, ret, true_value);
} else {
ctx.Add("MOV.S.CC RC.x,{};"
"MOV.U64 {}.x,{};"
"MOV.U64 {}.x(NE.x),{};",
cond, ret, false_value, ret, true_value);
}
}
void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
[[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
throw NotImplementedException("GLASM instruction");
}
void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
ScalarS32 false_value) {
ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
}
void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
[[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
throw NotImplementedException("GLASM instruction");
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,58 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLASM {
void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset);
}
void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset);
}
void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset);
}
void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset);
}
void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset);
}
void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset);
}
void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset);
}
void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
ctx.Add("STS.U8 {},shared_mem[{}];", value, offset);
}
void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
ctx.Add("STS.U16 {},shared_mem[{}];", value, offset);
}
void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
ctx.Add("STS.U32 {},shared_mem[{}];", value, offset);
}
void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) {
ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset);
}
void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) {
ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset);
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,150 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/profile.h"
namespace Shader::Backend::GLASM {
void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name);
}
void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
ctx.Add("TGALL.S {}.x,{};", inst, pred);
}
void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
ctx.Add("TGANY.S {}.x,{};", inst, pred);
}
void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
ctx.Add("TGEQ.S {}.x,{};", inst, pred);
}
void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
ctx.Add("TGBALLOT {}.x,{};", inst, pred);
}
void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name);
}
void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name);
}
void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name);
}
void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name);
}
void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name);
}
static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
const IR::Value& clamp, const IR::Value& segmentation_mask,
std::string_view op) {
IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
if (in_bounds) {
in_bounds->Invalidate();
}
std::string mask;
if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {
mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8));
} else {
mask = "RC";
ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};",
ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)},
ScalarU32{ctx.reg_alloc.Consume(clamp)});
}
const Register value_ret{ctx.reg_alloc.Define(inst)};
if (in_bounds) {
const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};
ctx.Add("SHF{}.U {},{},{},{};"
"MOV.U {}.x,{}.y;",
op, bounds_ret, value, index, mask, value_ret, bounds_ret);
} else {
ctx.Add("SHF{}.U {},{},{},{};"
"MOV.U {}.x,{}.y;",
op, value_ret, value, index, mask, value_ret, value_ret);
}
}
void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
const IR::Value& clamp, const IR::Value& segmentation_mask) {
Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX");
}
void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
const IR::Value& clamp, const IR::Value& segmentation_mask) {
Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP");
}
void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
const IR::Value& clamp, const IR::Value& segmentation_mask) {
Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN");
}
void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
const IR::Value& clamp, const IR::Value& segmentation_mask) {
Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR");
}
void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
ScalarU32 swizzle) {
const auto ret{ctx.reg_alloc.Define(inst)};
ctx.Add("AND.U RC.z,{}.threadid,3;"
"SHL.U RC.z,RC.z,1;"
"SHR.U RC.z,{},RC.z;"
"AND.U RC.z,RC.z,3;"
"MUL.F RC.x,{},FSWZA[RC.z];"
"MUL.F RC.y,{},FSWZB[RC.z];"
"ADD.F {}.x,RC.x,RC.y;",
ctx.stage_name, swizzle, op_a, op_b, ret);
}
void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
if (ctx.profile.support_derivative_control) {
ctx.Add("DDX.FINE {}.x,{};", inst, p);
} else {
LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
ctx.Add("DDX {}.x,{};", inst, p);
}
}
void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
if (ctx.profile.support_derivative_control) {
ctx.Add("DDY.FINE {}.x,{};", inst, p);
} else {
LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
ctx.Add("DDY {}.x,{};", inst, p);
}
}
void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
if (ctx.profile.support_derivative_control) {
ctx.Add("DDX.COARSE {}.x,{};", inst, p);
} else {
LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
ctx.Add("DDX {}.x,{};", inst, p);
}
}
void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
if (ctx.profile.support_derivative_control) {
ctx.Add("DDY.COARSE {}.x,{};", inst, p);
} else {
LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
ctx.Add("DDY {}.x,{};", inst, p);
}
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,186 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string>
#include <fmt/format.h>
#include "shader_recompiler/backend/glasm/emit_context.h"
#include "shader_recompiler/backend/glasm/reg_alloc.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLASM {
Register RegAlloc::Define(IR::Inst& inst) {
return Define(inst, false);
}
Register RegAlloc::LongDefine(IR::Inst& inst) {
return Define(inst, true);
}
Value RegAlloc::Peek(const IR::Value& value) {
if (value.IsImmediate()) {
return MakeImm(value);
} else {
return PeekInst(*value.Inst());
}
}
Value RegAlloc::Consume(const IR::Value& value) {
if (value.IsImmediate()) {
return MakeImm(value);
} else {
return ConsumeInst(*value.Inst());
}
}
void RegAlloc::Unref(IR::Inst& inst) {
IR::Inst& value_inst{AliasInst(inst)};
value_inst.DestructiveRemoveUsage();
if (!value_inst.HasUses()) {
Free(value_inst.Definition<Id>());
}
}
Register RegAlloc::AllocReg() {
Register ret;
ret.type = Type::Register;
ret.id = Alloc(false);
return ret;
}
Register RegAlloc::AllocLongReg() {
Register ret;
ret.type = Type::Register;
ret.id = Alloc(true);
return ret;
}
void RegAlloc::FreeReg(Register reg) {
Free(reg.id);
}
Value RegAlloc::MakeImm(const IR::Value& value) {
Value ret;
switch (value.Type()) {
case IR::Type::Void:
ret.type = Type::Void;
break;
case IR::Type::U1:
ret.type = Type::U32;
ret.imm_u32 = value.U1() ? 0xffffffff : 0;
break;
case IR::Type::U32:
ret.type = Type::U32;
ret.imm_u32 = value.U32();
break;
case IR::Type::F32:
ret.type = Type::U32;
ret.imm_u32 = Common::BitCast<u32>(value.F32());
break;
case IR::Type::U64:
ret.type = Type::U64;
ret.imm_u64 = value.U64();
break;
case IR::Type::F64:
ret.type = Type::U64;
ret.imm_u64 = Common::BitCast<u64>(value.F64());
break;
default:
throw NotImplementedException("Immediate type {}", value.Type());
}
return ret;
}
Register RegAlloc::Define(IR::Inst& inst, bool is_long) {
if (inst.HasUses()) {
inst.SetDefinition<Id>(Alloc(is_long));
} else {
Id id{};
id.is_long.Assign(is_long ? 1 : 0);
id.is_null.Assign(1);
inst.SetDefinition<Id>(id);
}
return Register{PeekInst(inst)};
}
Value RegAlloc::PeekInst(IR::Inst& inst) {
Value ret;
ret.type = Type::Register;
ret.id = inst.Definition<Id>();
return ret;
}
Value RegAlloc::ConsumeInst(IR::Inst& inst) {
Unref(inst);
return PeekInst(inst);
}
Id RegAlloc::Alloc(bool is_long) {
size_t& num_regs{is_long ? num_used_long_registers : num_used_registers};
std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use};
if (num_used_registers + num_used_long_registers < NUM_REGS) {
for (size_t reg = 0; reg < NUM_REGS; ++reg) {
if (use[reg]) {
continue;
}
num_regs = std::max(num_regs, reg + 1);
use[reg] = true;
Id ret{};
ret.is_valid.Assign(1);
ret.is_long.Assign(is_long ? 1 : 0);
ret.is_spill.Assign(0);
ret.is_condition_code.Assign(0);
ret.is_null.Assign(0);
ret.index.Assign(static_cast<u32>(reg));
return ret;
}
}
throw NotImplementedException("Register spilling");
}
void RegAlloc::Free(Id id) {
if (id.is_valid == 0) {
throw LogicError("Freeing invalid register");
}
if (id.is_spill != 0) {
throw NotImplementedException("Free spill");
}
if (id.is_long != 0) {
long_register_use[id.index] = false;
} else {
register_use[id.index] = false;
}
}
/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::Identity:
case IR::Opcode::BitCastU16F16:
case IR::Opcode::BitCastU32F32:
case IR::Opcode::BitCastU64F64:
case IR::Opcode::BitCastF16U16:
case IR::Opcode::BitCastF32U32:
case IR::Opcode::BitCastF64U64:
return true;
default:
return false;
}
}
/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) {
IR::Inst* it{&inst};
while (IsAliased(*it)) {
const IR::Value arg{it->Arg(0)};
if (arg.IsImmediate()) {
break;
}
it = arg.InstRecursive();
}
return *it;
}
} // namespace Shader::Backend::GLASM

View File

@@ -0,0 +1,304 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <bitset>
#include <fmt/format.h>
#include "common/bit_cast.h"
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
namespace Shader::IR {
class Inst;
class Value;
} // namespace Shader::IR
namespace Shader::Backend::GLASM {
class EmitContext;
enum class Type : u32 {
Void,
Register,
U32,
U64,
};
struct Id {
union {
u32 raw;
BitField<0, 1, u32> is_valid;
BitField<1, 1, u32> is_long;
BitField<2, 1, u32> is_spill;
BitField<3, 1, u32> is_condition_code;
BitField<4, 1, u32> is_null;
BitField<5, 27, u32> index;
};
bool operator==(Id rhs) const noexcept {
return raw == rhs.raw;
}
bool operator!=(Id rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(sizeof(Id) == sizeof(u32));
struct Value {
Type type;
union {
Id id;
u32 imm_u32;
u64 imm_u64;
};
bool operator==(const Value& rhs) const noexcept {
if (type != rhs.type) {
return false;
}
switch (type) {
case Type::Void:
return true;
case Type::Register:
return id == rhs.id;
case Type::U32:
return imm_u32 == rhs.imm_u32;
case Type::U64:
return imm_u64 == rhs.imm_u64;
}
return false;
}
bool operator!=(const Value& rhs) const noexcept {
return !operator==(rhs);
}
};
struct Register : Value {};
struct ScalarRegister : Value {};
struct ScalarU32 : Value {};
struct ScalarS32 : Value {};
struct ScalarF32 : Value {};
struct ScalarF64 : Value {};
class RegAlloc {
public:
RegAlloc(EmitContext& ctx_) : ctx{ctx_} {}
Register Define(IR::Inst& inst);
Register LongDefine(IR::Inst& inst);
[[nodiscard]] Value Peek(const IR::Value& value);
Value Consume(const IR::Value& value);
void Unref(IR::Inst& inst);
[[nodiscard]] Register AllocReg();
[[nodiscard]] Register AllocLongReg();
void FreeReg(Register reg);
void InvalidateConditionCodes() {
// This does nothing for now
}
[[nodiscard]] size_t NumUsedRegisters() const noexcept {
return num_used_registers;
}
[[nodiscard]] size_t NumUsedLongRegisters() const noexcept {
return num_used_long_registers;
}
[[nodiscard]] bool IsEmpty() const noexcept {
return register_use.none() && long_register_use.none();
}
/// Returns true if the instruction is expected to be aliased to another
static bool IsAliased(const IR::Inst& inst);
/// Returns the underlying value out of an alias sequence
static IR::Inst& AliasInst(IR::Inst& inst);
private:
static constexpr size_t NUM_REGS = 4096;
static constexpr size_t NUM_ELEMENTS = 4;
Value MakeImm(const IR::Value& value);
Register Define(IR::Inst& inst, bool is_long);
Value PeekInst(IR::Inst& inst);
Value ConsumeInst(IR::Inst& inst);
Id Alloc(bool is_long);
void Free(Id id);
EmitContext& ctx;
size_t num_used_registers{};
size_t num_used_long_registers{};
std::bitset<NUM_REGS> register_use{};
std::bitset<NUM_REGS> long_register_use{};
};
template <bool scalar, typename FormatContext>
auto FormatTo(FormatContext& ctx, Id id) {
if (id.is_condition_code != 0) {
throw NotImplementedException("Condition code emission");
}
if (id.is_spill != 0) {
throw NotImplementedException("Spill emission");
}
if constexpr (scalar) {
if (id.is_null != 0) {
return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x");
}
if (id.is_long != 0) {
return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());
} else {
return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());
}
} else {
if (id.is_null != 0) {
return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC");
}
if (id.is_long != 0) {
return fmt::format_to(ctx.out(), "D{}", id.index.Value());
} else {
return fmt::format_to(ctx.out(), "R{}", id.index.Value());
}
}
}
} // namespace Shader::Backend::GLASM
template <>
struct fmt::formatter<Shader::Backend::GLASM::Id> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) {
return Shader::Backend::GLASM::FormatTo<true>(ctx, id);
}
};
template <>
struct fmt::formatter<Shader::Backend::GLASM::Register> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) {
if (value.type != Shader::Backend::GLASM::Type::Register) {
throw Shader::InvalidArgument("Register value type is not register");
}
return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id);
}
};
template <>
struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) {
if (value.type != Shader::Backend::GLASM::Type::Register) {
throw Shader::InvalidArgument("Register value type is not register");
}
return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
}
};
template <>
struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) {
switch (value.type) {
case Shader::Backend::GLASM::Type::Void:
break;
case Shader::Backend::GLASM::Type::Register:
return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
case Shader::Backend::GLASM::Type::U32:
return fmt::format_to(ctx.out(), "{}", value.imm_u32);
case Shader::Backend::GLASM::Type::U64:
break;
}
throw Shader::InvalidArgument("Invalid value type {}", value.type);
}
};
template <>
struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) {
switch (value.type) {
case Shader::Backend::GLASM::Type::Void:
break;
case Shader::Backend::GLASM::Type::Register:
return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
case Shader::Backend::GLASM::Type::U32:
return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32));
case Shader::Backend::GLASM::Type::U64:
break;
}
throw Shader::InvalidArgument("Invalid value type {}", value.type);
}
};
template <>
struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) {
switch (value.type) {
case Shader::Backend::GLASM::Type::Void:
break;
case Shader::Backend::GLASM::Type::Register:
return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
case Shader::Backend::GLASM::Type::U32:
return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32));
case Shader::Backend::GLASM::Type::U64:
break;
}
throw Shader::InvalidArgument("Invalid value type {}", value.type);
}
};
template <>
struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) {
switch (value.type) {
case Shader::Backend::GLASM::Type::Void:
break;
case Shader::Backend::GLASM::Type::Register:
return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
case Shader::Backend::GLASM::Type::U32:
break;
case Shader::Backend::GLASM::Type::U64:
return format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64));
}
throw Shader::InvalidArgument("Invalid value type {}", value.type);
}
};