early-access version 1866

This commit is contained in:
pineappleEA
2021-07-09 23:54:15 +02:00
parent 335eeff822
commit 7d21887d40
469 changed files with 201995 additions and 78488 deletions

View File

@@ -0,0 +1,58 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::IR {
class Block;
struct AbstractSyntaxNode {
enum class Type {
Block,
If,
EndIf,
Loop,
Repeat,
Break,
Return,
Unreachable,
};
union Data {
Block* block;
struct {
U1 cond;
Block* body;
Block* merge;
} if_node;
struct {
Block* merge;
} end_if;
struct {
Block* body;
Block* continue_block;
Block* merge;
} loop;
struct {
U1 cond;
Block* loop_header;
Block* merge;
} repeat;
struct {
U1 cond;
Block* merge;
Block* skip;
} break_node;
};
Data data{};
Type type{};
};
using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
} // namespace Shader::IR

View File

@@ -0,0 +1,454 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <fmt/format.h>
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/attribute.h"
namespace Shader::IR {
bool IsGeneric(Attribute attribute) noexcept {
return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X;
}
u32 GenericAttributeIndex(Attribute attribute) {
if (!IsGeneric(attribute)) {
throw InvalidArgument("Attribute is not generic {}", attribute);
}
return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u;
}
u32 GenericAttributeElement(Attribute attribute) {
if (!IsGeneric(attribute)) {
throw InvalidArgument("Attribute is not generic {}", attribute);
}
return static_cast<u32>(attribute) % 4;
}
std::string NameOf(Attribute attribute) {
switch (attribute) {
case Attribute::PrimitiveId:
return "PrimitiveId";
case Attribute::Layer:
return "Layer";
case Attribute::ViewportIndex:
return "ViewportIndex";
case Attribute::PointSize:
return "PointSize";
case Attribute::PositionX:
return "Position.X";
case Attribute::PositionY:
return "Position.Y";
case Attribute::PositionZ:
return "Position.Z";
case Attribute::PositionW:
return "Position.W";
case Attribute::Generic0X:
return "Generic[0].X";
case Attribute::Generic0Y:
return "Generic[0].Y";
case Attribute::Generic0Z:
return "Generic[0].Z";
case Attribute::Generic0W:
return "Generic[0].W";
case Attribute::Generic1X:
return "Generic[1].X";
case Attribute::Generic1Y:
return "Generic[1].Y";
case Attribute::Generic1Z:
return "Generic[1].Z";
case Attribute::Generic1W:
return "Generic[1].W";
case Attribute::Generic2X:
return "Generic[2].X";
case Attribute::Generic2Y:
return "Generic[2].Y";
case Attribute::Generic2Z:
return "Generic[2].Z";
case Attribute::Generic2W:
return "Generic[2].W";
case Attribute::Generic3X:
return "Generic[3].X";
case Attribute::Generic3Y:
return "Generic[3].Y";
case Attribute::Generic3Z:
return "Generic[3].Z";
case Attribute::Generic3W:
return "Generic[3].W";
case Attribute::Generic4X:
return "Generic[4].X";
case Attribute::Generic4Y:
return "Generic[4].Y";
case Attribute::Generic4Z:
return "Generic[4].Z";
case Attribute::Generic4W:
return "Generic[4].W";
case Attribute::Generic5X:
return "Generic[5].X";
case Attribute::Generic5Y:
return "Generic[5].Y";
case Attribute::Generic5Z:
return "Generic[5].Z";
case Attribute::Generic5W:
return "Generic[5].W";
case Attribute::Generic6X:
return "Generic[6].X";
case Attribute::Generic6Y:
return "Generic[6].Y";
case Attribute::Generic6Z:
return "Generic[6].Z";
case Attribute::Generic6W:
return "Generic[6].W";
case Attribute::Generic7X:
return "Generic[7].X";
case Attribute::Generic7Y:
return "Generic[7].Y";
case Attribute::Generic7Z:
return "Generic[7].Z";
case Attribute::Generic7W:
return "Generic[7].W";
case Attribute::Generic8X:
return "Generic[8].X";
case Attribute::Generic8Y:
return "Generic[8].Y";
case Attribute::Generic8Z:
return "Generic[8].Z";
case Attribute::Generic8W:
return "Generic[8].W";
case Attribute::Generic9X:
return "Generic[9].X";
case Attribute::Generic9Y:
return "Generic[9].Y";
case Attribute::Generic9Z:
return "Generic[9].Z";
case Attribute::Generic9W:
return "Generic[9].W";
case Attribute::Generic10X:
return "Generic[10].X";
case Attribute::Generic10Y:
return "Generic[10].Y";
case Attribute::Generic10Z:
return "Generic[10].Z";
case Attribute::Generic10W:
return "Generic[10].W";
case Attribute::Generic11X:
return "Generic[11].X";
case Attribute::Generic11Y:
return "Generic[11].Y";
case Attribute::Generic11Z:
return "Generic[11].Z";
case Attribute::Generic11W:
return "Generic[11].W";
case Attribute::Generic12X:
return "Generic[12].X";
case Attribute::Generic12Y:
return "Generic[12].Y";
case Attribute::Generic12Z:
return "Generic[12].Z";
case Attribute::Generic12W:
return "Generic[12].W";
case Attribute::Generic13X:
return "Generic[13].X";
case Attribute::Generic13Y:
return "Generic[13].Y";
case Attribute::Generic13Z:
return "Generic[13].Z";
case Attribute::Generic13W:
return "Generic[13].W";
case Attribute::Generic14X:
return "Generic[14].X";
case Attribute::Generic14Y:
return "Generic[14].Y";
case Attribute::Generic14Z:
return "Generic[14].Z";
case Attribute::Generic14W:
return "Generic[14].W";
case Attribute::Generic15X:
return "Generic[15].X";
case Attribute::Generic15Y:
return "Generic[15].Y";
case Attribute::Generic15Z:
return "Generic[15].Z";
case Attribute::Generic15W:
return "Generic[15].W";
case Attribute::Generic16X:
return "Generic[16].X";
case Attribute::Generic16Y:
return "Generic[16].Y";
case Attribute::Generic16Z:
return "Generic[16].Z";
case Attribute::Generic16W:
return "Generic[16].W";
case Attribute::Generic17X:
return "Generic[17].X";
case Attribute::Generic17Y:
return "Generic[17].Y";
case Attribute::Generic17Z:
return "Generic[17].Z";
case Attribute::Generic17W:
return "Generic[17].W";
case Attribute::Generic18X:
return "Generic[18].X";
case Attribute::Generic18Y:
return "Generic[18].Y";
case Attribute::Generic18Z:
return "Generic[18].Z";
case Attribute::Generic18W:
return "Generic[18].W";
case Attribute::Generic19X:
return "Generic[19].X";
case Attribute::Generic19Y:
return "Generic[19].Y";
case Attribute::Generic19Z:
return "Generic[19].Z";
case Attribute::Generic19W:
return "Generic[19].W";
case Attribute::Generic20X:
return "Generic[20].X";
case Attribute::Generic20Y:
return "Generic[20].Y";
case Attribute::Generic20Z:
return "Generic[20].Z";
case Attribute::Generic20W:
return "Generic[20].W";
case Attribute::Generic21X:
return "Generic[21].X";
case Attribute::Generic21Y:
return "Generic[21].Y";
case Attribute::Generic21Z:
return "Generic[21].Z";
case Attribute::Generic21W:
return "Generic[21].W";
case Attribute::Generic22X:
return "Generic[22].X";
case Attribute::Generic22Y:
return "Generic[22].Y";
case Attribute::Generic22Z:
return "Generic[22].Z";
case Attribute::Generic22W:
return "Generic[22].W";
case Attribute::Generic23X:
return "Generic[23].X";
case Attribute::Generic23Y:
return "Generic[23].Y";
case Attribute::Generic23Z:
return "Generic[23].Z";
case Attribute::Generic23W:
return "Generic[23].W";
case Attribute::Generic24X:
return "Generic[24].X";
case Attribute::Generic24Y:
return "Generic[24].Y";
case Attribute::Generic24Z:
return "Generic[24].Z";
case Attribute::Generic24W:
return "Generic[24].W";
case Attribute::Generic25X:
return "Generic[25].X";
case Attribute::Generic25Y:
return "Generic[25].Y";
case Attribute::Generic25Z:
return "Generic[25].Z";
case Attribute::Generic25W:
return "Generic[25].W";
case Attribute::Generic26X:
return "Generic[26].X";
case Attribute::Generic26Y:
return "Generic[26].Y";
case Attribute::Generic26Z:
return "Generic[26].Z";
case Attribute::Generic26W:
return "Generic[26].W";
case Attribute::Generic27X:
return "Generic[27].X";
case Attribute::Generic27Y:
return "Generic[27].Y";
case Attribute::Generic27Z:
return "Generic[27].Z";
case Attribute::Generic27W:
return "Generic[27].W";
case Attribute::Generic28X:
return "Generic[28].X";
case Attribute::Generic28Y:
return "Generic[28].Y";
case Attribute::Generic28Z:
return "Generic[28].Z";
case Attribute::Generic28W:
return "Generic[28].W";
case Attribute::Generic29X:
return "Generic[29].X";
case Attribute::Generic29Y:
return "Generic[29].Y";
case Attribute::Generic29Z:
return "Generic[29].Z";
case Attribute::Generic29W:
return "Generic[29].W";
case Attribute::Generic30X:
return "Generic[30].X";
case Attribute::Generic30Y:
return "Generic[30].Y";
case Attribute::Generic30Z:
return "Generic[30].Z";
case Attribute::Generic30W:
return "Generic[30].W";
case Attribute::Generic31X:
return "Generic[31].X";
case Attribute::Generic31Y:
return "Generic[31].Y";
case Attribute::Generic31Z:
return "Generic[31].Z";
case Attribute::Generic31W:
return "Generic[31].W";
case Attribute::ColorFrontDiffuseR:
return "ColorFrontDiffuse.R";
case Attribute::ColorFrontDiffuseG:
return "ColorFrontDiffuse.G";
case Attribute::ColorFrontDiffuseB:
return "ColorFrontDiffuse.B";
case Attribute::ColorFrontDiffuseA:
return "ColorFrontDiffuse.A";
case Attribute::ColorFrontSpecularR:
return "ColorFrontSpecular.R";
case Attribute::ColorFrontSpecularG:
return "ColorFrontSpecular.G";
case Attribute::ColorFrontSpecularB:
return "ColorFrontSpecular.B";
case Attribute::ColorFrontSpecularA:
return "ColorFrontSpecular.A";
case Attribute::ColorBackDiffuseR:
return "ColorBackDiffuse.R";
case Attribute::ColorBackDiffuseG:
return "ColorBackDiffuse.G";
case Attribute::ColorBackDiffuseB:
return "ColorBackDiffuse.B";
case Attribute::ColorBackDiffuseA:
return "ColorBackDiffuse.A";
case Attribute::ColorBackSpecularR:
return "ColorBackSpecular.R";
case Attribute::ColorBackSpecularG:
return "ColorBackSpecular.G";
case Attribute::ColorBackSpecularB:
return "ColorBackSpecular.B";
case Attribute::ColorBackSpecularA:
return "ColorBackSpecular.A";
case Attribute::ClipDistance0:
return "ClipDistance[0]";
case Attribute::ClipDistance1:
return "ClipDistance[1]";
case Attribute::ClipDistance2:
return "ClipDistance[2]";
case Attribute::ClipDistance3:
return "ClipDistance[3]";
case Attribute::ClipDistance4:
return "ClipDistance[4]";
case Attribute::ClipDistance5:
return "ClipDistance[5]";
case Attribute::ClipDistance6:
return "ClipDistance[6]";
case Attribute::ClipDistance7:
return "ClipDistance[7]";
case Attribute::PointSpriteS:
return "PointSprite.S";
case Attribute::PointSpriteT:
return "PointSprite.T";
case Attribute::FogCoordinate:
return "FogCoordinate";
case Attribute::TessellationEvaluationPointU:
return "TessellationEvaluationPoint.U";
case Attribute::TessellationEvaluationPointV:
return "TessellationEvaluationPoint.V";
case Attribute::InstanceId:
return "InstanceId";
case Attribute::VertexId:
return "VertexId";
case Attribute::FixedFncTexture0S:
return "FixedFncTexture[0].S";
case Attribute::FixedFncTexture0T:
return "FixedFncTexture[0].T";
case Attribute::FixedFncTexture0R:
return "FixedFncTexture[0].R";
case Attribute::FixedFncTexture0Q:
return "FixedFncTexture[0].Q";
case Attribute::FixedFncTexture1S:
return "FixedFncTexture[1].S";
case Attribute::FixedFncTexture1T:
return "FixedFncTexture[1].T";
case Attribute::FixedFncTexture1R:
return "FixedFncTexture[1].R";
case Attribute::FixedFncTexture1Q:
return "FixedFncTexture[1].Q";
case Attribute::FixedFncTexture2S:
return "FixedFncTexture[2].S";
case Attribute::FixedFncTexture2T:
return "FixedFncTexture[2].T";
case Attribute::FixedFncTexture2R:
return "FixedFncTexture[2].R";
case Attribute::FixedFncTexture2Q:
return "FixedFncTexture[2].Q";
case Attribute::FixedFncTexture3S:
return "FixedFncTexture[3].S";
case Attribute::FixedFncTexture3T:
return "FixedFncTexture[3].T";
case Attribute::FixedFncTexture3R:
return "FixedFncTexture[3].R";
case Attribute::FixedFncTexture3Q:
return "FixedFncTexture[3].Q";
case Attribute::FixedFncTexture4S:
return "FixedFncTexture[4].S";
case Attribute::FixedFncTexture4T:
return "FixedFncTexture[4].T";
case Attribute::FixedFncTexture4R:
return "FixedFncTexture[4].R";
case Attribute::FixedFncTexture4Q:
return "FixedFncTexture[4].Q";
case Attribute::FixedFncTexture5S:
return "FixedFncTexture[5].S";
case Attribute::FixedFncTexture5T:
return "FixedFncTexture[5].T";
case Attribute::FixedFncTexture5R:
return "FixedFncTexture[5].R";
case Attribute::FixedFncTexture5Q:
return "FixedFncTexture[5].Q";
case Attribute::FixedFncTexture6S:
return "FixedFncTexture[6].S";
case Attribute::FixedFncTexture6T:
return "FixedFncTexture[6].T";
case Attribute::FixedFncTexture6R:
return "FixedFncTexture[6].R";
case Attribute::FixedFncTexture6Q:
return "FixedFncTexture[6].Q";
case Attribute::FixedFncTexture7S:
return "FixedFncTexture[7].S";
case Attribute::FixedFncTexture7T:
return "FixedFncTexture[7].T";
case Attribute::FixedFncTexture7R:
return "FixedFncTexture[7].R";
case Attribute::FixedFncTexture7Q:
return "FixedFncTexture[7].Q";
case Attribute::FixedFncTexture8S:
return "FixedFncTexture[8].S";
case Attribute::FixedFncTexture8T:
return "FixedFncTexture[8].T";
case Attribute::FixedFncTexture8R:
return "FixedFncTexture[8].R";
case Attribute::FixedFncTexture8Q:
return "FixedFncTexture[8].Q";
case Attribute::FixedFncTexture9S:
return "FixedFncTexture[9].S";
case Attribute::FixedFncTexture9T:
return "FixedFncTexture[9].T";
case Attribute::FixedFncTexture9R:
return "FixedFncTexture[9].R";
case Attribute::FixedFncTexture9Q:
return "FixedFncTexture[9].Q";
case Attribute::ViewportMask:
return "ViewportMask";
case Attribute::FrontFace:
return "FrontFace";
}
return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
}
} // namespace Shader::IR

View File

@@ -0,0 +1,250 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <fmt/format.h>
#include "common/common_types.h"
namespace Shader::IR {
enum class Attribute : u64 {
PrimitiveId = 24,
Layer = 25,
ViewportIndex = 26,
PointSize = 27,
PositionX = 28,
PositionY = 29,
PositionZ = 30,
PositionW = 31,
Generic0X = 32,
Generic0Y = 33,
Generic0Z = 34,
Generic0W = 35,
Generic1X = 36,
Generic1Y = 37,
Generic1Z = 38,
Generic1W = 39,
Generic2X = 40,
Generic2Y = 41,
Generic2Z = 42,
Generic2W = 43,
Generic3X = 44,
Generic3Y = 45,
Generic3Z = 46,
Generic3W = 47,
Generic4X = 48,
Generic4Y = 49,
Generic4Z = 50,
Generic4W = 51,
Generic5X = 52,
Generic5Y = 53,
Generic5Z = 54,
Generic5W = 55,
Generic6X = 56,
Generic6Y = 57,
Generic6Z = 58,
Generic6W = 59,
Generic7X = 60,
Generic7Y = 61,
Generic7Z = 62,
Generic7W = 63,
Generic8X = 64,
Generic8Y = 65,
Generic8Z = 66,
Generic8W = 67,
Generic9X = 68,
Generic9Y = 69,
Generic9Z = 70,
Generic9W = 71,
Generic10X = 72,
Generic10Y = 73,
Generic10Z = 74,
Generic10W = 75,
Generic11X = 76,
Generic11Y = 77,
Generic11Z = 78,
Generic11W = 79,
Generic12X = 80,
Generic12Y = 81,
Generic12Z = 82,
Generic12W = 83,
Generic13X = 84,
Generic13Y = 85,
Generic13Z = 86,
Generic13W = 87,
Generic14X = 88,
Generic14Y = 89,
Generic14Z = 90,
Generic14W = 91,
Generic15X = 92,
Generic15Y = 93,
Generic15Z = 94,
Generic15W = 95,
Generic16X = 96,
Generic16Y = 97,
Generic16Z = 98,
Generic16W = 99,
Generic17X = 100,
Generic17Y = 101,
Generic17Z = 102,
Generic17W = 103,
Generic18X = 104,
Generic18Y = 105,
Generic18Z = 106,
Generic18W = 107,
Generic19X = 108,
Generic19Y = 109,
Generic19Z = 110,
Generic19W = 111,
Generic20X = 112,
Generic20Y = 113,
Generic20Z = 114,
Generic20W = 115,
Generic21X = 116,
Generic21Y = 117,
Generic21Z = 118,
Generic21W = 119,
Generic22X = 120,
Generic22Y = 121,
Generic22Z = 122,
Generic22W = 123,
Generic23X = 124,
Generic23Y = 125,
Generic23Z = 126,
Generic23W = 127,
Generic24X = 128,
Generic24Y = 129,
Generic24Z = 130,
Generic24W = 131,
Generic25X = 132,
Generic25Y = 133,
Generic25Z = 134,
Generic25W = 135,
Generic26X = 136,
Generic26Y = 137,
Generic26Z = 138,
Generic26W = 139,
Generic27X = 140,
Generic27Y = 141,
Generic27Z = 142,
Generic27W = 143,
Generic28X = 144,
Generic28Y = 145,
Generic28Z = 146,
Generic28W = 147,
Generic29X = 148,
Generic29Y = 149,
Generic29Z = 150,
Generic29W = 151,
Generic30X = 152,
Generic30Y = 153,
Generic30Z = 154,
Generic30W = 155,
Generic31X = 156,
Generic31Y = 157,
Generic31Z = 158,
Generic31W = 159,
ColorFrontDiffuseR = 160,
ColorFrontDiffuseG = 161,
ColorFrontDiffuseB = 162,
ColorFrontDiffuseA = 163,
ColorFrontSpecularR = 164,
ColorFrontSpecularG = 165,
ColorFrontSpecularB = 166,
ColorFrontSpecularA = 167,
ColorBackDiffuseR = 168,
ColorBackDiffuseG = 169,
ColorBackDiffuseB = 170,
ColorBackDiffuseA = 171,
ColorBackSpecularR = 172,
ColorBackSpecularG = 173,
ColorBackSpecularB = 174,
ColorBackSpecularA = 175,
ClipDistance0 = 176,
ClipDistance1 = 177,
ClipDistance2 = 178,
ClipDistance3 = 179,
ClipDistance4 = 180,
ClipDistance5 = 181,
ClipDistance6 = 182,
ClipDistance7 = 183,
PointSpriteS = 184,
PointSpriteT = 185,
FogCoordinate = 186,
TessellationEvaluationPointU = 188,
TessellationEvaluationPointV = 189,
InstanceId = 190,
VertexId = 191,
FixedFncTexture0S = 192,
FixedFncTexture0T = 193,
FixedFncTexture0R = 194,
FixedFncTexture0Q = 195,
FixedFncTexture1S = 196,
FixedFncTexture1T = 197,
FixedFncTexture1R = 198,
FixedFncTexture1Q = 199,
FixedFncTexture2S = 200,
FixedFncTexture2T = 201,
FixedFncTexture2R = 202,
FixedFncTexture2Q = 203,
FixedFncTexture3S = 204,
FixedFncTexture3T = 205,
FixedFncTexture3R = 206,
FixedFncTexture3Q = 207,
FixedFncTexture4S = 208,
FixedFncTexture4T = 209,
FixedFncTexture4R = 210,
FixedFncTexture4Q = 211,
FixedFncTexture5S = 212,
FixedFncTexture5T = 213,
FixedFncTexture5R = 214,
FixedFncTexture5Q = 215,
FixedFncTexture6S = 216,
FixedFncTexture6T = 217,
FixedFncTexture6R = 218,
FixedFncTexture6Q = 219,
FixedFncTexture7S = 220,
FixedFncTexture7T = 221,
FixedFncTexture7R = 222,
FixedFncTexture7Q = 223,
FixedFncTexture8S = 224,
FixedFncTexture8T = 225,
FixedFncTexture8R = 226,
FixedFncTexture8Q = 227,
FixedFncTexture9S = 228,
FixedFncTexture9T = 229,
FixedFncTexture9R = 230,
FixedFncTexture9Q = 231,
ViewportMask = 232,
FrontFace = 255,
};
constexpr size_t NUM_GENERICS = 32;
[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept;
[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute);
[[nodiscard]] u32 GenericAttributeElement(Attribute attribute);
[[nodiscard]] std::string NameOf(Attribute attribute);
[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept {
return static_cast<IR::Attribute>(static_cast<size_t>(attribute) + value);
}
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Attribute> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) {
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute));
}
};

View File

@@ -0,0 +1,149 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <initializer_list>
#include <map>
#include <memory>
#include "common/bit_cast.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::IR {
Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
Block::~Block() = default;
void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
PrependNewInst(end(), op, args);
}
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
std::initializer_list<Value> args, u32 flags) {
Inst* const inst{inst_pool->Create(op, flags)};
const auto result_it{instructions.insert(insertion_point, *inst)};
if (inst->NumArgs() != args.size()) {
throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op);
}
std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable {
inst->SetArg(index, arg);
++index;
});
return result_it;
}
void Block::AddBranch(Block* block) {
if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
throw LogicError("Successor already inserted");
}
if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
throw LogicError("Predecessor already inserted");
}
imm_successors.push_back(block);
block->imm_predecessors.push_back(this);
}
static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index,
Block* block) {
if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) {
return fmt::format("{{Block ${}}}", it->second);
}
return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block));
}
static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
const Inst* inst) {
const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)};
if (is_inserted) {
++inst_index;
}
return it->second;
}
static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
const Value& arg) {
if (arg.IsEmpty()) {
return "<null>";
}
if (!arg.IsImmediate() || arg.IsIdentity()) {
return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst()));
}
switch (arg.Type()) {
case Type::U1:
return fmt::format("#{}", arg.U1() ? "true" : "false");
case Type::U8:
return fmt::format("#{}", arg.U8());
case Type::U16:
return fmt::format("#{}", arg.U16());
case Type::U32:
return fmt::format("#{}", arg.U32());
case Type::U64:
return fmt::format("#{}", arg.U64());
case Type::F32:
return fmt::format("#{}", arg.F32());
case Type::Reg:
return fmt::format("{}", arg.Reg());
case Type::Pred:
return fmt::format("{}", arg.Pred());
case Type::Attribute:
return fmt::format("{}", arg.Attribute());
default:
return "<unknown immediate type>";
}
}
std::string DumpBlock(const Block& block) {
size_t inst_index{0};
std::map<const Inst*, size_t> inst_to_index;
return DumpBlock(block, {}, inst_to_index, inst_index);
}
std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index,
std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) {
std::string ret{"Block"};
if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) {
ret += fmt::format(" ${}", it->second);
}
ret += '\n';
for (const Inst& inst : block) {
const Opcode op{inst.GetOpcode()};
ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
if (TypeOf(op) != Type::Void) {
ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
} else {
ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
}
const size_t arg_count{inst.NumArgs()};
for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
const Value arg{inst.Arg(arg_index)};
const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)};
ret += arg_index != 0 ? ", " : " ";
if (op == Opcode::Phi) {
ret += fmt::format("[ {}, {} ]", arg_str,
BlockToIndex(block_to_index, inst.PhiBlock(arg_index)));
} else {
ret += arg_str;
}
if (op != Opcode::Phi) {
const Type actual_type{arg.Type()};
const Type expected_type{ArgTypeOf(op, arg_index)};
if (!AreTypesCompatible(actual_type, expected_type)) {
ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
}
}
}
if (TypeOf(op) != Type::Void) {
ret += fmt::format(" (uses: {})\n", inst.UseCount());
} else {
ret += '\n';
}
}
return ret;
}
} // namespace Shader::IR

View File

@@ -0,0 +1,185 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <initializer_list>
#include <map>
#include <span>
#include <vector>
#include <boost/intrusive/list.hpp>
#include "common/bit_cast.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/condition.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::IR {
class Block {
public:
using InstructionList = boost::intrusive::list<Inst>;
using size_type = InstructionList::size_type;
using iterator = InstructionList::iterator;
using const_iterator = InstructionList::const_iterator;
using reverse_iterator = InstructionList::reverse_iterator;
using const_reverse_iterator = InstructionList::const_reverse_iterator;
explicit Block(ObjectPool<Inst>& inst_pool_);
~Block();
Block(const Block&) = delete;
Block& operator=(const Block&) = delete;
Block(Block&&) = default;
Block& operator=(Block&&) = default;
/// Appends a new instruction to the end of this basic block.
void AppendNewInst(Opcode op, std::initializer_list<Value> args);
/// Prepends a new instruction to this basic block before the insertion point.
iterator PrependNewInst(iterator insertion_point, Opcode op,
std::initializer_list<Value> args = {}, u32 flags = 0);
/// Adds a new branch to this basic block.
void AddBranch(Block* block);
/// Gets a mutable reference to the instruction list for this basic block.
[[nodiscard]] InstructionList& Instructions() noexcept {
return instructions;
}
/// Gets an immutable reference to the instruction list for this basic block.
[[nodiscard]] const InstructionList& Instructions() const noexcept {
return instructions;
}
/// Gets an immutable span to the immediate predecessors.
[[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept {
return imm_predecessors;
}
/// Gets an immutable span to the immediate successors.
[[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept {
return imm_successors;
}
/// Intrusively store the host definition of this instruction.
template <typename DefinitionType>
void SetDefinition(DefinitionType def) {
definition = Common::BitCast<u32>(def);
}
/// Return the intrusively stored host definition of this instruction.
template <typename DefinitionType>
[[nodiscard]] DefinitionType Definition() const noexcept {
return Common::BitCast<DefinitionType>(definition);
}
void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept {
ssa_reg_values[RegIndex(reg)] = value;
}
const Value& SsaRegValue(IR::Reg reg) const noexcept {
return ssa_reg_values[RegIndex(reg)];
}
void SsaSeal() noexcept {
is_ssa_sealed = true;
}
[[nodiscard]] bool IsSsaSealed() const noexcept {
return is_ssa_sealed;
}
[[nodiscard]] bool empty() const {
return instructions.empty();
}
[[nodiscard]] size_type size() const {
return instructions.size();
}
[[nodiscard]] Inst& front() {
return instructions.front();
}
[[nodiscard]] const Inst& front() const {
return instructions.front();
}
[[nodiscard]] Inst& back() {
return instructions.back();
}
[[nodiscard]] const Inst& back() const {
return instructions.back();
}
[[nodiscard]] iterator begin() {
return instructions.begin();
}
[[nodiscard]] const_iterator begin() const {
return instructions.begin();
}
[[nodiscard]] iterator end() {
return instructions.end();
}
[[nodiscard]] const_iterator end() const {
return instructions.end();
}
[[nodiscard]] reverse_iterator rbegin() {
return instructions.rbegin();
}
[[nodiscard]] const_reverse_iterator rbegin() const {
return instructions.rbegin();
}
[[nodiscard]] reverse_iterator rend() {
return instructions.rend();
}
[[nodiscard]] const_reverse_iterator rend() const {
return instructions.rend();
}
[[nodiscard]] const_iterator cbegin() const {
return instructions.cbegin();
}
[[nodiscard]] const_iterator cend() const {
return instructions.cend();
}
[[nodiscard]] const_reverse_iterator crbegin() const {
return instructions.crbegin();
}
[[nodiscard]] const_reverse_iterator crend() const {
return instructions.crend();
}
private:
/// Memory pool for instruction list
ObjectPool<Inst>* inst_pool;
/// List of instructions in this block
InstructionList instructions;
/// Block immediate predecessors
std::vector<Block*> imm_predecessors;
/// Block immediate successors
std::vector<Block*> imm_successors;
/// Intrusively store the value of a register in the block.
std::array<Value, NUM_REGS> ssa_reg_values;
/// Intrusively store if the block is sealed in the SSA pass.
bool is_ssa_sealed{false};
/// Intrusively stored host definition of this block.
u32 definition{};
};
using BlockList = std::vector<Block*>;
[[nodiscard]] std::string DumpBlock(const Block& block);
[[nodiscard]] std::string DumpBlock(const Block& block,
const std::map<const Block*, size_t>& block_to_index,
std::map<const Inst*, size_t>& inst_to_index,
size_t& inst_index);
} // namespace Shader::IR

View File

@@ -0,0 +1,56 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <type_traits>
#include <queue>
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::IR {
template <typename Pred>
auto BreadthFirstSearch(const Value& value, Pred&& pred)
-> std::invoke_result_t<Pred, const Inst*> {
if (value.IsImmediate()) {
// Nothing to do with immediates
return std::nullopt;
}
// Breadth-first search visiting the right most arguments first
// Small vector has been determined from shaders in Super Smash Bros. Ultimate
boost::container::small_vector<const Inst*, 2> visited;
std::queue<const Inst*> queue;
queue.push(value.InstRecursive());
while (!queue.empty()) {
// Pop one instruction from the queue
const Inst* const inst{queue.front()};
queue.pop();
if (const std::optional result = pred(inst)) {
// This is the instruction we were looking for
return result;
}
// Visit the right most arguments first
for (size_t arg = inst->NumArgs(); arg--;) {
const Value arg_value{inst->Arg(arg)};
if (arg_value.IsImmediate()) {
continue;
}
// Queue instruction if it hasn't been visited
const Inst* const arg_inst{arg_value.InstRecursive()};
if (std::ranges::find(visited, arg_inst) == visited.end()) {
visited.push_back(arg_inst);
queue.push(arg_inst);
}
}
}
// SSA tree has been traversed and the result hasn't been found
return std::nullopt;
}
} // namespace Shader::IR

View File

@@ -0,0 +1,29 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string>
#include <fmt/format.h>
#include "shader_recompiler/frontend/ir/condition.h"
namespace Shader::IR {
std::string NameOf(Condition condition) {
std::string ret;
if (condition.GetFlowTest() != FlowTest::T) {
ret = fmt::to_string(condition.GetFlowTest());
}
const auto [pred, negated]{condition.GetPred()};
if (!ret.empty()) {
ret += '&';
}
if (negated) {
ret += '!';
}
ret += fmt::to_string(pred);
return ret;
}
} // namespace Shader::IR

View File

@@ -0,0 +1,60 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <compare>
#include <string>
#include <fmt/format.h>
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/flow_test.h"
#include "shader_recompiler/frontend/ir/pred.h"
namespace Shader::IR {
class Condition {
public:
Condition() noexcept = default;
explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept
: flow_test{static_cast<u16>(flow_test_)}, pred{static_cast<u8>(pred_)},
pred_negated{pred_negated_ ? u8{1} : u8{0}} {}
explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept
: Condition(FlowTest::T, pred_, pred_negated_) {}
explicit Condition(bool value) : Condition(Pred::PT, !value) {}
auto operator<=>(const Condition&) const noexcept = default;
[[nodiscard]] IR::FlowTest GetFlowTest() const noexcept {
return static_cast<IR::FlowTest>(flow_test);
}
[[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept {
return {static_cast<IR::Pred>(pred), pred_negated != 0};
}
private:
u16 flow_test;
u8 pred;
u8 pred_negated;
};
std::string NameOf(Condition condition);
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Condition> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::IR::Condition& cond, FormatContext& ctx) {
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond));
}
};

View File

@@ -0,0 +1,83 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string>
#include <fmt/format.h>
#include "shader_recompiler/frontend/ir/flow_test.h"
namespace Shader::IR {
std::string NameOf(FlowTest flow_test) {
switch (flow_test) {
case FlowTest::F:
return "F";
case FlowTest::LT:
return "LT";
case FlowTest::EQ:
return "EQ";
case FlowTest::LE:
return "LE";
case FlowTest::GT:
return "GT";
case FlowTest::NE:
return "NE";
case FlowTest::GE:
return "GE";
case FlowTest::NUM:
return "NUM";
case FlowTest::NaN:
return "NAN";
case FlowTest::LTU:
return "LTU";
case FlowTest::EQU:
return "EQU";
case FlowTest::LEU:
return "LEU";
case FlowTest::GTU:
return "GTU";
case FlowTest::NEU:
return "NEU";
case FlowTest::GEU:
return "GEU";
case FlowTest::T:
return "T";
case FlowTest::OFF:
return "OFF";
case FlowTest::LO:
return "LO";
case FlowTest::SFF:
return "SFF";
case FlowTest::LS:
return "LS";
case FlowTest::HI:
return "HI";
case FlowTest::SFT:
return "SFT";
case FlowTest::HS:
return "HS";
case FlowTest::OFT:
return "OFT";
case FlowTest::CSM_TA:
return "CSM_TA";
case FlowTest::CSM_TR:
return "CSM_TR";
case FlowTest::CSM_MX:
return "CSM_MX";
case FlowTest::FCSM_TA:
return "FCSM_TA";
case FlowTest::FCSM_TR:
return "FCSM_TR";
case FlowTest::FCSM_MX:
return "FCSM_MX";
case FlowTest::RLE:
return "RLE";
case FlowTest::RGT:
return "RGT";
}
return fmt::format("<invalid flow test {}>", static_cast<int>(flow_test));
}
} // namespace Shader::IR

View File

@@ -0,0 +1,62 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <fmt/format.h>
#include "common/common_types.h"
namespace Shader::IR {
enum class FlowTest : u64 {
F,
LT,
EQ,
LE,
GT,
NE,
GE,
NUM,
NaN,
LTU,
EQU,
LEU,
GTU,
NEU,
GEU,
T,
OFF,
LO,
SFF,
LS,
HI,
SFT,
HS,
OFT,
CSM_TA,
CSM_TR,
CSM_MX,
FCSM_TA,
FCSM_TR,
FCSM_MX,
RLE,
RGT,
};
[[nodiscard]] std::string NameOf(FlowTest flow_test);
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::FlowTest> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) {
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test));
}
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,413 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstring>
#include <type_traits>
#include "shader_recompiler/frontend/ir/attribute.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::IR {
class IREmitter {
public:
explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
explicit IREmitter(Block& block_, Block::iterator insertion_point_)
: block{&block_}, insertion_point{insertion_point_} {}
Block* block;
[[nodiscard]] U1 Imm1(bool value) const;
[[nodiscard]] U8 Imm8(u8 value) const;
[[nodiscard]] U16 Imm16(u16 value) const;
[[nodiscard]] U32 Imm32(u32 value) const;
[[nodiscard]] U32 Imm32(s32 value) const;
[[nodiscard]] F32 Imm32(f32 value) const;
[[nodiscard]] U64 Imm64(u64 value) const;
[[nodiscard]] U64 Imm64(s64 value) const;
[[nodiscard]] F64 Imm64(f64 value) const;
U1 ConditionRef(const U1& value);
void Reference(const Value& value);
void PhiMove(IR::Inst& phi, const Value& value);
void Prologue();
void Epilogue();
void DemoteToHelperInvocation();
void EmitVertex(const U32& stream);
void EndPrimitive(const U32& stream);
[[nodiscard]] U32 GetReg(IR::Reg reg);
void SetReg(IR::Reg reg, const U32& value);
[[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false);
void SetPred(IR::Pred pred, const U1& value);
[[nodiscard]] U1 GetGotoVariable(u32 id);
void SetGotoVariable(u32 id, const U1& value);
[[nodiscard]] U32 GetIndirectBranchVariable();
void SetIndirectBranchVariable(const U32& value);
[[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
[[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
bool is_signed);
[[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset);
[[nodiscard]] U1 GetZFlag();
[[nodiscard]] U1 GetSFlag();
[[nodiscard]] U1 GetCFlag();
[[nodiscard]] U1 GetOFlag();
void SetZFlag(const U1& value);
void SetSFlag(const U1& value);
void SetCFlag(const U1& value);
void SetOFlag(const U1& value);
[[nodiscard]] U1 Condition(IR::Condition cond);
[[nodiscard]] U1 GetFlowTestResult(FlowTest test);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
[[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
[[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex);
void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex);
[[nodiscard]] F32 GetPatch(Patch patch);
void SetPatch(Patch patch, const F32& value);
void SetFragColor(u32 index, u32 component, const F32& value);
void SetSampleMask(const U32& value);
void SetFragDepth(const F32& value);
[[nodiscard]] U32 WorkgroupIdX();
[[nodiscard]] U32 WorkgroupIdY();
[[nodiscard]] U32 WorkgroupIdZ();
[[nodiscard]] Value LocalInvocationId();
[[nodiscard]] U32 LocalInvocationIdX();
[[nodiscard]] U32 LocalInvocationIdY();
[[nodiscard]] U32 LocalInvocationIdZ();
[[nodiscard]] U32 InvocationId();
[[nodiscard]] U32 SampleId();
[[nodiscard]] U1 IsHelperInvocation();
[[nodiscard]] F32 YDirection();
[[nodiscard]] U32 LaneId();
[[nodiscard]] U32 LoadGlobalU8(const U64& address);
[[nodiscard]] U32 LoadGlobalS8(const U64& address);
[[nodiscard]] U32 LoadGlobalU16(const U64& address);
[[nodiscard]] U32 LoadGlobalS16(const U64& address);
[[nodiscard]] U32 LoadGlobal32(const U64& address);
[[nodiscard]] Value LoadGlobal64(const U64& address);
[[nodiscard]] Value LoadGlobal128(const U64& address);
void WriteGlobalU8(const U64& address, const U32& value);
void WriteGlobalS8(const U64& address, const U32& value);
void WriteGlobalU16(const U64& address, const U32& value);
void WriteGlobalS16(const U64& address, const U32& value);
void WriteGlobal32(const U64& address, const U32& value);
void WriteGlobal64(const U64& address, const IR::Value& vector);
void WriteGlobal128(const U64& address, const IR::Value& vector);
[[nodiscard]] U32 LoadLocal(const U32& word_offset);
void WriteLocal(const U32& word_offset, const U32& value);
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const U32& offset, const Value& value);
[[nodiscard]] U1 GetZeroFromOp(const Value& op);
[[nodiscard]] U1 GetSignFromOp(const Value& op);
[[nodiscard]] U1 GetCarryFromOp(const Value& op);
[[nodiscard]] U1 GetOverflowFromOp(const Value& op);
[[nodiscard]] U1 GetSparseFromOp(const Value& op);
[[nodiscard]] U1 GetInBoundsFromOp(const Value& op);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
const Value& e4);
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
[[nodiscard]] Value Select(const U1& condition, const Value& true_value,
const Value& false_value);
void Barrier();
void WorkgroupMemoryBarrier();
void DeviceMemoryBarrier();
template <typename Dest, typename Source>
[[nodiscard]] Dest BitCast(const Source& value);
[[nodiscard]] U64 PackUint2x32(const Value& vector);
[[nodiscard]] Value UnpackUint2x32(const U64& value);
[[nodiscard]] U32 PackFloat2x16(const Value& vector);
[[nodiscard]] Value UnpackFloat2x16(const U32& value);
[[nodiscard]] U32 PackHalf2x16(const Value& vector);
[[nodiscard]] Value UnpackHalf2x16(const U32& value);
[[nodiscard]] F64 PackDouble2x32(const Value& vector);
[[nodiscard]] Value UnpackDouble2x32(const F64& value);
[[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
[[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
[[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
FpControl control = {});
[[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value);
[[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
[[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);
[[nodiscard]] F32 FPCos(const F32& value);
[[nodiscard]] F32 FPSin(const F32& value);
[[nodiscard]] F32 FPExp2(const F32& value);
[[nodiscard]] F32 FPLog2(const F32& value);
[[nodiscard]] F32F64 FPRecip(const F32F64& value);
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
[[nodiscard]] F32 FPSqrt(const F32& value);
[[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
[[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value,
const F16F32F64& max_value);
[[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
[[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
[[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
[[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});
[[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
bool ordered = true);
[[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
bool ordered = true);
[[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
bool ordered = true);
[[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs,
FpControl control = {}, bool ordered = true);
[[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
FpControl control = {}, bool ordered = true);
[[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
FpControl control = {}, bool ordered = true);
[[nodiscard]] U1 FPIsNan(const F16F32F64& value);
[[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs);
[[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs);
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
[[nodiscard]] U32U64 INeg(const U32U64& value);
[[nodiscard]] U32 IAbs(const U32& value);
[[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
[[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
[[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
[[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
[[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
[[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
[[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
const U32& count);
[[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
bool is_signed = false);
[[nodiscard]] U32 BitReverse(const U32& value);
[[nodiscard]] U32 BitCount(const U32& value);
[[nodiscard]] U32 BitwiseNot(const U32& value);
[[nodiscard]] U32 FindSMsb(const U32& value);
[[nodiscard]] U32 FindUMsb(const U32& value);
[[nodiscard]] U32 SMin(const U32& a, const U32& b);
[[nodiscard]] U32 UMin(const U32& a, const U32& b);
[[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);
[[nodiscard]] U32 SMax(const U32& a, const U32& b);
[[nodiscard]] U32 UMax(const U32& a, const U32& b);
[[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed);
[[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
[[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);
[[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
[[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
[[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value);
[[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value);
[[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value);
[[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed);
[[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value);
[[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value);
[[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed);
[[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value);
[[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value);
[[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value);
[[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value);
[[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value);
[[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value);
[[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value);
[[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value);
[[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value);
[[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value,
bool is_signed);
[[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value);
[[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value);
[[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value,
bool is_signed);
[[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value);
[[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value);
[[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value);
[[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value);
[[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value);
[[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value);
[[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
const FpControl control = {});
[[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
const FpControl control = {});
[[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
const FpControl control = {});
[[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
const FpControl control = {});
[[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalNot(const U1& value);
[[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
[[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
[[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
[[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
FpControl control = {});
[[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
FpControl control = {});
[[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
const Value& value, FpControl control = {});
[[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value,
FpControl control = {});
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
const F32& bias, const Value& offset,
const F32& lod_clamp, TextureInstInfo info);
[[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
const F32& lod, const Value& offset,
TextureInstInfo info);
[[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
const F32& dref, const F32& bias,
const Value& offset, const F32& lod_clamp,
TextureInstInfo info);
[[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
const F32& dref, const F32& lod,
const Value& offset, TextureInstInfo info);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod);
[[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
TextureInstInfo info);
[[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
const Value& offset2, TextureInstInfo info);
[[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
const Value& offset, const Value& offset2, const F32& dref,
TextureInstInfo info);
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
const U32& lod, const U32& multisampling, TextureInstInfo info);
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
const Value& derivates, const Value& offset,
const F32& lod_clamp, TextureInstInfo info);
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
[[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
const Value& value, bool is_signed, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
const Value& value, bool is_signed, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] U1 VoteAll(const U1& value);
[[nodiscard]] U1 VoteAny(const U1& value);
[[nodiscard]] U1 VoteEqual(const U1& value);
[[nodiscard]] U32 SubgroupBallot(const U1& value);
[[nodiscard]] U32 SubgroupEqMask();
[[nodiscard]] U32 SubgroupLtMask();
[[nodiscard]] U32 SubgroupLeMask();
[[nodiscard]] U32 SubgroupGtMask();
[[nodiscard]] U32 SubgroupGeMask();
[[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
const IR::U32& seg_mask);
[[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
const IR::U32& seg_mask);
[[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
const IR::U32& seg_mask);
[[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
const IR::U32& clamp, const IR::U32& seg_mask);
[[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle,
FpControl control = {});
[[nodiscard]] F32 DPdxFine(const F32& a);
[[nodiscard]] F32 DPdyFine(const F32& a);
[[nodiscard]] F32 DPdxCoarse(const F32& a);
[[nodiscard]] F32 DPdyCoarse(const F32& a);
private:
IR::Block::iterator insertion_point;
template <typename T = Value, typename... Args>
T Inst(Opcode op, Args... args) {
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
return T{Value{&*it}};
}
template <typename T>
requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags {
Flags() = default;
Flags(T proxy_) : proxy{proxy_} {}
T proxy;
};
template <typename T = Value, typename FlagType, typename... Args>
T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
u32 raw_flags{};
std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
return T{Value{&*it}};
}
};
} // namespace Shader::IR

View File

@@ -0,0 +1,411 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <memory>
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/type.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::IR {
namespace {
void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
if (inst && inst->GetOpcode() != opcode) {
throw LogicError("Invalid pseudo-instruction");
}
}
void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
if (dest_inst) {
throw LogicError("Only one of each type of pseudo-op allowed");
}
dest_inst = pseudo_inst;
}
void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
if (inst->GetOpcode() != expected_opcode) {
throw LogicError("Undoing use of invalid pseudo-op");
}
inst = nullptr;
}
void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
if (!associated_insts) {
associated_insts = std::make_unique<AssociatedInsts>();
}
}
} // Anonymous namespace
Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
if (op == Opcode::Phi) {
std::construct_at(&phi_args);
} else {
std::construct_at(&args);
}
}
Inst::~Inst() {
if (op == Opcode::Phi) {
std::destroy_at(&phi_args);
} else {
std::destroy_at(&args);
}
}
bool Inst::MayHaveSideEffects() const noexcept {
switch (op) {
case Opcode::ConditionRef:
case Opcode::Reference:
case Opcode::PhiMove:
case Opcode::Prologue:
case Opcode::Epilogue:
case Opcode::Join:
case Opcode::DemoteToHelperInvocation:
case Opcode::Barrier:
case Opcode::WorkgroupMemoryBarrier:
case Opcode::DeviceMemoryBarrier:
case Opcode::EmitVertex:
case Opcode::EndPrimitive:
case Opcode::SetAttribute:
case Opcode::SetAttributeIndexed:
case Opcode::SetPatch:
case Opcode::SetFragColor:
case Opcode::SetSampleMask:
case Opcode::SetFragDepth:
case Opcode::WriteGlobalU8:
case Opcode::WriteGlobalS8:
case Opcode::WriteGlobalU16:
case Opcode::WriteGlobalS16:
case Opcode::WriteGlobal32:
case Opcode::WriteGlobal64:
case Opcode::WriteGlobal128:
case Opcode::WriteStorageU8:
case Opcode::WriteStorageS8:
case Opcode::WriteStorageU16:
case Opcode::WriteStorageS16:
case Opcode::WriteStorage32:
case Opcode::WriteStorage64:
case Opcode::WriteStorage128:
case Opcode::WriteLocal:
case Opcode::WriteSharedU8:
case Opcode::WriteSharedU16:
case Opcode::WriteSharedU32:
case Opcode::WriteSharedU64:
case Opcode::WriteSharedU128:
case Opcode::SharedAtomicIAdd32:
case Opcode::SharedAtomicSMin32:
case Opcode::SharedAtomicUMin32:
case Opcode::SharedAtomicSMax32:
case Opcode::SharedAtomicUMax32:
case Opcode::SharedAtomicInc32:
case Opcode::SharedAtomicDec32:
case Opcode::SharedAtomicAnd32:
case Opcode::SharedAtomicOr32:
case Opcode::SharedAtomicXor32:
case Opcode::SharedAtomicExchange32:
case Opcode::SharedAtomicExchange64:
case Opcode::GlobalAtomicIAdd32:
case Opcode::GlobalAtomicSMin32:
case Opcode::GlobalAtomicUMin32:
case Opcode::GlobalAtomicSMax32:
case Opcode::GlobalAtomicUMax32:
case Opcode::GlobalAtomicInc32:
case Opcode::GlobalAtomicDec32:
case Opcode::GlobalAtomicAnd32:
case Opcode::GlobalAtomicOr32:
case Opcode::GlobalAtomicXor32:
case Opcode::GlobalAtomicExchange32:
case Opcode::GlobalAtomicIAdd64:
case Opcode::GlobalAtomicSMin64:
case Opcode::GlobalAtomicUMin64:
case Opcode::GlobalAtomicSMax64:
case Opcode::GlobalAtomicUMax64:
case Opcode::GlobalAtomicAnd64:
case Opcode::GlobalAtomicOr64:
case Opcode::GlobalAtomicXor64:
case Opcode::GlobalAtomicExchange64:
case Opcode::GlobalAtomicAddF32:
case Opcode::GlobalAtomicAddF16x2:
case Opcode::GlobalAtomicAddF32x2:
case Opcode::GlobalAtomicMinF16x2:
case Opcode::GlobalAtomicMinF32x2:
case Opcode::GlobalAtomicMaxF16x2:
case Opcode::GlobalAtomicMaxF32x2:
case Opcode::StorageAtomicIAdd32:
case Opcode::StorageAtomicSMin32:
case Opcode::StorageAtomicUMin32:
case Opcode::StorageAtomicSMax32:
case Opcode::StorageAtomicUMax32:
case Opcode::StorageAtomicInc32:
case Opcode::StorageAtomicDec32:
case Opcode::StorageAtomicAnd32:
case Opcode::StorageAtomicOr32:
case Opcode::StorageAtomicXor32:
case Opcode::StorageAtomicExchange32:
case Opcode::StorageAtomicIAdd64:
case Opcode::StorageAtomicSMin64:
case Opcode::StorageAtomicUMin64:
case Opcode::StorageAtomicSMax64:
case Opcode::StorageAtomicUMax64:
case Opcode::StorageAtomicAnd64:
case Opcode::StorageAtomicOr64:
case Opcode::StorageAtomicXor64:
case Opcode::StorageAtomicExchange64:
case Opcode::StorageAtomicAddF32:
case Opcode::StorageAtomicAddF16x2:
case Opcode::StorageAtomicAddF32x2:
case Opcode::StorageAtomicMinF16x2:
case Opcode::StorageAtomicMinF32x2:
case Opcode::StorageAtomicMaxF16x2:
case Opcode::StorageAtomicMaxF32x2:
case Opcode::BindlessImageWrite:
case Opcode::BoundImageWrite:
case Opcode::ImageWrite:
case IR::Opcode::BindlessImageAtomicIAdd32:
case IR::Opcode::BindlessImageAtomicSMin32:
case IR::Opcode::BindlessImageAtomicUMin32:
case IR::Opcode::BindlessImageAtomicSMax32:
case IR::Opcode::BindlessImageAtomicUMax32:
case IR::Opcode::BindlessImageAtomicInc32:
case IR::Opcode::BindlessImageAtomicDec32:
case IR::Opcode::BindlessImageAtomicAnd32:
case IR::Opcode::BindlessImageAtomicOr32:
case IR::Opcode::BindlessImageAtomicXor32:
case IR::Opcode::BindlessImageAtomicExchange32:
case IR::Opcode::BoundImageAtomicIAdd32:
case IR::Opcode::BoundImageAtomicSMin32:
case IR::Opcode::BoundImageAtomicUMin32:
case IR::Opcode::BoundImageAtomicSMax32:
case IR::Opcode::BoundImageAtomicUMax32:
case IR::Opcode::BoundImageAtomicInc32:
case IR::Opcode::BoundImageAtomicDec32:
case IR::Opcode::BoundImageAtomicAnd32:
case IR::Opcode::BoundImageAtomicOr32:
case IR::Opcode::BoundImageAtomicXor32:
case IR::Opcode::BoundImageAtomicExchange32:
case IR::Opcode::ImageAtomicIAdd32:
case IR::Opcode::ImageAtomicSMin32:
case IR::Opcode::ImageAtomicUMin32:
case IR::Opcode::ImageAtomicSMax32:
case IR::Opcode::ImageAtomicUMax32:
case IR::Opcode::ImageAtomicInc32:
case IR::Opcode::ImageAtomicDec32:
case IR::Opcode::ImageAtomicAnd32:
case IR::Opcode::ImageAtomicOr32:
case IR::Opcode::ImageAtomicXor32:
case IR::Opcode::ImageAtomicExchange32:
return true;
default:
return false;
}
}
bool Inst::IsPseudoInstruction() const noexcept {
switch (op) {
case Opcode::GetZeroFromOp:
case Opcode::GetSignFromOp:
case Opcode::GetCarryFromOp:
case Opcode::GetOverflowFromOp:
case Opcode::GetSparseFromOp:
case Opcode::GetInBoundsFromOp:
return true;
default:
return false;
}
}
bool Inst::AreAllArgsImmediates() const {
if (op == Opcode::Phi) {
throw LogicError("Testing for all arguments are immediates on phi instruction");
}
return std::all_of(args.begin(), args.begin() + NumArgs(),
[](const IR::Value& value) { return value.IsImmediate(); });
}
Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
if (!associated_insts) {
return nullptr;
}
switch (opcode) {
case Opcode::GetZeroFromOp:
CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp);
return associated_insts->zero_inst;
case Opcode::GetSignFromOp:
CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp);
return associated_insts->sign_inst;
case Opcode::GetCarryFromOp:
CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp);
return associated_insts->carry_inst;
case Opcode::GetOverflowFromOp:
CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp);
return associated_insts->overflow_inst;
case Opcode::GetSparseFromOp:
CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
return associated_insts->sparse_inst;
case Opcode::GetInBoundsFromOp:
CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp);
return associated_insts->in_bounds_inst;
default:
throw InvalidArgument("{} is not a pseudo-instruction", opcode);
}
}
IR::Type Inst::Type() const {
return TypeOf(op);
}
void Inst::SetArg(size_t index, Value value) {
if (index >= NumArgs()) {
throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
}
const IR::Value arg{Arg(index)};
if (!arg.IsImmediate()) {
UndoUse(arg);
}
if (!value.IsImmediate()) {
Use(value);
}
if (op == Opcode::Phi) {
phi_args[index].second = value;
} else {
args[index] = value;
}
}
Block* Inst::PhiBlock(size_t index) const {
if (op != Opcode::Phi) {
throw LogicError("{} is not a Phi instruction", op);
}
if (index >= phi_args.size()) {
throw InvalidArgument("Out of bounds argument index {} in phi instruction");
}
return phi_args[index].first;
}
void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
if (!value.IsImmediate()) {
Use(value);
}
phi_args.emplace_back(predecessor, value);
}
void Inst::Invalidate() {
ClearArgs();
ReplaceOpcode(Opcode::Void);
}
void Inst::ClearArgs() {
if (op == Opcode::Phi) {
for (auto& pair : phi_args) {
IR::Value& value{pair.second};
if (!value.IsImmediate()) {
UndoUse(value);
}
}
phi_args.clear();
} else {
for (auto& value : args) {
if (!value.IsImmediate()) {
UndoUse(value);
}
}
// Reset arguments to null
// std::memset was measured to be faster on MSVC than std::ranges:fill
std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args));
}
}
void Inst::ReplaceUsesWith(Value replacement) {
Invalidate();
ReplaceOpcode(Opcode::Identity);
if (!replacement.IsImmediate()) {
Use(replacement);
}
args[0] = replacement;
}
void Inst::ReplaceOpcode(IR::Opcode opcode) {
if (opcode == IR::Opcode::Phi) {
throw LogicError("Cannot transition into Phi");
}
if (op == Opcode::Phi) {
// Transition out of phi arguments into non-phi
std::destroy_at(&phi_args);
std::construct_at(&args);
}
op = opcode;
}
void Inst::Use(const Value& value) {
Inst* const inst{value.Inst()};
++inst->use_count;
std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
switch (op) {
case Opcode::GetZeroFromOp:
AllocAssociatedInsts(assoc_inst);
SetPseudoInstruction(assoc_inst->zero_inst, this);
break;
case Opcode::GetSignFromOp:
AllocAssociatedInsts(assoc_inst);
SetPseudoInstruction(assoc_inst->sign_inst, this);
break;
case Opcode::GetCarryFromOp:
AllocAssociatedInsts(assoc_inst);
SetPseudoInstruction(assoc_inst->carry_inst, this);
break;
case Opcode::GetOverflowFromOp:
AllocAssociatedInsts(assoc_inst);
SetPseudoInstruction(assoc_inst->overflow_inst, this);
break;
case Opcode::GetSparseFromOp:
AllocAssociatedInsts(assoc_inst);
SetPseudoInstruction(assoc_inst->sparse_inst, this);
break;
case Opcode::GetInBoundsFromOp:
AllocAssociatedInsts(assoc_inst);
SetPseudoInstruction(assoc_inst->in_bounds_inst, this);
break;
default:
break;
}
}
void Inst::UndoUse(const Value& value) {
Inst* const inst{value.Inst()};
--inst->use_count;
std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
switch (op) {
case Opcode::GetZeroFromOp:
AllocAssociatedInsts(assoc_inst);
RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp);
break;
case Opcode::GetSignFromOp:
AllocAssociatedInsts(assoc_inst);
RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp);
break;
case Opcode::GetCarryFromOp:
AllocAssociatedInsts(assoc_inst);
RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp);
break;
case Opcode::GetOverflowFromOp:
AllocAssociatedInsts(assoc_inst);
RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
break;
case Opcode::GetSparseFromOp:
AllocAssociatedInsts(assoc_inst);
RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp);
break;
case Opcode::GetInBoundsFromOp:
AllocAssociatedInsts(assoc_inst);
RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp);
break;
default:
break;
}
}
} // namespace Shader::IR

View File

@@ -0,0 +1,49 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
namespace Shader::IR {
enum class FmzMode : u8 {
DontCare, // Not specified for this instruction
FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9)
None, // Denorms are not flushed, NAN is propagated (nouveau)
};
enum class FpRounding : u8 {
DontCare, // Not specified for this instruction
RN, // Round to nearest even,
RM, // Round towards negative infinity
RP, // Round towards positive infinity
RZ, // Round towards zero
};
struct FpControl {
bool no_contraction{false};
FpRounding rounding{FpRounding::DontCare};
FmzMode fmz_mode{FmzMode::DontCare};
};
static_assert(sizeof(FpControl) <= sizeof(u32));
union TextureInstInfo {
u32 raw;
BitField<0, 16, u32> descriptor_index;
BitField<16, 3, TextureType> type;
BitField<19, 1, u32> is_depth;
BitField<20, 1, u32> has_bias;
BitField<21, 1, u32> has_lod_clamp;
BitField<22, 1, u32> relaxed_precision;
BitField<23, 2, u32> gather_component;
BitField<25, 2, u32> num_derivates;
BitField<27, 3, ImageFormat> image_format;
};
static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
} // namespace Shader::IR

View File

@@ -0,0 +1,15 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include "shader_recompiler/frontend/ir/opcodes.h"
namespace Shader::IR {
std::string_view NameOf(Opcode op) {
return Detail::META_TABLE[static_cast<size_t>(op)].name;
}
} // namespace Shader::IR

View File

@@ -0,0 +1,109 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <array>
#include <string_view>
#include <fmt/format.h>
#include "shader_recompiler/frontend/ir/type.h"
namespace Shader::IR {
enum class Opcode {
#define OPCODE(name, ...) name,
#include "opcodes.inc"
#undef OPCODE
};
namespace Detail {
struct OpcodeMeta {
std::string_view name;
Type type;
std::array<Type, 5> arg_types;
};
// using enum Type;
constexpr Type Void{Type::Void};
constexpr Type Opaque{Type::Opaque};
constexpr Type Reg{Type::Reg};
constexpr Type Pred{Type::Pred};
constexpr Type Attribute{Type::Attribute};
constexpr Type Patch{Type::Patch};
constexpr Type U1{Type::U1};
constexpr Type U8{Type::U8};
constexpr Type U16{Type::U16};
constexpr Type U32{Type::U32};
constexpr Type U64{Type::U64};
constexpr Type F16{Type::F16};
constexpr Type F32{Type::F32};
constexpr Type F64{Type::F64};
constexpr Type U32x2{Type::U32x2};
constexpr Type U32x3{Type::U32x3};
constexpr Type U32x4{Type::U32x4};
constexpr Type F16x2{Type::F16x2};
constexpr Type F16x3{Type::F16x3};
constexpr Type F16x4{Type::F16x4};
constexpr Type F32x2{Type::F32x2};
constexpr Type F32x3{Type::F32x3};
constexpr Type F32x4{Type::F32x4};
constexpr Type F64x2{Type::F64x2};
constexpr Type F64x3{Type::F64x3};
constexpr Type F64x4{Type::F64x4};
constexpr OpcodeMeta META_TABLE[]{
#define OPCODE(name_token, type_token, ...) \
{ \
.name{#name_token}, \
.type = type_token, \
.arg_types{__VA_ARGS__}, \
},
#include "opcodes.inc"
#undef OPCODE
};
constexpr size_t CalculateNumArgsOf(Opcode op) {
const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types};
return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void));
}
constexpr u8 NUM_ARGS[]{
#define OPCODE(name_token, type_token, ...) static_cast<u8>(CalculateNumArgsOf(Opcode::name_token)),
#include "opcodes.inc"
#undef OPCODE
};
} // namespace Detail
/// Get return type of an opcode
[[nodiscard]] inline Type TypeOf(Opcode op) noexcept {
return Detail::META_TABLE[static_cast<size_t>(op)].type;
}
/// Get the number of arguments an opcode accepts
[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept {
return static_cast<size_t>(Detail::NUM_ARGS[static_cast<size_t>(op)]);
}
/// Get the required type of an argument of an opcode
[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept {
return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index];
}
/// Get the name of an opcode
[[nodiscard]] std::string_view NameOf(Opcode op);
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Opcode> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::IR::Opcode& op, FormatContext& ctx) {
return format_to(ctx.out(), "{}", Shader::IR::NameOf(op));
}
};

View File

@@ -0,0 +1,550 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ...
OPCODE(Phi, Opaque, )
OPCODE(Identity, Opaque, Opaque, )
OPCODE(Void, Void, )
OPCODE(ConditionRef, U1, U1, )
OPCODE(Reference, Void, Opaque, )
OPCODE(PhiMove, Void, Opaque, Opaque, )
// Special operations
OPCODE(Prologue, Void, )
OPCODE(Epilogue, Void, )
OPCODE(Join, Void, )
OPCODE(DemoteToHelperInvocation, Void, )
OPCODE(EmitVertex, Void, U32, )
OPCODE(EndPrimitive, Void, U32, )
// Barriers
OPCODE(Barrier, Void, )
OPCODE(WorkgroupMemoryBarrier, Void, )
OPCODE(DeviceMemoryBarrier, Void, )
// Context getters/setters
OPCODE(GetRegister, U32, Reg, )
OPCODE(SetRegister, Void, Reg, U32, )
OPCODE(GetPred, U1, Pred, )
OPCODE(SetPred, Void, Pred, U1, )
OPCODE(GetGotoVariable, U1, U32, )
OPCODE(SetGotoVariable, Void, U32, U1, )
OPCODE(GetIndirectBranchVariable, U32, )
OPCODE(SetIndirectBranchVariable, Void, U32, )
OPCODE(GetCbufU8, U32, U32, U32, )
OPCODE(GetCbufS8, U32, U32, U32, )
OPCODE(GetCbufU16, U32, U32, U32, )
OPCODE(GetCbufS16, U32, U32, U32, )
OPCODE(GetCbufU32, U32, U32, U32, )
OPCODE(GetCbufF32, F32, U32, U32, )
OPCODE(GetCbufU32x2, U32x2, U32, U32, )
OPCODE(GetAttribute, F32, Attribute, U32, )
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
OPCODE(GetAttributeIndexed, F32, U32, U32, )
OPCODE(SetAttributeIndexed, Void, U32, F32, U32, )
OPCODE(GetPatch, F32, Patch, )
OPCODE(SetPatch, Void, Patch, F32, )
OPCODE(SetFragColor, Void, U32, U32, F32, )
OPCODE(SetSampleMask, Void, U32, )
OPCODE(SetFragDepth, Void, F32, )
OPCODE(GetZFlag, U1, Void, )
OPCODE(GetSFlag, U1, Void, )
OPCODE(GetCFlag, U1, Void, )
OPCODE(GetOFlag, U1, Void, )
OPCODE(SetZFlag, Void, U1, )
OPCODE(SetSFlag, Void, U1, )
OPCODE(SetCFlag, Void, U1, )
OPCODE(SetOFlag, Void, U1, )
OPCODE(WorkgroupId, U32x3, )
OPCODE(LocalInvocationId, U32x3, )
OPCODE(InvocationId, U32, )
OPCODE(SampleId, U32, )
OPCODE(IsHelperInvocation, U1, )
OPCODE(YDirection, F32, )
// Undefined
OPCODE(UndefU1, U1, )
OPCODE(UndefU8, U8, )
OPCODE(UndefU16, U16, )
OPCODE(UndefU32, U32, )
OPCODE(UndefU64, U64, )
// Memory operations
OPCODE(LoadGlobalU8, U32, Opaque, )
OPCODE(LoadGlobalS8, U32, Opaque, )
OPCODE(LoadGlobalU16, U32, Opaque, )
OPCODE(LoadGlobalS16, U32, Opaque, )
OPCODE(LoadGlobal32, U32, Opaque, )
OPCODE(LoadGlobal64, U32x2, Opaque, )
OPCODE(LoadGlobal128, U32x4, Opaque, )
OPCODE(WriteGlobalU8, Void, Opaque, U32, )
OPCODE(WriteGlobalS8, Void, Opaque, U32, )
OPCODE(WriteGlobalU16, Void, Opaque, U32, )
OPCODE(WriteGlobalS16, Void, Opaque, U32, )
OPCODE(WriteGlobal32, Void, Opaque, U32, )
OPCODE(WriteGlobal64, Void, Opaque, U32x2, )
OPCODE(WriteGlobal128, Void, Opaque, U32x4, )
// Storage buffer operations
OPCODE(LoadStorageU8, U32, U32, U32, )
OPCODE(LoadStorageS8, U32, U32, U32, )
OPCODE(LoadStorageU16, U32, U32, U32, )
OPCODE(LoadStorageS16, U32, U32, U32, )
OPCODE(LoadStorage32, U32, U32, U32, )
OPCODE(LoadStorage64, U32x2, U32, U32, )
OPCODE(LoadStorage128, U32x4, U32, U32, )
OPCODE(WriteStorageU8, Void, U32, U32, U32, )
OPCODE(WriteStorageS8, Void, U32, U32, U32, )
OPCODE(WriteStorageU16, Void, U32, U32, U32, )
OPCODE(WriteStorageS16, Void, U32, U32, U32, )
OPCODE(WriteStorage32, Void, U32, U32, U32, )
OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
// Local memory operations
OPCODE(LoadLocal, U32, U32, )
OPCODE(WriteLocal, Void, U32, U32, )
// Shared memory operations
OPCODE(LoadSharedU8, U32, U32, )
OPCODE(LoadSharedS8, U32, U32, )
OPCODE(LoadSharedU16, U32, U32, )
OPCODE(LoadSharedS16, U32, U32, )
OPCODE(LoadSharedU32, U32, U32, )
OPCODE(LoadSharedU64, U32x2, U32, )
OPCODE(LoadSharedU128, U32x4, U32, )
OPCODE(WriteSharedU8, Void, U32, U32, )
OPCODE(WriteSharedU16, Void, U32, U32, )
OPCODE(WriteSharedU32, Void, U32, U32, )
OPCODE(WriteSharedU64, Void, U32, U32x2, )
OPCODE(WriteSharedU128, Void, U32, U32x4, )
// Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
// Select operations
OPCODE(SelectU1, U1, U1, U1, U1, )
OPCODE(SelectU8, U8, U1, U8, U8, )
OPCODE(SelectU16, U16, U1, U16, U16, )
OPCODE(SelectU32, U32, U1, U32, U32, )
OPCODE(SelectU64, U64, U1, U64, U64, )
OPCODE(SelectF16, F16, U1, F16, F16, )
OPCODE(SelectF32, F32, U1, F32, F32, )
OPCODE(SelectF64, F64, U1, F64, F64, )
// Bitwise conversions
OPCODE(BitCastU16F16, U16, F16, )
OPCODE(BitCastU32F32, U32, F32, )
OPCODE(BitCastU64F64, U64, F64, )
OPCODE(BitCastF16U16, F16, U16, )
OPCODE(BitCastF32U32, F32, U32, )
OPCODE(BitCastF64U64, F64, U64, )
OPCODE(PackUint2x32, U64, U32x2, )
OPCODE(UnpackUint2x32, U32x2, U64, )
OPCODE(PackFloat2x16, U32, F16x2, )
OPCODE(UnpackFloat2x16, F16x2, U32, )
OPCODE(PackHalf2x16, U32, F32x2, )
OPCODE(UnpackHalf2x16, F32x2, U32, )
OPCODE(PackDouble2x32, F64, U32x2, )
OPCODE(UnpackDouble2x32, U32x2, F64, )
// Pseudo-operation, handled specially at final emit
OPCODE(GetZeroFromOp, U1, Opaque, )
OPCODE(GetSignFromOp, U1, Opaque, )
OPCODE(GetCarryFromOp, U1, Opaque, )
OPCODE(GetOverflowFromOp, U1, Opaque, )
OPCODE(GetSparseFromOp, U1, Opaque, )
OPCODE(GetInBoundsFromOp, U1, Opaque, )
// Floating-point operations
OPCODE(FPAbs16, F16, F16, )
OPCODE(FPAbs32, F32, F32, )
OPCODE(FPAbs64, F64, F64, )
OPCODE(FPAdd16, F16, F16, F16, )
OPCODE(FPAdd32, F32, F32, F32, )
OPCODE(FPAdd64, F64, F64, F64, )
OPCODE(FPFma16, F16, F16, F16, F16, )
OPCODE(FPFma32, F32, F32, F32, F32, )
OPCODE(FPFma64, F64, F64, F64, F64, )
OPCODE(FPMax32, F32, F32, F32, )
OPCODE(FPMax64, F64, F64, F64, )
OPCODE(FPMin32, F32, F32, F32, )
OPCODE(FPMin64, F64, F64, F64, )
OPCODE(FPMul16, F16, F16, F16, )
OPCODE(FPMul32, F32, F32, F32, )
OPCODE(FPMul64, F64, F64, F64, )
OPCODE(FPNeg16, F16, F16, )
OPCODE(FPNeg32, F32, F32, )
OPCODE(FPNeg64, F64, F64, )
OPCODE(FPRecip32, F32, F32, )
OPCODE(FPRecip64, F64, F64, )
OPCODE(FPRecipSqrt32, F32, F32, )
OPCODE(FPRecipSqrt64, F64, F64, )
OPCODE(FPSqrt, F32, F32, )
OPCODE(FPSin, F32, F32, )
OPCODE(FPExp2, F32, F32, )
OPCODE(FPCos, F32, F32, )
OPCODE(FPLog2, F32, F32, )
OPCODE(FPSaturate16, F16, F16, )
OPCODE(FPSaturate32, F32, F32, )
OPCODE(FPSaturate64, F64, F64, )
OPCODE(FPClamp16, F16, F16, F16, F16, )
OPCODE(FPClamp32, F32, F32, F32, F32, )
OPCODE(FPClamp64, F64, F64, F64, F64, )
OPCODE(FPRoundEven16, F16, F16, )
OPCODE(FPRoundEven32, F32, F32, )
OPCODE(FPRoundEven64, F64, F64, )
OPCODE(FPFloor16, F16, F16, )
OPCODE(FPFloor32, F32, F32, )
OPCODE(FPFloor64, F64, F64, )
OPCODE(FPCeil16, F16, F16, )
OPCODE(FPCeil32, F32, F32, )
OPCODE(FPCeil64, F64, F64, )
OPCODE(FPTrunc16, F16, F16, )
OPCODE(FPTrunc32, F32, F32, )
OPCODE(FPTrunc64, F64, F64, )
OPCODE(FPOrdEqual16, U1, F16, F16, )
OPCODE(FPOrdEqual32, U1, F32, F32, )
OPCODE(FPOrdEqual64, U1, F64, F64, )
OPCODE(FPUnordEqual16, U1, F16, F16, )
OPCODE(FPUnordEqual32, U1, F32, F32, )
OPCODE(FPUnordEqual64, U1, F64, F64, )
OPCODE(FPOrdNotEqual16, U1, F16, F16, )
OPCODE(FPOrdNotEqual32, U1, F32, F32, )
OPCODE(FPOrdNotEqual64, U1, F64, F64, )
OPCODE(FPUnordNotEqual16, U1, F16, F16, )
OPCODE(FPUnordNotEqual32, U1, F32, F32, )
OPCODE(FPUnordNotEqual64, U1, F64, F64, )
OPCODE(FPOrdLessThan16, U1, F16, F16, )
OPCODE(FPOrdLessThan32, U1, F32, F32, )
OPCODE(FPOrdLessThan64, U1, F64, F64, )
OPCODE(FPUnordLessThan16, U1, F16, F16, )
OPCODE(FPUnordLessThan32, U1, F32, F32, )
OPCODE(FPUnordLessThan64, U1, F64, F64, )
OPCODE(FPOrdGreaterThan16, U1, F16, F16, )
OPCODE(FPOrdGreaterThan32, U1, F32, F32, )
OPCODE(FPOrdGreaterThan64, U1, F64, F64, )
OPCODE(FPUnordGreaterThan16, U1, F16, F16, )
OPCODE(FPUnordGreaterThan32, U1, F32, F32, )
OPCODE(FPUnordGreaterThan64, U1, F64, F64, )
OPCODE(FPOrdLessThanEqual16, U1, F16, F16, )
OPCODE(FPOrdLessThanEqual32, U1, F32, F32, )
OPCODE(FPOrdLessThanEqual64, U1, F64, F64, )
OPCODE(FPUnordLessThanEqual16, U1, F16, F16, )
OPCODE(FPUnordLessThanEqual32, U1, F32, F32, )
OPCODE(FPUnordLessThanEqual64, U1, F64, F64, )
OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, )
OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, )
OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, )
OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
OPCODE(FPIsNan16, U1, F16, )
OPCODE(FPIsNan32, U1, F32, )
OPCODE(FPIsNan64, U1, F64, )
// Integer operations
OPCODE(IAdd32, U32, U32, U32, )
OPCODE(IAdd64, U64, U64, U64, )
OPCODE(ISub32, U32, U32, U32, )
OPCODE(ISub64, U64, U64, U64, )
OPCODE(IMul32, U32, U32, U32, )
OPCODE(INeg32, U32, U32, )
OPCODE(INeg64, U64, U64, )
OPCODE(IAbs32, U32, U32, )
OPCODE(ShiftLeftLogical32, U32, U32, U32, )
OPCODE(ShiftLeftLogical64, U64, U64, U32, )
OPCODE(ShiftRightLogical32, U32, U32, U32, )
OPCODE(ShiftRightLogical64, U64, U64, U32, )
OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
OPCODE(BitwiseAnd32, U32, U32, U32, )
OPCODE(BitwiseOr32, U32, U32, U32, )
OPCODE(BitwiseXor32, U32, U32, U32, )
OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
OPCODE(BitReverse32, U32, U32, )
OPCODE(BitCount32, U32, U32, )
OPCODE(BitwiseNot32, U32, U32, )
OPCODE(FindSMsb32, U32, U32, )
OPCODE(FindUMsb32, U32, U32, )
OPCODE(SMin32, U32, U32, U32, )
OPCODE(UMin32, U32, U32, U32, )
OPCODE(SMax32, U32, U32, U32, )
OPCODE(UMax32, U32, U32, U32, )
OPCODE(SClamp32, U32, U32, U32, U32, )
OPCODE(UClamp32, U32, U32, U32, U32, )
OPCODE(SLessThan, U1, U32, U32, )
OPCODE(ULessThan, U1, U32, U32, )
OPCODE(IEqual, U1, U32, U32, )
OPCODE(SLessThanEqual, U1, U32, U32, )
OPCODE(ULessThanEqual, U1, U32, U32, )
OPCODE(SGreaterThan, U1, U32, U32, )
OPCODE(UGreaterThan, U1, U32, U32, )
OPCODE(INotEqual, U1, U32, U32, )
OPCODE(SGreaterThanEqual, U1, U32, U32, )
OPCODE(UGreaterThanEqual, U1, U32, U32, )
// Atomic operations
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
OPCODE(SharedAtomicUMax32, U32, U32, U32, )
OPCODE(SharedAtomicInc32, U32, U32, U32, )
OPCODE(SharedAtomicDec32, U32, U32, U32, )
OPCODE(SharedAtomicAnd32, U32, U32, U32, )
OPCODE(SharedAtomicOr32, U32, U32, U32, )
OPCODE(SharedAtomicXor32, U32, U32, U32, )
OPCODE(SharedAtomicExchange32, U32, U32, U32, )
OPCODE(SharedAtomicExchange64, U64, U32, U64, )
OPCODE(GlobalAtomicIAdd32, U32, U64, U32, )
OPCODE(GlobalAtomicSMin32, U32, U64, U32, )
OPCODE(GlobalAtomicUMin32, U32, U64, U32, )
OPCODE(GlobalAtomicSMax32, U32, U64, U32, )
OPCODE(GlobalAtomicUMax32, U32, U64, U32, )
OPCODE(GlobalAtomicInc32, U32, U64, U32, )
OPCODE(GlobalAtomicDec32, U32, U64, U32, )
OPCODE(GlobalAtomicAnd32, U32, U64, U32, )
OPCODE(GlobalAtomicOr32, U32, U64, U32, )
OPCODE(GlobalAtomicXor32, U32, U64, U32, )
OPCODE(GlobalAtomicExchange32, U32, U64, U32, )
OPCODE(GlobalAtomicIAdd64, U64, U64, U64, )
OPCODE(GlobalAtomicSMin64, U64, U64, U64, )
OPCODE(GlobalAtomicUMin64, U64, U64, U64, )
OPCODE(GlobalAtomicSMax64, U64, U64, U64, )
OPCODE(GlobalAtomicUMax64, U64, U64, U64, )
OPCODE(GlobalAtomicAnd64, U64, U64, U64, )
OPCODE(GlobalAtomicOr64, U64, U64, U64, )
OPCODE(GlobalAtomicXor64, U64, U64, U64, )
OPCODE(GlobalAtomicExchange64, U64, U64, U64, )
OPCODE(GlobalAtomicAddF32, F32, U64, F32, )
OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, )
OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, )
OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, )
OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, )
OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, )
OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, )
OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, )
OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, )
OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, )
OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, )
OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, )
OPCODE(StorageAtomicInc32, U32, U32, U32, U32, )
OPCODE(StorageAtomicDec32, U32, U32, U32, U32, )
OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, )
OPCODE(StorageAtomicOr32, U32, U32, U32, U32, )
OPCODE(StorageAtomicXor32, U32, U32, U32, U32, )
OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, )
OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, )
OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, )
OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, )
OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, )
OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, )
OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, )
OPCODE(StorageAtomicOr64, U64, U32, U32, U64, )
OPCODE(StorageAtomicXor64, U64, U32, U32, U64, )
OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, )
OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, )
OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, )
OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, )
OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, )
OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, )
OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, )
OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, )
// Logical operations
OPCODE(LogicalOr, U1, U1, U1, )
OPCODE(LogicalAnd, U1, U1, U1, )
OPCODE(LogicalXor, U1, U1, U1, )
OPCODE(LogicalNot, U1, U1, )
// Conversion operations
OPCODE(ConvertS16F16, U32, F16, )
OPCODE(ConvertS16F32, U32, F32, )
OPCODE(ConvertS16F64, U32, F64, )
OPCODE(ConvertS32F16, U32, F16, )
OPCODE(ConvertS32F32, U32, F32, )
OPCODE(ConvertS32F64, U32, F64, )
OPCODE(ConvertS64F16, U64, F16, )
OPCODE(ConvertS64F32, U64, F32, )
OPCODE(ConvertS64F64, U64, F64, )
OPCODE(ConvertU16F16, U32, F16, )
OPCODE(ConvertU16F32, U32, F32, )
OPCODE(ConvertU16F64, U32, F64, )
OPCODE(ConvertU32F16, U32, F16, )
OPCODE(ConvertU32F32, U32, F32, )
OPCODE(ConvertU32F64, U32, F64, )
OPCODE(ConvertU64F16, U64, F16, )
OPCODE(ConvertU64F32, U64, F32, )
OPCODE(ConvertU64F64, U64, F64, )
OPCODE(ConvertU64U32, U64, U32, )
OPCODE(ConvertU32U64, U32, U64, )
OPCODE(ConvertF16F32, F16, F32, )
OPCODE(ConvertF32F16, F32, F16, )
OPCODE(ConvertF32F64, F32, F64, )
OPCODE(ConvertF64F32, F64, F32, )
OPCODE(ConvertF16S8, F16, U32, )
OPCODE(ConvertF16S16, F16, U32, )
OPCODE(ConvertF16S32, F16, U32, )
OPCODE(ConvertF16S64, F16, U64, )
OPCODE(ConvertF16U8, F16, U32, )
OPCODE(ConvertF16U16, F16, U32, )
OPCODE(ConvertF16U32, F16, U32, )
OPCODE(ConvertF16U64, F16, U64, )
OPCODE(ConvertF32S8, F32, U32, )
OPCODE(ConvertF32S16, F32, U32, )
OPCODE(ConvertF32S32, F32, U32, )
OPCODE(ConvertF32S64, F32, U64, )
OPCODE(ConvertF32U8, F32, U32, )
OPCODE(ConvertF32U16, F32, U32, )
OPCODE(ConvertF32U32, F32, U32, )
OPCODE(ConvertF32U64, F32, U64, )
OPCODE(ConvertF64S8, F64, U32, )
OPCODE(ConvertF64S16, F64, U32, )
OPCODE(ConvertF64S32, F64, U32, )
OPCODE(ConvertF64S64, F64, U64, )
OPCODE(ConvertF64U8, F64, U32, )
OPCODE(ConvertF64U16, F64, U32, )
OPCODE(ConvertF64U32, F64, U32, )
OPCODE(ConvertF64U64, F64, U64, )
// Image operations
OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, )
OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, )
OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
OPCODE(BindlessImageRead, U32x4, U32, Opaque, )
OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, )
OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, )
OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, )
OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
OPCODE(BoundImageRead, U32x4, U32, Opaque, )
OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, )
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, )
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
// Atomic Image operations
OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, )
OPCODE(BindlessImageAtomicSMin32, U32, U32, Opaque, U32, )
OPCODE(BindlessImageAtomicUMin32, U32, U32, Opaque, U32, )
OPCODE(BindlessImageAtomicSMax32, U32, U32, Opaque, U32, )
OPCODE(BindlessImageAtomicUMax32, U32, U32, Opaque, U32, )
OPCODE(BindlessImageAtomicInc32, U32, U32, Opaque, U32, )
OPCODE(BindlessImageAtomicDec32, U32, U32, Opaque, U32, )
OPCODE(BindlessImageAtomicAnd32, U32, U32, Opaque, U32, )
OPCODE(BindlessImageAtomicOr32, U32, U32, Opaque, U32, )
OPCODE(BindlessImageAtomicXor32, U32, U32, Opaque, U32, )
OPCODE(BindlessImageAtomicExchange32, U32, U32, Opaque, U32, )
OPCODE(BoundImageAtomicIAdd32, U32, U32, Opaque, U32, )
OPCODE(BoundImageAtomicSMin32, U32, U32, Opaque, U32, )
OPCODE(BoundImageAtomicUMin32, U32, U32, Opaque, U32, )
OPCODE(BoundImageAtomicSMax32, U32, U32, Opaque, U32, )
OPCODE(BoundImageAtomicUMax32, U32, U32, Opaque, U32, )
OPCODE(BoundImageAtomicInc32, U32, U32, Opaque, U32, )
OPCODE(BoundImageAtomicDec32, U32, U32, Opaque, U32, )
OPCODE(BoundImageAtomicAnd32, U32, U32, Opaque, U32, )
OPCODE(BoundImageAtomicOr32, U32, U32, Opaque, U32, )
OPCODE(BoundImageAtomicXor32, U32, U32, Opaque, U32, )
OPCODE(BoundImageAtomicExchange32, U32, U32, Opaque, U32, )
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
// Warp operations
OPCODE(LaneId, U32, )
OPCODE(VoteAll, U1, U1, )
OPCODE(VoteAny, U1, U1, )
OPCODE(VoteEqual, U1, U1, )
OPCODE(SubgroupBallot, U32, U1, )
OPCODE(SubgroupEqMask, U32, )
OPCODE(SubgroupLtMask, U32, )
OPCODE(SubgroupLeMask, U32, )
OPCODE(SubgroupGtMask, U32, )
OPCODE(SubgroupGeMask, U32, )
OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, )
OPCODE(ShuffleUp, U32, U32, U32, U32, U32, )
OPCODE(ShuffleDown, U32, U32, U32, U32, U32, )
OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, )
OPCODE(FSwizzleAdd, F32, F32, F32, U32, )
OPCODE(DPdxFine, F32, F32, )
OPCODE(DPdyFine, F32, F32, )
OPCODE(DPdxCoarse, F32, F32, )
OPCODE(DPdyCoarse, F32, F32, )

View File

@@ -0,0 +1,28 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/patch.h"
namespace Shader::IR {
bool IsGeneric(Patch patch) noexcept {
return patch >= Patch::Component0 && patch <= Patch::Component119;
}
u32 GenericPatchIndex(Patch patch) {
if (!IsGeneric(patch)) {
throw InvalidArgument("Patch {} is not generic", patch);
}
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
}
u32 GenericPatchElement(Patch patch) {
if (!IsGeneric(patch)) {
throw InvalidArgument("Patch {} is not generic", patch);
}
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
}
} // namespace Shader::IR

View File

@@ -0,0 +1,149 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
namespace Shader::IR {
enum class Patch : u64 {
TessellationLodLeft,
TessellationLodTop,
TessellationLodRight,
TessellationLodBottom,
TessellationLodInteriorU,
TessellationLodInteriorV,
ComponentPadding0,
ComponentPadding1,
Component0,
Component1,
Component2,
Component3,
Component4,
Component5,
Component6,
Component7,
Component8,
Component9,
Component10,
Component11,
Component12,
Component13,
Component14,
Component15,
Component16,
Component17,
Component18,
Component19,
Component20,
Component21,
Component22,
Component23,
Component24,
Component25,
Component26,
Component27,
Component28,
Component29,
Component30,
Component31,
Component32,
Component33,
Component34,
Component35,
Component36,
Component37,
Component38,
Component39,
Component40,
Component41,
Component42,
Component43,
Component44,
Component45,
Component46,
Component47,
Component48,
Component49,
Component50,
Component51,
Component52,
Component53,
Component54,
Component55,
Component56,
Component57,
Component58,
Component59,
Component60,
Component61,
Component62,
Component63,
Component64,
Component65,
Component66,
Component67,
Component68,
Component69,
Component70,
Component71,
Component72,
Component73,
Component74,
Component75,
Component76,
Component77,
Component78,
Component79,
Component80,
Component81,
Component82,
Component83,
Component84,
Component85,
Component86,
Component87,
Component88,
Component89,
Component90,
Component91,
Component92,
Component93,
Component94,
Component95,
Component96,
Component97,
Component98,
Component99,
Component100,
Component101,
Component102,
Component103,
Component104,
Component105,
Component106,
Component107,
Component108,
Component109,
Component110,
Component111,
Component112,
Component113,
Component114,
Component115,
Component116,
Component117,
Component118,
Component119,
};
static_assert(static_cast<u64>(Patch::Component119) == 127);
[[nodiscard]] bool IsGeneric(Patch patch) noexcept;
[[nodiscard]] u32 GenericPatchIndex(Patch patch);
[[nodiscard]] u32 GenericPatchElement(Patch patch);
} // namespace Shader::IR

View File

@@ -0,0 +1,46 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <boost/container/flat_set.hpp>
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/post_order.h"
namespace Shader::IR {
BlockList PostOrder(const AbstractSyntaxNode& root) {
boost::container::small_vector<Block*, 16> block_stack;
boost::container::flat_set<Block*> visited;
BlockList post_order_blocks;
if (root.type != AbstractSyntaxNode::Type::Block) {
throw LogicError("First node in abstract syntax list root is not a block");
}
Block* const first_block{root.data.block};
visited.insert(first_block);
block_stack.push_back(first_block);
while (!block_stack.empty()) {
Block* const block{block_stack.back()};
const auto visit{[&](Block* branch) {
if (!visited.insert(branch).second) {
return false;
}
// Calling push_back twice is faster than insert on MSVC
block_stack.push_back(block);
block_stack.push_back(branch);
return true;
}};
block_stack.pop_back();
if (std::ranges::none_of(block->ImmSuccessors(), visit)) {
post_order_blocks.push_back(block);
}
}
return post_order_blocks;
}
} // namespace Shader::IR

View File

@@ -0,0 +1,14 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
namespace Shader::IR {
BlockList PostOrder(const AbstractSyntaxNode& root);
} // namespace Shader::IR

View File

@@ -0,0 +1,44 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <fmt/format.h>
namespace Shader::IR {
enum class Pred : u64 {
P0,
P1,
P2,
P3,
P4,
P5,
P6,
PT,
};
constexpr size_t NUM_USER_PREDS = 7;
constexpr size_t NUM_PREDS = 8;
[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept {
return static_cast<size_t>(pred);
}
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Pred> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::IR::Pred& pred, FormatContext& ctx) {
if (pred == Shader::IR::Pred::PT) {
return fmt::format_to(ctx.out(), "PT");
} else {
return fmt::format_to(ctx.out(), "P{}", static_cast<int>(pred));
}
}
};

View File

@@ -0,0 +1,32 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <map>
#include <string>
#include <fmt/format.h>
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::IR {
std::string DumpProgram(const Program& program) {
size_t index{0};
std::map<const IR::Inst*, size_t> inst_to_index;
std::map<const IR::Block*, size_t> block_to_index;
for (const IR::Block* const block : program.blocks) {
block_to_index.emplace(block, index);
++index;
}
std::string ret;
for (const auto& block : program.blocks) {
ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
}
return ret;
}
} // namespace Shader::IR

View File

@@ -0,0 +1,35 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <string>
#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/program_header.h"
#include "shader_recompiler/shader_info.h"
#include "shader_recompiler/stage.h"
namespace Shader::IR {
struct Program {
AbstractSyntaxList syntax_list;
BlockList blocks;
BlockList post_order_blocks;
Info info;
Stage stage{};
std::array<u32, 3> workgroup_size{};
OutputTopology output_topology{};
u32 output_vertices{};
u32 invocations{};
u32 local_memory_size{};
u32 shared_memory_size{};
bool is_geometry_passthrough{};
};
[[nodiscard]] std::string DumpProgram(const Program& program);
} // namespace Shader::IR

View File

@@ -0,0 +1,332 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <fmt/format.h>
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
namespace Shader::IR {
enum class Reg : u64 {
R0,
R1,
R2,
R3,
R4,
R5,
R6,
R7,
R8,
R9,
R10,
R11,
R12,
R13,
R14,
R15,
R16,
R17,
R18,
R19,
R20,
R21,
R22,
R23,
R24,
R25,
R26,
R27,
R28,
R29,
R30,
R31,
R32,
R33,
R34,
R35,
R36,
R37,
R38,
R39,
R40,
R41,
R42,
R43,
R44,
R45,
R46,
R47,
R48,
R49,
R50,
R51,
R52,
R53,
R54,
R55,
R56,
R57,
R58,
R59,
R60,
R61,
R62,
R63,
R64,
R65,
R66,
R67,
R68,
R69,
R70,
R71,
R72,
R73,
R74,
R75,
R76,
R77,
R78,
R79,
R80,
R81,
R82,
R83,
R84,
R85,
R86,
R87,
R88,
R89,
R90,
R91,
R92,
R93,
R94,
R95,
R96,
R97,
R98,
R99,
R100,
R101,
R102,
R103,
R104,
R105,
R106,
R107,
R108,
R109,
R110,
R111,
R112,
R113,
R114,
R115,
R116,
R117,
R118,
R119,
R120,
R121,
R122,
R123,
R124,
R125,
R126,
R127,
R128,
R129,
R130,
R131,
R132,
R133,
R134,
R135,
R136,
R137,
R138,
R139,
R140,
R141,
R142,
R143,
R144,
R145,
R146,
R147,
R148,
R149,
R150,
R151,
R152,
R153,
R154,
R155,
R156,
R157,
R158,
R159,
R160,
R161,
R162,
R163,
R164,
R165,
R166,
R167,
R168,
R169,
R170,
R171,
R172,
R173,
R174,
R175,
R176,
R177,
R178,
R179,
R180,
R181,
R182,
R183,
R184,
R185,
R186,
R187,
R188,
R189,
R190,
R191,
R192,
R193,
R194,
R195,
R196,
R197,
R198,
R199,
R200,
R201,
R202,
R203,
R204,
R205,
R206,
R207,
R208,
R209,
R210,
R211,
R212,
R213,
R214,
R215,
R216,
R217,
R218,
R219,
R220,
R221,
R222,
R223,
R224,
R225,
R226,
R227,
R228,
R229,
R230,
R231,
R232,
R233,
R234,
R235,
R236,
R237,
R238,
R239,
R240,
R241,
R242,
R243,
R244,
R245,
R246,
R247,
R248,
R249,
R250,
R251,
R252,
R253,
R254,
RZ,
};
static_assert(static_cast<int>(Reg::RZ) == 255);
constexpr size_t NUM_USER_REGS = 255;
constexpr size_t NUM_REGS = 256;
[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
if (reg == Reg::RZ) {
// Adding or subtracting registers from RZ yields RZ
return Reg::RZ;
}
const int result{static_cast<int>(reg) + num};
if (result >= static_cast<int>(Reg::RZ)) {
throw LogicError("Overflow on register arithmetic");
}
if (result < 0) {
throw LogicError("Underflow on register arithmetic");
}
return static_cast<Reg>(result);
}
[[nodiscard]] constexpr Reg operator-(Reg reg, int num) {
return reg + (-num);
}
constexpr Reg operator++(Reg& reg) {
reg = reg + 1;
return reg;
}
constexpr Reg operator++(Reg& reg, int) {
const Reg copy{reg};
reg = reg + 1;
return copy;
}
[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
return static_cast<size_t>(reg);
}
[[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) {
return RegIndex(reg) % align == 0 || reg == Reg::RZ;
}
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Reg> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::IR::Reg& reg, FormatContext& ctx) {
if (reg == Shader::IR::Reg::RZ) {
return fmt::format_to(ctx.out(), "RZ");
} else if (static_cast<int>(reg) >= 0 && static_cast<int>(reg) < 255) {
return fmt::format_to(ctx.out(), "R{}", static_cast<int>(reg));
} else {
throw Shader::LogicError("Invalid register with raw value {}", static_cast<int>(reg));
}
}
};

View File

@@ -0,0 +1,38 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <string>
#include "shader_recompiler/frontend/ir/type.h"
namespace Shader::IR {
std::string NameOf(Type type) {
static constexpr std::array names{
"Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32",
"U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3",
"F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4",
};
const size_t bits{static_cast<size_t>(type)};
if (bits == 0) {
return "Void";
}
std::string result;
for (size_t i = 0; i < names.size(); i++) {
if ((bits & (size_t{1} << i)) != 0) {
if (!result.empty()) {
result += '|';
}
result += names[i];
}
}
return result;
}
bool AreTypesCompatible(Type lhs, Type rhs) noexcept {
return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque;
}
} // namespace Shader::IR

View File

@@ -0,0 +1,61 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <fmt/format.h>
#include "common/common_funcs.h"
#include "shader_recompiler/exception.h"
namespace Shader::IR {
enum class Type {
Void = 0,
Opaque = 1 << 0,
Reg = 1 << 1,
Pred = 1 << 2,
Attribute = 1 << 3,
Patch = 1 << 4,
U1 = 1 << 5,
U8 = 1 << 6,
U16 = 1 << 7,
U32 = 1 << 8,
U64 = 1 << 9,
F16 = 1 << 10,
F32 = 1 << 11,
F64 = 1 << 12,
U32x2 = 1 << 13,
U32x3 = 1 << 14,
U32x4 = 1 << 15,
F16x2 = 1 << 16,
F16x3 = 1 << 17,
F16x4 = 1 << 18,
F32x2 = 1 << 19,
F32x3 = 1 << 20,
F32x4 = 1 << 21,
F64x2 = 1 << 22,
F64x3 = 1 << 23,
F64x4 = 1 << 24,
};
DECLARE_ENUM_FLAG_OPERATORS(Type)
[[nodiscard]] std::string NameOf(Type type);
[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept;
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Type> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::IR::Type& type, FormatContext& ctx) {
return fmt::format_to(ctx.out(), "{}", NameOf(type));
}
};

View File

@@ -0,0 +1,99 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/ir/opcodes.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::IR {
Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {}
Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {}
Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {}
Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {}
Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
IR::Type Value::Type() const noexcept {
if (IsPhi()) {
// The type of a phi node is stored in its flags
return inst->Flags<IR::Type>();
}
if (IsIdentity()) {
return inst->Arg(0).Type();
}
if (type == Type::Opaque) {
return inst->Type();
}
return type;
}
bool Value::operator==(const Value& other) const {
if (type != other.type) {
return false;
}
switch (type) {
case Type::Void:
return true;
case Type::Opaque:
return inst == other.inst;
case Type::Reg:
return reg == other.reg;
case Type::Pred:
return pred == other.pred;
case Type::Attribute:
return attribute == other.attribute;
case Type::Patch:
return patch == other.patch;
case Type::U1:
return imm_u1 == other.imm_u1;
case Type::U8:
return imm_u8 == other.imm_u8;
case Type::U16:
case Type::F16:
return imm_u16 == other.imm_u16;
case Type::U32:
case Type::F32:
return imm_u32 == other.imm_u32;
case Type::U64:
case Type::F64:
return imm_u64 == other.imm_u64;
case Type::U32x2:
case Type::U32x3:
case Type::U32x4:
case Type::F16x2:
case Type::F16x3:
case Type::F16x4:
case Type::F32x2:
case Type::F32x3:
case Type::F32x4:
case Type::F64x2:
case Type::F64x3:
case Type::F64x4:
break;
}
throw LogicError("Invalid type {}", type);
}
bool Value::operator!=(const Value& other) const {
return !operator==(other);
}
} // namespace Shader::IR

View File

@@ -0,0 +1,398 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstring>
#include <memory>
#include <type_traits>
#include <utility>
#include <vector>
#include <boost/container/small_vector.hpp>
#include <boost/intrusive/list.hpp>
#include "common/assert.h"
#include "common/bit_cast.h"
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/attribute.h"
#include "shader_recompiler/frontend/ir/opcodes.h"
#include "shader_recompiler/frontend/ir/patch.h"
#include "shader_recompiler/frontend/ir/pred.h"
#include "shader_recompiler/frontend/ir/reg.h"
#include "shader_recompiler/frontend/ir/type.h"
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::IR {
class Block;
class Inst;
struct AssociatedInsts;
class Value {
public:
Value() noexcept = default;
explicit Value(IR::Inst* value) noexcept;
explicit Value(IR::Reg value) noexcept;
explicit Value(IR::Pred value) noexcept;
explicit Value(IR::Attribute value) noexcept;
explicit Value(IR::Patch value) noexcept;
explicit Value(bool value) noexcept;
explicit Value(u8 value) noexcept;
explicit Value(u16 value) noexcept;
explicit Value(u32 value) noexcept;
explicit Value(f32 value) noexcept;
explicit Value(u64 value) noexcept;
explicit Value(f64 value) noexcept;
[[nodiscard]] bool IsIdentity() const noexcept;
[[nodiscard]] bool IsPhi() const noexcept;
[[nodiscard]] bool IsEmpty() const noexcept;
[[nodiscard]] bool IsImmediate() const noexcept;
[[nodiscard]] IR::Type Type() const noexcept;
[[nodiscard]] IR::Inst* Inst() const;
[[nodiscard]] IR::Inst* InstRecursive() const;
[[nodiscard]] IR::Value Resolve() const;
[[nodiscard]] IR::Reg Reg() const;
[[nodiscard]] IR::Pred Pred() const;
[[nodiscard]] IR::Attribute Attribute() const;
[[nodiscard]] IR::Patch Patch() const;
[[nodiscard]] bool U1() const;
[[nodiscard]] u8 U8() const;
[[nodiscard]] u16 U16() const;
[[nodiscard]] u32 U32() const;
[[nodiscard]] f32 F32() const;
[[nodiscard]] u64 U64() const;
[[nodiscard]] f64 F64() const;
[[nodiscard]] bool operator==(const Value& other) const;
[[nodiscard]] bool operator!=(const Value& other) const;
private:
IR::Type type{};
union {
IR::Inst* inst{};
IR::Reg reg;
IR::Pred pred;
IR::Attribute attribute;
IR::Patch patch;
bool imm_u1;
u8 imm_u8;
u16 imm_u16;
u32 imm_u32;
f32 imm_f32;
u64 imm_u64;
f64 imm_f64;
};
};
static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero");
static_assert(std::is_trivially_copyable_v<Value>);
template <IR::Type type_>
class TypedValue : public Value {
public:
TypedValue() = default;
template <IR::Type other_type>
requires((other_type & type_) != IR::Type::Void) explicit(false)
TypedValue(const TypedValue<other_type>& value)
: Value(value) {}
explicit TypedValue(const Value& value) : Value(value) {
if ((value.Type() & type_) == IR::Type::Void) {
throw InvalidArgument("Incompatible types {} and {}", type_, value.Type());
}
}
explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
};
class Inst : public boost::intrusive::list_base_hook<> {
public:
explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
~Inst();
Inst& operator=(const Inst&) = delete;
Inst(const Inst&) = delete;
Inst& operator=(Inst&&) = delete;
Inst(Inst&&) = delete;
/// Get the number of uses this instruction has.
[[nodiscard]] int UseCount() const noexcept {
return use_count;
}
/// Determines whether this instruction has uses or not.
[[nodiscard]] bool HasUses() const noexcept {
return use_count > 0;
}
/// Get the opcode this microinstruction represents.
[[nodiscard]] IR::Opcode GetOpcode() const noexcept {
return op;
}
/// Determines if there is a pseudo-operation associated with this instruction.
[[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
return associated_insts != nullptr;
}
/// Determines whether or not this instruction may have side effects.
[[nodiscard]] bool MayHaveSideEffects() const noexcept;
/// Determines whether or not this instruction is a pseudo-instruction.
/// Pseudo-instructions depend on their parent instructions for their semantics.
[[nodiscard]] bool IsPseudoInstruction() const noexcept;
/// Determines if all arguments of this instruction are immediates.
[[nodiscard]] bool AreAllArgsImmediates() const;
/// Gets a pseudo-operation associated with this instruction
[[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
/// Get the type this instruction returns.
[[nodiscard]] IR::Type Type() const;
/// Get the number of arguments this instruction has.
[[nodiscard]] size_t NumArgs() const {
return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op);
}
/// Get the value of a given argument index.
[[nodiscard]] Value Arg(size_t index) const noexcept {
if (op == IR::Opcode::Phi) {
return phi_args[index].second;
} else {
return args[index];
}
}
/// Set the value of a given argument index.
void SetArg(size_t index, Value value);
/// Get a pointer to the block of a phi argument.
[[nodiscard]] Block* PhiBlock(size_t index) const;
/// Add phi operand to a phi instruction.
void AddPhiOperand(Block* predecessor, const Value& value);
void Invalidate();
void ClearArgs();
void ReplaceUsesWith(Value replacement);
void ReplaceOpcode(IR::Opcode opcode);
template <typename FlagsType>
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
[[nodiscard]] FlagsType Flags() const noexcept {
FlagsType ret;
std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
return ret;
}
template <typename FlagsType>
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
[[nodiscard]] void SetFlags(FlagsType value) noexcept {
std::memcpy(&flags, &value, sizeof(value));
}
/// Intrusively store the host definition of this instruction.
template <typename DefinitionType>
void SetDefinition(DefinitionType def) {
definition = Common::BitCast<u32>(def);
}
/// Return the intrusively stored host definition of this instruction.
template <typename DefinitionType>
[[nodiscard]] DefinitionType Definition() const noexcept {
return Common::BitCast<DefinitionType>(definition);
}
/// Destructively remove one reference count from the instruction
/// Useful for register allocation
void DestructiveRemoveUsage() {
--use_count;
}
/// Destructively add usages to the instruction
/// Useful for register allocation
void DestructiveAddUsage(int count) {
use_count += count;
}
private:
struct NonTriviallyDummy {
NonTriviallyDummy() noexcept {}
};
void Use(const Value& value);
void UndoUse(const Value& value);
IR::Opcode op{};
int use_count{};
u32 flags{};
u32 definition{};
union {
NonTriviallyDummy dummy{};
boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
std::array<Value, 5> args;
};
std::unique_ptr<AssociatedInsts> associated_insts;
};
static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
struct AssociatedInsts {
union {
Inst* in_bounds_inst;
Inst* sparse_inst;
Inst* zero_inst{};
};
Inst* sign_inst{};
Inst* carry_inst{};
Inst* overflow_inst{};
};
using U1 = TypedValue<Type::U1>;
using U8 = TypedValue<Type::U8>;
using U16 = TypedValue<Type::U16>;
using U32 = TypedValue<Type::U32>;
using U64 = TypedValue<Type::U64>;
using F16 = TypedValue<Type::F16>;
using F32 = TypedValue<Type::F32>;
using F64 = TypedValue<Type::F64>;
using U32U64 = TypedValue<Type::U32 | Type::U64>;
using F32F64 = TypedValue<Type::F32 | Type::F64>;
using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
inline bool Value::IsIdentity() const noexcept {
return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity;
}
inline bool Value::IsPhi() const noexcept {
return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi;
}
inline bool Value::IsEmpty() const noexcept {
return type == Type::Void;
}
inline bool Value::IsImmediate() const noexcept {
IR::Type current_type{type};
const IR::Inst* current_inst{inst};
while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) {
const Value& arg{current_inst->Arg(0)};
current_type = arg.type;
current_inst = arg.inst;
}
return current_type != Type::Opaque;
}
inline IR::Inst* Value::Inst() const {
DEBUG_ASSERT(type == Type::Opaque);
return inst;
}
inline IR::Inst* Value::InstRecursive() const {
DEBUG_ASSERT(type == Type::Opaque);
if (IsIdentity()) {
return inst->Arg(0).InstRecursive();
}
return inst;
}
inline IR::Value Value::Resolve() const {
if (IsIdentity()) {
return inst->Arg(0).Resolve();
}
return *this;
}
inline IR::Reg Value::Reg() const {
DEBUG_ASSERT(type == Type::Reg);
return reg;
}
inline IR::Pred Value::Pred() const {
DEBUG_ASSERT(type == Type::Pred);
return pred;
}
inline IR::Attribute Value::Attribute() const {
DEBUG_ASSERT(type == Type::Attribute);
return attribute;
}
inline IR::Patch Value::Patch() const {
DEBUG_ASSERT(type == Type::Patch);
return patch;
}
inline bool Value::U1() const {
if (IsIdentity()) {
return inst->Arg(0).U1();
}
DEBUG_ASSERT(type == Type::U1);
return imm_u1;
}
inline u8 Value::U8() const {
if (IsIdentity()) {
return inst->Arg(0).U8();
}
DEBUG_ASSERT(type == Type::U8);
return imm_u8;
}
inline u16 Value::U16() const {
if (IsIdentity()) {
return inst->Arg(0).U16();
}
DEBUG_ASSERT(type == Type::U16);
return imm_u16;
}
inline u32 Value::U32() const {
if (IsIdentity()) {
return inst->Arg(0).U32();
}
DEBUG_ASSERT(type == Type::U32);
return imm_u32;
}
inline f32 Value::F32() const {
if (IsIdentity()) {
return inst->Arg(0).F32();
}
DEBUG_ASSERT(type == Type::F32);
return imm_f32;
}
inline u64 Value::U64() const {
if (IsIdentity()) {
return inst->Arg(0).U64();
}
DEBUG_ASSERT(type == Type::U64);
return imm_u64;
}
inline f64 Value::F64() const {
if (IsIdentity()) {
return inst->Arg(0).F64();
}
DEBUG_ASSERT(type == Type::F64);
return imm_f64;
}
[[nodiscard]] inline bool IsPhi(const Inst& inst) {
return inst.GetOpcode() == Opcode::Phi;
}
} // namespace Shader::IR

View File

@@ -0,0 +1,643 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <optional>
#include <ranges>
#include <string>
#include <utility>
#include <fmt/format.h>
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/frontend/maxwell/decode.h"
#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
#include "shader_recompiler/frontend/maxwell/location.h"
namespace Shader::Maxwell::Flow {
namespace {
struct Compare {
bool operator()(const Block& lhs, Location rhs) const noexcept {
return lhs.begin < rhs;
}
bool operator()(Location lhs, const Block& rhs) const noexcept {
return lhs < rhs.begin;
}
bool operator()(const Block& lhs, const Block& rhs) const noexcept {
return lhs.begin < rhs.begin;
}
};
u32 BranchOffset(Location pc, Instruction inst) {
return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
}
void Split(Block* old_block, Block* new_block, Location pc) {
if (pc <= old_block->begin || pc >= old_block->end) {
throw InvalidArgument("Invalid address to split={}", pc);
}
*new_block = Block{};
new_block->begin = pc;
new_block->end = old_block->end;
new_block->end_class = old_block->end_class;
new_block->cond = old_block->cond;
new_block->stack = old_block->stack;
new_block->branch_true = old_block->branch_true;
new_block->branch_false = old_block->branch_false;
new_block->function_call = old_block->function_call;
new_block->return_block = old_block->return_block;
new_block->branch_reg = old_block->branch_reg;
new_block->branch_offset = old_block->branch_offset;
new_block->indirect_branches = std::move(old_block->indirect_branches);
const Location old_begin{old_block->begin};
Stack old_stack{std::move(old_block->stack)};
*old_block = Block{};
old_block->begin = old_begin;
old_block->end = pc;
old_block->end_class = EndClass::Branch;
old_block->cond = IR::Condition(true);
old_block->stack = old_stack;
old_block->branch_true = new_block;
old_block->branch_false = nullptr;
}
Token OpcodeToken(Opcode opcode) {
switch (opcode) {
case Opcode::PBK:
case Opcode::BRK:
return Token::PBK;
case Opcode::PCNT:
case Opcode::CONT:
return Token::PBK;
case Opcode::PEXIT:
case Opcode::EXIT:
return Token::PEXIT;
case Opcode::PLONGJMP:
case Opcode::LONGJMP:
return Token::PLONGJMP;
case Opcode::PRET:
case Opcode::RET:
case Opcode::CAL:
return Token::PRET;
case Opcode::SSY:
case Opcode::SYNC:
return Token::SSY;
default:
throw InvalidArgument("{}", opcode);
}
}
bool IsAbsoluteJump(Opcode opcode) {
switch (opcode) {
case Opcode::JCAL:
case Opcode::JMP:
case Opcode::JMX:
return true;
default:
return false;
}
}
bool HasFlowTest(Opcode opcode) {
switch (opcode) {
case Opcode::BRA:
case Opcode::BRX:
case Opcode::EXIT:
case Opcode::JMP:
case Opcode::JMX:
case Opcode::KIL:
case Opcode::BRK:
case Opcode::CONT:
case Opcode::LONGJMP:
case Opcode::RET:
case Opcode::SYNC:
return true;
case Opcode::CAL:
case Opcode::JCAL:
return false;
default:
throw InvalidArgument("Invalid branch {}", opcode);
}
}
std::string NameOf(const Block& block) {
if (block.begin.IsVirtual()) {
return fmt::format("\"Virtual {}\"", block.begin);
} else {
return fmt::format("\"{}\"", block.begin);
}
}
} // Anonymous namespace
void Stack::Push(Token token, Location target) {
entries.push_back({
.token = token,
.target{target},
});
}
std::pair<Location, Stack> Stack::Pop(Token token) const {
const std::optional<Location> pc{Peek(token)};
if (!pc) {
throw LogicError("Token could not be found");
}
return {*pc, Remove(token)};
}
std::optional<Location> Stack::Peek(Token token) const {
const auto reverse_entries{entries | std::views::reverse};
const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)};
if (it == reverse_entries.end()) {
return std::nullopt;
}
return it->target;
}
Stack Stack::Remove(Token token) const {
const auto reverse_entries{entries | std::views::reverse};
const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)};
const auto pos{std::distance(reverse_entries.begin(), it)};
Stack result;
result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1);
return result;
}
bool Block::Contains(Location pc) const noexcept {
return pc >= begin && pc < end;
}
Function::Function(ObjectPool<Block>& block_pool, Location start_address)
: entrypoint{start_address} {
Label& label{labels.emplace_back()};
label.address = start_address;
label.block = block_pool.Create(Block{});
label.block->begin = start_address;
label.block->end = start_address;
label.block->end_class = EndClass::Branch;
label.block->cond = IR::Condition(true);
label.block->branch_true = nullptr;
label.block->branch_false = nullptr;
}
CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address,
bool exits_to_dispatcher_)
: env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{
exits_to_dispatcher_} {
if (exits_to_dispatcher) {
dispatch_block = block_pool.Create(Block{});
dispatch_block->begin = {};
dispatch_block->end = {};
dispatch_block->end_class = EndClass::Exit;
dispatch_block->cond = IR::Condition(true);
dispatch_block->stack = {};
dispatch_block->branch_true = nullptr;
dispatch_block->branch_false = nullptr;
}
functions.emplace_back(block_pool, start_address);
for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) {
while (!functions[function_id].labels.empty()) {
Function& function{functions[function_id]};
Label label{function.labels.back()};
function.labels.pop_back();
AnalyzeLabel(function_id, label);
}
}
if (exits_to_dispatcher) {
const auto last_block{functions[0].blocks.rbegin()};
dispatch_block->begin = last_block->end + 1;
dispatch_block->end = last_block->end + 1;
functions[0].blocks.insert(*dispatch_block);
}
}
void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
if (InspectVisitedBlocks(function_id, label)) {
// Label address has been visited
return;
}
// Try to find the next block
Function* const function{&functions[function_id]};
Location pc{label.address};
const auto next_it{function->blocks.upper_bound(pc, Compare{})};
const bool is_last{next_it == function->blocks.end()};
Block* const next{is_last ? nullptr : &*next_it};
// Insert before the next block
Block* const block{label.block};
// Analyze instructions until it reaches an already visited block or there's a branch
bool is_branch{false};
while (!next || pc < next->begin) {
is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch;
if (is_branch) {
break;
}
++pc;
}
if (!is_branch) {
// If the block finished without a branch,
// it means that the next instruction is already visited, jump to it
block->end = pc;
block->cond = IR::Condition{true};
block->branch_true = next;
block->branch_false = nullptr;
}
// Function's pointer might be invalid, resolve it again
// Insert the new block
functions[function_id].blocks.insert(*block);
}
bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) {
const Location pc{label.address};
Function& function{functions[function_id]};
const auto it{
std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })};
if (it == function.blocks.end()) {
// Address has not been visited
return false;
}
Block* const visited_block{&*it};
if (visited_block->begin == pc) {
throw LogicError("Dangling block");
}
Block* const new_block{label.block};
Split(visited_block, new_block, pc);
function.blocks.insert(it, *new_block);
return true;
}
CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) {
const Instruction inst{env.ReadInstruction(pc.Offset())};
const Opcode opcode{Decode(inst.raw)};
switch (opcode) {
case Opcode::BRA:
case Opcode::JMP:
case Opcode::RET:
if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
return AnalysisState::Continue;
}
switch (opcode) {
case Opcode::BRA:
case Opcode::JMP:
AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode));
break;
case Opcode::RET:
block->end_class = EndClass::Return;
break;
default:
break;
}
block->end = pc;
return AnalysisState::Branch;
case Opcode::BRK:
case Opcode::CONT:
case Opcode::LONGJMP:
case Opcode::SYNC: {
if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
return AnalysisState::Continue;
}
const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))};
block->branch_true = AddLabel(block, new_stack, stack_pc, function_id);
block->end = pc;
return AnalysisState::Branch;
}
case Opcode::KIL: {
const Predicate pred{inst.Pred()};
const auto ir_pred{static_cast<IR::Pred>(pred.index)};
const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
return AnalysisState::Branch;
}
case Opcode::PBK:
case Opcode::PCNT:
case Opcode::PEXIT:
case Opcode::PLONGJMP:
case Opcode::SSY:
block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
return AnalysisState::Continue;
case Opcode::BRX:
case Opcode::JMX:
return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id);
case Opcode::EXIT:
return AnalyzeEXIT(block, function_id, pc, inst);
case Opcode::PRET:
throw NotImplementedException("PRET flow analysis");
case Opcode::CAL:
case Opcode::JCAL: {
const bool is_absolute{IsAbsoluteJump(opcode)};
const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
// Technically CAL pushes into PRET, but that's implicit in the function call for us
// Insert the function into the list if it doesn't exist
const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
const bool exists{it != functions.end()};
const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
: functions.size()};
if (!exists) {
functions.emplace_back(block_pool, cal_pc);
}
block->end_class = EndClass::Call;
block->function_call = call_id;
block->return_block = AddLabel(block, block->stack, pc + 1, function_id);
block->end = pc;
return AnalysisState::Branch;
}
default:
break;
}
const Predicate pred{inst.Pred()};
if (pred == Predicate{true} || pred == Predicate{false}) {
return AnalysisState::Continue;
}
const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
return AnalysisState::Branch;
}
void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
EndClass insn_end_class, IR::Condition cond) {
if (block->begin != pc) {
// If the block doesn't start in the conditional instruction
// mark it as a label to visit it later
block->end = pc;
block->cond = IR::Condition{true};
block->branch_true = AddLabel(block, block->stack, pc, function_id);
block->branch_false = nullptr;
return;
}
// Create a virtual block and a conditional block
Block* const conditional_block{block_pool.Create()};
Block virtual_block{};
virtual_block.begin = block->begin.Virtual();
virtual_block.end = block->begin.Virtual();
virtual_block.end_class = EndClass::Branch;
virtual_block.stack = block->stack;
virtual_block.cond = cond;
virtual_block.branch_true = conditional_block;
virtual_block.branch_false = nullptr;
// Save the contents of the visited block in the conditional block
*conditional_block = std::move(*block);
// Impersonate the visited block with a virtual block
*block = std::move(virtual_block);
// Set the end properties of the conditional instruction
conditional_block->end = pc + 1;
conditional_block->end_class = insn_end_class;
// Add a label to the instruction after the conditional instruction
Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
// Branch to the next instruction from the virtual block
block->branch_false = endif_block;
// And branch to it from the conditional instruction if it is a branch or a kill instruction
// Kill instructions are considered a branch because they demote to a helper invocation and
// execution may continue.
if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
conditional_block->cond = IR::Condition{true};
conditional_block->branch_true = endif_block;
conditional_block->branch_false = nullptr;
}
// Finally insert the condition block into the list of blocks
functions[function_id].blocks.insert(*conditional_block);
}
bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
Opcode opcode) {
if (inst.branch.is_cbuf) {
throw NotImplementedException("Branch with constant buffer offset");
}
const Predicate pred{inst.Pred()};
if (pred == Predicate{false}) {
return false;
}
const bool has_flow_test{HasFlowTest(opcode)};
const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T};
if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated);
block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
} else {
block->cond = IR::Condition{true};
}
return true;
}
void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
bool is_absolute) {
const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
block->branch_true = AddLabel(block, block->stack, bra_pc, function_id);
}
CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
FunctionId function_id) {
const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)};
if (!brx_table) {
TrackIndirectBranchTable(env, pc, program_start);
throw NotImplementedException("Failed to track indirect branch");
}
const IR::FlowTest flow_test{inst.branch.flow_test};
const Predicate pred{inst.Pred()};
if (flow_test != IR::FlowTest::T || pred != Predicate{true}) {
throw NotImplementedException("Conditional indirect branch");
}
std::vector<u32> targets;
targets.reserve(brx_table->num_entries);
for (u32 i = 0; i < brx_table->num_entries; ++i) {
u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)};
if (!is_absolute) {
target += pc.Offset();
}
target += static_cast<u32>(brx_table->branch_offset);
target += 8;
targets.push_back(target);
}
std::ranges::sort(targets);
targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
block->indirect_branches.reserve(targets.size());
for (const u32 target : targets) {
Block* const branch{AddLabel(block, block->stack, target, function_id)};
block->indirect_branches.push_back({
.block = branch,
.address = target,
});
}
block->cond = IR::Condition{true};
block->end = pc + 1;
block->end_class = EndClass::IndirectBranch;
block->branch_reg = brx_table->branch_reg;
block->branch_offset = brx_table->branch_offset + 8;
if (!is_absolute) {
block->branch_offset += pc.Offset();
}
return AnalysisState::Branch;
}
CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc,
Instruction inst) {
const IR::FlowTest flow_test{inst.branch.flow_test};
const Predicate pred{inst.Pred()};
if (pred == Predicate{false} || flow_test == IR::FlowTest::F) {
// EXIT will never be taken
return AnalysisState::Continue;
}
if (exits_to_dispatcher && function_id != 0) {
throw NotImplementedException("Dispatch EXIT on external function");
}
if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
if (block->stack.Peek(Token::PEXIT).has_value()) {
throw NotImplementedException("Conditional EXIT with PEXIT token");
}
const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
if (exits_to_dispatcher) {
block->end = pc;
block->end_class = EndClass::Branch;
block->cond = cond;
block->branch_true = dispatch_block;
block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
return AnalysisState::Branch;
}
AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
return AnalysisState::Branch;
}
if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
const Stack popped_stack{block->stack.Remove(Token::PEXIT)};
block->cond = IR::Condition{true};
block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id);
block->branch_false = nullptr;
return AnalysisState::Branch;
}
if (exits_to_dispatcher) {
block->cond = IR::Condition{true};
block->end = pc;
block->end_class = EndClass::Branch;
block->branch_true = dispatch_block;
block->branch_false = nullptr;
return AnalysisState::Branch;
}
block->end = pc + 1;
block->end_class = EndClass::Exit;
return AnalysisState::Branch;
}
Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) {
Function& function{functions[function_id]};
if (block->begin == pc) {
// Jumps to itself
return block;
}
if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) {
// Block already exists and it has been visited
if (function.blocks.begin() != it) {
// Check if the previous node is the virtual variant of the label
// This won't exist if a virtual node is not needed or it hasn't been visited
// If it hasn't been visited and a virtual node is needed, this will still behave as
// expected because the node impersonated with its virtual node.
const auto prev{std::prev(it)};
if (it->begin.Virtual() == prev->begin) {
return &*prev;
}
}
return &*it;
}
// Make sure we don't insert the same layer twice
const auto label_it{std::ranges::find(function.labels, pc, &Label::address)};
if (label_it != function.labels.end()) {
return label_it->block;
}
Block* const new_block{block_pool.Create()};
new_block->begin = pc;
new_block->end = pc;
new_block->end_class = EndClass::Branch;
new_block->cond = IR::Condition(true);
new_block->stack = stack;
new_block->branch_true = nullptr;
new_block->branch_false = nullptr;
function.labels.push_back(Label{
.address{pc},
.block = new_block,
.stack{std::move(stack)},
});
return new_block;
}
std::string CFG::Dot() const {
int node_uid{0};
std::string dot{"digraph shader {\n"};
for (const Function& function : functions) {
dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint);
dot += fmt::format("\t\tnode [style=filled];\n");
for (const Block& block : function.blocks) {
const std::string name{NameOf(block)};
const auto add_branch = [&](Block* branch, bool add_label) {
dot += fmt::format("\t\t{}->{}", name, NameOf(*branch));
if (add_label && block.cond != IR::Condition{true} &&
block.cond != IR::Condition{false}) {
dot += fmt::format(" [label=\"{}\"]", block.cond);
}
dot += '\n';
};
dot += fmt::format("\t\t{};\n", name);
switch (block.end_class) {
case EndClass::Branch:
if (block.cond != IR::Condition{false}) {
add_branch(block.branch_true, true);
}
if (block.cond != IR::Condition{true}) {
add_branch(block.branch_false, false);
}
break;
case EndClass::IndirectBranch:
for (const IndirectBranch& branch : block.indirect_branches) {
add_branch(branch.block, false);
}
break;
case EndClass::Call:
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block));
dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n",
node_uid, block.function_call);
dot += '\n';
++node_uid;
break;
case EndClass::Exit:
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n",
node_uid);
++node_uid;
break;
case EndClass::Return:
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n",
node_uid);
++node_uid;
break;
case EndClass::Kill:
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
node_uid);
++node_uid;
break;
}
}
if (function.entrypoint == 8) {
dot += fmt::format("\t\tlabel = \"main\";\n");
} else {
dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint);
}
dot += "\t}\n";
}
if (!functions.empty()) {
auto& function{functions.front()};
if (function.blocks.empty()) {
dot += "Start;\n";
} else {
dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin()));
}
dot += fmt::format("\tStart [shape=diamond];\n");
}
dot += "}\n";
return dot;
}
} // namespace Shader::Maxwell::Flow

View File

@@ -0,0 +1,170 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <compare>
#include <optional>
#include <span>
#include <string>
#include <vector>
#include <boost/container/small_vector.hpp>
#include <boost/intrusive/set.hpp>
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/condition.h"
#include "shader_recompiler/frontend/maxwell/instruction.h"
#include "shader_recompiler/frontend/maxwell/location.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::Maxwell::Flow {
struct Block;
using FunctionId = size_t;
enum class EndClass {
Branch,
IndirectBranch,
Call,
Exit,
Return,
Kill,
};
enum class Token {
SSY,
PBK,
PEXIT,
PRET,
PCNT,
PLONGJMP,
};
struct StackEntry {
auto operator<=>(const StackEntry&) const noexcept = default;
Token token;
Location target;
};
class Stack {
public:
void Push(Token token, Location target);
[[nodiscard]] std::pair<Location, Stack> Pop(Token token) const;
[[nodiscard]] std::optional<Location> Peek(Token token) const;
[[nodiscard]] Stack Remove(Token token) const;
private:
boost::container::small_vector<StackEntry, 3> entries;
};
struct IndirectBranch {
Block* block;
u32 address;
};
struct Block : boost::intrusive::set_base_hook<
// Normal link is ~2.5% faster compared to safe link
boost::intrusive::link_mode<boost::intrusive::normal_link>> {
[[nodiscard]] bool Contains(Location pc) const noexcept;
bool operator<(const Block& rhs) const noexcept {
return begin < rhs.begin;
}
Location begin;
Location end;
EndClass end_class{};
IR::Condition cond{};
Stack stack;
Block* branch_true{};
Block* branch_false{};
FunctionId function_call{};
Block* return_block{};
IR::Reg branch_reg{};
s32 branch_offset{};
std::vector<IndirectBranch> indirect_branches;
};
struct Label {
Location address;
Block* block;
Stack stack;
};
struct Function {
explicit Function(ObjectPool<Block>& block_pool, Location start_address);
Location entrypoint;
boost::container::small_vector<Label, 16> labels;
boost::intrusive::set<Block> blocks;
};
class CFG {
enum class AnalysisState {
Branch,
Continue,
};
public:
explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address,
bool exits_to_dispatcher = false);
CFG& operator=(const CFG&) = delete;
CFG(const CFG&) = delete;
CFG& operator=(CFG&&) = delete;
CFG(CFG&&) = delete;
[[nodiscard]] std::string Dot() const;
[[nodiscard]] std::span<const Function> Functions() const noexcept {
return std::span(functions.data(), functions.size());
}
[[nodiscard]] std::span<Function> Functions() noexcept {
return std::span(functions.data(), functions.size());
}
[[nodiscard]] bool ExitsToDispatcher() const {
return exits_to_dispatcher;
}
private:
void AnalyzeLabel(FunctionId function_id, Label& label);
/// Inspect already visited blocks.
/// Return true when the block has already been visited
bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
IR::Condition cond);
/// Return true when the branch instruction is confirmed to be a branch
bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
Opcode opcode);
void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
bool is_absolute);
AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
FunctionId function_id);
AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst);
/// Return the branch target block id
Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id);
Environment& env;
ObjectPool<Block>& block_pool;
boost::container::small_vector<Function, 1> functions;
FunctionId current_function_id{0};
Location program_start;
bool exits_to_dispatcher{};
Block* dispatch_block{};
};
} // namespace Shader::Maxwell::Flow

View File

@@ -0,0 +1,149 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <bit>
#include <memory>
#include <string_view>
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/decode.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
namespace Shader::Maxwell {
namespace {
struct MaskValue {
u64 mask;
u64 value;
};
constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
u64 mask{};
u64 value{};
u64 bit{u64(1) << 63};
while (*encoding) {
switch (*encoding) {
case '0':
mask |= bit;
break;
case '1':
mask |= bit;
value |= bit;
break;
case '-':
break;
case ' ':
break;
default:
throw LogicError("Invalid encoding character '{}'", *encoding);
}
++encoding;
if (*encoding != ' ') {
bit >>= 1;
}
}
return MaskValue{.mask = mask, .value = value};
}
struct InstEncoding {
MaskValue mask_value;
Opcode opcode;
};
constexpr std::array UNORDERED_ENCODINGS{
#define INST(name, cute, encode) \
InstEncoding{ \
.mask_value{MaskValueFromEncoding(encode)}, \
.opcode = Opcode::name, \
},
#include "maxwell.inc"
#undef INST
};
constexpr auto SortedEncodings() {
std::array encodings{UNORDERED_ENCODINGS};
std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) {
return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask);
});
return encodings;
}
constexpr auto ENCODINGS{SortedEncodings()};
constexpr int WidestLeftBits() {
int bits{64};
for (const InstEncoding& encoding : ENCODINGS) {
bits = std::min(bits, std::countr_zero(encoding.mask_value.mask));
}
return 64 - bits;
}
constexpr int WIDEST_LEFT_BITS{WidestLeftBits()};
constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS};
constexpr size_t ToFastLookupIndex(u64 value) {
return static_cast<size_t>(value >> MASK_SHIFT);
}
constexpr size_t FastLookupSize() {
size_t max_width{};
for (const InstEncoding& encoding : ENCODINGS) {
max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask));
}
return max_width + 1;
}
constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()};
struct InstInfo {
[[nodiscard]] u64 Mask() const noexcept {
return static_cast<u64>(high_mask) << MASK_SHIFT;
}
[[nodiscard]] u64 Value() const noexcept {
return static_cast<u64>(high_value) << MASK_SHIFT;
}
u16 high_mask;
u16 high_value;
Opcode opcode;
};
constexpr auto MakeFastLookupTableIndex(size_t index) {
std::array<InstInfo, 2> encodings{};
size_t element{};
for (const auto& encoding : ENCODINGS) {
const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)};
const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
if ((index & mask) == value) {
encodings.at(element) = InstInfo{
.high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
.high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
.opcode = encoding.opcode,
};
++element;
}
}
return encodings;
}
/*constexpr*/ auto MakeFastLookupTable() {
auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()};
for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) {
(*encodings)[index] = MakeFastLookupTableIndex(index);
}
return encodings;
}
const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()};
} // Anonymous namespace
Opcode Decode(u64 insn) {
const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]};
const auto it{std::ranges::find_if(
table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })};
if (it == table.end()) {
throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn);
}
return it->opcode;
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,14 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
namespace Shader::Maxwell {
[[nodiscard]] Opcode Decode(u64 insn);
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,108 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <optional>
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/decode.h"
#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
namespace Shader::Maxwell {
namespace {
union Encoding {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<20, 19, u64> immediate;
BitField<56, 1, u64> is_negative;
BitField<20, 24, s64> brx_offset;
};
template <typename Callable>
std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) {
while (pos >= block_begin) {
const u64 insn{env.ReadInstruction(pos.Offset())};
--pos;
if (func(insn, Decode(insn))) {
return insn;
}
}
return std::nullopt;
}
std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos,
IR::Reg brx_reg) {
return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) {
const LDC::Encoding ldc{insn};
return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 &&
ldc.mode == LDC::Mode::Default;
});
}
std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos,
IR::Reg ldc_reg) {
return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) {
const Encoding shl{insn};
return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg;
});
}
std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos,
IR::Reg shl_reg) {
return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) {
const Encoding imnmx{insn};
return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg;
});
}
} // Anonymous namespace
std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
Location block_begin) {
const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())};
const Opcode brx_opcode{Decode(brx_insn)};
if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) {
throw LogicError("Tracked instruction is not BRX or JMX");
}
const IR::Reg brx_reg{Encoding{brx_insn}.src_reg};
const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)};
Location pos{brx_pos};
const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)};
if (!ldc_insn) {
return std::nullopt;
}
const LDC::Encoding ldc{*ldc_insn};
const u32 cbuf_index{static_cast<u32>(ldc.index)};
const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))};
const IR::Reg ldc_reg{ldc.src_reg};
const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)};
if (!shl_insn) {
return std::nullopt;
}
const Encoding shl{*shl_insn};
const IR::Reg shl_reg{shl.src_reg};
const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)};
if (!imnmx_insn) {
return std::nullopt;
}
const Encoding imnmx{*imnmx_insn};
if (imnmx.is_negative != 0) {
return std::nullopt;
}
const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
return IndirectBranchTableInfo{
.cbuf_index = cbuf_index,
.cbuf_offset = cbuf_offset,
.num_entries = imnmx_immediate + 1,
.branch_offset = brx_offset,
.branch_reg = brx_reg,
};
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,28 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/reg.h"
#include "shader_recompiler/frontend/maxwell/location.h"
namespace Shader::Maxwell {
struct IndirectBranchTableInfo {
u32 cbuf_index{};
u32 cbuf_offset{};
u32 num_entries{};
s32 branch_offset{};
IR::Reg branch_reg{};
};
std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
Location block_begin);
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,63 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/flow_test.h"
#include "shader_recompiler/frontend/ir/reg.h"
namespace Shader::Maxwell {
struct Predicate {
Predicate() = default;
Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {}
Predicate(bool value) : index{7}, negated{!value} {}
Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {}
unsigned index;
bool negated;
};
inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept {
return lhs.index == rhs.index && lhs.negated == rhs.negated;
}
inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept {
return !(lhs == rhs);
}
union Instruction {
Instruction(u64 raw_) : raw{raw_} {}
u64 raw;
union {
BitField<5, 1, u64> is_cbuf;
BitField<0, 5, IR::FlowTest> flow_test;
[[nodiscard]] u32 Absolute() const noexcept {
return static_cast<u32>(absolute);
}
[[nodiscard]] s32 Offset() const noexcept {
return static_cast<s32>(offset);
}
private:
BitField<20, 24, s64> offset;
BitField<20, 32, u64> absolute;
} branch;
[[nodiscard]] Predicate Pred() const noexcept {
return Predicate{pred};
}
private:
BitField<16, 4, u64> pred;
};
static_assert(std::is_trivially_copyable_v<Instruction>);
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,112 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <compare>
#include <iterator>
#include <fmt/format.h>
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
namespace Shader::Maxwell {
class Location {
static constexpr u32 VIRTUAL_BIAS{4};
public:
constexpr Location() = default;
constexpr Location(u32 initial_offset) : offset{initial_offset} {
if (initial_offset % 8 != 0) {
throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset);
}
Align();
}
constexpr Location Virtual() const noexcept {
Location virtual_location;
virtual_location.offset = offset - VIRTUAL_BIAS;
return virtual_location;
}
[[nodiscard]] constexpr u32 Offset() const noexcept {
return offset;
}
[[nodiscard]] constexpr bool IsVirtual() const {
return offset % 8 == VIRTUAL_BIAS;
}
constexpr auto operator<=>(const Location&) const noexcept = default;
constexpr Location operator++() noexcept {
const Location copy{*this};
Step();
return copy;
}
constexpr Location operator++(int) noexcept {
Step();
return *this;
}
constexpr Location operator--() noexcept {
const Location copy{*this};
Back();
return copy;
}
constexpr Location operator--(int) noexcept {
Back();
return *this;
}
constexpr Location operator+(int number) const {
Location new_pc{*this};
while (number > 0) {
--number;
++new_pc;
}
while (number < 0) {
++number;
--new_pc;
}
return new_pc;
}
constexpr Location operator-(int number) const {
return operator+(-number);
}
private:
constexpr void Align() {
offset += offset % 32 == 0 ? 8 : 0;
}
constexpr void Step() {
offset += 8 + (offset % 32 == 24 ? 8 : 0);
}
constexpr void Back() {
offset -= 8 + (offset % 32 == 8 ? 8 : 0);
}
u32 offset{0xcccccccc};
};
} // namespace Shader::Maxwell
template <>
struct fmt::formatter<Shader::Maxwell::Location> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) {
return fmt::format_to(ctx.out(), "{:04x}", location.Offset());
}
};

View File

@@ -0,0 +1,286 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
INST(AL2P, "AL2P", "1110 1111 1010 0---")
INST(ALD, "ALD", "1110 1111 1101 1---")
INST(AST, "AST", "1110 1111 1111 0---")
INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----")
INST(ATOM, "ATOM", "1110 1101 ---- ----")
INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----")
INST(ATOMS, "ATOMS", "1110 1100 ---- ----")
INST(B2R, "B2R", "1111 0000 1011 1---")
INST(BAR, "BAR", "1111 0000 1010 1---")
INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---")
INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---")
INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---")
INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---")
INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---")
INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---")
INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---")
INST(BPT, "BPT", "1110 0011 1010 ----")
INST(BRA, "BRA", "1110 0010 0100 ----")
INST(BRK, "BRK", "1110 0011 0100 ----")
INST(BRX, "BRX", "1110 0010 0101 ----")
INST(CAL, "CAL", "1110 0010 0110 ----")
INST(CCTL, "CCTL", "1110 1111 011- ----")
INST(CCTLL, "CCTLL", "1110 1111 100- ----")
INST(CONT, "CONT", "1110 0011 0101 ----")
INST(CS2R, "CS2R", "0101 0000 1100 1---")
INST(CSET, "CSET", "0101 0000 1001 1---")
INST(CSETP, "CSETP", "0101 0000 1010 0---")
INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---")
INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---")
INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---")
INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---")
INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----")
INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----")
INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----")
INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----")
INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---")
INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---")
INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---")
INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---")
INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---")
INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---")
INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----")
INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----")
INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----")
INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----")
INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----")
INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----")
INST(EXIT, "EXIT", "1110 0011 0000 ----")
INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---")
INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---")
INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---")
INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---")
INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---")
INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---")
INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---")
INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---")
INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---")
INST(FADD32I, "FADD32I", "0000 10-- ---- ----")
INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---")
INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---")
INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---")
INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----")
INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----")
INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----")
INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----")
INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----")
INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----")
INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----")
INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----")
INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----")
INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---")
INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---")
INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---")
INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---")
INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---")
INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---")
INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---")
INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---")
INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---")
INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----")
INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----")
INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----")
INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----")
INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----")
INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----")
INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----")
INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---")
INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----")
INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----")
INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---")
INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----")
INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----")
INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----")
INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---")
INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----")
INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----")
INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----")
INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----")
INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---")
INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----")
INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----")
INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----")
INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---")
INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----")
INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----")
INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---")
INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----")
INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----")
INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---")
INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---")
INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---")
INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---")
INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---")
INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---")
INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---")
INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---")
INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---")
INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----")
INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----")
INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----")
INST(IADD32I, "IADD32I", "0001 110- ---- ----")
INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----")
INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----")
INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----")
INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----")
INST(IDE, "IDE", "1110 0011 1001 ----")
INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---")
INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---")
INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----")
INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----")
INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----")
INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----")
INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----")
INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----")
INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----")
INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----")
INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----")
INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---")
INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---")
INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---")
INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---")
INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---")
INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---")
INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----")
INST(IPA, "IPA", "1110 0000 ---- ----")
INST(ISBERD, "ISBERD", "1110 1111 1101 0---")
INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---")
INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---")
INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---")
INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----")
INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----")
INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----")
INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----")
INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----")
INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----")
INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----")
INST(JCAL, "JCAL", "1110 0010 0010 ----")
INST(JMP, "JMP", "1110 0010 0001 ----")
INST(JMX, "JMX", "1110 0010 0000 ----")
INST(KIL, "KIL", "1110 0011 0011 ----")
INST(LD, "LD", "100- ---- ---- ----")
INST(LDC, "LDC", "1110 1111 1001 0---")
INST(LDG, "LDG", "1110 1110 1101 0---")
INST(LDL, "LDL", "1110 1111 0100 0---")
INST(LDS, "LDS", "1110 1111 0100 1---")
INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---")
INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----")
INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---")
INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----")
INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---")
INST(LEPC, "LEPC", "0101 0000 1101 0---")
INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----")
INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---")
INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---")
INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---")
INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---")
INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----")
INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----")
INST(LOP32I, "LOP32I", "0000 01-- ---- ----")
INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---")
INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---")
INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---")
INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---")
INST(MOV32I, "MOV32I", "0000 0001 0000 ----")
INST(MUFU, "MUFU", "0101 0000 1000 0---")
INST(NOP, "NOP", "0101 0000 1011 0---")
INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---")
INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---")
INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---")
INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---")
INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---")
INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---")
INST(PBK, "PBK", "1110 0010 1010 ----")
INST(PCNT, "PCNT", "1110 0010 1011 ----")
INST(PEXIT, "PEXIT", "1110 0010 0011 ----")
INST(PIXLD, "PIXLD", "1110 1111 1110 1---")
INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----")
INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---")
INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---")
INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---")
INST(PRET, "PRET", "1110 0010 0111 ----")
INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----")
INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----")
INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----")
INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----")
INST(PSET, "PSET", "0101 0000 1000 1---")
INST(PSETP, "PSETP", "0101 0000 1001 0---")
INST(R2B, "R2B", "1111 0000 1100 0---")
INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---")
INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---")
INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---")
INST(RAM, "RAM", "1110 0011 1000 ----")
INST(RED, "RED", "1110 1011 1111 1---")
INST(RET, "RET", "1110 0011 0010 ----")
INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---")
INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---")
INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---")
INST(RTT, "RTT", "1110 0011 0110 ----")
INST(S2R, "S2R", "1111 0000 1100 1---")
INST(SAM, "SAM", "1110 0011 0111 ----")
INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---")
INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---")
INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---")
INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----")
INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----")
INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---")
INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---")
INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---")
INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---")
INST(SHFL, "SHFL", "1110 1111 0001 0---")
INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---")
INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---")
INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---")
INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---")
INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---")
INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---")
INST(SSY, "SSY", "1110 0010 1001 ----")
INST(ST, "ST", "101- ---- ---- ----")
INST(STG, "STG", "1110 1110 1101 1---")
INST(STL, "STL", "1110 1111 0101 0---")
INST(STP, "STP", "1110 1110 1010 0---")
INST(STS, "STS", "1110 1111 0101 1---")
INST(SUATOM, "SUATOM", "1110 1010 0--- ----")
INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----")
INST(SULD, "SULD", "1110 1011 000- ----")
INST(SURED, "SURED", "1110 1011 010- ----")
INST(SUST, "SUST", "1110 1011 001- ----")
INST(SYNC, "SYNC", "1111 0000 1111 1---")
INST(TEX, "TEX", "1100 0--- ---- ----")
INST(TEX_b, "TEX (b)", "1101 1110 10-- ----")
INST(TEXS, "TEXS", "1101 -00- ---- ----")
INST(TLD, "TLD", "1101 1100 ---- ----")
INST(TLD_b, "TLD (b)", "1101 1101 ---- ----")
INST(TLD4, "TLD4", "1100 10-- ---- ----")
INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----")
INST(TLD4S, "TLD4S", "1101 1111 -0-- ----")
INST(TLDS, "TLDS", "1101 -01- ---- ----")
INST(TMML, "TMML", "1101 1111 0101 1---")
INST(TMML_b, "TMML (b)", "1101 1111 0110 0---")
INST(TXA, "TXA", "1101 1111 0100 0---")
INST(TXD, "TXD", "1101 1110 00-- ----")
INST(TXD_b, "TXD (b)", "1101 1110 01-- ----")
INST(TXQ, "TXQ", "1101 1111 0100 1---")
INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---")
INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----")
INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----")
INST(VADD, "VADD", "0010 00-- ---- ----")
INST(VMAD, "VMAD", "0101 1111 ---- ----")
INST(VMNMX, "VMNMX", "0011 101- ---- ----")
INST(VOTE, "VOTE", "0101 0000 1101 1---")
INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---")
INST(VSET, "VSET", "0100 000- ---- ----")
INST(VSETP, "VSETP", "0101 0000 1111 0---")
INST(VSHL, "VSHL", "0101 0111 ---- ----")
INST(VSHR, "VSHR", "0101 0110 ---- ----")
INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----")
INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----")
INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----")
INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----")
// Removed due to its weird formatting making fast tables larger
// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0")

View File

@@ -0,0 +1,26 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
namespace Shader::Maxwell {
namespace {
constexpr std::array NAME_TABLE{
#define INST(name, cute, encode) cute,
#include "maxwell.inc"
#undef INST
};
} // Anonymous namespace
const char* NameOf(Opcode opcode) {
if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) {
throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode));
}
return NAME_TABLE[static_cast<size_t>(opcode)];
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,30 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <fmt/format.h>
namespace Shader::Maxwell {
enum class Opcode {
#define INST(name, cute, encode) name,
#include "maxwell.inc"
#undef INST
};
const char* NameOf(Opcode opcode);
} // namespace Shader::Maxwell
template <>
struct fmt::formatter<Shader::Maxwell::Opcode> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) {
return format_to(ctx.out(), "{}", NameOf(opcode));
}
};

View File

@@ -0,0 +1,888 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <memory>
#include <ranges>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <version>
#include <fmt/format.h>
#include <boost/intrusive/list.hpp>
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/maxwell/decode.h"
#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
#include "shader_recompiler/frontend/maxwell/translate/translate.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::Maxwell {
namespace {
struct Statement;
// Use normal_link because we are not guaranteed to destroy the tree in order
using ListBaseHook =
boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>;
using Tree = boost::intrusive::list<Statement,
// Allow using Statement without a definition
boost::intrusive::base_hook<ListBaseHook>,
// Avoid linear complexity on splice, size is never called
boost::intrusive::constant_time_size<false>>;
using Node = Tree::iterator;
enum class StatementType {
Code,
Goto,
Label,
If,
Loop,
Break,
Return,
Kill,
Unreachable,
Function,
Identity,
Not,
Or,
SetVariable,
SetIndirectBranchVariable,
Variable,
IndirectBranchCond,
};
bool HasChildren(StatementType type) {
switch (type) {
case StatementType::If:
case StatementType::Loop:
case StatementType::Function:
return true;
default:
return false;
}
}
struct Goto {};
struct Label {};
struct If {};
struct Loop {};
struct Break {};
struct Return {};
struct Kill {};
struct Unreachable {};
struct FunctionTag {};
struct Identity {};
struct Not {};
struct Or {};
struct SetVariable {};
struct SetIndirectBranchVariable {};
struct Variable {};
struct IndirectBranchCond {};
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement
#endif
struct Statement : ListBaseHook {
Statement(const Flow::Block* block_, Statement* up_)
: block{block_}, up{up_}, type{StatementType::Code} {}
Statement(Goto, Statement* cond_, Node label_, Statement* up_)
: label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {}
Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {}
Statement(If, Statement* cond_, Tree&& children_, Statement* up_)
: children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {}
Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_)
: children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {}
Statement(Break, Statement* cond_, Statement* up_)
: cond{cond_}, up{up_}, type{StatementType::Break} {}
Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {}
Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {}
Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {}
Statement(FunctionTag) : children{}, type{StatementType::Function} {}
Statement(Identity, IR::Condition cond_, Statement* up_)
: guest_cond{cond_}, up{up_}, type{StatementType::Identity} {}
Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {}
Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_)
: op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {}
Statement(SetVariable, u32 id_, Statement* op_, Statement* up_)
: op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {}
Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_)
: branch_offset{branch_offset_},
branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {}
Statement(Variable, u32 id_, Statement* up_)
: id{id_}, up{up_}, type{StatementType::Variable} {}
Statement(IndirectBranchCond, u32 location_, Statement* up_)
: location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {}
~Statement() {
if (HasChildren(type)) {
std::destroy_at(&children);
}
}
union {
const Flow::Block* block;
Node label;
Tree children;
IR::Condition guest_cond;
Statement* op;
Statement* op_a;
u32 location;
s32 branch_offset;
};
union {
Statement* cond;
Statement* op_b;
u32 id;
IR::Reg branch_reg;
};
Statement* up{};
StatementType type;
};
#ifdef _MSC_VER
#pragma warning(pop)
#endif
std::string DumpExpr(const Statement* stmt) {
switch (stmt->type) {
case StatementType::Identity:
return fmt::format("{}", stmt->guest_cond);
case StatementType::Not:
return fmt::format("!{}", DumpExpr(stmt->op));
case StatementType::Or:
return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b));
case StatementType::Variable:
return fmt::format("goto_L{}", stmt->id);
case StatementType::IndirectBranchCond:
return fmt::format("(indirect_branch == {:x})", stmt->location);
default:
return "<invalid type>";
}
}
std::string DumpTree(const Tree& tree, u32 indentation = 0) {
std::string ret;
std::string indent(indentation, ' ');
for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) {
switch (stmt->type) {
case StatementType::Code:
ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent,
stmt->block->begin.Offset(), stmt->block->end.Offset(),
reinterpret_cast<uintptr_t>(stmt->block));
break;
case StatementType::Goto:
ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond),
stmt->label->id);
break;
case StatementType::Label:
ret += fmt::format("{}L{}:\n", indent, stmt->id);
break;
case StatementType::If:
ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond));
ret += DumpTree(stmt->children, indentation + 4);
ret += fmt::format("{} }}\n", indent);
break;
case StatementType::Loop:
ret += fmt::format("{} do {{\n", indent);
ret += DumpTree(stmt->children, indentation + 4);
ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond));
break;
case StatementType::Break:
ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond));
break;
case StatementType::Return:
ret += fmt::format("{} return;\n", indent);
break;
case StatementType::Kill:
ret += fmt::format("{} kill;\n", indent);
break;
case StatementType::Unreachable:
ret += fmt::format("{} unreachable;\n", indent);
break;
case StatementType::SetVariable:
ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
break;
case StatementType::SetIndirectBranchVariable:
ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg,
stmt->branch_offset);
break;
case StatementType::Function:
case StatementType::Identity:
case StatementType::Not:
case StatementType::Or:
case StatementType::Variable:
case StatementType::IndirectBranchCond:
throw LogicError("Statement can't be printed");
}
}
return ret;
}
void SanitizeNoBreaks(const Tree& tree) {
if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
throw NotImplementedException("Capturing statement with break nodes");
}
}
size_t Level(Node stmt) {
size_t level{0};
Statement* node{stmt->up};
while (node) {
++level;
node = node->up;
}
return level;
}
bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) {
const size_t goto_level{Level(goto_stmt)};
const size_t label_level{Level(label_stmt)};
size_t min_level;
size_t max_level;
Node min;
Node max;
if (label_level < goto_level) {
min_level = label_level;
max_level = goto_level;
min = label_stmt;
max = goto_stmt;
} else { // goto_level < label_level
min_level = goto_level;
max_level = label_level;
min = goto_stmt;
max = label_stmt;
}
while (max_level > min_level) {
--max_level;
max = max->up;
}
return min->up == max->up;
}
bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) {
return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt);
}
[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept {
Node it{goto_stmt};
do {
if (it == label_stmt) {
return true;
}
--it;
} while (it != goto_stmt->up->children.begin());
while (it != goto_stmt->up->children.end()) {
if (it == label_stmt) {
return true;
}
++it;
}
return false;
}
Node SiblingFromNephew(Node uncle, Node nephew) noexcept {
Statement* const parent{uncle->up};
Statement* it{&*nephew};
while (it->up != parent) {
it = it->up;
}
return Tree::s_iterator_to(*it);
}
bool AreOrdered(Node left_sibling, Node right_sibling) noexcept {
const Node end{right_sibling->up->children.end()};
for (auto it = right_sibling; it != end; ++it) {
if (it == left_sibling) {
return false;
}
}
return true;
}
bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)};
return AreOrdered(sibling, goto_stmt);
}
class GotoPass {
public:
explicit GotoPass(Flow::CFG& cfg, ObjectPool<IR::Inst>& inst_pool_,
ObjectPool<IR::Block>& block_pool_, ObjectPool<Statement>& stmt_pool)
: inst_pool{inst_pool_}, block_pool{block_pool_}, pool{stmt_pool} {
std::vector gotos{BuildTree(cfg)};
for (const Node& goto_stmt : gotos | std::views::reverse) {
RemoveGoto(goto_stmt);
}
}
Statement& RootStatement() noexcept {
return root_stmt;
}
private:
void RemoveGoto(Node goto_stmt) {
// Force goto_stmt and label_stmt to be directly related
const Node label_stmt{goto_stmt->label};
if (IsIndirectlyRelated(goto_stmt, label_stmt)) {
// Move goto_stmt out using outward-movement transformation until it becomes
// directly related to label_stmt
while (!IsDirectlyRelated(goto_stmt, label_stmt)) {
goto_stmt = MoveOutward(goto_stmt);
}
}
// Force goto_stmt and label_stmt to be siblings
if (IsDirectlyRelated(goto_stmt, label_stmt)) {
const size_t label_level{Level(label_stmt)};
size_t goto_level{Level(goto_stmt)};
if (goto_level > label_level) {
// Move goto_stmt out of its level using outward-movement transformations
while (goto_level > label_level) {
goto_stmt = MoveOutward(goto_stmt);
--goto_level;
}
} else { // Level(goto_stmt) < Level(label_stmt)
if (NeedsLift(goto_stmt, label_stmt)) {
// Lift goto_stmt to above stmt containing label_stmt using goto-lifting
// transformations
goto_stmt = Lift(goto_stmt);
}
// Move goto_stmt into label_stmt's level using inward-movement transformation
while (goto_level < label_level) {
goto_stmt = MoveInward(goto_stmt);
++goto_level;
}
}
}
// Expensive operation:
// if (!AreSiblings(goto_stmt, label_stmt)) {
// throw LogicError("Goto is not a sibling with the label");
// }
// goto_stmt and label_stmt are guaranteed to be siblings, eliminate
if (std::next(goto_stmt) == label_stmt) {
// Simply eliminate the goto if the label is next to it
goto_stmt->up->children.erase(goto_stmt);
} else if (AreOrdered(goto_stmt, label_stmt)) {
// Eliminate goto_stmt with a conditional
EliminateAsConditional(goto_stmt, label_stmt);
} else {
// Eliminate goto_stmt with a loop
EliminateAsLoop(goto_stmt, label_stmt);
}
}
std::vector<Node> BuildTree(Flow::CFG& cfg) {
u32 label_id{0};
std::vector<Node> gotos;
Flow::Function& first_function{cfg.Functions().front()};
BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt);
return gotos;
}
void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id,
std::vector<Node>& gotos, Node function_insert_point,
std::optional<Node> return_label) {
Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)};
Tree& root{root_stmt.children};
std::unordered_map<Flow::Block*, Node> local_labels;
local_labels.reserve(function.blocks.size());
for (Flow::Block& block : function.blocks) {
Statement* const label{pool.Create(Label{}, label_id, &root_stmt)};
const Node label_it{root.insert(function_insert_point, *label)};
local_labels.emplace(&block, label_it);
++label_id;
}
for (Flow::Block& block : function.blocks) {
const Node label{local_labels.at(&block)};
// Insertion point
const Node ip{std::next(label)};
// Reset goto variables before the first block and after its respective label
const auto make_reset_variable{[&]() -> Statement& {
return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt);
}};
root.push_front(make_reset_variable());
root.insert(ip, make_reset_variable());
root.insert(ip, *pool.Create(&block, &root_stmt));
switch (block.end_class) {
case Flow::EndClass::Branch: {
Statement* const always_cond{
pool.Create(Identity{}, IR::Condition{true}, &root_stmt)};
if (block.cond == IR::Condition{true}) {
const Node true_label{local_labels.at(block.branch_true)};
gotos.push_back(
root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt)));
} else if (block.cond == IR::Condition{false}) {
const Node false_label{local_labels.at(block.branch_false)};
gotos.push_back(root.insert(
ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
} else {
const Node true_label{local_labels.at(block.branch_true)};
const Node false_label{local_labels.at(block.branch_false)};
Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
gotos.push_back(
root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt)));
gotos.push_back(root.insert(
ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
}
break;
}
case Flow::EndClass::IndirectBranch:
root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg,
block.branch_offset, &root_stmt));
for (const Flow::IndirectBranch& indirect : block.indirect_branches) {
const Node indirect_label{local_labels.at(indirect.block)};
Statement* cond{
pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)};
Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)};
gotos.push_back(root.insert(ip, *goto_stmt));
}
root.insert(ip, *pool.Create(Unreachable{}, &root_stmt));
break;
case Flow::EndClass::Call: {
Flow::Function& call{cfg.Functions()[block.function_call]};
const Node call_return_label{local_labels.at(block.return_block)};
BuildTree(cfg, call, label_id, gotos, ip, call_return_label);
break;
}
case Flow::EndClass::Exit:
root.insert(ip, *pool.Create(Return{}, &root_stmt));
break;
case Flow::EndClass::Return: {
Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)};
gotos.push_back(root.insert(ip, *goto_stmt));
break;
}
case Flow::EndClass::Kill:
root.insert(ip, *pool.Create(Kill{}, &root_stmt));
break;
}
}
}
void UpdateTreeUp(Statement* tree) {
for (Statement& stmt : tree->children) {
stmt.up = tree;
}
}
void EliminateAsConditional(Node goto_stmt, Node label_stmt) {
Tree& body{goto_stmt->up->children};
Tree if_body;
if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt);
Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)};
Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)};
UpdateTreeUp(if_stmt);
body.insert(goto_stmt, *if_stmt);
body.erase(goto_stmt);
}
void EliminateAsLoop(Node goto_stmt, Node label_stmt) {
Tree& body{goto_stmt->up->children};
Tree loop_body;
loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt);
Statement* const cond{goto_stmt->cond};
Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)};
UpdateTreeUp(loop);
body.insert(goto_stmt, *loop);
body.erase(goto_stmt);
}
[[nodiscard]] Node MoveOutward(Node goto_stmt) {
switch (goto_stmt->up->type) {
case StatementType::If:
return MoveOutwardIf(goto_stmt);
case StatementType::Loop:
return MoveOutwardLoop(goto_stmt);
default:
throw LogicError("Invalid outward movement");
}
}
[[nodiscard]] Node MoveInward(Node goto_stmt) {
Statement* const parent{goto_stmt->up};
Tree& body{parent->children};
const Node label{goto_stmt->label};
const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
const u32 label_id{label->id};
Statement* const goto_cond{goto_stmt->cond};
Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
body.insert(goto_stmt, *set_var);
Tree if_body;
if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt);
Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)};
if (!if_body.empty()) {
Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)};
UpdateTreeUp(if_stmt);
body.insert(goto_stmt, *if_stmt);
}
body.erase(goto_stmt);
switch (label_nested_stmt->type) {
case StatementType::If:
// Update nested if condition
label_nested_stmt->cond =
pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt);
break;
case StatementType::Loop:
break;
default:
throw LogicError("Invalid inward movement");
}
Tree& nested_tree{label_nested_stmt->children};
Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
return nested_tree.insert(nested_tree.begin(), *new_goto);
}
[[nodiscard]] Node Lift(Node goto_stmt) {
Statement* const parent{goto_stmt->up};
Tree& body{parent->children};
const Node label{goto_stmt->label};
const u32 label_id{label->id};
const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
Tree loop_body;
loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
SanitizeNoBreaks(loop_body);
Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
UpdateTreeUp(loop_stmt);
body.insert(goto_stmt, *loop_stmt);
Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
loop_stmt->children.push_front(*new_goto);
const Node new_goto_node{loop_stmt->children.begin()};
Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)};
loop_stmt->children.push_back(*set_var);
body.erase(goto_stmt);
return new_goto_node;
}
Node MoveOutwardIf(Node goto_stmt) {
const Node parent{Tree::s_iterator_to(*goto_stmt->up)};
Tree& body{parent->children};
const u32 label_id{goto_stmt->label->id};
Statement* const goto_cond{goto_stmt->cond};
Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)};
body.insert(goto_stmt, *set_goto_var);
Tree if_body;
if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end());
if_body.pop_front();
Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)};
Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)};
UpdateTreeUp(if_stmt);
body.insert(goto_stmt, *if_stmt);
body.erase(goto_stmt);
Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)};
Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)};
Tree& parent_tree{parent->up->children};
return parent_tree.insert(std::next(parent), *new_goto);
}
Node MoveOutwardLoop(Node goto_stmt) {
Statement* const parent{goto_stmt->up};
Tree& body{parent->children};
const u32 label_id{goto_stmt->label->id};
Statement* const goto_cond{goto_stmt->cond};
Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
Statement* const break_stmt{pool.Create(Break{}, cond, parent)};
body.insert(goto_stmt, *set_goto_var);
body.insert(goto_stmt, *break_stmt);
body.erase(goto_stmt);
const Node loop{Tree::s_iterator_to(*goto_stmt->up)};
Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)};
Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)};
Tree& parent_tree{loop->up->children};
return parent_tree.insert(std::next(loop), *new_goto);
}
ObjectPool<IR::Inst>& inst_pool;
ObjectPool<IR::Block>& block_pool;
ObjectPool<Statement>& pool;
Statement root_stmt{FunctionTag{}};
};
[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) {
Tree& tree{stmt.up->children};
const Node end{tree.end()};
Node forward_node{std::next(Tree::s_iterator_to(stmt))};
while (forward_node != end && !HasChildren(forward_node->type)) {
if (forward_node->type == StatementType::Code) {
return &*forward_node;
}
++forward_node;
}
return nullptr;
}
[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) {
switch (stmt.type) {
case StatementType::Identity:
return ir.Condition(stmt.guest_cond);
case StatementType::Not:
return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)});
case StatementType::Or:
return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b));
case StatementType::Variable:
return ir.GetGotoVariable(stmt.id);
case StatementType::IndirectBranchCond:
return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location));
default:
throw NotImplementedException("Statement type {}", stmt.type);
}
}
class TranslatePass {
public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
syntax_list{syntax_list_} {
Visit(root_stmt, nullptr, nullptr);
IR::Block& first_block{*syntax_list.front().data.block};
IR::IREmitter ir(first_block, first_block.begin());
ir.Prologue();
}
private:
void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) {
IR::Block* current_block{};
const auto ensure_block{[&] {
if (current_block) {
return;
}
current_block = block_pool.Create(inst_pool);
auto& node{syntax_list.emplace_back()};
node.type = IR::AbstractSyntaxNode::Type::Block;
node.data.block = current_block;
}};
Tree& tree{parent.children};
for (auto it = tree.begin(); it != tree.end(); ++it) {
Statement& stmt{*it};
switch (stmt.type) {
case StatementType::Label:
// Labels can be ignored
break;
case StatementType::Code: {
ensure_block();
Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset());
break;
}
case StatementType::SetVariable: {
ensure_block();
IR::IREmitter ir{*current_block};
ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
break;
}
case StatementType::SetIndirectBranchVariable: {
ensure_block();
IR::IREmitter ir{*current_block};
IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))};
ir.SetIndirectBranchVariable(address);
break;
}
case StatementType::If: {
ensure_block();
IR::Block* const merge_block{MergeBlock(parent, stmt)};
// Implement if header block
IR::IREmitter ir{*current_block};
const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
const size_t if_node_index{syntax_list.size()};
syntax_list.emplace_back();
// Visit children
const size_t then_block_index{syntax_list.size()};
Visit(stmt, break_block, merge_block);
IR::Block* const then_block{syntax_list.at(then_block_index).data.block};
current_block->AddBranch(then_block);
current_block->AddBranch(merge_block);
current_block = merge_block;
auto& if_node{syntax_list[if_node_index]};
if_node.type = IR::AbstractSyntaxNode::Type::If;
if_node.data.if_node.cond = cond;
if_node.data.if_node.body = then_block;
if_node.data.if_node.merge = merge_block;
auto& endif_node{syntax_list.emplace_back()};
endif_node.type = IR::AbstractSyntaxNode::Type::EndIf;
endif_node.data.end_if.merge = merge_block;
auto& merge{syntax_list.emplace_back()};
merge.type = IR::AbstractSyntaxNode::Type::Block;
merge.data.block = merge_block;
break;
}
case StatementType::Loop: {
IR::Block* const loop_header_block{block_pool.Create(inst_pool)};
if (current_block) {
current_block->AddBranch(loop_header_block);
}
auto& header_node{syntax_list.emplace_back()};
header_node.type = IR::AbstractSyntaxNode::Type::Block;
header_node.data.block = loop_header_block;
IR::Block* const continue_block{block_pool.Create(inst_pool)};
IR::Block* const merge_block{MergeBlock(parent, stmt)};
const size_t loop_node_index{syntax_list.size()};
syntax_list.emplace_back();
// Visit children
const size_t body_block_index{syntax_list.size()};
Visit(stmt, merge_block, continue_block);
// The continue block is located at the end of the loop
IR::IREmitter ir{*continue_block};
const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
IR::Block* const body_block{syntax_list.at(body_block_index).data.block};
loop_header_block->AddBranch(body_block);
continue_block->AddBranch(loop_header_block);
continue_block->AddBranch(merge_block);
current_block = merge_block;
auto& loop{syntax_list[loop_node_index]};
loop.type = IR::AbstractSyntaxNode::Type::Loop;
loop.data.loop.body = body_block;
loop.data.loop.continue_block = continue_block;
loop.data.loop.merge = merge_block;
auto& continue_block_node{syntax_list.emplace_back()};
continue_block_node.type = IR::AbstractSyntaxNode::Type::Block;
continue_block_node.data.block = continue_block;
auto& repeat{syntax_list.emplace_back()};
repeat.type = IR::AbstractSyntaxNode::Type::Repeat;
repeat.data.repeat.cond = cond;
repeat.data.repeat.loop_header = loop_header_block;
repeat.data.repeat.merge = merge_block;
auto& merge{syntax_list.emplace_back()};
merge.type = IR::AbstractSyntaxNode::Type::Block;
merge.data.block = merge_block;
break;
}
case StatementType::Break: {
ensure_block();
IR::Block* const skip_block{MergeBlock(parent, stmt)};
IR::IREmitter ir{*current_block};
const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
current_block->AddBranch(break_block);
current_block->AddBranch(skip_block);
current_block = skip_block;
auto& break_node{syntax_list.emplace_back()};
break_node.type = IR::AbstractSyntaxNode::Type::Break;
break_node.data.break_node.cond = cond;
break_node.data.break_node.merge = break_block;
break_node.data.break_node.skip = skip_block;
auto& merge{syntax_list.emplace_back()};
merge.type = IR::AbstractSyntaxNode::Type::Block;
merge.data.block = skip_block;
break;
}
case StatementType::Return: {
ensure_block();
IR::IREmitter{*current_block}.Epilogue();
current_block = nullptr;
syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
break;
}
case StatementType::Kill: {
ensure_block();
IR::Block* demote_block{MergeBlock(parent, stmt)};
IR::IREmitter{*current_block}.DemoteToHelperInvocation();
current_block->AddBranch(demote_block);
current_block = demote_block;
auto& merge{syntax_list.emplace_back()};
merge.type = IR::AbstractSyntaxNode::Type::Block;
merge.data.block = demote_block;
break;
}
case StatementType::Unreachable: {
ensure_block();
current_block = nullptr;
syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
break;
}
default:
throw NotImplementedException("Statement type {}", stmt.type);
}
}
if (current_block) {
if (fallthrough_block) {
current_block->AddBranch(fallthrough_block);
} else {
syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
}
}
}
IR::Block* MergeBlock(Statement& parent, Statement& stmt) {
Statement* merge_stmt{TryFindForwardBlock(stmt)};
if (!merge_stmt) {
// Create a merge block we can visit later
merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent);
parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
}
return block_pool.Create(inst_pool);
}
ObjectPool<Statement>& stmt_pool;
ObjectPool<IR::Inst>& inst_pool;
ObjectPool<IR::Block>& block_pool;
Environment& env;
IR::AbstractSyntaxList& syntax_list;
u32 loop_id{};
// TODO: C++20 Remove this when all compilers support constexpr std::vector
#if __cpp_lib_constexpr_vector >= 201907
static constexpr Flow::Block dummy_flow_block;
#else
const Flow::Block dummy_flow_block;
#endif
};
} // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
Environment& env, Flow::CFG& cfg) {
ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool};
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
return syntax_list;
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,20 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::Maxwell {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Environment& env,
Flow::CFG& cfg);
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,214 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class AtomOp : u64 {
ADD,
MIN,
MAX,
INC,
DEC,
AND,
OR,
XOR,
EXCH,
SAFEADD,
};
enum class AtomSize : u64 {
U32,
S32,
U64,
F32,
F16x2,
S64,
};
IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
AtomOp op, bool is_signed) {
switch (op) {
case AtomOp::ADD:
return ir.GlobalAtomicIAdd(offset, op_b);
case AtomOp::MIN:
return ir.GlobalAtomicIMin(offset, op_b, is_signed);
case AtomOp::MAX:
return ir.GlobalAtomicIMax(offset, op_b, is_signed);
case AtomOp::INC:
return ir.GlobalAtomicInc(offset, op_b);
case AtomOp::DEC:
return ir.GlobalAtomicDec(offset, op_b);
case AtomOp::AND:
return ir.GlobalAtomicAnd(offset, op_b);
case AtomOp::OR:
return ir.GlobalAtomicOr(offset, op_b);
case AtomOp::XOR:
return ir.GlobalAtomicXor(offset, op_b);
case AtomOp::EXCH:
return ir.GlobalAtomicExchange(offset, op_b);
default:
throw NotImplementedException("Integer Atom Operation {}", op);
}
}
IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
AtomSize size) {
static constexpr IR::FpControl f16_control{
.no_contraction{false},
.rounding{IR::FpRounding::RN},
.fmz_mode{IR::FmzMode::DontCare},
};
static constexpr IR::FpControl f32_control{
.no_contraction{false},
.rounding{IR::FpRounding::RN},
.fmz_mode{IR::FmzMode::FTZ},
};
switch (op) {
case AtomOp::ADD:
return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
: ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
case AtomOp::MIN:
return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
case AtomOp::MAX:
return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
default:
throw NotImplementedException("FP Atom Operation {}", op);
}
}
IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
union {
u64 raw;
BitField<8, 8, IR::Reg> addr_reg;
BitField<28, 20, s64> addr_offset;
BitField<28, 20, u64> rz_addr_offset;
BitField<48, 1, u64> e;
} const mem{insn};
const IR::U64 address{[&]() -> IR::U64 {
if (mem.e == 0) {
return v.ir.UConvert(64, v.X(mem.addr_reg));
}
return v.L(mem.addr_reg);
}()};
const u64 addr_offset{[&]() -> u64 {
if (mem.addr_reg == IR::Reg::RZ) {
// When RZ is used, the address is an absolute address
return static_cast<u64>(mem.rz_addr_offset.Value());
} else {
return static_cast<u64>(mem.addr_offset.Value());
}
}()};
return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
}
bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
// TODO: SAFEADD
switch (size) {
case AtomSize::S32:
case AtomSize::U64:
return (op == AtomOp::INC || op == AtomOp::DEC);
case AtomSize::S64:
return !(op == AtomOp::MIN || op == AtomOp::MAX);
case AtomSize::F32:
return op != AtomOp::ADD;
case AtomSize::F16x2:
return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
default:
return false;
}
}
IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
switch (size) {
case AtomSize::U32:
case AtomSize::S32:
case AtomSize::F32:
case AtomSize::F16x2:
return ir.LoadGlobal32(offset);
case AtomSize::U64:
case AtomSize::S64:
return ir.PackUint2x32(ir.LoadGlobal64(offset));
default:
throw NotImplementedException("Atom Size {}", size);
}
}
void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
switch (size) {
case AtomSize::U32:
case AtomSize::S32:
case AtomSize::F16x2:
return v.X(dest_reg, IR::U32{result});
case AtomSize::U64:
case AtomSize::S64:
return v.L(dest_reg, IR::U64{result});
case AtomSize::F32:
return v.F(dest_reg, IR::F32{result});
default:
break;
}
}
IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
AtomSize size, AtomOp op) {
switch (size) {
case AtomSize::U32:
case AtomSize::S32:
return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
case AtomSize::U64:
case AtomSize::S64:
return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
case AtomSize::F32:
return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
case AtomSize::F16x2: {
return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
}
default:
throw NotImplementedException("Atom Size {}", size);
}
}
void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
IR::Value result;
if (AtomOpNotApplicable(size, op)) {
result = LoadGlobal(v.ir, offset, size);
} else {
result = ApplyAtomOp(v, operand_reg, offset, size, op);
}
if (write_dest) {
StoreResult(v, dest_reg, result, size);
}
}
} // Anonymous namespace
void TranslatorVisitor::ATOM(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<20, 8, IR::Reg> operand_reg;
BitField<49, 3, AtomSize> size;
BitField<52, 4, AtomOp> op;
} const atom{insn};
const IR::U64 offset{AtomOffset(*this, insn)};
GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
}
void TranslatorVisitor::RED(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> operand_reg;
BitField<20, 3, AtomSize> size;
BitField<23, 3, AtomOp> op;
} const red{insn};
const IR::U64 offset{AtomOffset(*this, insn)};
GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,110 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class AtomOp : u64 {
ADD,
MIN,
MAX,
INC,
DEC,
AND,
OR,
XOR,
EXCH,
};
enum class AtomsSize : u64 {
U32,
S32,
U64,
};
IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
bool is_signed) {
switch (op) {
case AtomOp::ADD:
return ir.SharedAtomicIAdd(offset, op_b);
case AtomOp::MIN:
return ir.SharedAtomicIMin(offset, op_b, is_signed);
case AtomOp::MAX:
return ir.SharedAtomicIMax(offset, op_b, is_signed);
case AtomOp::INC:
return ir.SharedAtomicInc(offset, op_b);
case AtomOp::DEC:
return ir.SharedAtomicDec(offset, op_b);
case AtomOp::AND:
return ir.SharedAtomicAnd(offset, op_b);
case AtomOp::OR:
return ir.SharedAtomicOr(offset, op_b);
case AtomOp::XOR:
return ir.SharedAtomicXor(offset, op_b);
case AtomOp::EXCH:
return ir.SharedAtomicExchange(offset, op_b);
default:
throw NotImplementedException("Integer Atoms Operation {}", op);
}
}
IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
union {
u64 raw;
BitField<8, 8, IR::Reg> offset_reg;
BitField<30, 22, u64> absolute_offset;
BitField<30, 22, s64> relative_offset;
} const encoding{insn};
if (encoding.offset_reg == IR::Reg::RZ) {
return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
} else {
const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
}
}
void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
switch (size) {
case AtomsSize::U32:
case AtomsSize::S32:
return v.X(dest_reg, IR::U32{result});
case AtomsSize::U64:
return v.L(dest_reg, IR::U64{result});
default:
break;
}
}
} // Anonymous namespace
void TranslatorVisitor::ATOMS(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> addr_reg;
BitField<20, 8, IR::Reg> src_reg_b;
BitField<28, 2, AtomsSize> size;
BitField<52, 4, AtomOp> op;
} const atoms{insn};
const bool size_64{atoms.size == AtomsSize::U64};
if (size_64 && atoms.op != AtomOp::EXCH) {
throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
}
const bool is_signed{atoms.size == AtomsSize::S32};
const IR::U32 offset{AtomsOffset(*this, insn)};
IR::Value result;
if (size_64) {
result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
} else {
result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
}
StoreResult(*this, atoms.dest_reg, result, atoms.size);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,35 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
enum class BitSize : u64 {
B32,
B64,
B96,
B128,
};
void TranslatorVisitor::AL2P(u64 inst) {
union {
u64 raw;
BitField<0, 8, IR::Reg> result_register;
BitField<8, 8, IR::Reg> indexing_register;
BitField<20, 11, s64> offset;
BitField<47, 2, BitSize> bitsize;
} al2p{inst};
if (al2p.bitsize != BitSize::B32) {
throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
}
const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
X(al2p.result_register, result);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,96 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
// Seems to be in CUDA terminology.
enum class LocalScope : u64 {
CTA,
GL,
SYS,
VC,
};
} // Anonymous namespace
void TranslatorVisitor::MEMBAR(u64 inst) {
union {
u64 raw;
BitField<8, 2, LocalScope> scope;
} const membar{inst};
if (membar.scope == LocalScope::CTA) {
ir.WorkgroupMemoryBarrier();
} else {
ir.DeviceMemoryBarrier();
}
}
void TranslatorVisitor::DEPBAR() {
// DEPBAR is a no-op
}
void TranslatorVisitor::BAR(u64 insn) {
enum class Mode {
RedPopc,
Scan,
RedAnd,
RedOr,
Sync,
Arrive,
};
union {
u64 raw;
BitField<43, 1, u64> is_a_imm;
BitField<44, 1, u64> is_b_imm;
BitField<8, 8, u64> imm_a;
BitField<20, 12, u64> imm_b;
BitField<42, 1, u64> neg_pred;
BitField<39, 3, IR::Pred> pred;
} const bar{insn};
const Mode mode{[insn] {
switch (insn & 0x0000009B00000000ULL) {
case 0x0000000200000000ULL:
return Mode::RedPopc;
case 0x0000000300000000ULL:
return Mode::Scan;
case 0x0000000A00000000ULL:
return Mode::RedAnd;
case 0x0000001200000000ULL:
return Mode::RedOr;
case 0x0000008000000000ULL:
return Mode::Sync;
case 0x0000008100000000ULL:
return Mode::Arrive;
}
throw NotImplementedException("Invalid encoding");
}()};
if (mode != Mode::Sync) {
throw NotImplementedException("BAR mode {}", mode);
}
if (bar.is_a_imm == 0) {
throw NotImplementedException("Non-immediate input A");
}
if (bar.imm_a != 0) {
throw NotImplementedException("Non-zero input A");
}
if (bar.is_b_imm == 0) {
throw NotImplementedException("Non-immediate input B");
}
if (bar.imm_b != 0) {
throw NotImplementedException("Non-zero input B");
}
if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
throw NotImplementedException("Non-true input predicate");
}
ir.Barrier();
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,74 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> offset_reg;
BitField<40, 1, u64> brev;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_signed;
} const bfe{insn};
const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
// Common constants
const IR::U32 zero{v.ir.Imm32(0)};
const IR::U32 one{v.ir.Imm32(1)};
const IR::U32 max_size{v.ir.Imm32(32)};
// Edge case conditions
const IR::U1 zero_count{v.ir.IEqual(count, zero)};
const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
IR::U32 base{v.X(bfe.offset_reg)};
if (bfe.brev != 0) {
base = v.ir.BitReverse(base);
}
IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
if (bfe.is_signed != 0) {
const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
// Replicate condition
result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
// Exceeding condition
const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
}
// Zero count condition
result = IR::U32{v.ir.Select(zero_count, zero, result)};
v.X(bfe.dest_reg, result);
if (bfe.cc != 0) {
v.SetZFlag(v.ir.IEqual(result, zero));
v.SetSFlag(v.ir.ILessThan(result, zero, true));
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::BFE_reg(u64 insn) {
BFE(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::BFE_cbuf(u64 insn) {
BFE(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::BFE_imm(u64 insn) {
BFE(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,62 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> insert_reg;
BitField<47, 1, u64> cc;
} const bfi{insn};
const IR::U32 zero{v.ir.Imm32(0)};
const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
const IR::U32 max_size{v.ir.Imm32(32)};
// Edge case conditions
const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
const IR::U32 insert{v.X(bfi.insert_reg)};
IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
result = IR::U32{v.ir.Select(exceed_offset, base, result)};
v.X(bfi.dest_reg, result);
if (bfi.cc != 0) {
v.SetZFlag(v.ir.IEqual(result, zero));
v.SetSFlag(v.ir.ILessThan(result, zero, true));
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::BFI_reg(u64 insn) {
BFI(*this, insn, GetReg20(insn), GetReg39(insn));
}
void TranslatorVisitor::BFI_rc(u64 insn) {
BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
}
void TranslatorVisitor::BFI_cr(u64 insn) {
BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
}
void TranslatorVisitor::BFI_imm(u64 insn) {
BFI(*this, insn, GetImm20(insn), GetReg39(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,36 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void Check(u64 insn) {
union {
u64 raw;
BitField<5, 1, u64> cbuf_mode;
BitField<6, 1, u64> lmt;
} const encoding{insn};
if (encoding.cbuf_mode != 0) {
throw NotImplementedException("Constant buffer mode");
}
if (encoding.lmt != 0) {
throw NotImplementedException("LMT");
}
}
} // Anonymous namespace
void TranslatorVisitor::BRX(u64 insn) {
Check(insn);
}
void TranslatorVisitor::JMX(u64 insn) {
Check(insn);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,57 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
namespace Shader::Maxwell {
enum class FpRounding : u64 {
RN,
RM,
RP,
RZ,
};
enum class FmzMode : u64 {
None,
FTZ,
FMZ,
INVALIDFMZ3,
};
inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
switch (fp_rounding) {
case FpRounding::RN:
return IR::FpRounding::RN;
case FpRounding::RM:
return IR::FpRounding::RM;
case FpRounding::RP:
return IR::FpRounding::RP;
case FpRounding::RZ:
return IR::FpRounding::RZ;
}
throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
}
inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
switch (fmz_mode) {
case FmzMode::None:
return IR::FmzMode::None;
case FmzMode::FTZ:
return IR::FmzMode::FTZ;
case FmzMode::FMZ:
// FMZ is manually handled in the instruction
return IR::FmzMode::FTZ;
case FmzMode::INVALIDFMZ3:
break;
}
throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,110 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
namespace Shader::Maxwell {
IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
CompareOp compare_op, bool is_signed) {
switch (compare_op) {
case CompareOp::False:
return ir.Imm1(false);
case CompareOp::LessThan:
return ir.ILessThan(operand_1, operand_2, is_signed);
case CompareOp::Equal:
return ir.IEqual(operand_1, operand_2);
case CompareOp::LessThanEqual:
return ir.ILessThanEqual(operand_1, operand_2, is_signed);
case CompareOp::GreaterThan:
return ir.IGreaterThan(operand_1, operand_2, is_signed);
case CompareOp::NotEqual:
return ir.INotEqual(operand_1, operand_2);
case CompareOp::GreaterThanEqual:
return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
case CompareOp::True:
return ir.Imm1(true);
default:
throw NotImplementedException("Invalid compare op {}", compare_op);
}
}
IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
BooleanOp bop) {
switch (bop) {
case BooleanOp::AND:
return ir.LogicalAnd(predicate_1, predicate_2);
case BooleanOp::OR:
return ir.LogicalOr(predicate_1, predicate_2);
case BooleanOp::XOR:
return ir.LogicalXor(predicate_1, predicate_2);
default:
throw NotImplementedException("Invalid bop {}", bop);
}
}
IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
switch (op) {
case PredicateOp::False:
return ir.Imm1(false);
case PredicateOp::True:
return ir.Imm1(true);
case PredicateOp::Zero:
return ir.IEqual(result, ir.Imm32(0));
case PredicateOp::NonZero:
return ir.INotEqual(result, ir.Imm32(0));
default:
throw NotImplementedException("Invalid Predicate operation {}", op);
}
}
bool IsCompareOpOrdered(FPCompareOp op) {
switch (op) {
case FPCompareOp::LTU:
case FPCompareOp::EQU:
case FPCompareOp::LEU:
case FPCompareOp::GTU:
case FPCompareOp::NEU:
case FPCompareOp::GEU:
return false;
default:
return true;
}
}
IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
const IR::F16F32F64& operand_2, FPCompareOp compare_op,
IR::FpControl control) {
const bool ordered{IsCompareOpOrdered(compare_op)};
switch (compare_op) {
case FPCompareOp::F:
return ir.Imm1(false);
case FPCompareOp::LT:
case FPCompareOp::LTU:
return ir.FPLessThan(operand_1, operand_2, control, ordered);
case FPCompareOp::EQ:
case FPCompareOp::EQU:
return ir.FPEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::LE:
case FPCompareOp::LEU:
return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::GT:
case FPCompareOp::GTU:
return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
case FPCompareOp::NE:
case FPCompareOp::NEU:
return ir.FPNotEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::GE:
case FPCompareOp::GEU:
return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::NUM:
return ir.FPOrdered(operand_1, operand_2);
case FPCompareOp::Nan:
return ir.FPUnordered(operand_1, operand_2);
case FPCompareOp::T:
return ir.Imm1(true);
default:
throw NotImplementedException("Invalid FP compare op {}", compare_op);
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,24 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
const IR::U1& predicate_2, BooleanOp bop);
[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
const IR::F16F32F64& operand_2, FPCompareOp compare_op,
IR::FpControl control = {});
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,66 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
void TranslatorVisitor::CSET(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 5, IR::FlowTest> cc_test;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<44, 1, u64> bf;
BitField<45, 2, BooleanOp> bop;
BitField<47, 1, u64> cc;
} const cset{insn};
const IR::U32 one_mask{ir.Imm32(-1)};
const IR::U32 fp_one{ir.Imm32(0x3f800000)};
const IR::U32 zero{ir.Imm32(0)};
const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
X(cset.dest_reg, result);
if (cset.cc != 0) {
const IR::U1 is_zero{ir.IEqual(result, zero)};
SetZFlag(is_zero);
if (cset.bf != 0) {
ResetSFlag();
} else {
SetSFlag(ir.LogicalNot(is_zero));
}
ResetOFlag();
ResetCFlag();
}
}
void TranslatorVisitor::CSETP(u64 insn) {
union {
u64 raw;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<8, 5, IR::FlowTest> cc_test;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<45, 2, BooleanOp> bop;
} const csetp{insn};
const BooleanOp bop{csetp.bop};
const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
ir.SetPred(csetp.dest_pred_a, result_a);
ir.SetPred(csetp.dest_pred_b, result_b);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,55 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 2, FpRounding> fp_rounding;
BitField<45, 1, u64> neg_b;
BitField<46, 1, u64> abs_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_a;
BitField<49, 1, u64> abs_b;
} const dadd{insn};
if (dadd.cc != 0) {
throw NotImplementedException("DADD CC");
}
const IR::F64 src_a{v.D(dadd.src_a_reg)};
const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
const IR::FpControl control{
.no_contraction = true,
.rounding = CastFpRounding(dadd.fp_rounding),
.fmz_mode = IR::FmzMode::None,
};
v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
}
} // Anonymous namespace
void TranslatorVisitor::DADD_reg(u64 insn) {
DADD(*this, insn, GetDoubleReg20(insn));
}
void TranslatorVisitor::DADD_cbuf(u64 insn) {
DADD(*this, insn, GetDoubleCbuf(insn));
}
void TranslatorVisitor::DADD_imm(u64 insn) {
DADD(*this, insn, GetDoubleImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,72 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_b;
BitField<45, 2, BooleanOp> bop;
BitField<47, 1, u64> cc;
BitField<48, 4, FPCompareOp> compare_op;
BitField<52, 1, u64> bf;
BitField<53, 1, u64> negate_b;
BitField<54, 1, u64> abs_a;
} const dset{insn};
const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
IR::U1 pred{v.ir.GetPred(dset.pred)};
if (dset.neg_pred != 0) {
pred = v.ir.LogicalNot(pred);
}
const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
const IR::U32 one_mask{v.ir.Imm32(-1)};
const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
const IR::U32 zero{v.ir.Imm32(0)};
const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
v.X(dset.dest_reg, result);
if (dset.cc != 0) {
const IR::U1 is_zero{v.ir.IEqual(result, zero)};
v.SetZFlag(is_zero);
if (dset.bf != 0) {
v.ResetSFlag();
} else {
v.SetSFlag(v.ir.LogicalNot(is_zero));
}
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::DSET_reg(u64 insn) {
DSET(*this, insn, GetDoubleReg20(insn));
}
void TranslatorVisitor::DSET_cbuf(u64 insn) {
DSET(*this, insn, GetDoubleCbuf(insn));
}
void TranslatorVisitor::DSET_imm(u64 insn) {
DSET(*this, insn, GetDoubleImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,58 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<50, 2, FpRounding> fp_rounding;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_b;
BitField<49, 1, u64> neg_c;
} const dfma{insn};
if (dfma.cc != 0) {
throw NotImplementedException("DFMA CC");
}
const IR::F64 src_a{v.D(dfma.src_a_reg)};
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
const IR::FpControl control{
.no_contraction = true,
.rounding = CastFpRounding(dfma.fp_rounding),
.fmz_mode = IR::FmzMode::None,
};
v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
}
} // Anonymous namespace
void TranslatorVisitor::DFMA_reg(u64 insn) {
DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
}
void TranslatorVisitor::DFMA_cr(u64 insn) {
DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
}
void TranslatorVisitor::DFMA_rc(u64 insn) {
DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
}
void TranslatorVisitor::DFMA_imm(u64 insn) {
DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,55 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<45, 1, u64> negate_b;
BitField<46, 1, u64> abs_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> negate_a;
BitField<49, 1, u64> abs_b;
} const dmnmx{insn};
if (dmnmx.cc != 0) {
throw NotImplementedException("DMNMX CC");
}
const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
IR::F64 max{v.ir.FPMax(op_a, op_b)};
IR::F64 min{v.ir.FPMin(op_a, op_b)};
if (dmnmx.neg_pred != 0) {
std::swap(min, max);
}
v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
}
} // Anonymous namespace
void TranslatorVisitor::DMNMX_reg(u64 insn) {
DMNMX(*this, insn, GetDoubleReg20(insn));
}
void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
DMNMX(*this, insn, GetDoubleCbuf(insn));
}
void TranslatorVisitor::DMNMX_imm(u64 insn) {
DMNMX(*this, insn, GetDoubleImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,50 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 2, FpRounding> fp_rounding;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg;
} const dmul{insn};
if (dmul.cc != 0) {
throw NotImplementedException("DMUL CC");
}
const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
const IR::FpControl control{
.no_contraction = true,
.rounding = CastFpRounding(dmul.fp_rounding),
.fmz_mode = IR::FmzMode::None,
};
v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
}
} // Anonymous namespace
void TranslatorVisitor::DMUL_reg(u64 insn) {
DMUL(*this, insn, GetDoubleReg20(insn));
}
void TranslatorVisitor::DMUL_cbuf(u64 insn) {
DMUL(*this, insn, GetDoubleCbuf(insn));
}
void TranslatorVisitor::DMUL_imm(u64 insn) {
DMUL(*this, insn, GetDoubleImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,54 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
union {
u64 insn;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<6, 1, u64> negate_b;
BitField<7, 1, u64> abs_a;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_b;
BitField<45, 2, BooleanOp> bop;
BitField<48, 4, FPCompareOp> compare_op;
} const dsetp{insn};
const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
const BooleanOp bop{dsetp.bop};
const FPCompareOp compare_op{dsetp.compare_op};
const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
v.ir.SetPred(dsetp.dest_pred_a, result_a);
v.ir.SetPred(dsetp.dest_pred_b, result_b);
}
} // Anonymous namespace
void TranslatorVisitor::DSETP_reg(u64 insn) {
DSETP(*this, insn, GetDoubleReg20(insn));
}
void TranslatorVisitor::DSETP_cbuf(u64 insn) {
DSETP(*this, insn, GetDoubleCbuf(insn));
}
void TranslatorVisitor::DSETP_imm(u64 insn) {
DSETP(*this, insn, GetDoubleImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,43 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void ExitFragment(TranslatorVisitor& v) {
const ProgramHeader sph{v.env.SPH()};
IR::Reg src_reg{IR::Reg::R0};
for (u32 render_target = 0; render_target < 8; ++render_target) {
const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
for (u32 component = 0; component < 4; ++component) {
if (!mask[component]) {
continue;
}
v.ir.SetFragColor(render_target, component, v.F(src_reg));
++src_reg;
}
}
if (sph.ps.omap.sample_mask != 0) {
v.ir.SetSampleMask(v.X(src_reg));
}
if (sph.ps.omap.depth != 0) {
v.ir.SetFragDepth(v.F(src_reg + 1));
}
}
} // Anonymous namespace
void TranslatorVisitor::EXIT() {
switch (env.ShaderStage()) {
case Stage::Fragment:
ExitFragment(*this);
break;
default:
break;
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,47 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<40, 1, u64> tilde;
BitField<41, 1, u64> shift;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_signed;
} const flo{insn};
if (flo.cc != 0) {
throw NotImplementedException("CC");
}
if (flo.tilde != 0) {
src = v.ir.BitwiseNot(src);
}
IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
if (flo.shift != 0) {
const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
}
v.X(flo.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::FLO_reg(u64 insn) {
FLO(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::FLO_cbuf(u64 insn) {
FLO(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::FLO_imm(u64 insn) {
FLO(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,82 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const fadd{insn};
if (cc) {
throw NotImplementedException("FADD CC");
}
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
IR::FpControl control{
.no_contraction = true,
.rounding = CastFpRounding(fp_rounding),
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
if (sat) {
value = v.ir.FPSaturate(value);
}
v.F(fadd.dest_reg, value);
}
void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 raw;
BitField<39, 2, FpRounding> fp_rounding;
BitField<44, 1, u64> ftz;
BitField<45, 1, u64> neg_b;
BitField<46, 1, u64> abs_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_a;
BitField<49, 1, u64> abs_b;
BitField<50, 1, u64> sat;
} const fadd{insn};
FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
}
} // Anonymous namespace
void TranslatorVisitor::FADD_reg(u64 insn) {
FADD(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FADD_cbuf(u64 insn) {
FADD(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FADD_imm(u64 insn) {
FADD(*this, insn, GetFloatImm20(insn));
}
void TranslatorVisitor::FADD32I(u64 insn) {
union {
u64 raw;
BitField<55, 1, u64> ftz;
BitField<56, 1, u64> neg_a;
BitField<54, 1, u64> abs_a;
BitField<52, 1, u64> cc;
BitField<53, 1, u64> neg_b;
BitField<57, 1, u64> abs_b;
} const fadd32i{insn};
FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,55 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<47, 1, u64> ftz;
BitField<48, 4, FPCompareOp> compare_op;
} const fcmp{insn};
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
const IR::U32 src_reg{v.X(fcmp.src_reg)};
const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
v.X(fcmp.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::FCMP_reg(u64 insn) {
FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FCMP_rc(u64 insn) {
FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
}
void TranslatorVisitor::FCMP_cr(u64 insn) {
FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FCMP_imm(u64 insn) {
union {
u64 raw;
BitField<20, 19, u64> value;
BitField<56, 1, u64> is_negative;
} const fcmp{insn};
const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
const u32 value{static_cast<u32>(fcmp.value) << 12};
FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,78 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_b;
BitField<45, 2, BooleanOp> bop;
BitField<47, 1, u64> cc;
BitField<48, 4, FPCompareOp> compare_op;
BitField<52, 1, u64> bf;
BitField<53, 1, u64> negate_b;
BitField<54, 1, u64> abs_a;
BitField<55, 1, u64> ftz;
} const fset{insn};
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
const IR::FpControl control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::U1 pred{v.ir.GetPred(fset.pred)};
if (fset.neg_pred != 0) {
pred = v.ir.LogicalNot(pred);
}
const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
const IR::U32 one_mask{v.ir.Imm32(-1)};
const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
const IR::U32 zero{v.ir.Imm32(0)};
const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
v.X(fset.dest_reg, result);
if (fset.cc != 0) {
const IR::U1 is_zero{v.ir.IEqual(result, zero)};
v.SetZFlag(is_zero);
if (fset.bf != 0) {
v.ResetSFlag();
} else {
v.SetSFlag(v.ir.LogicalNot(is_zero));
}
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::FSET_reg(u64 insn) {
FSET(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FSET_cbuf(u64 insn) {
FSET(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FSET_imm(u64 insn) {
FSET(*this, insn, GetFloatImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,214 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
enum class FloatFormat : u64 {
F16 = 1,
F32 = 2,
F64 = 3,
};
enum class RoundingOp : u64 {
None = 0,
Pass = 3,
Round = 8,
Floor = 9,
Ceil = 10,
Trunc = 11,
};
[[nodiscard]] u32 WidthSize(FloatFormat width) {
switch (width) {
case FloatFormat::F16:
return 16;
case FloatFormat::F32:
return 32;
case FloatFormat::F64:
return 64;
default:
throw NotImplementedException("Invalid width {}", width);
}
}
void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<44, 1, u64> ftz;
BitField<45, 1, u64> neg;
BitField<47, 1, u64> cc;
BitField<50, 1, u64> sat;
BitField<39, 4, u64> rounding_op;
BitField<39, 2, FpRounding> rounding;
BitField<10, 2, FloatFormat> src_size;
BitField<8, 2, FloatFormat> dst_size;
[[nodiscard]] RoundingOp RoundingOperation() const {
constexpr u64 rounding_mask = 0x0B;
return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
}
} const f2f{insn};
if (f2f.cc != 0) {
throw NotImplementedException("F2F CC");
}
IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
IR::FpControl fp_control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
if (f2f.src_size != f2f.dst_size) {
fp_control.rounding = CastFpRounding(f2f.rounding);
input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
} else {
switch (f2f.RoundingOperation()) {
case RoundingOp::None:
case RoundingOp::Pass:
// Make sure NANs are handled properly
switch (f2f.src_size) {
case FloatFormat::F16:
input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
break;
case FloatFormat::F32:
input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
break;
case FloatFormat::F64:
input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
break;
}
break;
case RoundingOp::Round:
input = v.ir.FPRoundEven(input, fp_control);
break;
case RoundingOp::Floor:
input = v.ir.FPFloor(input, fp_control);
break;
case RoundingOp::Ceil:
input = v.ir.FPCeil(input, fp_control);
break;
case RoundingOp::Trunc:
input = v.ir.FPTrunc(input, fp_control);
break;
default:
throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
}
}
if (f2f.sat != 0 && !any_fp64) {
input = v.ir.FPSaturate(input);
}
switch (f2f.dst_size) {
case FloatFormat::F16: {
const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
break;
}
case FloatFormat::F32:
v.F(f2f.dest_reg, input);
break;
case FloatFormat::F64:
v.D(f2f.dest_reg, input);
break;
default:
throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
}
}
} // Anonymous namespace
void TranslatorVisitor::F2F_reg(u64 insn) {
union {
u64 insn;
BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size;
BitField<41, 1, u64> selector;
} const f2f{insn};
IR::F16F32F64 src_a;
switch (f2f.src_size) {
case FloatFormat::F16: {
auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
break;
}
case FloatFormat::F32:
src_a = GetFloatReg20(insn);
break;
case FloatFormat::F64:
src_a = GetDoubleReg20(insn);
break;
default:
throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
}
F2F(*this, insn, src_a, f2f.abs != 0);
}
void TranslatorVisitor::F2F_cbuf(u64 insn) {
union {
u64 insn;
BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size;
BitField<41, 1, u64> selector;
} const f2f{insn};
IR::F16F32F64 src_a;
switch (f2f.src_size) {
case FloatFormat::F16: {
auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
break;
}
case FloatFormat::F32:
src_a = GetFloatCbuf(insn);
break;
case FloatFormat::F64:
src_a = GetDoubleCbuf(insn);
break;
default:
throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
}
F2F(*this, insn, src_a, f2f.abs != 0);
}
void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
union {
u64 insn;
BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size;
BitField<41, 1, u64> selector;
BitField<20, 19, u64> imm;
BitField<56, 1, u64> imm_neg;
} const f2f{insn};
IR::F16F32F64 src_a;
switch (f2f.src_size) {
case FloatFormat::F16: {
const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
if (f2f.imm_neg != 0) {
throw NotImplementedException("Neg bit on F16");
}
break;
}
case FloatFormat::F32:
src_a = GetFloatImm20(insn);
break;
case FloatFormat::F64:
src_a = GetDoubleImm20(insn);
break;
default:
throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
}
F2F(*this, insn, src_a, f2f.abs != 0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,253 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <limits>
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class DestFormat : u64 {
Invalid,
I16,
I32,
I64,
};
enum class SrcFormat : u64 {
Invalid,
F16,
F32,
F64,
};
enum class Rounding : u64 {
Round,
Floor,
Ceil,
Trunc,
};
union F2I {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 2, DestFormat> dest_format;
BitField<10, 2, SrcFormat> src_format;
BitField<12, 1, u64> is_signed;
BitField<39, 2, Rounding> rounding;
BitField<41, 1, u64> half;
BitField<44, 1, u64> ftz;
BitField<45, 1, u64> abs;
BitField<47, 1, u64> cc;
BitField<49, 1, u64> neg;
};
size_t BitSize(DestFormat dest_format) {
switch (dest_format) {
case DestFormat::I16:
return 16;
case DestFormat::I32:
return 32;
case DestFormat::I64:
return 64;
default:
throw NotImplementedException("Invalid destination format {}", dest_format);
}
}
std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
if (is_signed) {
switch (format) {
case DestFormat::I16:
return {static_cast<f64>(std::numeric_limits<s16>::max()),
static_cast<f64>(std::numeric_limits<s16>::min())};
case DestFormat::I32:
return {static_cast<f64>(std::numeric_limits<s32>::max()),
static_cast<f64>(std::numeric_limits<s32>::min())};
case DestFormat::I64:
return {static_cast<f64>(std::numeric_limits<s64>::max()),
static_cast<f64>(std::numeric_limits<s64>::min())};
default:
break;
}
} else {
switch (format) {
case DestFormat::I16:
return {static_cast<f64>(std::numeric_limits<u16>::max()),
static_cast<f64>(std::numeric_limits<u16>::min())};
case DestFormat::I32:
return {static_cast<f64>(std::numeric_limits<u32>::max()),
static_cast<f64>(std::numeric_limits<u32>::min())};
case DestFormat::I64:
return {static_cast<f64>(std::numeric_limits<u64>::max()),
static_cast<f64>(std::numeric_limits<u64>::min())};
default:
break;
}
}
throw NotImplementedException("Invalid destination format {}", format);
}
IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
union {
u64 raw;
BitField<20, 14, s64> offset;
BitField<34, 5, u64> binding;
} const cbuf{insn};
if (cbuf.binding >= 18) {
throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
}
if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
}
if (cbuf.offset % 2 != 0) {
throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
}
const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
return v.ir.PackDouble2x32(vector);
}
void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
// F2I is used to convert from a floating point value to an integer
const F2I f2i{insn};
const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
f2i.dest_format != DestFormat::I64};
IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
if (denorm_cares) {
fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
}
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = fmz_mode,
};
const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
const IR::F16F32F64 rounded_value{[&] {
switch (f2i.rounding) {
case Rounding::Round:
return v.ir.FPRoundEven(op_a, fp_control);
case Rounding::Floor:
return v.ir.FPFloor(op_a, fp_control);
case Rounding::Ceil:
return v.ir.FPCeil(op_a, fp_control);
case Rounding::Trunc:
return v.ir.FPTrunc(op_a, fp_control);
default:
throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
}
}()};
const bool is_signed{f2i.is_signed != 0};
const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
IR::F16F32F64 intermediate;
switch (f2i.src_format) {
case SrcFormat::F16: {
const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
break;
}
case SrcFormat::F32: {
const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
break;
}
case SrcFormat::F64: {
const IR::F64 max_val{v.ir.Imm64(max_bound)};
const IR::F64 min_val{v.ir.Imm64(min_bound)};
intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
break;
}
default:
throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
}
const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
bool handled_special_case = false;
const bool special_nan_cases =
(f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
if (special_nan_cases) {
if (f2i.dest_format == DestFormat::I32) {
handled_special_case = true;
result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
} else if (f2i.dest_format == DestFormat::I64) {
handled_special_case = true;
result = IR::U64{
v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
}
}
if (!handled_special_case && is_signed) {
if (bitsize != 64) {
result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
} else {
result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
}
}
if (bitsize == 64) {
v.L(f2i.dest_reg, result);
} else {
v.X(f2i.dest_reg, result);
}
if (f2i.cc != 0) {
throw NotImplementedException("F2I CC");
}
}
} // Anonymous namespace
void TranslatorVisitor::F2I_reg(u64 insn) {
union {
u64 raw;
F2I base;
BitField<20, 8, IR::Reg> src_reg;
} const f2i{insn};
const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
switch (f2i.base.src_format) {
case SrcFormat::F16:
return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
case SrcFormat::F32:
return F(f2i.src_reg);
case SrcFormat::F64:
return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
default:
throw NotImplementedException("Invalid F2I source format {}",
f2i.base.src_format.Value());
}
}()};
TranslateF2I(*this, insn, op_a);
}
void TranslatorVisitor::F2I_cbuf(u64 insn) {
const F2I f2i{insn};
const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
switch (f2i.src_format) {
case SrcFormat::F16:
return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
case SrcFormat::F32:
return GetFloatCbuf(insn);
case SrcFormat::F64: {
return UnpackCbuf(*this, insn);
}
default:
throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
}
}()};
TranslateF2I(*this, insn, op_a);
}
void TranslatorVisitor::F2I_imm(u64) {
throw NotImplementedException("{}", Opcode::F2I_imm);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,94 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const ffma{insn};
if (cc) {
throw NotImplementedException("FFMA CC");
}
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = CastFpRounding(fp_rounding),
.fmz_mode = CastFmzMode(fmz_mode),
};
IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
if (fmz_mode == FmzMode::FMZ && !sat) {
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
value = IR::F32{v.ir.Select(any_zero, op_c, value)};
}
if (sat) {
value = v.ir.FPSaturate(value);
}
v.F(ffma.dest_reg, value);
}
void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
union {
u64 raw;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_b;
BitField<49, 1, u64> neg_c;
BitField<50, 1, u64> sat;
BitField<51, 2, FpRounding> fp_rounding;
BitField<53, 2, FmzMode> fmz_mode;
} const ffma{insn};
FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
}
} // Anonymous namespace
void TranslatorVisitor::FFMA_reg(u64 insn) {
FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FFMA_rc(u64 insn) {
FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
}
void TranslatorVisitor::FFMA_cr(u64 insn) {
FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FFMA_imm(u64 insn) {
FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FFMA32I(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
BitField<52, 1, u64> cc;
BitField<53, 2, FmzMode> fmz_mode;
BitField<55, 1, u64> sat;
BitField<56, 1, u64> neg_a;
BitField<57, 1, u64> neg_c;
} const ffma32i{insn};
FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,62 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<44, 1, u64> ftz;
BitField<45, 1, u64> negate_b;
BitField<46, 1, u64> abs_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> negate_a;
BitField<49, 1, u64> abs_b;
} const fmnmx{insn};
if (fmnmx.cc) {
throw NotImplementedException("FMNMX CC");
}
const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
const IR::FpControl control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
if (fmnmx.neg_pred != 0) {
std::swap(min, max);
}
v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
}
} // Anonymous namespace
void TranslatorVisitor::FMNMX_reg(u64 insn) {
FMNMX(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
FMNMX(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FMNMX_imm(u64 insn) {
FMNMX(*this, insn, GetFloatImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,71 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/opcodes.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Operation : u64 {
Cos = 0,
Sin = 1,
Ex2 = 2, // Base 2 exponent
Lg2 = 3, // Base 2 logarithm
Rcp = 4, // Reciprocal
Rsq = 5, // Reciprocal square root
Rcp64H = 6, // 64-bit reciprocal
Rsq64H = 7, // 64-bit reciprocal square root
Sqrt = 8,
};
} // Anonymous namespace
void TranslatorVisitor::MUFU(u64 insn) {
// MUFU is used to implement a bunch of special functions. See Operation.
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<20, 4, Operation> operation;
BitField<46, 1, u64> abs;
BitField<48, 1, u64> neg;
BitField<50, 1, u64> sat;
} const mufu{insn};
const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
IR::F32 value{[&]() -> IR::F32 {
switch (mufu.operation) {
case Operation::Cos:
return ir.FPCos(op_a);
case Operation::Sin:
return ir.FPSin(op_a);
case Operation::Ex2:
return ir.FPExp2(op_a);
case Operation::Lg2:
return ir.FPLog2(op_a);
case Operation::Rcp:
return ir.FPRecip(op_a);
case Operation::Rsq:
return ir.FPRecipSqrt(op_a);
case Operation::Rcp64H:
throw NotImplementedException("MUFU.RCP64H");
case Operation::Rsq64H:
throw NotImplementedException("MUFU.RSQ64H");
case Operation::Sqrt:
return ir.FPSqrt(op_a);
default:
throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
}
}()};
if (mufu.sat) {
value = ir.FPSaturate(value);
}
F(mufu.dest_reg, value);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,127 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Scale : u64 {
None,
D2,
D4,
D8,
M8,
M4,
M2,
INVALIDSCALE37,
};
float ScaleFactor(Scale scale) {
switch (scale) {
case Scale::None:
return 1.0f;
case Scale::D2:
return 1.0f / 2.0f;
case Scale::D4:
return 1.0f / 4.0f;
case Scale::D8:
return 1.0f / 8.0f;
case Scale::M8:
return 8.0f;
case Scale::M4:
return 4.0f;
case Scale::M2:
return 2.0f;
case Scale::INVALIDSCALE37:
break;
}
throw NotImplementedException("Invalid FMUL scale {}", scale);
}
void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const fmul{insn};
if (cc) {
throw NotImplementedException("FMUL CC");
}
IR::F32 op_a{v.F(fmul.src_a)};
if (scale != Scale::None) {
if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
}
op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
}
const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = CastFpRounding(fp_rounding),
.fmz_mode = CastFmzMode(fmz_mode),
};
IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
if (fmz_mode == FmzMode::FMZ && !sat) {
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
value = IR::F32{v.ir.Select(any_zero, zero, value)};
}
if (sat) {
value = v.ir.FPSaturate(value);
}
v.F(fmul.dest_reg, value);
}
void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 raw;
BitField<39, 2, FpRounding> fp_rounding;
BitField<41, 3, Scale> scale;
BitField<44, 2, FmzMode> fmz;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_b;
BitField<50, 1, u64> sat;
} const fmul{insn};
FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
fmul.neg_b != 0);
}
} // Anonymous namespace
void TranslatorVisitor::FMUL_reg(u64 insn) {
return FMUL(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FMUL_cbuf(u64 insn) {
return FMUL(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FMUL_imm(u64 insn) {
return FMUL(*this, insn, GetFloatImm20(insn));
}
void TranslatorVisitor::FMUL32I(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> cc;
BitField<53, 2, FmzMode> fmz;
BitField<55, 1, u64> sat;
} const fmul32i{insn};
FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
fmul32i.sat != 0, fmul32i.cc != 0, false);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,41 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Mode : u64 {
SINCOS,
EX2,
};
void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<39, 1, Mode> mode;
BitField<45, 1, u64> neg;
BitField<49, 1, u64> abs;
} const rro{insn};
v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
}
} // Anonymous namespace
void TranslatorVisitor::RRO_reg(u64 insn) {
RRO(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::RRO_cbuf(u64 insn) {
RRO(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::RRO_imm(u64) {
throw NotImplementedException("RRO (imm)");
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,60 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 insn;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<6, 1, u64> negate_b;
BitField<7, 1, u64> abs_a;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_b;
BitField<45, 2, BooleanOp> bop;
BitField<47, 1, u64> ftz;
BitField<48, 4, FPCompareOp> compare_op;
} const fsetp{insn};
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
const IR::FpControl control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
const BooleanOp bop{fsetp.bop};
const FPCompareOp compare_op{fsetp.compare_op};
const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
v.ir.SetPred(fsetp.dest_pred_a, result_a);
v.ir.SetPred(fsetp.dest_pred_b, result_b);
}
} // Anonymous namespace
void TranslatorVisitor::FSETP_reg(u64 insn) {
FSETP(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FSETP_cbuf(u64 insn) {
FSETP(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FSETP_imm(u64 insn) {
FSETP(*this, insn, GetFloatImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,44 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
void TranslatorVisitor::FSWZADD(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<28, 8, u64> swizzle;
BitField<38, 1, u64> ndv;
BitField<39, 2, FpRounding> round;
BitField<44, 1, u64> ftz;
BitField<47, 1, u64> cc;
} const fswzadd{insn};
if (fswzadd.ndv != 0) {
throw NotImplementedException("FSWZADD NDV");
}
const IR::F32 src_a{GetFloatReg8(insn)};
const IR::F32 src_b{GetFloatReg20(insn)};
const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
const IR::FpControl fp_control{
.no_contraction = false,
.rounding = CastFpRounding(fswzadd.round),
.fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
F(fswzadd.dest_reg, result);
if (fswzadd.cc != 0) {
throw NotImplementedException("FSWZADD CC");
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,125 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const hadd2{insn};
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
const bool promotion{lhs_a.Type() != lhs_b.Type()};
if (promotion) {
if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a);
}
if (lhs_b.Type() == IR::Type::F16) {
lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b);
}
}
lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
if (sat) {
lhs = v.ir.FPSaturate(lhs);
rhs = v.ir.FPSaturate(rhs);
}
if (promotion) {
lhs = v.ir.FPConvert(16, lhs);
rhs = v.ir.FPConvert(16, rhs);
}
v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
}
void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
const IR::U32& src_b) {
union {
u64 raw;
BitField<49, 2, Merge> merge;
BitField<39, 1, u64> ftz;
BitField<43, 1, u64> neg_a;
BitField<44, 1, u64> abs_a;
BitField<47, 2, Swizzle> swizzle_a;
} const hadd2{insn};
HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
}
} // Anonymous namespace
void TranslatorVisitor::HADD2_reg(u64 insn) {
union {
u64 raw;
BitField<32, 1, u64> sat;
BitField<31, 1, u64> neg_b;
BitField<30, 1, u64> abs_b;
BitField<28, 2, Swizzle> swizzle_b;
} const hadd2{insn};
HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
GetReg20(insn));
}
void TranslatorVisitor::HADD2_cbuf(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> sat;
BitField<56, 1, u64> neg_b;
BitField<54, 1, u64> abs_b;
} const hadd2{insn};
HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
GetCbuf(insn));
}
void TranslatorVisitor::HADD2_imm(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> sat;
BitField<56, 1, u64> neg_high;
BitField<30, 9, u64> high;
BitField<29, 1, u64> neg_low;
BitField<20, 9, u64> low;
} const hadd2{insn};
const u32 imm{
static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
}
void TranslatorVisitor::HADD2_32I(u64 insn) {
union {
u64 raw;
BitField<55, 1, u64> ftz;
BitField<52, 1, u64> sat;
BitField<56, 1, u64> neg_a;
BitField<53, 2, Swizzle> swizzle_a;
BitField<20, 32, u64> imm32;
} const hadd2{insn};
const u32 imm{static_cast<u32>(hadd2.imm32)};
HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,169 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
bool sat, HalfPrecision precision) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const hfma2{insn};
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
if (promotion) {
if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a);
}
if (lhs_b.Type() == IR::Type::F16) {
lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b);
}
if (lhs_c.Type() == IR::Type::F16) {
lhs_c = v.ir.FPConvert(32, lhs_c);
rhs_c = v.ir.FPConvert(32, rhs_c);
}
}
lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = HalfPrecision2FmzMode(precision),
};
IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
if (precision == HalfPrecision::FMZ && !sat) {
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
}
if (sat) {
lhs = v.ir.FPSaturate(lhs);
rhs = v.ir.FPSaturate(rhs);
}
if (promotion) {
lhs = v.ir.FPConvert(16, lhs);
rhs = v.ir.FPConvert(16, rhs);
}
v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
}
void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
HalfPrecision precision) {
union {
u64 raw;
BitField<47, 2, Swizzle> swizzle_a;
BitField<49, 2, Merge> merge;
} const hfma2{insn};
HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
sat, precision);
}
} // Anonymous namespace
void TranslatorVisitor::HFMA2_reg(u64 insn) {
union {
u64 raw;
BitField<28, 2, Swizzle> swizzle_b;
BitField<32, 1, u64> saturate;
BitField<31, 1, u64> neg_b;
BitField<30, 1, u64> neg_c;
BitField<35, 2, Swizzle> swizzle_c;
BitField<37, 2, HalfPrecision> precision;
} const hfma2{insn};
HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
}
void TranslatorVisitor::HFMA2_rc(u64 insn) {
union {
u64 raw;
BitField<51, 1, u64> neg_c;
BitField<52, 1, u64> saturate;
BitField<53, 2, Swizzle> swizzle_b;
BitField<56, 1, u64> neg_b;
BitField<57, 2, HalfPrecision> precision;
} const hfma2{insn};
HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
}
void TranslatorVisitor::HFMA2_cr(u64 insn) {
union {
u64 raw;
BitField<51, 1, u64> neg_c;
BitField<52, 1, u64> saturate;
BitField<53, 2, Swizzle> swizzle_c;
BitField<56, 1, u64> neg_b;
BitField<57, 2, HalfPrecision> precision;
} const hfma2{insn};
HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
}
void TranslatorVisitor::HFMA2_imm(u64 insn) {
union {
u64 raw;
BitField<51, 1, u64> neg_c;
BitField<52, 1, u64> saturate;
BitField<53, 2, Swizzle> swizzle_c;
BitField<56, 1, u64> neg_high;
BitField<30, 9, u64> high;
BitField<29, 1, u64> neg_low;
BitField<20, 9, u64> low;
BitField<57, 2, HalfPrecision> precision;
} const hfma2{insn};
const u32 imm{
static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
}
void TranslatorVisitor::HFMA2_32I(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> src_c;
BitField<20, 32, u64> imm32;
BitField<52, 1, u64> neg_c;
BitField<53, 2, Swizzle> swizzle_a;
BitField<55, 2, HalfPrecision> precision;
} const hfma2{insn};
const u32 imm{static_cast<u32>(hfma2.imm32)};
HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,62 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
switch (precision) {
case HalfPrecision::None:
return IR::FmzMode::None;
case HalfPrecision::FTZ:
return IR::FmzMode::FTZ;
case HalfPrecision::FMZ:
return IR::FmzMode::FMZ;
default:
return IR::FmzMode::DontCare;
}
}
std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
switch (swizzle) {
case Swizzle::H1_H0: {
const IR::Value vector{ir.UnpackFloat2x16(value)};
return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
}
case Swizzle::H0_H0: {
const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
return {scalar, scalar};
}
case Swizzle::H1_H1: {
const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
return {scalar, scalar};
}
case Swizzle::F32: {
const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
return {scalar, scalar};
}
}
throw InvalidArgument("Invalid swizzle {}", swizzle);
}
IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
Merge merge) {
switch (merge) {
case Merge::H1_H0:
return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
case Merge::F32:
return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
case Merge::MRG_H0:
case Merge::MRG_H1: {
const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
const bool is_h0{merge == Merge::MRG_H0};
const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
}
}
throw InvalidArgument("Invalid merge {}", merge);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,42 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
enum class Merge : u64 {
H1_H0,
F32,
MRG_H0,
MRG_H1,
};
enum class Swizzle : u64 {
H1_H0,
F32,
H0_H0,
H1_H1,
};
enum class HalfPrecision : u64 {
None = 0,
FTZ = 1,
FMZ = 2,
};
IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
Merge merge);
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,143 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
HalfPrecision precision) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const hmul2{insn};
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
const bool promotion{lhs_a.Type() != lhs_b.Type()};
if (promotion) {
if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a);
}
if (lhs_b.Type() == IR::Type::F16) {
lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b);
}
}
lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
const IR::FpControl fp_control{
.no_contraction = true,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = HalfPrecision2FmzMode(precision),
};
IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
if (precision == HalfPrecision::FMZ && !sat) {
// Do not implement FMZ if SAT is enabled, as it does the logic for us.
// On D3D9 mode, anything * 0 is zero, even NAN and infinity
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
}
if (sat) {
lhs = v.ir.FPSaturate(lhs);
rhs = v.ir.FPSaturate(rhs);
}
if (promotion) {
lhs = v.ir.FPConvert(16, lhs);
rhs = v.ir.FPConvert(16, rhs);
}
v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
}
void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
Swizzle swizzle_b, const IR::U32& src_b) {
union {
u64 raw;
BitField<49, 2, Merge> merge;
BitField<47, 2, Swizzle> swizzle_a;
BitField<39, 2, HalfPrecision> precision;
} const hmul2{insn};
HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
hmul2.precision);
}
} // Anonymous namespace
void TranslatorVisitor::HMUL2_reg(u64 insn) {
union {
u64 raw;
BitField<32, 1, u64> sat;
BitField<31, 1, u64> neg_b;
BitField<30, 1, u64> abs_b;
BitField<44, 1, u64> abs_a;
BitField<28, 2, Swizzle> swizzle_b;
} const hmul2{insn};
HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
hmul2.swizzle_b, GetReg20(insn));
}
void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> sat;
BitField<54, 1, u64> abs_b;
BitField<43, 1, u64> neg_a;
BitField<44, 1, u64> abs_a;
} const hmul2{insn};
HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
Swizzle::F32, GetCbuf(insn));
}
void TranslatorVisitor::HMUL2_imm(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> sat;
BitField<56, 1, u64> neg_high;
BitField<30, 9, u64> high;
BitField<29, 1, u64> neg_low;
BitField<20, 9, u64> low;
BitField<43, 1, u64> neg_a;
BitField<44, 1, u64> abs_a;
} const hmul2{insn};
const u32 imm{
static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
Swizzle::H1_H0, ir.Imm32(imm));
}
void TranslatorVisitor::HMUL2_32I(u64 insn) {
union {
u64 raw;
BitField<55, 2, HalfPrecision> precision;
BitField<52, 1, u64> sat;
BitField<53, 2, Swizzle> swizzle_a;
BitField<20, 32, u64> imm32;
} const hmul2{insn};
const u32 imm{static_cast<u32>(hmul2.imm32)};
HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,117 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> neg_a;
BitField<45, 2, BooleanOp> bop;
BitField<44, 1, u64> abs_a;
BitField<47, 2, Swizzle> swizzle_a;
} const hset2{insn};
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
if (lhs_a.Type() != lhs_b.Type()) {
if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a);
}
if (lhs_b.Type() == IR::Type::F16) {
lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b);
}
}
lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
const IR::FpControl control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::U1 pred{v.ir.GetPred(hset2.pred)};
if (hset2.neg_pred != 0) {
pred = v.ir.LogicalNot(pred);
}
const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
const u32 true_value = bf ? 0x3c00 : 0xffff;
const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
const IR::U32 fail_result{v.ir.Imm32(0)};
const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
}
} // Anonymous namespace
void TranslatorVisitor::HSET2_reg(u64 insn) {
union {
u64 insn;
BitField<30, 1, u64> abs_b;
BitField<49, 1, u64> bf;
BitField<31, 1, u64> neg_b;
BitField<50, 1, u64> ftz;
BitField<35, 4, FPCompareOp> compare_op;
BitField<28, 2, Swizzle> swizzle_b;
} const hset2{insn};
HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
}
void TranslatorVisitor::HSET2_cbuf(u64 insn) {
union {
u64 insn;
BitField<53, 1, u64> bf;
BitField<56, 1, u64> neg_b;
BitField<54, 1, u64> ftz;
BitField<49, 4, FPCompareOp> compare_op;
} const hset2{insn};
HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
hset2.compare_op, Swizzle::F32);
}
void TranslatorVisitor::HSET2_imm(u64 insn) {
union {
u64 insn;
BitField<53, 1, u64> bf;
BitField<54, 1, u64> ftz;
BitField<49, 4, FPCompareOp> compare_op;
BitField<56, 1, u64> neg_high;
BitField<30, 9, u64> high;
BitField<29, 1, u64> neg_low;
BitField<20, 9, u64> low;
} const hset2{insn};
const u32 imm{
static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
Swizzle::H1_H0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,118 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
namespace Shader::Maxwell {
namespace {
void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
union {
u64 insn;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> neg_a;
BitField<45, 2, BooleanOp> bop;
BitField<44, 1, u64> abs_a;
BitField<6, 1, u64> ftz;
BitField<47, 2, Swizzle> swizzle_a;
} const hsetp2{insn};
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
if (lhs_a.Type() != lhs_b.Type()) {
if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a);
}
if (lhs_b.Type() == IR::Type::F16) {
lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b);
}
}
lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
const IR::FpControl control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
.fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
};
IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
if (hsetp2.neg_pred != 0) {
pred = v.ir.LogicalNot(pred);
}
const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
if (h_and) {
auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
v.ir.SetPred(hsetp2.dest_pred_a, result);
v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
} else {
v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
}
}
} // Anonymous namespace
void TranslatorVisitor::HSETP2_reg(u64 insn) {
union {
u64 insn;
BitField<30, 1, u64> abs_b;
BitField<49, 1, u64> h_and;
BitField<31, 1, u64> neg_b;
BitField<35, 4, FPCompareOp> compare_op;
BitField<28, 2, Swizzle> swizzle_b;
} const hsetp2{insn};
HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
hsetp2.compare_op, hsetp2.h_and != 0);
}
void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
union {
u64 insn;
BitField<53, 1, u64> h_and;
BitField<54, 1, u64> abs_b;
BitField<56, 1, u64> neg_b;
BitField<49, 4, FPCompareOp> compare_op;
} const hsetp2{insn};
HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
hsetp2.compare_op, hsetp2.h_and != 0);
}
void TranslatorVisitor::HSETP2_imm(u64 insn) {
union {
u64 insn;
BitField<53, 1, u64> h_and;
BitField<54, 1, u64> ftz;
BitField<49, 4, FPCompareOp> compare_op;
BitField<56, 1, u64> neg_high;
BitField<30, 9, u64> high;
BitField<29, 1, u64> neg_low;
BitField<20, 9, u64> low;
} const hsetp2{insn};
const u32 imm{static_cast<u32>(hsetp2.low << 6) |
static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hsetp2.high << 22) |
static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
hsetp2.h_and != 0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,272 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
u32 offset) {
if (unaligned) {
return ir.Imm32(0);
}
return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
}
} // Anonymous namespace
IR::U32 TranslatorVisitor::X(IR::Reg reg) {
return ir.GetReg(reg);
}
IR::U64 TranslatorVisitor::L(IR::Reg reg) {
if (!IR::IsAligned(reg, 2)) {
throw NotImplementedException("Unaligned source register {}", reg);
}
return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
}
IR::F32 TranslatorVisitor::F(IR::Reg reg) {
return ir.BitCast<IR::F32>(X(reg));
}
IR::F64 TranslatorVisitor::D(IR::Reg reg) {
if (!IR::IsAligned(reg, 2)) {
throw NotImplementedException("Unaligned source register {}", reg);
}
return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
}
void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
ir.SetReg(dest_reg, value);
}
void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
if (!IR::IsAligned(dest_reg, 2)) {
throw NotImplementedException("Unaligned destination register {}", dest_reg);
}
const IR::Value result{ir.UnpackUint2x32(value)};
for (int i = 0; i < 2; i++) {
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
}
}
void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
X(dest_reg, ir.BitCast<IR::U32>(value));
}
void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
if (!IR::IsAligned(dest_reg, 2)) {
throw NotImplementedException("Unaligned destination register {}", dest_reg);
}
const IR::Value result{ir.UnpackDouble2x32(value)};
for (int i = 0; i < 2; i++) {
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
}
}
IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
union {
u64 raw;
BitField<8, 8, IR::Reg> index;
} const reg{insn};
return X(reg.index);
}
IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
union {
u64 raw;
BitField<20, 8, IR::Reg> index;
} const reg{insn};
return X(reg.index);
}
IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
union {
u64 raw;
BitField<39, 8, IR::Reg> index;
} const reg{insn};
return X(reg.index);
}
IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
return ir.BitCast<IR::F32>(GetReg8(insn));
}
IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
return ir.BitCast<IR::F32>(GetReg20(insn));
}
IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
return ir.BitCast<IR::F32>(GetReg39(insn));
}
IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
union {
u64 raw;
BitField<20, 8, IR::Reg> index;
} const reg{insn};
return D(reg.index);
}
IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
union {
u64 raw;
BitField<39, 8, IR::Reg> index;
} const reg{insn};
return D(reg.index);
}
static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
union {
u64 raw;
BitField<20, 14, u64> offset;
BitField<34, 5, u64> binding;
} const cbuf{insn};
if (cbuf.binding >= 18) {
throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
}
if (cbuf.offset >= 0x10'000) {
throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
}
const IR::Value binding{static_cast<u32>(cbuf.binding)};
const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
return {IR::U32{binding}, IR::U32{byte_offset}};
}
IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
const auto [binding, byte_offset]{CbufAddr(insn)};
return ir.GetCbuf(binding, byte_offset);
}
IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
const auto [binding, byte_offset]{CbufAddr(insn)};
return ir.GetFloatCbuf(binding, byte_offset);
}
IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
union {
u64 raw;
BitField<20, 1, u64> unaligned;
} const cbuf{insn};
const auto [binding, offset_value]{CbufAddr(insn)};
const bool unaligned{cbuf.unaligned != 0};
const u32 offset{offset_value.U32()};
const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
}
IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
union {
u64 raw;
BitField<20, 1, u64> unaligned;
} const cbuf{insn};
if (cbuf.unaligned != 0) {
throw NotImplementedException("Unaligned packed constant buffer read");
}
const auto [binding, lower_offset]{CbufAddr(insn)};
const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
}
IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
union {
u64 raw;
BitField<20, 19, u64> value;
BitField<56, 1, u64> is_negative;
} const imm{insn};
if (imm.is_negative != 0) {
const s64 raw{static_cast<s64>(imm.value)};
return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
} else {
return ir.Imm32(static_cast<u32>(imm.value));
}
}
IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
union {
u64 raw;
BitField<20, 19, u64> value;
BitField<56, 1, u64> is_negative;
} const imm{insn};
const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
const u32 value{static_cast<u32>(imm.value) << 12};
return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
}
IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
union {
u64 raw;
BitField<20, 19, u64> value;
BitField<56, 1, u64> is_negative;
} const imm{insn};
const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
const u64 value{imm.value << 44};
return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
}
IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
const s64 value{GetImm20(insn).U32()};
return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
}
IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
union {
u64 raw;
BitField<20, 32, u64> value;
} const imm{insn};
return ir.Imm32(static_cast<u32>(imm.value));
}
IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
union {
u64 raw;
BitField<20, 32, u64> value;
} const imm{insn};
return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
}
void TranslatorVisitor::SetZFlag(const IR::U1& value) {
ir.SetZFlag(value);
}
void TranslatorVisitor::SetSFlag(const IR::U1& value) {
ir.SetSFlag(value);
}
void TranslatorVisitor::SetCFlag(const IR::U1& value) {
ir.SetCFlag(value);
}
void TranslatorVisitor::SetOFlag(const IR::U1& value) {
ir.SetOFlag(value);
}
void TranslatorVisitor::ResetZero() {
SetZFlag(ir.Imm1(false));
}
void TranslatorVisitor::ResetSFlag() {
SetSFlag(ir.Imm1(false));
}
void TranslatorVisitor::ResetCFlag() {
SetCFlag(ir.Imm1(false));
}
void TranslatorVisitor::ResetOFlag() {
SetOFlag(ir.Imm1(false));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,387 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/maxwell/instruction.h"
namespace Shader::Maxwell {
enum class CompareOp : u64 {
False,
LessThan,
Equal,
LessThanEqual,
GreaterThan,
NotEqual,
GreaterThanEqual,
True,
};
enum class BooleanOp : u64 {
AND,
OR,
XOR,
};
enum class PredicateOp : u64 {
False,
True,
Zero,
NonZero,
};
enum class FPCompareOp : u64 {
F,
LT,
EQ,
LE,
GT,
NE,
GE,
NUM,
Nan,
LTU,
EQU,
LEU,
GTU,
NEU,
GEU,
T,
};
class TranslatorVisitor {
public:
explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
Environment& env;
IR::IREmitter ir;
void AL2P(u64 insn);
void ALD(u64 insn);
void AST(u64 insn);
void ATOM_cas(u64 insn);
void ATOM(u64 insn);
void ATOMS_cas(u64 insn);
void ATOMS(u64 insn);
void B2R(u64 insn);
void BAR(u64 insn);
void BFE_reg(u64 insn);
void BFE_cbuf(u64 insn);
void BFE_imm(u64 insn);
void BFI_reg(u64 insn);
void BFI_rc(u64 insn);
void BFI_cr(u64 insn);
void BFI_imm(u64 insn);
void BPT(u64 insn);
void BRA(u64 insn);
void BRK(u64 insn);
void BRX(u64 insn);
void CAL();
void CCTL(u64 insn);
void CCTLL(u64 insn);
void CONT(u64 insn);
void CS2R(u64 insn);
void CSET(u64 insn);
void CSETP(u64 insn);
void DADD_reg(u64 insn);
void DADD_cbuf(u64 insn);
void DADD_imm(u64 insn);
void DEPBAR();
void DFMA_reg(u64 insn);
void DFMA_rc(u64 insn);
void DFMA_cr(u64 insn);
void DFMA_imm(u64 insn);
void DMNMX_reg(u64 insn);
void DMNMX_cbuf(u64 insn);
void DMNMX_imm(u64 insn);
void DMUL_reg(u64 insn);
void DMUL_cbuf(u64 insn);
void DMUL_imm(u64 insn);
void DSET_reg(u64 insn);
void DSET_cbuf(u64 insn);
void DSET_imm(u64 insn);
void DSETP_reg(u64 insn);
void DSETP_cbuf(u64 insn);
void DSETP_imm(u64 insn);
void EXIT();
void F2F_reg(u64 insn);
void F2F_cbuf(u64 insn);
void F2F_imm(u64 insn);
void F2I_reg(u64 insn);
void F2I_cbuf(u64 insn);
void F2I_imm(u64 insn);
void FADD_reg(u64 insn);
void FADD_cbuf(u64 insn);
void FADD_imm(u64 insn);
void FADD32I(u64 insn);
void FCHK_reg(u64 insn);
void FCHK_cbuf(u64 insn);
void FCHK_imm(u64 insn);
void FCMP_reg(u64 insn);
void FCMP_rc(u64 insn);
void FCMP_cr(u64 insn);
void FCMP_imm(u64 insn);
void FFMA_reg(u64 insn);
void FFMA_rc(u64 insn);
void FFMA_cr(u64 insn);
void FFMA_imm(u64 insn);
void FFMA32I(u64 insn);
void FLO_reg(u64 insn);
void FLO_cbuf(u64 insn);
void FLO_imm(u64 insn);
void FMNMX_reg(u64 insn);
void FMNMX_cbuf(u64 insn);
void FMNMX_imm(u64 insn);
void FMUL_reg(u64 insn);
void FMUL_cbuf(u64 insn);
void FMUL_imm(u64 insn);
void FMUL32I(u64 insn);
void FSET_reg(u64 insn);
void FSET_cbuf(u64 insn);
void FSET_imm(u64 insn);
void FSETP_reg(u64 insn);
void FSETP_cbuf(u64 insn);
void FSETP_imm(u64 insn);
void FSWZADD(u64 insn);
void GETCRSPTR(u64 insn);
void GETLMEMBASE(u64 insn);
void HADD2_reg(u64 insn);
void HADD2_cbuf(u64 insn);
void HADD2_imm(u64 insn);
void HADD2_32I(u64 insn);
void HFMA2_reg(u64 insn);
void HFMA2_rc(u64 insn);
void HFMA2_cr(u64 insn);
void HFMA2_imm(u64 insn);
void HFMA2_32I(u64 insn);
void HMUL2_reg(u64 insn);
void HMUL2_cbuf(u64 insn);
void HMUL2_imm(u64 insn);
void HMUL2_32I(u64 insn);
void HSET2_reg(u64 insn);
void HSET2_cbuf(u64 insn);
void HSET2_imm(u64 insn);
void HSETP2_reg(u64 insn);
void HSETP2_cbuf(u64 insn);
void HSETP2_imm(u64 insn);
void I2F_reg(u64 insn);
void I2F_cbuf(u64 insn);
void I2F_imm(u64 insn);
void I2I_reg(u64 insn);
void I2I_cbuf(u64 insn);
void I2I_imm(u64 insn);
void IADD_reg(u64 insn);
void IADD_cbuf(u64 insn);
void IADD_imm(u64 insn);
void IADD3_reg(u64 insn);
void IADD3_cbuf(u64 insn);
void IADD3_imm(u64 insn);
void IADD32I(u64 insn);
void ICMP_reg(u64 insn);
void ICMP_rc(u64 insn);
void ICMP_cr(u64 insn);
void ICMP_imm(u64 insn);
void IDE(u64 insn);
void IDP_reg(u64 insn);
void IDP_imm(u64 insn);
void IMAD_reg(u64 insn);
void IMAD_rc(u64 insn);
void IMAD_cr(u64 insn);
void IMAD_imm(u64 insn);
void IMAD32I(u64 insn);
void IMADSP_reg(u64 insn);
void IMADSP_rc(u64 insn);
void IMADSP_cr(u64 insn);
void IMADSP_imm(u64 insn);
void IMNMX_reg(u64 insn);
void IMNMX_cbuf(u64 insn);
void IMNMX_imm(u64 insn);
void IMUL_reg(u64 insn);
void IMUL_cbuf(u64 insn);
void IMUL_imm(u64 insn);
void IMUL32I(u64 insn);
void IPA(u64 insn);
void ISBERD(u64 insn);
void ISCADD_reg(u64 insn);
void ISCADD_cbuf(u64 insn);
void ISCADD_imm(u64 insn);
void ISCADD32I(u64 insn);
void ISET_reg(u64 insn);
void ISET_cbuf(u64 insn);
void ISET_imm(u64 insn);
void ISETP_reg(u64 insn);
void ISETP_cbuf(u64 insn);
void ISETP_imm(u64 insn);
void JCAL(u64 insn);
void JMP(u64 insn);
void JMX(u64 insn);
void KIL();
void LD(u64 insn);
void LDC(u64 insn);
void LDG(u64 insn);
void LDL(u64 insn);
void LDS(u64 insn);
void LEA_hi_reg(u64 insn);
void LEA_hi_cbuf(u64 insn);
void LEA_lo_reg(u64 insn);
void LEA_lo_cbuf(u64 insn);
void LEA_lo_imm(u64 insn);
void LEPC(u64 insn);
void LONGJMP(u64 insn);
void LOP_reg(u64 insn);
void LOP_cbuf(u64 insn);
void LOP_imm(u64 insn);
void LOP3_reg(u64 insn);
void LOP3_cbuf(u64 insn);
void LOP3_imm(u64 insn);
void LOP32I(u64 insn);
void MEMBAR(u64 insn);
void MOV_reg(u64 insn);
void MOV_cbuf(u64 insn);
void MOV_imm(u64 insn);
void MOV32I(u64 insn);
void MUFU(u64 insn);
void NOP(u64 insn);
void OUT_reg(u64 insn);
void OUT_cbuf(u64 insn);
void OUT_imm(u64 insn);
void P2R_reg(u64 insn);
void P2R_cbuf(u64 insn);
void P2R_imm(u64 insn);
void PBK();
void PCNT();
void PEXIT(u64 insn);
void PIXLD(u64 insn);
void PLONGJMP(u64 insn);
void POPC_reg(u64 insn);
void POPC_cbuf(u64 insn);
void POPC_imm(u64 insn);
void PRET(u64 insn);
void PRMT_reg(u64 insn);
void PRMT_rc(u64 insn);
void PRMT_cr(u64 insn);
void PRMT_imm(u64 insn);
void PSET(u64 insn);
void PSETP(u64 insn);
void R2B(u64 insn);
void R2P_reg(u64 insn);
void R2P_cbuf(u64 insn);
void R2P_imm(u64 insn);
void RAM(u64 insn);
void RED(u64 insn);
void RET(u64 insn);
void RRO_reg(u64 insn);
void RRO_cbuf(u64 insn);
void RRO_imm(u64 insn);
void RTT(u64 insn);
void S2R(u64 insn);
void SAM(u64 insn);
void SEL_reg(u64 insn);
void SEL_cbuf(u64 insn);
void SEL_imm(u64 insn);
void SETCRSPTR(u64 insn);
void SETLMEMBASE(u64 insn);
void SHF_l_reg(u64 insn);
void SHF_l_imm(u64 insn);
void SHF_r_reg(u64 insn);
void SHF_r_imm(u64 insn);
void SHFL(u64 insn);
void SHL_reg(u64 insn);
void SHL_cbuf(u64 insn);
void SHL_imm(u64 insn);
void SHR_reg(u64 insn);
void SHR_cbuf(u64 insn);
void SHR_imm(u64 insn);
void SSY();
void ST(u64 insn);
void STG(u64 insn);
void STL(u64 insn);
void STP(u64 insn);
void STS(u64 insn);
void SUATOM(u64 insn);
void SUATOM_cas(u64 insn);
void SULD(u64 insn);
void SURED(u64 insn);
void SUST(u64 insn);
void SYNC(u64 insn);
void TEX(u64 insn);
void TEX_b(u64 insn);
void TEXS(u64 insn);
void TLD(u64 insn);
void TLD_b(u64 insn);
void TLD4(u64 insn);
void TLD4_b(u64 insn);
void TLD4S(u64 insn);
void TLDS(u64 insn);
void TMML(u64 insn);
void TMML_b(u64 insn);
void TXA(u64 insn);
void TXD(u64 insn);
void TXD_b(u64 insn);
void TXQ(u64 insn);
void TXQ_b(u64 insn);
void VABSDIFF(u64 insn);
void VABSDIFF4(u64 insn);
void VADD(u64 insn);
void VMAD(u64 insn);
void VMNMX(u64 insn);
void VOTE(u64 insn);
void VOTE_vtg(u64 insn);
void VSET(u64 insn);
void VSETP(u64 insn);
void VSHL(u64 insn);
void VSHR(u64 insn);
void XMAD_reg(u64 insn);
void XMAD_rc(u64 insn);
void XMAD_cr(u64 insn);
void XMAD_imm(u64 insn);
[[nodiscard]] IR::U32 X(IR::Reg reg);
[[nodiscard]] IR::U64 L(IR::Reg reg);
[[nodiscard]] IR::F32 F(IR::Reg reg);
[[nodiscard]] IR::F64 D(IR::Reg reg);
void X(IR::Reg dest_reg, const IR::U32& value);
void L(IR::Reg dest_reg, const IR::U64& value);
void F(IR::Reg dest_reg, const IR::F32& value);
void D(IR::Reg dest_reg, const IR::F64& value);
[[nodiscard]] IR::U32 GetReg8(u64 insn);
[[nodiscard]] IR::U32 GetReg20(u64 insn);
[[nodiscard]] IR::U32 GetReg39(u64 insn);
[[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
[[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
[[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
[[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
[[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
[[nodiscard]] IR::U32 GetCbuf(u64 insn);
[[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
[[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
[[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
[[nodiscard]] IR::U32 GetImm20(u64 insn);
[[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
[[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
[[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
[[nodiscard]] IR::U32 GetImm32(u64 insn);
[[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
void SetZFlag(const IR::U1& value);
void SetSFlag(const IR::U1& value);
void SetCFlag(const IR::U1& value);
void SetOFlag(const IR::U1& value);
void ResetZero();
void ResetSFlag();
void ResetCFlag();
void ResetOFlag();
};
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,105 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
bool cc) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const iadd{insn};
if (sat) {
throw NotImplementedException("IADD SAT");
}
if (x && po) {
throw NotImplementedException("IADD X+PO");
}
// Operand A is always read from here, negated if needed
IR::U32 op_a{v.X(iadd.src_a)};
if (neg_a) {
op_a = v.ir.INeg(op_a);
}
// Add both operands
IR::U32 result{v.ir.IAdd(op_a, op_b)};
if (x) {
const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
result = v.ir.IAdd(result, carry);
}
if (po) {
// .PO adds one to the result
result = v.ir.IAdd(result, v.ir.Imm32(1));
}
if (cc) {
// Store flags
// TODO: Does this grab the result pre-PO or after?
if (po) {
throw NotImplementedException("IADD CC+PO");
}
// TODO: How does CC behave when X is set?
if (x) {
throw NotImplementedException("IADD X+CC");
}
v.SetZFlag(v.ir.GetZeroFromOp(result));
v.SetSFlag(v.ir.GetSignFromOp(result));
v.SetCFlag(v.ir.GetCarryFromOp(result));
v.SetOFlag(v.ir.GetOverflowFromOp(result));
}
// Store result
v.X(iadd.dest_reg, result);
}
void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
union {
u64 insn;
BitField<43, 1, u64> x;
BitField<47, 1, u64> cc;
BitField<48, 2, u64> three_for_po;
BitField<48, 1, u64> neg_b;
BitField<49, 1, u64> neg_a;
BitField<50, 1, u64> sat;
} const iadd{insn};
const bool po{iadd.three_for_po == 3};
if (!po && iadd.neg_b != 0) {
op_b = v.ir.INeg(op_b);
}
IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
}
} // Anonymous namespace
void TranslatorVisitor::IADD_reg(u64 insn) {
IADD(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::IADD_cbuf(u64 insn) {
IADD(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::IADD_imm(u64 insn) {
IADD(*this, insn, GetImm20(insn));
}
void TranslatorVisitor::IADD32I(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> cc;
BitField<53, 1, u64> x;
BitField<54, 1, u64> sat;
BitField<55, 2, u64> three_for_po;
BitField<56, 1, u64> neg_a;
} const iadd32i{insn};
const bool po{iadd32i.three_for_po == 3};
const bool neg_a{!po && iadd32i.neg_a != 0};
IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,122 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Shift : u64 {
None,
Right,
Left,
};
enum class Half : u64 {
All,
Lower,
Upper,
};
[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
constexpr bool is_signed{false};
switch (half) {
case Half::All:
return value;
case Half::Lower:
return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
case Half::Upper:
return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
}
throw NotImplementedException("Invalid half");
}
[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
switch (shift) {
case Shift::None:
return value;
case Shift::Right: {
// 33-bit RS IADD3 edge case
const IR::U1 edge_case{ir.GetCarryFromOp(value)};
const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
}
case Shift::Left:
return ir.ShiftLeftLogical(value, ir.Imm32(16));
}
throw NotImplementedException("Invalid shift");
}
void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
Shift shift = Shift::None) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> x;
BitField<49, 1, u64> neg_c;
BitField<50, 1, u64> neg_b;
BitField<51, 1, u64> neg_a;
} iadd3{insn};
if (iadd3.neg_a != 0) {
op_a = v.ir.INeg(op_a);
}
if (iadd3.neg_b != 0) {
op_b = v.ir.INeg(op_b);
}
if (iadd3.neg_c != 0) {
op_c = v.ir.INeg(op_c);
}
IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
if (iadd3.x != 0) {
// TODO: How does RS behave when X is set?
if (shift == Shift::Right) {
throw NotImplementedException("IADD3 X+RS");
}
const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
lhs_1 = v.ir.IAdd(lhs_1, carry);
}
const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
v.X(iadd3.dest_reg, result);
if (iadd3.cc != 0) {
// TODO: How does CC behave when X is set?
if (iadd3.x != 0) {
throw NotImplementedException("IADD3 X+CC");
}
v.SetZFlag(v.ir.GetZeroFromOp(result));
v.SetSFlag(v.ir.GetSignFromOp(result));
v.SetCFlag(v.ir.GetCarryFromOp(result));
const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
}
}
} // Anonymous namespace
void TranslatorVisitor::IADD3_reg(u64 insn) {
union {
u64 insn;
BitField<37, 2, Shift> shift;
BitField<35, 2, Half> half_a;
BitField<33, 2, Half> half_b;
BitField<31, 2, Half> half_c;
} const iadd3{insn};
const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
}
void TranslatorVisitor::IADD3_cbuf(u64 insn) {
IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
}
void TranslatorVisitor::IADD3_imm(u64 insn) {
IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,48 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<48, 1, u64> is_signed;
BitField<49, 3, CompareOp> compare_op;
} const icmp{insn};
const IR::U32 zero{v.ir.Imm32(0)};
const bool is_signed{icmp.is_signed != 0};
const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
const IR::U32 src_reg{v.X(icmp.src_reg)};
const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
v.X(icmp.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::ICMP_reg(u64 insn) {
ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
}
void TranslatorVisitor::ICMP_rc(u64 insn) {
ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
}
void TranslatorVisitor::ICMP_cr(u64 insn) {
ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
}
void TranslatorVisitor::ICMP_imm(u64 insn) {
ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,123 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
CompareOp compare_op, bool is_signed) {
const IR::U32 zero{ir.Imm32(0)};
const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
const IR::U1 z_flag{ir.GetZFlag()};
const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
: ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
ir.ILessThan(operand_2, zero, true))};
switch (compare_op) {
case CompareOp::False:
return ir.Imm1(false);
case CompareOp::LessThan:
return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
ir.ILessThan(intermediate, zero, true))};
case CompareOp::Equal:
return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
case CompareOp::LessThanEqual: {
const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
ir.ILessThan(intermediate, zero, true))};
return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
}
case CompareOp::GreaterThan: {
const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
ir.IGreaterThan(intermediate, zero, true))};
const IR::U1 not_z{ir.LogicalNot(z_flag)};
return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
}
case CompareOp::NotEqual:
return ir.LogicalOr(ir.INotEqual(intermediate, zero),
ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
case CompareOp::GreaterThanEqual: {
const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
ir.IGreaterThanEqual(intermediate, zero, true))};
return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
}
case CompareOp::True:
return ir.Imm1(true);
default:
throw NotImplementedException("Invalid compare op {}", compare_op);
}
}
IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
CompareOp compare_op, bool is_signed, bool x) {
return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
: IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
}
void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> x;
BitField<44, 1, u64> bf;
BitField<45, 2, BooleanOp> bop;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_signed;
BitField<49, 3, CompareOp> compare_op;
} const iset{insn};
const IR::U32 src_a{v.X(iset.src_reg)};
const bool is_signed{iset.is_signed != 0};
const IR::U32 zero{v.ir.Imm32(0)};
const bool x{iset.x != 0};
const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
IR::U1 pred{v.ir.GetPred(iset.pred)};
if (iset.neg_pred != 0) {
pred = v.ir.LogicalNot(pred);
}
const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
const IR::U32 one_mask{v.ir.Imm32(-1)};
const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
v.X(iset.dest_reg, result);
if (iset.cc != 0) {
if (x) {
throw NotImplementedException("ISET.CC + X");
}
const IR::U1 is_zero{v.ir.IEqual(result, zero)};
v.SetZFlag(is_zero);
if (iset.bf != 0) {
v.ResetSFlag();
} else {
v.SetSFlag(v.ir.LogicalNot(is_zero));
}
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::ISET_reg(u64 insn) {
ISET(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::ISET_cbuf(u64 insn) {
ISET(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::ISET_imm(u64 insn) {
ISET(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,180 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class FloatFormat : u64 {
F16 = 1,
F32 = 2,
F64 = 3,
};
enum class IntFormat : u64 {
U8 = 0,
U16 = 1,
U32 = 2,
U64 = 3,
};
union Encoding {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 2, FloatFormat> float_format;
BitField<10, 2, IntFormat> int_format;
BitField<13, 1, u64> is_signed;
BitField<39, 2, FpRounding> fp_rounding;
BitField<41, 2, u64> selector;
BitField<47, 1, u64> cc;
BitField<45, 1, u64> neg;
BitField<49, 1, u64> abs;
};
bool Is64(u64 insn) {
return Encoding{insn}.int_format == IntFormat::U64;
}
int BitSize(FloatFormat format) {
switch (format) {
case FloatFormat::F16:
return 16;
case FloatFormat::F32:
return 32;
case FloatFormat::F64:
return 64;
}
throw NotImplementedException("Invalid float format {}", format);
}
IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
const IR::U1 is_least{v.ir.IEqual(value, least_value)};
return IR::U32{v.ir.Select(is_least, value, absolute)};
}
void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
const Encoding i2f{insn};
if (i2f.cc != 0) {
throw NotImplementedException("I2F CC");
}
const bool is_signed{i2f.is_signed != 0};
int src_bitsize{};
switch (i2f.int_format) {
case IntFormat::U8:
src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
v.ir.Imm32(8), is_signed);
if (i2f.abs != 0) {
src = SmallAbs(v, src, 8);
}
src_bitsize = 8;
break;
case IntFormat::U16:
if (i2f.selector == 1 || i2f.selector == 3) {
throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
}
src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
v.ir.Imm32(16), is_signed);
if (i2f.abs != 0) {
src = SmallAbs(v, src, 16);
}
src_bitsize = 16;
break;
case IntFormat::U32:
case IntFormat::U64:
if (i2f.selector != 0) {
throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
}
if (i2f.abs != 0 && is_signed) {
src = v.ir.IAbs(src);
}
src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
break;
}
const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
const int dst_bitsize{BitSize(i2f.float_format)};
const IR::FpControl fp_control{
.no_contraction = false,
.rounding = CastFpRounding(i2f.fp_rounding),
.fmz_mode = IR::FmzMode::DontCare,
};
auto value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src, fp_control)};
if (i2f.neg != 0) {
if (i2f.abs != 0 || !is_signed) {
// We know the value is positive
value = v.ir.FPNeg(value);
} else {
// Only negate if the input isn't the lowest value
IR::U1 is_least;
if (src_bitsize == 64) {
is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
} else if (src_bitsize == 32) {
is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
} else {
const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
is_least = v.ir.IEqual(src, least_value);
}
value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
}
}
switch (i2f.float_format) {
case FloatFormat::F16: {
const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
break;
}
case FloatFormat::F32:
v.F(i2f.dest_reg, value);
break;
case FloatFormat::F64: {
if (!IR::IsAligned(i2f.dest_reg, 2)) {
throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
}
const IR::Value vector{v.ir.UnpackDouble2x32(value)};
for (int i = 0; i < 2; ++i) {
v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
}
break;
}
default:
throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
}
}
} // Anonymous namespace
void TranslatorVisitor::I2F_reg(u64 insn) {
if (Is64(insn)) {
union {
u64 raw;
BitField<20, 8, IR::Reg> reg;
} const value{insn};
const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
I2F(*this, insn, ir.PackUint2x32(regs));
} else {
I2F(*this, insn, GetReg20(insn));
}
}
void TranslatorVisitor::I2F_cbuf(u64 insn) {
if (Is64(insn)) {
I2F(*this, insn, GetPackedCbuf(insn));
} else {
I2F(*this, insn, GetCbuf(insn));
}
}
void TranslatorVisitor::I2F_imm(u64 insn) {
if (Is64(insn)) {
I2F(*this, insn, GetPackedImm20(insn));
} else {
I2F(*this, insn, GetImm20(insn));
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,82 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class MaxShift : u64 {
U32,
Undefined,
U64,
S64,
};
IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
bool right_shift, bool is_signed) {
if (!right_shift) {
return ir.ShiftLeftLogical(packed_int, safe_shift);
}
if (is_signed) {
return ir.ShiftRightArithmetic(packed_int, safe_shift);
}
return ir.ShiftRightLogical(packed_int, safe_shift);
}
void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
bool right_shift) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<0, 8, IR::Reg> lo_bits_reg;
BitField<37, 2, MaxShift> max_shift;
BitField<47, 1, u64> cc;
BitField<48, 2, u64> x_mode;
BitField<50, 1, u64> wrap;
} const shf{insn};
if (shf.cc != 0) {
throw NotImplementedException("SHF CC");
}
if (shf.x_mode != 0) {
throw NotImplementedException("SHF X Mode");
}
if (shf.max_shift == MaxShift::Undefined) {
throw NotImplementedException("SHF Use of undefined MaxShift value");
}
const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
const IR::U32 safe_shift{shf.wrap != 0
? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
: v.ir.UMin(shift, max_shift)};
const bool is_signed{shf.max_shift == MaxShift::S64};
const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
v.X(shf.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::SHF_l_reg(u64 insn) {
SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
}
void TranslatorVisitor::SHF_l_imm(u64 insn) {
SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
}
void TranslatorVisitor::SHF_r_reg(u64 insn) {
SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
}
void TranslatorVisitor::SHF_r_imm(u64 insn) {
SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,64 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 2, u64> mode;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_signed;
} const imnmx{insn};
if (imnmx.cc != 0) {
throw NotImplementedException("IMNMX CC");
}
if (imnmx.mode != 0) {
throw NotImplementedException("IMNMX.MODE");
}
const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
const IR::U32 op_a{v.X(imnmx.src_reg)};
IR::U32 min;
IR::U32 max;
if (imnmx.is_signed != 0) {
min = IR::U32{v.ir.SMin(op_a, op_b)};
max = IR::U32{v.ir.SMax(op_a, op_b)};
} else {
min = IR::U32{v.ir.UMin(op_a, op_b)};
max = IR::U32{v.ir.UMax(op_a, op_b)};
}
if (imnmx.neg_pred != 0) {
std::swap(min, max);
}
const IR::U32 result{v.ir.Select(pred, min, max)};
v.X(imnmx.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::IMNMX_reg(u64 insn) {
IMNMX(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
IMNMX(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::IMNMX_imm(u64 insn) {
IMNMX(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,36 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<40, 1, u64> tilde;
} const popc{insn};
const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
const IR::U32 result = v.ir.BitCount(operand);
v.X(popc.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::POPC_reg(u64 insn) {
POPC(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::POPC_cbuf(u64 insn) {
POPC(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::POPC_imm(u64 insn) {
POPC(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,86 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
u64 scale_imm) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> op_a;
} const iscadd{insn};
const bool po{neg_a && neg_b};
IR::U32 op_a{v.X(iscadd.op_a)};
if (po) {
// When PO is present, add one
op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
} else {
// When PO is not present, the bits are interpreted as negation
if (neg_a) {
op_a = v.ir.INeg(op_a);
}
if (neg_b) {
op_b = v.ir.INeg(op_b);
}
}
// With the operands already processed, scale A
const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
v.X(iscadd.dest_reg, result);
if (cc) {
v.SetZFlag(v.ir.GetZeroFromOp(result));
v.SetSFlag(v.ir.GetSignFromOp(result));
const IR::U1 carry{v.ir.GetCarryFromOp(result)};
const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
}
}
void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
union {
u64 raw;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_b;
BitField<49, 1, u64> neg_a;
BitField<39, 5, u64> scale;
} const iscadd{insn};
ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
}
} // Anonymous namespace
void TranslatorVisitor::ISCADD_reg(u64 insn) {
ISCADD(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
ISCADD(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::ISCADD_imm(u64 insn) {
ISCADD(*this, insn, GetImm20(insn));
}
void TranslatorVisitor::ISCADD32I(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> cc;
BitField<53, 5, u64> scale;
} const iscadd{insn};
return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,49 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
union {
u64 raw;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<45, 2, BooleanOp> bop;
BitField<48, 1, u64> is_signed;
BitField<49, 3, CompareOp> compare_op;
} const isetp{insn};
const BooleanOp bop{isetp.bop};
const CompareOp compare_op{isetp.compare_op};
const IR::U32 op_a{v.X(isetp.src_reg_a)};
const IR::U1 comparison{IntegerCompare(v.ir, op_a, op_b, compare_op, isetp.is_signed != 0)};
const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
v.ir.SetPred(isetp.dest_pred_a, result_a);
v.ir.SetPred(isetp.dest_pred_b, result_b);
}
} // Anonymous namespace
void TranslatorVisitor::ISETP_reg(u64 insn) {
ISETP(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::ISETP_cbuf(u64 insn) {
ISETP(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::ISETP_imm(u64 insn) {
ISETP(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,71 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<39, 1, u64> w;
BitField<43, 1, u64> x;
BitField<47, 1, u64> cc;
} const shl{insn};
if (shl.x != 0) {
throw NotImplementedException("SHL.X");
}
if (shl.cc != 0) {
throw NotImplementedException("SHL.CC");
}
const IR::U32 base{v.X(shl.src_reg_a)};
IR::U32 result;
if (shl.w != 0) {
// When .W is set, the shift value is wrapped
// To emulate this we just have to wrap it ourselves.
const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
result = v.ir.ShiftLeftLogical(base, shift);
} else {
// When .W is not set, the shift value is clamped between 0 and 32.
// To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
// We can safely evaluate an out of bounds shift according to the SPIR-V specification:
//
// https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
// "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
// or equal to the bit width of the components of Base."
//
// And on the GLASM specification it is also safe to evaluate out of bounds:
//
// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
// "The results of a shift operation ("<<") are undefined if the value of the second operand
// is negative, or greater than or equal to the number of bits in the first operand."
//
// Emphasis on undefined results in contrast to undefined behavior.
//
const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
}
v.X(shl.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::SHL_reg(u64 insn) {
SHL(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::SHL_cbuf(u64 insn) {
SHL(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::SHL_imm(u64 insn) {
SHL(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,66 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<39, 1, u64> is_wrapped;
BitField<40, 1, u64> brev;
BitField<43, 1, u64> xmode;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_signed;
} const shr{insn};
if (shr.xmode != 0) {
throw NotImplementedException("SHR.XMODE");
}
if (shr.cc != 0) {
throw NotImplementedException("SHR.CC");
}
IR::U32 base{v.X(shr.src_reg_a)};
if (shr.brev == 1) {
base = v.ir.BitReverse(base);
}
IR::U32 result;
const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
if (shr.is_signed == 1) {
result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
} else {
result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
}
if (shr.is_wrapped == 0) {
const IR::U32 zero{v.ir.Imm32(0)};
const IR::U32 safe_bits{v.ir.Imm32(32)};
const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
}
v.X(shr.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::SHR_reg(u64 insn) {
SHR(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::SHR_cbuf(u64 insn) {
SHR(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::SHR_imm(u64 insn) {
SHR(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,135 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class SelectMode : u64 {
Default,
CLO,
CHI,
CSFU,
CBCC,
};
enum class Half : u64 {
H0, // Least-significant bits (15:0)
H1, // Most-significant bits (31:16)
};
IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
}
void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_a_signed;
BitField<49, 1, u64> is_b_signed;
BitField<53, 1, Half> half_a;
} const xmad{insn};
if (x) {
throw NotImplementedException("XMAD X");
}
const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
IR::U32 product{v.ir.IMul(op_a, op_b)};
if (psl) {
// .PSL shifts the product 16 bits
product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
}
const IR::U32 op_c{[&]() -> IR::U32 {
switch (select_mode) {
case SelectMode::Default:
return src_c;
case SelectMode::CLO:
return ExtractHalf(v, src_c, Half::H0, false);
case SelectMode::CHI:
return ExtractHalf(v, src_c, Half::H1, false);
case SelectMode::CBCC:
return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
case SelectMode::CSFU:
throw NotImplementedException("XMAD CSFU");
}
throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
}()};
IR::U32 result{v.ir.IAdd(product, op_c)};
if (mrg) {
// .MRG inserts src_b [15:0] into result's [31:16].
const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
}
if (xmad.cc) {
throw NotImplementedException("XMAD CC");
}
// Store result
v.X(xmad.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::XMAD_reg(u64 insn) {
union {
u64 raw;
BitField<35, 1, Half> half_b;
BitField<36, 1, u64> psl;
BitField<37, 1, u64> mrg;
BitField<38, 1, u64> x;
BitField<50, 3, SelectMode> select_mode;
} const xmad{insn};
XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
xmad.mrg != 0, xmad.x != 0);
}
void TranslatorVisitor::XMAD_rc(u64 insn) {
union {
u64 raw;
BitField<50, 2, SelectMode> select_mode;
BitField<52, 1, Half> half_b;
BitField<54, 1, u64> x;
} const xmad{insn};
XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
xmad.x != 0);
}
void TranslatorVisitor::XMAD_cr(u64 insn) {
union {
u64 raw;
BitField<50, 2, SelectMode> select_mode;
BitField<52, 1, Half> half_b;
BitField<54, 1, u64> x;
BitField<55, 1, u64> psl;
BitField<56, 1, u64> mrg;
} const xmad{insn};
XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
xmad.mrg != 0, xmad.x != 0);
}
void TranslatorVisitor::XMAD_imm(u64 insn) {
union {
u64 raw;
BitField<20, 16, u64> src_b;
BitField<36, 1, u64> psl;
BitField<37, 1, u64> mrg;
BitField<38, 1, u64> x;
BitField<50, 3, SelectMode> select_mode;
} const xmad{insn};
XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,126 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class IntegerWidth : u64 {
Byte,
Short,
Word,
};
[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
switch (width) {
case IntegerWidth::Byte:
return ir.Imm32(8);
case IntegerWidth::Short:
return ir.Imm32(16);
case IntegerWidth::Word:
return ir.Imm32(32);
default:
throw NotImplementedException("Invalid width {}", width);
}
}
[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
IntegerWidth dst_width) {
const IR::U32 zero{ir.Imm32(0)};
const IR::U32 count{WidthSize(ir, dst_width)};
return ir.BitFieldExtract(src, zero, count, false);
}
[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
bool dst_signed, bool src_signed) {
IR::U32 min{};
IR::U32 max{};
const IR::U32 zero{ir.Imm32(0)};
switch (dst_width) {
case IntegerWidth::Byte:
min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
break;
case IntegerWidth::Short:
min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
break;
case IntegerWidth::Word:
min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
break;
default:
throw NotImplementedException("Invalid width {}", dst_width);
}
const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
}
void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 2, IntegerWidth> dst_fmt;
BitField<12, 1, u64> dst_fmt_sign;
BitField<10, 2, IntegerWidth> src_fmt;
BitField<13, 1, u64> src_fmt_sign;
BitField<41, 3, u64> selector;
BitField<45, 1, u64> neg;
BitField<47, 1, u64> cc;
BitField<49, 1, u64> abs;
BitField<50, 1, u64> sat;
} const i2i{insn};
if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
throw NotImplementedException("16-bit source format incompatible with selector {}",
i2i.selector);
}
if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
throw NotImplementedException("32-bit source format incompatible with selector {}",
i2i.selector);
}
const s32 selector{static_cast<s32>(i2i.selector)};
const IR::U32 offset{v.ir.Imm32(selector * 8)};
const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
const bool src_signed{i2i.src_fmt_sign != 0};
const bool dst_signed{i2i.dst_fmt_sign != 0};
const bool sat{i2i.sat != 0};
IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
if (i2i.abs != 0) {
src_values = v.ir.IAbs(src_values);
}
if (i2i.neg != 0) {
src_values = v.ir.INeg(src_values);
}
const IR::U32 result{
sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
: ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
v.X(i2i.dest_reg, result);
if (i2i.cc != 0) {
v.SetZFlag(v.ir.GetZeroFromOp(result));
v.SetSFlag(v.ir.GetSignFromOp(result));
v.ResetCFlag();
v.ResetOFlag();
}
}
} // Anonymous namespace
void TranslatorVisitor::I2I_reg(u64 insn) {
I2I(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::I2I_cbuf(u64 insn) {
I2I(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::I2I_imm(u64 insn) {
I2I(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,53 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Mode : u64 {
Default,
Patch,
Prim,
Attr,
};
enum class Shift : u64 {
Default,
U16,
B32,
};
} // Anonymous namespace
void TranslatorVisitor::ISBERD(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<31, 1, u64> skew;
BitField<32, 1, u64> o;
BitField<33, 2, Mode> mode;
BitField<47, 2, Shift> shift;
} const isberd{insn};
if (isberd.skew != 0) {
throw NotImplementedException("SKEW");
}
if (isberd.o != 0) {
throw NotImplementedException("O");
}
if (isberd.mode != Mode::Default) {
throw NotImplementedException("Mode {}", isberd.mode.Value());
}
if (isberd.shift != Shift::Default) {
throw NotImplementedException("Shift {}", isberd.shift.Value());
}
LOG_WARNING(Shader, "(STUBBED) called");
X(isberd.dest_reg, X(isberd.src_reg));
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,62 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
namespace Shader::Maxwell {
using namespace LDC;
namespace {
std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
const IR::U32& reg, const IR::U32& imm) {
switch (mode) {
case Mode::Default:
return {imm_index, ir.IAdd(reg, imm)};
default:
break;
}
throw NotImplementedException("Mode {}", mode);
}
} // Anonymous namespace
void TranslatorVisitor::LDC(u64 insn) {
const Encoding ldc{insn};
const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
const IR::U32 reg{X(ldc.src_reg)};
const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
switch (ldc.size) {
case Size::U8:
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
break;
case Size::S8:
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
break;
case Size::U16:
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
break;
case Size::S16:
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
break;
case Size::B32:
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
break;
case Size::B64: {
if (!IR::IsAligned(ldc.dest_reg, 2)) {
throw NotImplementedException("Unaligned destination register");
}
const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
for (int i = 0; i < 2; ++i) {
X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
}
break;
}
default:
throw NotImplementedException("Invalid size {}", ldc.size.Value());
}
}
} // namespace Shader::Maxwell

View File

@@ -0,0 +1,39 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/reg.h"
namespace Shader::Maxwell::LDC {
enum class Mode : u64 {
Default,
IL,
IS,
ISL,
};
enum class Size : u64 {
U8,
S8,
U16,
S16,
B32,
B64,
};
union Encoding {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<20, 16, s64> offset;
BitField<36, 5, u64> index;
BitField<44, 2, Mode> mode;
BitField<48, 3, Size> size;
};
} // namespace Shader::Maxwell::LDC

View File

@@ -0,0 +1,108 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
bool neg, bool x) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> offset_lo_reg;
BitField<47, 1, u64> cc;
BitField<48, 3, IR::Pred> pred;
} const lea{insn};
if (x) {
throw NotImplementedException("LEA.HI X");
}
if (lea.pred != IR::Pred::PT) {
throw NotImplementedException("LEA.HI Pred");
}
if (lea.cc != 0) {
throw NotImplementedException("LEA.HI CC");
}
const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
const s32 hi_scale{32 - static_cast<s32>(scale)};
const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
v.X(lea.dest_reg, result);
}
void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> offset_lo_reg;
BitField<39, 5, u64> scale;
BitField<45, 1, u64> neg;
BitField<46, 1, u64> x;
BitField<47, 1, u64> cc;
BitField<48, 3, IR::Pred> pred;
} const lea{insn};
if (lea.x != 0) {
throw NotImplementedException("LEA.LO X");
}
if (lea.pred != IR::Pred::PT) {
throw NotImplementedException("LEA.LO Pred");
}
if (lea.cc != 0) {
throw NotImplementedException("LEA.LO CC");
}
const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
const s32 scale{static_cast<s32>(lea.scale)};
const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
IR::U32 result{v.ir.IAdd(base, scaled_offset)};
v.X(lea.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::LEA_hi_reg(u64 insn) {
union {
u64 insn;
BitField<28, 5, u64> scale;
BitField<37, 1, u64> neg;
BitField<38, 1, u64> x;
} const lea{insn};
LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
}
void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
union {
u64 insn;
BitField<51, 5, u64> scale;
BitField<56, 1, u64> neg;
BitField<57, 1, u64> x;
} const lea{insn};
LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
}
void TranslatorVisitor::LEA_lo_reg(u64 insn) {
LEA_lo(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
LEA_lo(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::LEA_lo_imm(u64 insn) {
LEA_lo(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

Some files were not shown because too many files have changed in this diff Show More