early-access version 1592

This commit is contained in:
pineappleEA
2021-04-13 02:50:20 +02:00
parent e839de5014
commit 8233a5069a
68 changed files with 5589 additions and 781 deletions

View File

@@ -491,6 +491,9 @@ private:
const Registry& registry;
const ShaderType stage;
std::shared_ptr<ShaderFunctionIR> context_func;
u32 ast_var_base{};
std::size_t num_temporaries = 0;
std::size_t max_temporaries = 0;
@@ -807,13 +810,33 @@ ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const R
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
DefineGlobalMemory();
context_func = ir.GetMainFunction();
ast_var_base = 0;
AddLine("TEMP RC;");
AddLine("TEMP FSWZA[4];");
AddLine("TEMP FSWZB[4];");
if (ir.IsDecompiled()) {
InitializeVariables();
AddLine("main:");
if (context_func->IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
AddLine("RET;");
}
const auto& subfunctions = ir.GetSubFunctions();
auto it = subfunctions.begin();
while (it != subfunctions.end()) {
context_func = *it;
AddLine("func_{}:", context_func->GetId());
if (context_func->IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
AddLine("RET;");
}
it++;
}
AddLine("END");
@@ -1060,41 +1083,38 @@ void ARBDecompiler::InitializeVariables() {
}
void ARBDecompiler::DecompileAST() {
const u32 num_flow_variables = ir.GetASTNumVariables();
const u32 num_flow_variables = context_func->GetASTNumVariables();
for (u32 i = 0; i < num_flow_variables; ++i) {
AddLine("TEMP F{};", i);
AddLine("TEMP F{};", i + ast_var_base);
}
for (u32 i = 0; i < num_flow_variables; ++i) {
AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i + ast_var_base);
}
InitializeVariables();
VisitAST(ir.GetASTProgram());
VisitAST(context_func->GetASTProgram());
ast_var_base += num_flow_variables;
}
void ARBDecompiler::DecompileBranchMode() {
static constexpr u32 FLOW_STACK_SIZE = 20;
if (!ir.IsFlowStackDisabled()) {
if (!context_func->IsFlowStackDisabled()) {
AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
AddLine("TEMP SSY_TOP;");
AddLine("TEMP PBK_TOP;");
}
AddLine("TEMP PC;");
AddLine("TEMP PC{};", context_func->GetId());
if (!ir.IsFlowStackDisabled()) {
if (!context_func->IsFlowStackDisabled()) {
AddLine("MOV.U SSY_TOP.x, 0;");
AddLine("MOV.U PBK_TOP.x, 0;");
}
InitializeVariables();
const auto basic_block_end = ir.GetBasicBlocks().end();
auto basic_block_it = ir.GetBasicBlocks().begin();
const auto basic_block_end = context_func->GetBasicBlocks().end();
auto basic_block_it = context_func->GetBasicBlocks().begin();
const u32 first_address = basic_block_it->first;
AddLine("MOV.U PC.x, {};", first_address);
AddLine("MOV.U PC{}.x, {};", context_func->GetId(), first_address);
AddLine("REP;");
@@ -1103,7 +1123,7 @@ void ARBDecompiler::DecompileBranchMode() {
const auto& [address, bb] = *basic_block_it;
++num_blocks;
AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
AddLine("SEQ.S.CC RC.x, PC{}.x, {};", context_func->GetId(), address);
AddLine("IF NE.x;");
VisitBlock(bb);
@@ -1114,7 +1134,7 @@ void ARBDecompiler::DecompileBranchMode() {
const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
if (!op || op->GetCode() != OperationCode::Branch) {
const u32 next_address = basic_block_it->first;
AddLine("MOV.U PC.x, {};", next_address);
AddLine("MOV.U PC{}.x, {};", context_func->GetId(), next_address);
AddLine("CONT;");
}
}
@@ -1152,7 +1172,8 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
} else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
VisitBlock(decoded->nodes);
} else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
AddLine("MOV.U F{}, {};", var_set->index + ast_var_base,
VisitExpression(var_set->condition));
ResetTemporaries();
} else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
const std::string condition = VisitExpression(do_while->condition);
@@ -1172,7 +1193,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
ResetTemporaries();
}
if (ast_return->kills) {
AddLine("KIL TR;");
if (stage == ShaderType::Fragment) {
AddLine("KIL TR;");
} else {
AddLine("RET;");
}
} else {
Exit();
}
@@ -1219,7 +1244,7 @@ std::string ARBDecompiler::VisitExpression(const Expr& node) {
return Visit(ir.GetConditionCode(expr->cc));
}
if (const auto expr = std::get_if<ExprVar>(&*node)) {
return fmt::format("F{}.x", expr->var_index);
return fmt::format("F{}.x", expr->var_index + ast_var_base);
}
if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
return expr->value ? "0xffffffff" : "0";
@@ -1406,6 +1431,11 @@ std::string ARBDecompiler::Visit(const Node& node) {
return {};
}
if (const auto func_call = std::get_if<FunctionCallNode>(&*node)) {
AddLine("CAL func_{};", func_call->GetFuncId());
return {};
}
if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
// Uncommenting this will generate invalid code. GLASM lacks comments.
// AddLine("// {}", cmt->GetText());
@@ -1479,7 +1509,7 @@ std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
}
void ARBDecompiler::Exit() {
if (stage != ShaderType::Fragment) {
if (!context_func->IsMain() || stage != ShaderType::Fragment) {
AddLine("RET;");
return;
}
@@ -2021,13 +2051,13 @@ std::string ARBDecompiler::ImageStore(Operation operation) {
std::string ARBDecompiler::Branch(Operation operation) {
const auto target = std::get<ImmediateNode>(*operation[0]);
AddLine("MOV.U PC.x, {};", target.GetValue());
AddLine("MOV.U PC{}.x, {};", context_func->GetId(), target.GetValue());
AddLine("CONT;");
return {};
}
std::string ARBDecompiler::BranchIndirect(Operation operation) {
AddLine("MOV.U PC.x, {};", Visit(operation[0]));
AddLine("MOV.U PC{}.x, {};", context_func->GetId(), Visit(operation[0]));
AddLine("CONT;");
return {};
}
@@ -2045,7 +2075,7 @@ std::string ARBDecompiler::PopFlowStack(Operation operation) {
const auto stack = std::get<MetaStackClass>(operation.GetMeta());
const std::string_view stack_name = StackName(stack);
AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
AddLine("MOV.U PC{}.x, {}[{}_TOP.x].x;", context_func->GetId(), stack_name, stack_name);
AddLine("CONT;");
return {};
}
@@ -2056,6 +2086,10 @@ std::string ARBDecompiler::Exit(Operation) {
}
std::string ARBDecompiler::Discard(Operation) {
if (stage != ShaderType::Fragment) {
AddLine("RET;");
return {};
}
AddLine("KIL TR;");
return {};
}

View File

@@ -544,6 +544,26 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
query_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateExceptTextureCache(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
shader_cache.InvalidateRegion(addr, size);
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::InvalidateTextureCache(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
if (!Settings::IsGPULevelHigh()) {

View File

@@ -74,6 +74,8 @@ public:
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateExceptTextureCache(VAddr addr, u64 size) override;
void InvalidateTextureCache(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;

View File

@@ -79,6 +79,11 @@ const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
)";
enum class HelperFunction {
SignedAtomic = 0,
Total,
};
class ShaderWriter final {
public:
void AddExpression(std::string_view text) {
@@ -434,6 +439,28 @@ public:
DeclareInternalFlags();
DeclareCustomVariables();
DeclarePhysicalAttributeReader();
DeclareHelpersForward();
const auto& subfunctions = ir.GetSubFunctions();
auto it = subfunctions.rbegin();
while (it != subfunctions.rend()) {
context_func = *it;
code.AddLine("void func_{}() {{", context_func->GetId());
++code.scope;
if (context_func->IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
}
--code.scope;
code.AddLine("}}");
it++;
}
context_func = ir.GetMainFunction();
code.AddLine("void main() {{");
++code.scope;
@@ -442,7 +469,7 @@ public:
code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
}
if (ir.IsDecompiled()) {
if (context_func->IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
@@ -450,6 +477,9 @@ public:
--code.scope;
code.AddLine("}}");
code.AddNewLine();
DeclareHelpers();
}
std::string GetResult() {
@@ -462,13 +492,13 @@ private:
void DecompileBranchMode() {
// VM's program counter
const auto first_address = ir.GetBasicBlocks().begin()->first;
const auto first_address = context_func->GetBasicBlocks().begin()->first;
code.AddLine("uint jmp_to = {}U;", first_address);
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
constexpr u32 FLOW_STACK_SIZE = 20;
if (!ir.IsFlowStackDisabled()) {
if (!context_func->IsFlowStackDisabled()) {
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
@@ -480,7 +510,7 @@ private:
code.AddLine("switch (jmp_to) {{");
for (const auto& pair : ir.GetBasicBlocks()) {
for (const auto& pair : context_func->GetBasicBlocks()) {
const auto& [address, bb] = pair;
code.AddLine("case 0x{:X}U: {{", address);
++code.scope;
@@ -599,7 +629,7 @@ private:
size = limit;
}
code.AddLine("shared uint smem[{}];", size / 4);
code.AddLine("shared uint {}[{}];", GetSharedMemory(), size / 4);
code.AddNewLine();
}
code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
@@ -983,6 +1013,27 @@ private:
}
}
void DeclareHelpersForward() {
code.AddLine("int Helpers_AtomicShared(uint offset, int value, bool is_min);");
code.AddNewLine();
}
void DeclareHelpers() {
if (IsHelperEnabled(HelperFunction::SignedAtomic)) {
code.AddLine(
R"(int Helpers_AtomicShared(uint offset, int value, bool is_min) {{
uint oldValue, newValue;
do {{
oldValue = {}[offset];
newValue = is_min ? uint(min(int(oldValue), value)) : uint(max(int(oldValue), value));
}} while (atomicCompSwap({}[offset], newValue, oldValue) != oldValue);
return int(oldValue);
}})",
GetSharedMemory(), GetSharedMemory());
code.AddNewLine();
}
}
void VisitBlock(const NodeBlock& bb) {
for (const auto& node : bb) {
Visit(node).CheckVoid();
@@ -1109,7 +1160,9 @@ private:
}
if (const auto smem = std::get_if<SmemNode>(&*node)) {
return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
return {
fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
Type::Uint};
}
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
@@ -1131,6 +1184,11 @@ private:
return {};
}
if (const auto func_call = std::get_if<FunctionCallNode>(&*node)) {
code.AddLine("func_{}();", func_call->GetFuncId());
return {};
}
if (const auto comment = std::get_if<CommentNode>(&*node)) {
code.AddLine("// " + comment->GetText());
return {};
@@ -1598,7 +1656,9 @@ private:
Type::Uint};
} else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
ASSERT(stage == ShaderType::Compute);
target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
target = {
fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
Type::Uint};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
const std::string real = Visit(gmem->GetRealAddress()).AsUint();
const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
@@ -2115,7 +2175,14 @@ private:
UNIMPLEMENTED_IF(meta->sampler.is_array);
const std::size_t count = operation.GetOperandsCount();
std::string expr = "texelFetch(";
std::string expr = "texelFetch";
if (!meta->aoffi.empty()) {
expr += "Offset";
}
expr += '(';
expr += GetSampler(meta->sampler);
expr += ", ";
@@ -2137,6 +2204,20 @@ private:
expr += ", ";
expr += Visit(meta->lod).AsInt();
}
if (!meta->aoffi.empty()) {
expr += ", ";
expr += constructors.at(meta->aoffi.size() - 1);
expr += '(';
for (size_t i = 0; i < meta->aoffi.size(); ++i) {
if (i > 0) {
expr += ", ";
}
expr += Visit(meta->aoffi[i]).AsInt();
}
expr += ')';
}
expr += ')';
expr += GetSwizzle(meta->element);
@@ -2183,8 +2264,11 @@ private:
template <const std::string_view& opname, Type type>
Expression Atomic(Operation operation) {
if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
return {};
// Use a helper as a workaround due to memory being uint
SetHelperEnabled(HelperFunction::SignedAtomic, true);
return {fmt::format("Helpers_AtomicShared({}, {}, {})", Visit(operation[0]).AsInt(),
Visit(operation[1]).AsInt(), opname == Func::Min),
Type::Int};
}
return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
Visit(operation[1]).AsUint()),
@@ -2267,7 +2351,9 @@ private:
}
Expression Exit(Operation operation) {
PreExit();
if (context_func->IsMain()) {
PreExit();
}
code.AddLine("return;");
return {};
}
@@ -2277,7 +2363,11 @@ private:
// about unexecuted instructions that may follow this.
code.AddLine("if (true) {{");
++code.scope;
code.AddLine("discard;");
if (stage != ShaderType::Fragment) {
code.AddLine("return;");
} else {
code.AddLine("discard;");
}
--code.scope;
code.AddLine("}}");
return {};
@@ -2388,7 +2478,7 @@ private:
}
Expression Barrier(Operation) {
if (!ir.IsDecompiled()) {
if (!context_func->IsDecompiled()) {
LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
return {};
}
@@ -2705,6 +2795,10 @@ private:
}
}
constexpr std::string_view GetSharedMemory() const {
return "shared_mem";
}
std::string GetInternalFlag(InternalFlag flag) const {
constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
"overflow_flag"};
@@ -2746,6 +2840,14 @@ private:
return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
}
void SetHelperEnabled(HelperFunction hf, bool enabled) {
helper_functions_enabled[static_cast<size_t>(hf)] = enabled;
}
bool IsHelperEnabled(HelperFunction hf) const {
return helper_functions_enabled[static_cast<size_t>(hf)];
}
const Device& device;
const ShaderIR& ir;
const Registry& registry;
@@ -2755,9 +2857,13 @@ private:
const Header header;
std::unordered_map<u8, VaryingTFB> transform_feedback;
std::shared_ptr<ShaderFunctionIR> context_func;
ShaderWriter code;
std::optional<u32> max_input_vertices;
std::array<bool, static_cast<size_t>(HelperFunction::Total)> helper_functions_enabled{};
};
std::string GetFlowVariable(u32 index) {
@@ -2902,9 +3008,15 @@ public:
decomp.code.scope++;
}
if (ast.kills) {
decomp.code.AddLine("discard;");
if (decomp.stage != ShaderType::Fragment) {
decomp.code.AddLine("return;");
} else {
decomp.code.AddLine("discard;");
}
} else {
decomp.PreExit();
if (decomp.context_func->IsMain()) {
decomp.PreExit();
}
decomp.code.AddLine("return;");
}
if (!is_true) {
@@ -2937,13 +3049,13 @@ private:
};
void GLSLDecompiler::DecompileAST() {
const u32 num_flow_variables = ir.GetASTNumVariables();
const u32 num_flow_variables = context_func->GetASTNumVariables();
for (u32 i = 0; i < num_flow_variables; i++) {
code.AddLine("bool {} = false;", GetFlowVariable(i));
}
ASTDecompiler decompiler{*this};
decompiler.Visit(ir.GetASTProgram());
decompiler.Visit(context_func->GetASTProgram());
}
} // Anonymous namespace